-- 
cgit v1.2.3

From 7e5c363a6f2ae202b928097bd3a1936db46df1b9 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Tue, 2 Nov 2010 00:50:04 +0000
Subject: Initial source checkin

git-svn-id: http://smhasher.googlecode.com/svn/trunk@2 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 AvalancheTest.cpp      | 273 ++++++++++++++++
 AvalancheTest.h        |   8 +
 Bitvec.cpp             | 741 ++++++++++++++++++++++++++++++++++++++++++++
 Bitvec.h               | 185 +++++++++++
 BlockCipher.cpp        | 294 ++++++++++++++++++
 BlockCipher.h          |  90 ++++++
 Cipher.cpp             |   1 +
 Cipher.h               |  16 +
 Core.cpp               | 186 +++++++++++
 Core.h                 |  15 +
 CycleTest.cpp          |   1 +
 CycleTest.h            |  45 +++
 DictionaryTest.cpp     |  61 ++++
 DictionaryTest.h       | 119 +++++++
 DifferentialTest.cpp   |   3 +
 DifferentialTest.h     | 202 ++++++++++++
 Diffusion.cpp          | 204 ++++++++++++
 Diffusion.h            |   1 +
 FWTransform.cpp        | 443 ++++++++++++++++++++++++++
 FWTransform.h          |  12 +
 Hamming.cpp            | 133 ++++++++
 Hamming.h              |   5 +
 Hashes.cpp             | 114 +++++++
 Hashes.h               |  35 +++
 Junk.cpp               |  38 +++
 Junk.h                 |  46 +++
 MurmurHash1.cpp        | 171 ++++++++++
 MurmurHash1.h          |   8 +
 MurmurHash2.cpp        | 502 ++++++++++++++++++++++++++++++
 MurmurHash2.h          |  13 +
 MurmurHash2_test.cpp   |   0
 MurmurHash3.cpp        | 288 +++++++++++++++++
 MurmurHash3.h          |  11 +
 MurmurHash64.cpp       |   0
 MurmurHashAligned.cpp  |   2 +
 MurmurHashAligned2.cpp |   4 +
 MurmurHashNeutral2.cpp |   2 +
 MurmurHashTest.cpp     |  26 ++
 Random.cpp             |  61 ++++
 Random.h               | 144 +++++++++
 SimAnneal.cpp          |  97 ++++++
 SimAnneal.h            |   6 +
 SparseKeyTest.cpp      | 111 +++++++
 SparseKeyTest.h        |  89 ++++++
 Stats.cpp              | 338 ++++++++++++++++++++
 Stats.h                | 559 +++++++++++++++++++++++++++++++++
 StreamCipher.cpp       |  13 +
 StreamCipher.h         |  17 +
 SuperFastHash.cpp      |  68 ++++
 TEA.cpp                |  52 ++++
 TEA.h                  |  23 ++
 Tests.cpp              | 542 ++++++++++++++++++++++++++++++++
 Tests.h                | 195 ++++++++++++
 Types.cpp              |  17 +
 Types.h                | 449 +++++++++++++++++++++++++++
 XTEA.cpp               | 119 +++++++
 XTEA.h                 |  23 ++
 crc.cpp                | 101 ++++++
 crc.h                  |  77 +++++
 lookup3.cpp            |  72 +++++
 main.cpp               | 104 +++++++
 md5.cpp                | 382 +++++++++++++++++++++++
 pstdint.h              | 799 +++++++++++++++++++++++++++++++++++++++++++++++
 scratch.cpp            | 823 +++++++++++++++++++++++++++++++++++++++++++++++++
 sha1.cpp               | 603 ++++++++++++++++++++++++++++++++++++
 sha1.h                 |  89 ++++++
 simplex.cpp            | 171 ++++++++++
 67 files changed, 10442 insertions(+)
 create mode 100644 AvalancheTest.cpp
 create mode 100644 AvalancheTest.h
 create mode 100644 Bitvec.cpp
 create mode 100644 Bitvec.h
 create mode 100644 BlockCipher.cpp
 create mode 100644 BlockCipher.h
 create mode 100644 Cipher.cpp
 create mode 100644 Cipher.h
 create mode 100644 Core.cpp
 create mode 100644 Core.h
 create mode 100644 CycleTest.cpp
 create mode 100644 CycleTest.h
 create mode 100644 DictionaryTest.cpp
 create mode 100644 DictionaryTest.h
 create mode 100644 DifferentialTest.cpp
 create mode 100644 DifferentialTest.h
 create mode 100644 Diffusion.cpp
 create mode 100644 Diffusion.h
 create mode 100644 FWTransform.cpp
 create mode 100644 FWTransform.h
 create mode 100644 Hamming.cpp
 create mode 100644 Hamming.h
 create mode 100644 Hashes.cpp
 create mode 100644 Hashes.h
 create mode 100644 Junk.cpp
 create mode 100644 Junk.h
 create mode 100644 MurmurHash1.cpp
 create mode 100644 MurmurHash1.h
 create mode 100644 MurmurHash2.cpp
 create mode 100644 MurmurHash2.h
 create mode 100644 MurmurHash2_test.cpp
 create mode 100644 MurmurHash3.cpp
 create mode 100644 MurmurHash3.h
 create mode 100644 MurmurHash64.cpp
 create mode 100644 MurmurHashAligned.cpp
 create mode 100644 MurmurHashAligned2.cpp
 create mode 100644 MurmurHashNeutral2.cpp
 create mode 100644 MurmurHashTest.cpp
 create mode 100644 Random.cpp
 create mode 100644 Random.h
 create mode 100644 SimAnneal.cpp
 create mode 100644 SimAnneal.h
 create mode 100644 SparseKeyTest.cpp
 create mode 100644 SparseKeyTest.h
 create mode 100644 Stats.cpp
 create mode 100644 Stats.h
 create mode 100644 StreamCipher.cpp
 create mode 100644 StreamCipher.h
 create mode 100644 SuperFastHash.cpp
 create mode 100644 TEA.cpp
 create mode 100644 TEA.h
 create mode 100644 Tests.cpp
 create mode 100644 Tests.h
 create mode 100644 Types.cpp
 create mode 100644 Types.h
 create mode 100644 XTEA.cpp
 create mode 100644 XTEA.h
 create mode 100644 crc.cpp
 create mode 100644 crc.h
 create mode 100644 lookup3.cpp
 create mode 100644 main.cpp
 create mode 100644 md5.cpp
 create mode 100644 pstdint.h
 create mode 100644 scratch.cpp
 create mode 100644 sha1.cpp
 create mode 100644 sha1.h
 create mode 100644 simplex.cpp

diff --git a/AvalancheTest.cpp b/AvalancheTest.cpp
new file mode 100644
index 0000000..25ee86c
--- /dev/null
+++ b/AvalancheTest.cpp
@@ -0,0 +1,273 @@
+//-----------------------------------------------------------------------------
+// Flipping a single bit of a key should cause an "avalanche" of changes in
+// the hash function's output. Ideally, each output bits should flip 50% of
+// the time - if the probability of an output bit flipping is not 50%, that bit
+// is "biased". Too much bias means that patterns applied to the input will
+// cause "echoes" of the patterns in the output, which in turn can cause the
+// hash function to fail to create an even, random distribution of hash values.
+
+#include "AvalancheTest.h"
+
+#include "Bitvec.h"
+#include "Random.h"
+
+#include <math.h>
+
+// Avalanche fails if a bit is biased by more than 1%
+
+double gc_avalancheFail = 0.01;
+
+//-----------------------------------------------------------------------------
+
+void PrintAvalancheDiagram ( int x, int y, int reps, double scale, int * bins )
+{
+	const char * symbols = ".123456789X";
+
+	for(int i = 0; i < y; i++)
+	{
+		printf("[");
+		for(int j = 0; j < x; j++)
+		{
+			int k = (y - i) -1;
+
+			int bin = bins[k + (j*y)];
+
+			double b = double(bin) / double(reps);
+			b = fabs(b*2 - 1);
+
+			b *= scale;
+
+			int s = (int)floor(b*10);
+
+			if(s > 10) s = 10;
+			if(s < 0) s = 0;
+
+			printf("%c",symbols[s]);
+		}
+
+		printf("]\n");
+	}
+}
+
+//----------------------------------------------------------------------------
+
+double maxBias ( std::vector<int> & counts, int reps )
+{
+	double worst = 0;
+
+	for(int i = 0; i < (int)counts.size(); i++)
+	{
+		double c = double(counts[i]) / double(reps);
+
+		double d = fabs(c * 2 - 1);
+			
+		if(d > worst)
+		{
+			worst = d;
+		}
+	}
+
+	return worst;
+}
+
+double rmsBias ( std::vector<int> & counts, int reps )
+{
+	double rms = 0;
+
+	for(int i = 0; i < (int)counts.size(); i++)
+	{
+		double d = double(counts[i]) / reps;
+
+		d = fabs(d * 2 - 1);
+
+		rms += d*d;
+	}
+
+	rms /= counts.size();
+	rms = sqrt(rms);
+
+	return rms;
+}
+
+//-----------------------------------------------------------------------------
+
+void calcBias ( pfHash hash, const int nbitsIn, const int nbitsOut, std::vector<int> & counts, int reps )
+{
+	const int nbytesIn = nbitsIn / 8;
+	const int nbytesOut = nbitsOut / 8;
+
+	uint8_t * K = new uint8_t[nbytesIn];
+	uint8_t * A = new uint8_t[nbytesIn];
+	uint8_t * B = new uint8_t[nbytesIn];
+
+	Rand r(378473);
+
+	for(int irep = 0; irep < reps; irep++)
+	{
+		r.rand_p(K,nbytesIn);
+
+		hash(K,nbytesIn,0,A);
+
+		int * cursor = &counts[0];
+
+		for(int iBit = 0; iBit < nbitsIn; iBit++)
+		{
+			flipbit(K,nbytesIn,iBit);
+			hash(K,nbytesIn,0,B);
+			flipbit(K,nbytesIn,iBit);
+
+			for(int iOut = 0; iOut < nbitsOut; iOut++)
+			{
+				int bitA = getbit(A,nbytesOut,iOut);
+				int bitB = getbit(B,nbytesOut,iOut);
+
+				(*cursor++) += (bitA ^ bitB);
+			}
+		}
+	}
+
+	delete [] K;
+	delete [] A;
+	delete [] B;
+}
+
+//-----------------------------------------------------------------------------
+
+bool AvalancheTest ( pfHash hash, const int keybits, const int hashbits, const int reps )
+{
+	printf("Avalanche for %3d-bit keys -> %3d-bit hashes, %8d reps - ",keybits,hashbits,reps);
+
+	std::vector<int> bins(keybits*hashbits,0);
+
+	calcBias(hash,keybits,hashbits,bins,reps);
+	
+	double b = maxBias(bins,reps);
+
+	printf("Max avalanche bias is %f\n",b);
+
+	if(b > gc_avalancheFail)
+	{
+		return false;
+	}
+	else
+	{
+		return true;
+	}
+}
+
+//----------------------------------------------------------------------------
+// Computing whether a given mix function produces a low bias can take many 
+// millions of tests when the bias is low.  This code tries to speed up the 
+// process by early-outing if the probability that the bias will fall outside
+// the given range is over 99%
+
+/*
+bool testMixAvalanche32_Fast ( pfMix32 mix, double cutmin, double cutmax, bool winlose )
+{
+	int counts[32*32];
+
+	memset(counts,0,sizeof(counts));
+
+	double pmin = 0;
+	double pmax = 0;
+	double n = 0;
+	double s = 4.75;
+	int w = 0;
+
+	int batchsize = 512;
+
+	for(int iBatch = 0; iBatch < 1024 * 1024; iBatch++)
+	{
+		calcMixBias<uint32_t>(mix,counts,batchsize);
+
+		n = (iBatch+1) * batchsize;
+		w = maxIntBias(32,32,counts,(int)n);
+
+		// compute p such that w is at the bottom of the confidence interval
+
+		double a = s*s*n + n*n;
+		double b = -2.0*double(w)*n - s*s*n;
+		double c = double(w)*double(w);
+
+		SolveQuadratic(a,b,c,pmin,pmax);
+
+		double win = 0;
+		double tie = 0;
+		double lose = 0;
+
+		if(winlose)
+		{
+			if(pmax < cutmax)
+			{
+				printf("\n+!!! %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
+				return true;
+			}
+
+			if(pmin > cutmax)
+			{
+				//printf("\n-!!! %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
+				return false;
+			}
+
+			// doesn't fail or win outright. does it have a chance of winning?
+
+			if(pmin < cutmin)
+			{
+				// pmin:pmax contains cutmin:cutmax
+
+				assert(cutmin > pmin);
+				assert(cutmax < pmax);
+
+				win = (cutmin-pmin) / (pmax-pmin);
+				tie = (cutmax-cutmin) / (pmax-pmin);
+				lose = (pmax-cutmax) / (pmax-pmin);
+			}
+			else
+			{
+				// pmin:pmax overlaps above cutmin:cutmax
+
+				assert(cutmin < pmin);
+
+				win = 0;
+				tie = ((cutmax - pmin) / (pmax-pmin)) * ((cutmax-pmin) / (cutmax-cutmin));
+				lose = (pmax-cutmax) / (pmax-pmin);
+
+				return false;
+			}
+
+			double frac = win + tie*0.5;
+
+			if((pmax-pmin)/(cutmax-cutmin) < 5)
+			{
+				if(frac < 0.20)
+				{
+					// 99% chance of loss
+					//printf("\n--- %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
+					return false;
+				}
+
+				if(frac > 0.80)
+				{
+					// 99% chance of win
+					printf("\n+++ %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
+					return true;
+				}
+			}
+		}
+
+		if(!winlose && (n > 0) && ((int)n % (128 * 1024) == 0))
+		{
+			printf("%f - %f : %f - %d - %f : %f : %f\n",double(w)/n,pmin,pmax,int(n),win,tie,lose);
+		}
+
+	}
+
+	// We failed to determine whether this mix function passes or fails
+
+	printf("\n??? %f - %f : %f",double(w)/n,pmin,pmax);
+
+	return true;
+}
+*/
+
+//-----------------------------------------------------------------------------
diff --git a/AvalancheTest.h b/AvalancheTest.h
new file mode 100644
index 0000000..88a0bc1
--- /dev/null
+++ b/AvalancheTest.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#include "Types.h"
+
+double maxBias ( std::vector<int> & counts, int reps );
+double rmsBias ( std::vector<int> & counts, int reps );
+
+bool AvalancheTest ( pfHash hash, const int keybits, const int hashbits, const int reps );
diff --git a/Bitvec.cpp b/Bitvec.cpp
new file mode 100644
index 0000000..07ac815
--- /dev/null
+++ b/Bitvec.cpp
@@ -0,0 +1,741 @@
+#include "Bitvec.h"
+
+#include "Random.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+#ifndef DEBUG
+#undef assert
+void assert ( bool )
+{
+}
+#endif
+
+//----------------------------------------------------------------------------
+
+void printbits ( void * blob, int len )
+{
+	uint8_t * data = (uint8_t*)blob;
+
+	printf("[");
+	for(int i = 0; i < len; i++)
+	{
+		unsigned char byte = data[i];
+
+		int hi = (byte >> 4);
+		int lo = (byte & 0xF);
+
+		if(hi) printf("%01x",hi);
+		else   printf(".");
+
+		if(lo) printf("%01x",lo);
+		else   printf(".");
+
+		if(i != len-1) printf(" ");
+	}
+	printf("]");
+}
+
+void printbits2 ( uint8_t * k, int nbytes )
+{
+	printf("[");
+
+	for(int i = nbytes-1; i >= 0; i--)
+	{
+		uint8_t b = k[i];
+
+		for(int j = 7; j >= 0; j--)
+		{
+			uint8_t c = (b & (1 << j)) ? '#' : ' ';
+
+			putc(c,stdout);
+		}
+	}
+	printf("]");
+}
+
+void printhex32 ( void * blob, int len )
+{
+	assert((len & 3) == 0);
+
+	uint32_t * d = (uint32_t*)blob;
+
+	printf("{ ");
+
+	for(int i = 0; i < len/4; i++) 
+	{
+		printf("0x%08x, ",d[i]);
+	}
+
+	printf("}");
+}
+
+
+//-----------------------------------------------------------------------------
+// Bit-level manipulation
+
+// These are from the "Bit Twiddling Hacks" webpage
+
+uint32_t popcount ( uint32_t v )
+{
+	v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
+	v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
+	uint32_t c = ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
+
+	return c;
+}
+
+uint32_t popcount128 ( uint32_t * v )
+{
+	uint32_t c = popcount(v[0]);
+
+	c += popcount(v[1]);
+	c += popcount(v[2]);
+	c += popcount(v[3]);
+
+	return c;
+}
+
+uint32_t parity ( uint32_t v )
+{
+	v ^= v >> 16;
+	v ^= v >> 8;
+	v ^= v >> 4;
+	v &= 0xf;
+
+	return (0x6996 >> v) & 1;
+}
+
+uint64_t parity ( uint64_t v )
+{
+	v ^= v >> 32;
+	v ^= v >> 16;
+	v ^= v >> 8;
+	v ^= v >> 4;
+	v &= 0xf;
+
+	return (0x6996 >> v) & 1;
+}
+
+//-----------------------------------------------------------------------------
+
+uint32_t getbit ( void * block, int len, uint32_t bit )
+{
+	uint8_t * b = (uint8_t*)block;
+
+	int byte = bit >> 3;
+	bit = bit & 0x7;
+	
+	if(byte < len) return (b[byte] >> bit) & 1;
+
+	return 0;
+}
+
+uint32_t getbit_wrap ( void * block, int len, uint32_t bit )
+{
+	uint8_t * b = (uint8_t*)block;
+
+	int byte = bit >> 3;
+	bit = bit & 0x7;
+	
+	byte %= len;
+		
+	return (b[byte] >> bit) & 1;
+}
+
+void setbit ( void * block, int len, uint32_t bit )
+{
+	uint8_t * b = (uint8_t*)block;
+
+	int byte = bit >> 3;
+	bit = bit & 0x7;
+	
+	if(byte < len) b[byte] |= (1 << bit);
+}
+
+void setbit ( void * block, int len, uint32_t bit, uint32_t val )
+{
+	val ? setbit(block,len,bit) : clearbit(block,len,bit);
+}
+
+void clearbit ( void * block, int len, uint32_t bit )
+{
+	uint8_t * b = (uint8_t*)block;
+
+	int byte = bit >> 3;
+	bit = bit & 0x7;
+	
+	if(byte < len) b[byte] &= ~(1 << bit);
+}
+
+void flipbit ( void * block, int len, uint32_t bit )
+{
+	uint8_t * b = (uint8_t*)block;
+
+	int byte = bit >> 3;
+	bit = bit & 0x7;
+	
+	if(byte < len) b[byte] ^= (1 << bit);
+}
+
+//-----------------------------------------------------------------------------
+
+void lshift1 ( void * blob, int len, int c )
+{
+	int nbits = len*8;
+
+	for(int i = nbits-1; i >= 0; i--)
+	{
+		setbit(blob,len,i,getbit(blob,len,i-c));
+	}
+}
+
+
+void lshift8 ( void * blob, int nbytes, int c )
+{
+	uint8_t * k = (uint8_t*)blob;
+
+	if(c == 0) return;
+
+	int b = c >> 3;
+	c &= 7;
+
+	for(int i = nbytes-1; i >= b; i--)
+	{
+		k[i] = k[i-b];
+	}
+
+	for(int i = b-1; i >= 0; i--)
+	{
+		k[i] = 0;
+	}
+
+	if(c == 0) return;
+
+	for(int i = nbytes-1; i >= 0; i--)
+	{
+		uint8_t a = k[i];
+		uint8_t b = (i == 0) ? 0 : k[i-1];
+
+		k[i] = (a << c) | (b >> (8-c));
+	}
+}
+
+void lshift32 ( void * blob, int len, int c )
+{
+	assert((len & 3) == 0);
+
+	int nbytes  = len;
+	int ndwords = nbytes / 4;
+
+	uint32_t * k = (uint32_t*)blob;
+
+	if(c == 0) return;
+
+	//----------
+
+	int b = c / 32;
+	c &= (32-1);
+
+	for(int i = ndwords-1; i >= b; i--)
+	{
+		k[i] = k[i-b];
+	}
+
+	for(int i = b-1; i >= 0; i--)
+	{
+		k[i] = 0;
+	}
+
+	if(c == 0) return;
+
+	for(int i = ndwords-1; i >= 0; i--)
+	{
+		uint32_t a = k[i];
+		uint32_t b = (i == 0) ? 0 : k[i-1];
+
+		k[i] = (a << c) | (b >> (32-c));
+	}
+}
+
+//-----------------------------------------------------------------------------
+
+void rshift1 ( void * blob, int len, int c )
+{
+	int nbits = len*8;
+
+	for(int i = 0; i < nbits; i++)
+	{
+		setbit(blob,len,i,getbit(blob,len,i+c));
+	}
+}
+
+void rshift8 ( void * blob, int nbytes, int c )
+{
+	uint8_t * k = (uint8_t*)blob;
+
+	if(c == 0) return;
+
+	int b = c >> 3;
+	c &= 7;
+
+	for(int i = 0; i < nbytes-b; i++)
+	{
+		k[i] = k[i+b];
+	}
+
+	for(int i = nbytes-b; i < nbytes; i++)
+	{
+		k[i] = 0;
+	}
+
+	if(c == 0) return;
+
+	for(int i = 0; i < nbytes; i++)
+	{
+		uint8_t a = (i == nbytes-1) ? 0 : k[i+1];
+		uint8_t b = k[i];
+
+		k[i] = (a << (8-c) ) | (b >> c);
+	}
+}
+
+void rshift32 ( void * blob, int len, int c )
+{
+	assert((len & 3) == 0);
+
+	int nbytes  = len;
+	int ndwords = nbytes / 4;
+
+	uint32_t * k = (uint32_t*)blob;
+
+	//----------
+
+	if(c == 0) return;
+
+	int b = c / 32;
+	c &= (32-1);
+
+	for(int i = 0; i < ndwords-b; i++)
+	{
+		k[i] = k[i+b];
+	}
+
+	for(int i = ndwords-b; i < ndwords; i++)
+	{
+		k[i] = 0;
+	}
+
+	if(c == 0) return;
+
+	for(int i = 0; i < ndwords; i++)
+	{
+		uint32_t a = (i == ndwords-1) ? 0 : k[i+1];
+		uint32_t b = k[i];
+
+		k[i] = (a << (32-c) ) | (b >> c);
+	}
+}
+
+//-----------------------------------------------------------------------------
+
+void lrot1 ( void * blob, int len, int c )
+{
+	int nbits = len * 8;
+
+	for(int i = 0; i < c; i++)
+	{
+		uint32_t bit = getbit(blob,len,nbits-1);
+
+		lshift1(blob,len,1);
+
+		setbit(blob,len,0,bit);
+	}
+}
+
+void lrot8 ( void * blob, int len, int c )
+{
+	int nbytes  = len;
+
+	uint8_t * k = (uint8_t*)blob;
+
+	if(c == 0) return;
+
+	//----------
+
+	int b = c / 8;
+	c &= (8-1);
+
+	for(int j = 0; j < b; j++)
+	{
+		uint8_t t = k[nbytes-1];
+
+		for(int i = nbytes-1; i > 0; i--)
+		{
+			k[i] = k[i-1];
+		}
+
+		k[0] = t;
+	}
+
+	uint8_t t = k[nbytes-1];
+
+	if(c == 0) return;
+
+	for(int i = nbytes-1; i >= 0; i--)
+	{
+		uint8_t a = k[i];
+		uint8_t b = (i == 0) ? t : k[i-1];
+
+		k[i] = (a << c) | (b >> (8-c));
+	}
+}
+
+void lrot32 ( void * blob, int len, int c )
+{
+	assert((len & 3) == 0);
+
+	int nbytes  = len;
+	int ndwords = nbytes/4;
+
+	uint32_t * k = (uint32_t*)blob;
+
+	if(c == 0) return;
+
+	//----------
+
+	int b = c / 32;
+	c &= (32-1);
+
+	for(int j = 0; j < b; j++)
+	{
+		uint32_t t = k[ndwords-1];
+
+		for(int i = ndwords-1; i > 0; i--)
+		{
+			k[i] = k[i-1];
+		}
+
+		k[0] = t;
+	}
+
+	uint32_t t = k[ndwords-1];
+
+	if(c == 0) return;
+
+	for(int i = ndwords-1; i >= 0; i--)
+	{
+		uint32_t a = k[i];
+		uint32_t b = (i == 0) ? t : k[i-1];
+
+		k[i] = (a << c) | (b >> (32-c));
+	}
+}
+
+//-----------------------------------------------------------------------------
+
+void rrot1 ( void * blob, int len, int c )
+{
+	int nbits = len * 8;
+
+	for(int i = 0; i < c; i++)
+	{
+		uint32_t bit = getbit(blob,len,0);
+
+		rshift1(blob,len,1);
+
+		setbit(blob,len,nbits-1,bit);
+	}
+}
+
+void rrot8 ( void * blob, int len, int c )
+{
+	int nbytes  = len;
+
+	uint8_t * k = (uint8_t*)blob;
+
+	if(c == 0) return;
+
+	//----------
+
+	int b = c / 8;
+	c &= (8-1);
+
+	for(int j = 0; j < b; j++)
+	{
+		uint8_t t = k[0];
+
+		for(int i = 0; i < nbytes-1; i++)
+		{
+			k[i] = k[i+1];
+		}
+
+		k[nbytes-1] = t;
+	}
+
+	if(c == 0) return;
+
+	//----------
+
+	uint8_t t = k[0];
+
+	for(int i = 0; i < nbytes; i++)
+	{
+		uint8_t a = (i == nbytes-1) ? t : k[i+1];
+		uint8_t b = k[i];
+
+		k[i] = (a << (8-c)) | (b >> c);
+	}
+}
+
+void rrot32 ( void * blob, int len, int c )
+{
+	assert((len & 3) == 0);
+
+	int nbytes  = len;
+	int ndwords = nbytes/4;
+
+	uint32_t * k = (uint32_t*)blob;
+
+	if(c == 0) return;
+
+	//----------
+
+	int b = c / 32;
+	c &= (32-1);
+
+	for(int j = 0; j < b; j++)
+	{
+		uint32_t t = k[0];
+
+		for(int i = 0; i < ndwords-1; i++)
+		{
+			k[i] = k[i+1];
+		}
+
+		k[ndwords-1] = t;
+	}
+
+	if(c == 0) return;
+
+	//----------
+
+	uint32_t t = k[0];
+
+	for(int i = 0; i < ndwords; i++)
+	{
+		uint32_t a = (i == ndwords-1) ? t : k[i+1];
+		uint32_t b = k[i];
+
+		k[i] = (a << (32-c)) | (b >> c);
+	}
+}
+
+//-----------------------------------------------------------------------------
+
+uint32_t window1 ( void * blob, int len, int start, int count )
+{
+	int nbits = len*8;
+	start %= nbits;
+
+	uint32_t t = 0;
+
+	for(int i = 0; i < count; i++)
+	{
+		setbit(&t,sizeof(t),i, getbit_wrap(blob,len,start+i));
+	}
+
+	return t;
+}
+
+uint32_t window8 ( void * blob, int len, int start, int count )
+{
+	int nbits = len*8;
+	start %= nbits;
+
+	uint32_t t = 0;
+	uint8_t * k = (uint8_t*)blob;
+
+	if(count == 0) return 0;
+
+	int c = start & (8-1);
+	int d = start / 8;
+
+	for(int i = 0; i < 4; i++)
+	{
+		int ia = (i + d + 1) % len;
+		int ib = (i + d + 0) % len;
+
+		uint32_t a = k[ia];
+		uint32_t b = k[ib];
+		
+		uint32_t m = (a << (8-c)) | (b >> c);
+
+		t |= (m << (8*i));
+
+	}
+
+	t &= ((1 << count)-1);
+
+	return t;
+}
+
+uint32_t window32 ( void * blob, int len, int start, int count )
+{
+	int nbits = len*8;
+	start %= nbits;
+
+	assert((len & 3) == 0);
+
+	int ndwords = len / 4;
+
+	uint32_t * k = (uint32_t*)blob;
+
+	if(count == 0) return 0;
+
+	int c = start & (32-1);
+	int d = start / 32;
+
+	if(c == 0) return (k[d] & ((1 << count) - 1));
+
+	int ia = (d + 1) % ndwords;
+	int ib = (d + 0) % ndwords;
+
+	uint32_t a = k[ia];
+	uint32_t b = k[ib];
+	
+	uint32_t t = (a << (32-c)) | (b >> c);
+
+	t &= ((1 << count)-1);
+
+	return t;
+}
+
+//-----------------------------------------------------------------------------
+
+bool test_shift ( void )
+{
+	int nbits   = 64;
+	int nbytes  = nbits / 8;
+	int reps = 10000;
+
+	for(int j = 0; j < reps; j++)
+	{
+		if(j % (reps/10) == 0) printf(".");
+
+		uint64_t a = rand_u64();
+		uint64_t b;
+
+		for(int i = 0; i < nbits; i++)
+		{
+			b = a; lshift1  (&b,nbytes,i);  assert(b == (a << i));
+			b = a; lshift8  (&b,nbytes,i);  assert(b == (a << i));
+			b = a; lshift32 (&b,nbytes,i);  assert(b == (a << i));
+
+			b = a; rshift1  (&b,nbytes,i);  assert(b == (a >> i));
+			b = a; rshift8  (&b,nbytes,i);  assert(b == (a >> i));
+			b = a; rshift32 (&b,nbytes,i);  assert(b == (a >> i));
+
+			b = a; lrot1    (&b,nbytes,i);  assert(b == _rotl64(a,i));
+			b = a; lrot8    (&b,nbytes,i);  assert(b == _rotl64(a,i));
+			b = a; lrot32   (&b,nbytes,i);  assert(b == _rotl64(a,i));
+
+			b = a; rrot1    (&b,nbytes,i);  assert(b == _rotr64(a,i));
+			b = a; rrot8    (&b,nbytes,i);  assert(b == _rotr64(a,i));
+			b = a; rrot32   (&b,nbytes,i);  assert(b == _rotr64(a,i));
+		}
+	}
+
+	printf("PASS\n");
+	return true;
+}
+
+//-----------------------------------------------------------------------------
+
+template < int nbits >
+bool test_window2 ( void )
+{
+	struct keytype
+	{
+		uint8_t bytes[nbits/8];
+	};
+
+	int nbytes = nbits / 8;
+	int reps = 10000;
+
+	for(int j = 0; j < reps; j++)
+	{
+		if(j % (reps/10) == 0) printf(".");
+
+		keytype k;
+
+		rand_p(&k,nbytes);
+
+		for(int start = 0; start < nbits; start++)
+		{
+			for(int count = 0; count < 32; count++)
+			{
+				uint32_t a = window1(&k,nbytes,start,count);
+				uint32_t b = window8(&k,nbytes,start,count);
+				uint32_t c = window(&k,nbytes,start,count);
+
+				assert(a == b);
+				assert(a == c);
+			}
+		}
+	}
+
+	printf("PASS %d\n",nbits);
+
+	return true;
+}
+
+bool test_window ( void )
+{
+	int reps = 10000;
+
+	for(int j = 0; j < reps; j++)
+	{
+		if(j % (reps/10) == 0) printf(".");
+
+		int nbits   = 64;
+		int nbytes  = nbits / 8;
+
+		uint64_t x = rand_u64();
+
+		for(int start = 0; start < nbits; start++)
+		{
+			for(int count = 0; count < 32; count++)
+			{
+				uint32_t a = (uint32_t)_rotr64(x,start);
+				a &= ((1 << count)-1);
+				
+				uint32_t b = window1 (&x,nbytes,start,count);
+				uint32_t c = window8 (&x,nbytes,start,count);
+				uint32_t d = window32(&x,nbytes,start,count);
+				uint32_t e = window  (x,start,count);
+
+				assert(a == b);
+				assert(a == c);
+				assert(a == d);
+				assert(a == e);
+			}
+		}
+	}
+
+	printf("PASS 64\n");
+
+	test_window2<8>();
+	test_window2<16>();
+	test_window2<24>();
+	test_window2<32>();
+	test_window2<40>();
+	test_window2<48>();
+	test_window2<56>();
+	test_window2<64>();
+
+	return true;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Bitvec.h b/Bitvec.h
new file mode 100644
index 0000000..b3988de
--- /dev/null
+++ b/Bitvec.h
@@ -0,0 +1,185 @@
+#pragma once
+
+#include "pstdint.h"
+
+#include <stdlib.h> // for _rotl, _rotr, etc.
+
+//-----------------------------------------------------------------------------
+
+uint32_t parity      ( uint32_t v );
+uint64_t parity      ( uint64_t v );
+
+uint32_t popcount    ( uint32_t v );
+uint32_t popcount128 ( uint32_t * v );
+
+void     printbits   ( void * blob, int len );
+void     printhex32  ( void * blob, int len );
+
+uint32_t getbit      ( void * blob, int len, uint32_t bit );
+uint32_t getbit_wrap ( void * blob, int len, uint32_t bit );
+
+void     setbit      ( void * blob, int len, uint32_t bit );
+void     setbit      ( void * blob, int len, uint32_t bit, uint32_t val );
+
+void     clearbit    ( void * blob, int len, uint32_t bit );
+
+void     flipbit     ( void * blob, int len, uint32_t bit );
+
+
+//-----------------------------------------------------------------------------
+// Left and right shift of blobs. The shift(N) versions work on chunks of N
+// bits at a time (faster)
+
+void lshift1  ( void * blob, int len, int c );
+void lshift8  ( void * blob, int len, int c );
+void lshift32 ( void * blob, int len, int c );
+
+void rshift1  ( void * blob, int len, int c );
+void rshift8  ( void * blob, int len, int c );
+void rshift32 ( void * blob, int len, int c );
+
+inline void lshift ( void * blob, int len, int c )
+{
+	if((len & 3) == 0)
+	{
+		lshift32(&blob,len,c);
+	}
+	else
+	{
+		lshift8(&blob,len,c);
+	}
+}
+
+inline void rshift ( void * blob, int len, int c )
+{
+	if((len & 3) == 0)
+	{
+		rshift32(&blob,len,c);
+	}
+	else
+	{
+		rshift8(&blob,len,c);
+	}
+}
+
+template < typename T >
+inline void lshift ( T & blob, int c )
+{
+	if((sizeof(T) & 3) == 0)
+	{
+		lshift32(&blob,sizeof(T),c);
+	}
+	else
+	{
+		lshift8(&blob,sizeof(T),c);
+	}
+}
+
+template < typename T >
+inline void rshift ( T & blob, int c )
+{
+	if((sizeof(T) & 3) == 0)
+	{
+		lshift32(&blob,sizeof(T),c);
+	}
+	else
+	{
+		lshift8(&blob,sizeof(T),c);
+	}
+}
+
+template<> inline void lshift ( uint32_t & blob, int c ) { blob <<= c; }
+template<> inline void lshift ( uint64_t & blob, int c ) { blob <<= c; }
+template<> inline void rshift ( uint32_t & blob, int c ) { blob >>= c; }
+template<> inline void rshift ( uint64_t & blob, int c ) { blob >>= c; }
+
+//-----------------------------------------------------------------------------
+// Left and right rotate of blobs. The rot(N) versions work on chunks of N
+// bits at a time (faster)
+
+void lrot1    ( void * blob, int len, int c );
+void lrot8    ( void * blob, int len, int c );
+void lrot32   ( void * blob, int len, int c );
+
+void rrot1    ( void * blob, int len, int c );
+void rrot8    ( void * blob, int len, int c );
+void rrot32   ( void * blob, int len, int c );
+
+template < typename T >
+inline void lrot ( T & blob, int c )
+{
+	if((sizeof(T) & 3) == 0)
+	{
+		return lrot32(&blob,sizeof(T),c);
+	}
+	else
+	{
+		return lrot8(&blob,sizeof(T),c);
+	}
+}
+
+template < typename T >
+inline void rrot ( T & blob, int c )
+{
+	if((sizeof(T) & 3) == 0)
+	{
+		return rrot32(&blob,sizeof(T),c);
+	}
+	else
+	{
+		return rrot8(&blob,sizeof(T),c);
+	}
+}
+
+template<> inline void lrot ( uint32_t & blob, int c ) { blob = _rotl(blob,c); }
+template<> inline void lrot ( uint64_t & blob, int c ) { blob = _rotl64(blob,c); }
+template<> inline void rrot ( uint32_t & blob, int c ) { blob = _rotr(blob,c); }
+template<> inline void rrot ( uint64_t & blob, int c ) { blob = _rotr64(blob,c); }
+
+//-----------------------------------------------------------------------------
+// Bit-windowing functions - select some N-bit subset of the input blob
+
+uint32_t window1  ( void * blob, int len, int start, int count );
+uint32_t window8  ( void * blob, int len, int start, int count );
+uint32_t window32 ( void * blob, int len, int start, int count );
+
+inline uint32_t window ( void * blob, int len, int start, int count )
+{
+	if(len & 3)
+	{
+		return window8(blob,len,start,count);
+	}
+	else
+	{
+		return window32(blob,len,start,count);
+	}
+}
+
+/*
+template < typename T >
+inline uint32_t window ( T & blob, int start, int count )
+{
+	if((sizeof(T) & 3) == 0)
+	{
+		return window32(&blob,sizeof(T),start,count);
+	}
+	else
+	{
+		return window8(&blob,sizeof(T),start,count);
+	}
+}
+*/
+
+// template<> 
+inline uint32_t window ( uint32_t & blob, int start, int count )
+{
+	return _rotr(blob,start) & ((1<<count)-1);
+}
+
+// template<> 
+inline uint32_t window ( uint64_t & blob, int start, int count )
+{
+	return (uint32_t)_rotr64(blob,start) & ((1<<count)-1);
+}
+
+//-----------------------------------------------------------------------------
diff --git a/BlockCipher.cpp b/BlockCipher.cpp
new file mode 100644
index 0000000..414cf92
--- /dev/null
+++ b/BlockCipher.cpp
@@ -0,0 +1,294 @@
+#include "BlockCipher.h"
+
+#include <assert.h>
+#include <memory.h>
+
+//----------------------------------------------------------------------------
+
+BlockCipher::BlockCipher ( void )
+{
+	clear();
+
+	setMode(ECB);
+}
+
+BlockCipher::BlockCipher ( CipherMode mode )
+{
+	clear();
+
+	setMode(mode);
+}
+
+BlockCipher::~BlockCipher ( void )
+{
+}
+
+void BlockCipher::clear ( void )
+{
+	memset(m_plain,  0, 64);
+	memset(m_input,  0, 64);
+	memset(m_temp,   0, 64);
+	memset(m_output, 0, 64);
+	memset(m_crypt,  0, 64);
+}
+
+//----------------------------------------------------------------------------
+
+void BlockCipher::setMode ( CipherMode m )
+{
+	switch(m)
+	{
+		case ECB:  m_pEncrypt = &BlockCipher::encrypt_ECB;  m_pDecrypt = &BlockCipher::decrypt_ECB;  break;
+		case ECBN: m_pEncrypt = &BlockCipher::encrypt_ECBN; m_pDecrypt = &BlockCipher::decrypt_ECBN; break;
+		case CBC:  m_pEncrypt = &BlockCipher::encrypt_CBC;  m_pDecrypt = &BlockCipher::decrypt_CBC;  break;
+		case CFB:  m_pEncrypt = &BlockCipher::encrypt_CFB;  m_pDecrypt = &BlockCipher::decrypt_CFB;  break;
+		case OFB:  m_pEncrypt = &BlockCipher::encrypt_OFB;  m_pDecrypt = &BlockCipher::decrypt_OFB;  break;
+		case PCBC: m_pEncrypt = &BlockCipher::encrypt_PCBC; m_pDecrypt = &BlockCipher::decrypt_PCBC; break;
+		case CTR:  m_pEncrypt = &BlockCipher::encrypt_CTR;  m_pDecrypt = &BlockCipher::decrypt_CTR;  break;
+
+		default:   assert(false); setMode(PCBC); break;
+	};
+}
+
+//----------------------------------------------------------------------------
+
+void BlockCipher::encrypt ( void * key, int keySize, void * plain, void * crypt, int size )
+{
+	clear();
+
+	uint8_t * in = (uint8_t*)plain;
+	uint8_t * out = (uint8_t*)crypt;
+
+	int blockSize = getBlockSize();
+	int blockCount = size / blockSize;
+
+	setKey(key,keySize);
+
+	for(m_blockIndex = 0; m_blockIndex < blockCount; m_blockIndex++)
+	{
+		copy(m_plain,in);
+
+		(this->*m_pEncrypt)();
+
+		copy(out,m_crypt);
+
+		in  += blockSize;
+		out += blockSize;
+	}
+}
+
+void BlockCipher::decrypt ( void * key, int keySize, void * crypt, void * plain, int size )
+{
+	clear();
+
+	uint8_t * in = (uint8_t*)crypt;
+	uint8_t * out = (uint8_t*)plain;
+
+	int blockSize = getBlockSize();
+	int blockCount = size / blockSize;
+
+	setKey(key,keySize);
+
+	for(m_blockIndex = 0; m_blockIndex < blockCount; m_blockIndex++)
+	{
+		copy(m_crypt,in);
+
+		(this->*m_pDecrypt)();
+
+		copy(out,m_plain);
+
+		in  += blockSize;
+		out += blockSize;
+	}
+}
+
+//----------------------------------------------------------------------------
+// Electronic Codebook
+
+void BlockCipher::encrypt_ECB ( void )
+{
+	copy(m_crypt,m_plain);
+
+	encrypt(m_crypt,0);
+}
+
+//----------
+
+void BlockCipher::decrypt_ECB ( void )
+{
+	copy(m_plain,m_crypt);
+
+	decrypt(m_plain,0);
+}
+
+//----------------------------------------------------------------------------
+// Electronic Codebook + Nonce
+
+void BlockCipher::encrypt_ECBN ( void )
+{
+	copy(m_crypt,m_plain);
+
+	encrypt(m_crypt,m_blockIndex);
+}
+
+//----------
+
+void BlockCipher::decrypt_ECBN ( void )
+{
+	copy(m_plain,m_crypt);
+
+	decrypt(m_plain,m_blockIndex);
+}
+
+//----------------------------------------------------------------------------
+// Cipher Block Chaining
+
+void BlockCipher::encrypt_CBC ( void )
+{
+	xor(m_temp,m_plain,m_input);
+
+	encrypt(m_temp,0);
+
+	copy(m_input,m_temp);
+	copy(m_crypt,m_temp);
+}
+
+//----------
+
+void BlockCipher::decrypt_CBC ( void )
+{
+	copy(m_temp,m_crypt);
+
+	decrypt(m_temp,0);
+
+	xor(m_plain,m_temp,m_output);
+	copy(m_output,m_crypt);
+}
+
+//----------------------------------------------------------------------------
+// Cipher Feedback
+
+void BlockCipher::encrypt_CFB ( void )
+{
+	copy(m_temp,m_input);
+
+	encrypt(m_temp,0);
+
+	xor(m_crypt,m_temp,m_plain);
+	copy(m_input,m_crypt);
+}
+
+//----------
+
+void BlockCipher::decrypt_CFB ( void )
+{
+	copy(m_temp,m_input);
+
+	encrypt(m_temp,0);
+
+	xor(m_plain,m_temp,m_crypt);
+	copy(m_input,m_crypt);
+}
+
+//----------------------------------------------------------------------------
+// Output Feedback
+
+void BlockCipher::encrypt_OFB ( void )
+{
+	copy(m_temp,m_input);
+
+	encrypt(m_temp,0);
+
+	xor(m_crypt,m_temp,m_plain);
+	copy(m_input,m_temp);
+}
+
+//----------
+
+void BlockCipher::decrypt_OFB( void )
+{
+	copy(m_temp,m_input);
+
+	encrypt(m_temp,0);
+
+	xor(m_plain,m_temp,m_crypt);
+	copy(m_input,m_temp);
+}
+
+//----------------------------------------------------------------------------
+// Propagating Cipher Block Chaining
+
+// P = M(i)
+// I = M(i-1)
+// C = C(i-1)
+
+void BlockCipher::encrypt_PCBC ( void )
+{
+	xor(m_temp,m_input,m_crypt);
+	xor(m_temp,m_temp,m_plain);
+	copy(m_input,m_plain);
+
+	encrypt(m_temp,0);
+
+	copy(m_crypt,m_temp);
+}
+
+//----------
+
+// P = M(i-1)
+// I = C(i-1)
+// C = C(i)
+
+void BlockCipher::decrypt_PCBC ( void )
+{
+	copy(m_temp,m_crypt);
+
+	decrypt(m_temp,0);
+
+	xor(m_plain,m_plain,m_temp);
+	xor(m_plain,m_plain,m_input);
+
+	copy(m_input,m_crypt);
+}
+
+//----------------------------------------------------------------------------
+// Counter mode
+
+void BlockCipher::encrypt_CTR ( void )
+{
+	*(int*)m_temp = m_blockIndex;
+
+	encrypt(m_temp,0);
+
+	xor(m_crypt,m_temp,m_plain);
+}
+
+//----------
+
+void BlockCipher::decrypt_CTR ( void )
+{
+	*(int*)m_temp = m_blockIndex;
+
+	encrypt(m_temp,0);
+
+	xor(m_plain,m_temp,m_crypt);
+}
+
+//----------------------------------------------------------------------------
+
+void BlockCipher::copy ( uint8_t * dst, const uint8_t * src )
+{
+	memcpy(dst,src,getBlockSize());
+}
+
+void BlockCipher::xor ( uint8_t * dst, const uint8_t * a, const uint8_t * b )
+{
+	int blockSize = getBlockSize();
+
+	for(int i = 0; i < blockSize; i++)
+	{
+		dst[i] = a[i] ^ b[i];
+	}
+}
+
+//----------------------------------------------------------------------------
diff --git a/BlockCipher.h b/BlockCipher.h
new file mode 100644
index 0000000..c5c63ad
--- /dev/null
+++ b/BlockCipher.h
@@ -0,0 +1,90 @@
+#pragma once
+#include "Cipher.h"
+#include "pstdint.h"
+
+//----------------------------------------------------------------------------
+
+class BlockCipher : public Cipher
+{
+public:
+
+	enum CipherMode
+	{
+		ECB,  // Electronic Codebook
+		ECBN, // Electronic Codebook + Nonce
+		CBC,  // Cipher block chaining
+		CFB,  // Cipher feedback
+		OFB,  // Output feedback
+		PCBC, // Propagating CBC
+		CTR,  // Counter
+		MAX = CTR,
+	};
+
+	//----------
+
+	BlockCipher ( void );
+	BlockCipher ( CipherMode mode );
+	virtual ~BlockCipher ( void );
+
+	virtual void clear ( void );
+
+	//----------
+	// Subclass interface
+
+	virtual int  getBlockSize ( void ) = 0;
+
+	virtual void setKey  ( void * k, int keySize ) = 0;
+
+	virtual void encrypt ( void * block, unsigned int nonce ) const = 0;
+	virtual void decrypt ( void * block, unsigned int nonce ) const = 0;
+
+	//----------
+	// Client interface
+
+	void setMode ( CipherMode m );
+
+	virtual void encrypt ( void * key, int keySize, void * plain, void * crypt, int size );
+	virtual void decrypt ( void * key, int keySize, void * crypt, void * plain, int size );
+
+	//----------
+
+private:
+
+	void encrypt_ECB  ( void );
+	void encrypt_ECBN ( void );
+	void encrypt_CBC  ( void );
+	void encrypt_CFB  ( void );
+	void encrypt_OFB  ( void );
+	void encrypt_PCBC ( void );
+	void encrypt_CTR  ( void );
+
+	void decrypt_ECB  ( void );
+	void decrypt_ECBN ( void );
+	void decrypt_CBC  ( void );
+	void decrypt_CFB  ( void );
+	void decrypt_OFB  ( void );
+	void decrypt_PCBC ( void );
+	void decrypt_CTR  ( void );
+
+	//----------
+
+	virtual void copy ( uint8_t * dst, const uint8_t * src );
+	virtual void xor  ( uint8_t * dst, const uint8_t * a, const uint8_t * b );
+
+	//----------
+
+	uint8_t  m_plain[64];
+	uint8_t  m_input[64];
+	uint8_t  m_temp[64];
+	uint8_t  m_output[64];
+	uint8_t  m_crypt[64];
+
+	int m_blockIndex;
+
+	typedef void (BlockCipher::*pFunc)(void);
+
+	pFunc m_pEncrypt;
+	pFunc m_pDecrypt;
+};
+
+//----------------------------------------------------------------------------
diff --git a/Cipher.cpp b/Cipher.cpp
new file mode 100644
index 0000000..a1de5e6
--- /dev/null
+++ b/Cipher.cpp
@@ -0,0 +1 @@
+#include "Cipher.h"
\ No newline at end of file
diff --git a/Cipher.h b/Cipher.h
new file mode 100644
index 0000000..5aa4155
--- /dev/null
+++ b/Cipher.h
@@ -0,0 +1,16 @@
+#pragma once
+
+//----------------------------------------------------------------------------
+
+class Cipher
+{
+public:
+
+	Cipher ( void ) {}
+	virtual ~Cipher ( void ) {}
+
+	virtual void encrypt ( void * key, int keySize, void * plain, void * crypt, int size ) = 0;
+	virtual void decrypt ( void * key, int keySize, void * crypt, void * plain, int size ) = 0;
+};
+
+//----------------------------------------------------------------------------
diff --git a/Core.cpp b/Core.cpp
new file mode 100644
index 0000000..f42541e
--- /dev/null
+++ b/Core.cpp
@@ -0,0 +1,186 @@
+#include "Core.h"
+
+#include <math.h>
+
+//-----------------------------------------------------------------------------
+
+int SolveLinear ( double a, double b, double & r )
+{
+	if(a == 0)
+	{
+		return 0;
+	}
+
+	r = -b/a;
+
+	return 1;
+}
+
+//----------
+
+int SolveQuadratic ( double a, double b, double c, double & r1, double & r2 )
+{
+	if(a == 0)
+	{
+		return SolveLinear(b,c,r1);
+	}
+
+	double d = (b*b) - (4*a*c);
+
+	if(d < 0) return 0;
+
+	double d2 = sqrt(d);
+
+	r1 = (-b - d2) / (2.0 * a);
+	r2 = (-b + d2) / (2.0 * a);
+
+	return (r1 == r2) ? 1 : 2;
+}
+
+//----------
+
+uint32_t multinv ( uint32_t x )
+{
+	uint32_t y = 1;
+	uint32_t t = x;
+
+	for(int i = 1; i < 32; i++)
+	{
+		uint32_t b = (1 << i);
+
+		if(t & b)
+		{
+			y |= b;
+			t += (x << i);
+		}
+	}
+
+	return y;
+};
+
+//-----------------------------------------------------------------------------
+// this is random stuff that needs to go somewhere else
+
+uint32_t modmul ( uint32_t k, uint32_t m )
+{
+	uint64_t k2 = k;
+	
+	k2 *= m;
+	k2 -= k2 >> 32;
+
+	return (uint32_t)k2;
+}
+
+uint32_t splitmul ( uint32_t k, uint32_t m )
+{
+	k *= m;
+	k -= k >> 16;
+
+	return k;
+}
+
+uint32_t expand16 ( uint32_t k, uint32_t m )
+{
+	return modmul(k+1,m);
+}
+
+bool compare_ham16 ( uint32_t mulA, uint32_t mulB, 
+                     int cutoffA, int cutoffXA, int cutoffAB,
+                     int & minA, int & minB, int & minXA, int & minXB, int & minAB )
+{
+	const int count = 65536;
+
+	uint32_t fA[count];
+	uint32_t fB[count];
+
+	for(int i = 0; i < count; i++)
+	{
+		fA[i] = expand16(i,mulA);
+		fB[i] = expand16(i,mulB);
+	}
+
+	minA = 100000;
+	minB = 100000;
+
+	minXA = 100000;
+	minXB = 100000;
+	minAB = 100000;
+
+	for(int j =   0; j < count-1; j++)
+	for(int k = j+1; k < count;   k++)
+	{
+		int X = popcount(j^k);
+		int A = popcount(fA[j]^fA[k]);
+		int B = popcount(fB[j]^fB[k]);
+
+		int XA = X+A;
+		int XB = X+B;
+		int AB = A+B;
+
+		if(A < minA) minA = A;
+		if(B < minB) minB = B;
+		if(XA < minXA) minXA = XA;
+		if(XB < minXB) minXB = XB;
+		if(AB < minAB) minAB = AB;
+
+		if(A < cutoffA) 
+			goto dead;
+
+		if(B < cutoffA) 
+			goto dead;
+
+		if(XA < cutoffXA) 
+			goto dead;
+
+		if(XB < cutoffXA) 
+			goto dead;
+
+		if(AB < cutoffAB) 
+			goto dead;
+	}
+
+	return true;
+
+	dead:
+
+	return false;
+}
+
+bool test_ham16 ( uint32_t mulA, int cutoffA, int cutoffXA, int & minA, int & minXA )
+{
+	int minB,minXB,minAB;
+
+	return compare_ham16(mulA,mulA, cutoffA,cutoffXA,0, minA,minB,minXA,minXB,minAB);
+}
+
+inline uint32_t foldmul ( uint32_t k, uint32_t m )
+{
+	uint64_t k2 = k;
+	
+	k2 *= m;
+	k2 ^= k2 >> 32;
+
+	return (uint32_t)k2;
+}
+
+inline uint32_t revmul ( const uint32_t v, const uint32_t m )
+{
+	uint32_t k1 = (uint32_t)(uint64_t(v) * m);
+	uint32_t k2 = (uint32_t)((uint64_t(v) * m) >> 32);
+
+	uint32_t k = k1-k2;
+
+	if(k2 > k1) k++;
+
+	return k;
+}
+
+inline uint32_t idmul ( const uint32_t v, const uint32_t m )
+{
+	uint32_t k1 = (uint32_t)(uint64_t(v) * m);
+	uint32_t k2 = (uint32_t)((uint64_t(v) * m) >> 32);
+
+	return k1 - k2;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Core.h b/Core.h
new file mode 100644
index 0000000..0f0f32a
--- /dev/null
+++ b/Core.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include "Types.h"
+#include "Bitvec.h"
+#include "Random.h"
+
+//-----------------------------------------------------------------------------
+
+int SolveQuadratic ( double a, double b, double c, double & r1, double & r2 );
+
+void AccumDiffCounts ( void * a, void * b, double * counts, int len, double inc );
+
+unsigned int multinv ( unsigned int x );
+
+//-----------------------------------------------------------------------------
diff --git a/CycleTest.cpp b/CycleTest.cpp
new file mode 100644
index 0000000..125e60e
--- /dev/null
+++ b/CycleTest.cpp
@@ -0,0 +1 @@
+#include "CycleTest.h"
diff --git a/CycleTest.h b/CycleTest.h
new file mode 100644
index 0000000..f221674
--- /dev/null
+++ b/CycleTest.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include "Types.h"
+
+template < typename hashtype >
+bool CycleTest ( pfHash hash, int cycleLen, int cycleReps, const int reps )
+{
+	printf("Keyset 'Cycles' (%dk keys, %d cycles, %d bytes)",reps / 1000,cycleReps,cycleLen);
+
+	bool result = true;
+
+	std::vector<hashtype> hashes;
+	hashes.resize(reps);
+
+	int keyLen = cycleLen * cycleReps;
+
+	uint8_t * cycle = new uint8_t[cycleLen + 16];
+	uint8_t * key = new uint8_t[keyLen];
+
+	for(int i = 0; i < reps; i++)
+	{
+		if(i % (reps/10) == 0) printf(".");
+
+		oracle(i,0,cycle,cycleLen);
+
+		*(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);
+
+		for(int j = 0; j < keyLen; j++)
+		{
+			key[j] = cycle[j % cycleLen];
+		}
+
+		hash(key,keyLen,0,&hashes[i]);
+	}
+	printf("\n");
+
+	testhashlist(hashes,true,false);
+
+	delete [] cycle;
+	delete [] key;
+
+	return result;
+}
+
+
diff --git a/DictionaryTest.cpp b/DictionaryTest.cpp
new file mode 100644
index 0000000..56ed6df
--- /dev/null
+++ b/DictionaryTest.cpp
@@ -0,0 +1,61 @@
+#include "DictionaryTest.h"
+
+#include <intrin.h>
+
+#pragma warning(disable:4996) // fopen is unsafe
+
+
+wordlist g_words;
+int g_wordcount = 0;
+
+const char ** g_pwords = NULL;
+int * g_plengths = NULL;
+
+double g_dictoverhead = 0;
+
+//----------------------------------------------------------------------------
+
+void LoadWords ( void )
+{
+	FILE * f = fopen("allwords.txt","r");
+
+	char buffer[1024];
+
+	while(fgets(buffer,1024,f))
+	{
+		char * cursor = buffer + strlen(buffer);
+
+		while((*cursor == 0x0a) || (*cursor == 0))
+		{
+			*cursor = 0;
+			cursor--;
+		}
+
+		g_words.push_back(buffer);
+	}
+
+	fclose(f);
+
+	g_wordcount = (int)g_words.size();
+
+	printf("Loaded %d words\n",g_wordcount);
+
+	g_pwords = new const char*[g_wordcount];
+	g_plengths = new int[g_wordcount];
+
+	for(int i = 0; i < g_wordcount; i++)
+	{
+		g_pwords[i] = g_words[i].c_str();
+		g_plengths[i] = (int)g_words[i].size();
+	}
+}
+
+void DeleteWords ( void )
+{
+	delete [] g_pwords;
+	delete [] g_plengths;
+
+	g_words.clear();
+}
+
+//----------------------------------------------------------------------------
diff --git a/DictionaryTest.h b/DictionaryTest.h
new file mode 100644
index 0000000..2b82047
--- /dev/null
+++ b/DictionaryTest.h
@@ -0,0 +1,119 @@
+#pragma once
+
+#include "Types.h"
+#include "Stats.h" // for testkeylist_string 
+
+#include <map>
+
+void LoadWords ( void );
+void DeleteWords ( void );
+
+typedef std::vector<std::string> wordlist;
+
+extern wordlist g_words;
+extern int g_wordcount;
+extern const char ** g_pwords;
+extern int * g_plengths;
+
+//-----------------------------------------------------------------------------
+
+
+template< typename hashtype >
+double DictHashTest ( hashfunc<hashtype> hash )
+{
+	__int64 begin,end;
+
+	const int reps = 999;
+
+	double best = 1.0e90;
+
+	for(int i = 0; i < reps; i++)
+	{
+		begin = __rdtsc();
+
+		for(int i = 0; i < g_wordcount; i++)
+		{
+			const char * buffer = g_pwords[i];
+			const int len = g_plengths[i];
+
+			hash(buffer,len,0);
+		}
+
+		end = __rdtsc();
+	
+		double clocks = double(end-begin) / double(g_wordcount);
+
+		if(clocks < best) best = clocks;
+	}
+
+	return best;
+}
+
+//-----------------------------------------------------------------------------
+
+template< typename hashtype >
+void DumpCollisions ( hashfunc<hashtype> hash )
+{
+	printf("\nDumping collisions for seed 0 - \n\n");
+
+	typedef std::map<hashtype,std::vector<std::string>> hashmap;
+	hashmap hashes;
+
+	for(int i = 0; i < g_wordcount; i++)
+	{
+		hashtype h = hash(g_pwords[i],g_plengths[i],0);
+
+		hashes[h].push_back(g_pwords[i]);
+	}
+
+	int collcount = 0;
+
+	for(hashmap::iterator it = hashes.begin(); it != hashes.end(); it++)
+	{
+		hashtype hash = (*it).first;
+
+		std::vector<std::string> & strings = (*it).second;
+
+		if(strings.size() > 1)
+		{
+			collcount += (int)strings.size() - 1;
+
+			printf("0x%08x - ",hash);
+
+			for(int i = 0; i < (int)strings.size(); i++)
+			{
+				printf("%20s,",strings[i].c_str());
+			}
+
+			printf("\n");
+		}
+	}
+
+	printf("%d collisions\n",collcount);
+}
+
+//----------------------------------------------------------------------------
+
+template< typename hashtype >
+void DictionaryTest ( hashfunc<hashtype> hash )
+{
+	printf("Dictionary-based tests -\n");
+	printf("\n");
+
+	LoadWords();
+
+	double clocks = DictHashTest<hashtype>(hash);
+
+	printf("All words hashed in min %f clocks/word\n",clocks);
+	printf("\n");
+
+	printf("Testing dictionary stats\n");
+	testkeylist_string<hashtype>(hash,g_words,true,true);
+	printf("\n");
+
+	DumpCollisions(hash);
+
+	DeleteWords();
+}
+
+//-----------------------------------------------------------------------------
diff --git a/DifferentialTest.cpp b/DifferentialTest.cpp
new file mode 100644
index 0000000..b356085
--- /dev/null
+++ b/DifferentialTest.cpp
@@ -0,0 +1,3 @@
+#include "DifferentialTest.h"
+
+//----------------------------------------------------------------------------
diff --git a/DifferentialTest.h b/DifferentialTest.h
new file mode 100644
index 0000000..d5b17db
--- /dev/null
+++ b/DifferentialTest.h
@@ -0,0 +1,202 @@
+//-----------------------------------------------------------------------------
+// Differential collision & distribution tests - generate a bunch of random keys,
+// see what happens to the hash value when we flip a few bits of the key.
+
+#pragma once
+#include "Types.h"
+
+//-----------------------------------------------------------------------------
+// Check all possible keybits-choose-N differentials for collisions, report
+// ones that occur significantly more often than expected.
+
+// Random collisions can happen with probability 1 in 2^32 - if we do more than
+// 2^32 tests, we'll probably see some spurious random collisions, so don't report
+// them.
+
+template < typename keytype, typename hashtype >
+void DiffTest ( pfHash hash, int diffbits, int reps )
+{
+	const int keybits = sizeof(keytype) * 8;
+	const int hashbits = sizeof(hashtype) * 8;
+
+	double diffcount = chooseUpToK(keybits,diffbits);
+	double testcount = (diffcount * double(reps));
+	double expected  = testcount / 4294967296.0;
+
+	std::vector<keytype> diffs;
+
+	keytype k1,k2;
+	hashtype h1,h2;
+
+	printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",diffcount,diffbits,keybits,hashbits);
+	printf("%d reps, %0.f total tests, expecting %2.2f random collisions",reps,testcount,expected);
+
+	for(int i = 0; i < reps; i++)
+	{
+		if(i % (reps/10) == 0) printf(".");
+
+		rand_p(&k1,sizeof(k1));
+		k2 = k1;
+
+		hash(&k1,sizeof(k1),0,(uint32_t*)&h1);
+
+		DiffTestRecurse<keytype,hashtype>(hash,k1,k2,h1,h2,0,diffbits,diffs);
+	}
+	printf("\n");
+
+	printdiffs(diffs,reps);
+	printf("\n");
+}
+
+//----------
+
+template < typename keytype, typename hashtype >
+void DiffTestRecurse ( pfHash hash, keytype & k1, keytype & k2, hashtype & h1, hashtype & h2, int start, int bitsleft, std::vector<keytype> & diffs )
+{
+	const int bits = sizeof(keytype)*8;
+
+	for(int i = start; i < bits; i++)
+	{
+		flipbit(&k2,sizeof(k2),i);
+		bitsleft--;
+
+		hash(&k2,sizeof(k2),0,&h2);
+
+		if(h1 == h2)
+		{
+			diffs.push_back(k1 ^ k2);
+		}
+
+		if(bitsleft)
+		{
+			DiffTestRecurse(hash,k1,k2,h1,h2,i+1,bitsleft,diffs);
+		}
+
+		flipbit(&k2,sizeof(k2),i);
+		bitsleft++;
+	}
+}
+
+//----------
+
+template < class keytype >
+void printdiffs ( std::vector<keytype> & diffs, int reps )
+{
+	std::sort(diffs.begin(), diffs.end());
+
+	int count = 1;
+	int ignore = 0;
+
+	if(diffs.size())
+	{
+		keytype kp = diffs[0];
+
+		for(int i = 1; i < (int)diffs.size(); i++)
+		{
+			if(diffs[i] == kp)
+			{
+				count++;
+				continue;
+			}
+			else
+			{
+				if(count > 1)
+				{
+					double pct = 100 * (double(count) / double(reps));
+					printbits((unsigned char*)&kp,sizeof(kp));
+					printf(" - %4.2f%%\n", pct );
+				}
+				else 
+				{
+					ignore++;
+				}
+
+				kp = diffs[i];
+				count = 1;
+			}
+		}
+
+		if(count > 1)
+		{
+			double pct = 100 * (double(count) / double(reps));
+			printbits((unsigned char*)&kp,sizeof(kp));
+			printf(" - %4.2f%%\n", pct );
+		}
+		else 
+		{
+			ignore++;
+		}
+	}
+
+	printf("%d total collisions, of which %d single collisions were ignored\n",(int)diffs.size(),ignore);
+}
+
+//-----------------------------------------------------------------------------
+// Differential distribution test - for each N-bit input differential, generate
+// a large set of differential key pairs, hash them, and test the output 
+// differentials using our distribution test code.
+
+// This is a very hard test to pass - even if the hash values are well-distributed,
+// the differences between hash values may not be. It's also not entirely relevant
+// for testing hash functions, but it's still interesting.
+
+// This test is a _lot_ of work, as it's essentially a full keyset test for
+// each of a potentially huge number of input differentials. To speed things
+// along, we do only a few distribution tests per keyset instead of the full
+// grid.
+
+// #TODO - put diagram drawing back on
+
+template < typename keytype, typename hashtype >
+void DiffDistTest ( pfHash hash, const int diffbits, int trials, double & worst, double & avg )
+{
+	std::vector<keytype>  keys(trials);
+	std::vector<hashtype> A(trials),B(trials);
+
+	for(int i = 0; i < trials; i++)
+	{
+		rand_t(keys[i]);
+
+		hash(&keys[i],sizeof(keys[i]),0,(uint32_t*)&A[i]);
+	}
+
+	//----------
+
+	std::vector<keytype> diffs;
+
+	keytype temp(0);
+
+	SparseKeygenRecurse<keytype>(0,diffbits,true,temp,diffs);
+
+	//----------
+
+	worst = 0;
+	avg = 0;
+
+	hashtype h2;
+
+	for(size_t j = 0; j < diffs.size(); j++)
+	{
+		keytype & d = diffs[j];
+
+		for(int i = 0; i < trials; i++)
+		{
+			keytype k2 = keys[i] ^ d;
+
+			hash(&k2,sizeof(k2),0,&h2);
+
+			B[i] = A[i] ^ h2;
+		}
+
+		double dworst,davg;
+
+		TestDistributionFast(B,dworst,davg);
+
+		avg += davg;
+		worst = (dworst > worst) ? dworst : worst;
+	}
+
+	avg /= double(diffs.size());
+}
+
+//----------------------------------------------------------------------------
diff --git a/Diffusion.cpp b/Diffusion.cpp
new file mode 100644
index 0000000..6927daa
--- /dev/null
+++ b/Diffusion.cpp
@@ -0,0 +1,204 @@
+#include "Diffusion.h"
+
+#include "Types.h"
+
+#include <memory.h>
+
+//-----------------------------------------------------------------------------
+// check invertibility of diffusion matrix
+
+void TestDiffusionMatrix ( void )
+{
+	//int m[4] = { 3, 1, 1, 3 };
+
+	int tab[65536];
+
+	memset(tab,0,sizeof(tab));
+
+	for(int i = 0; i < 65536; i++)
+	{
+		uint8_t a1 = (uint8_t)i;
+		uint8_t a2 = (uint8_t)(i >> 8);
+
+		//uint8_t b1 = uint8_t(a1 * m[0]) + uint8_t(a2*m[1]);
+		//uint8_t b2 = uint8_t(a1 * m[2]) + uint8_t(a2*m[3]);
+
+		uint8_t b1 = a1;
+		uint8_t b2 = a2;
+
+		b1 += b2;
+		b2 += b1;
+
+		int index = (int(b1) << 8) + b2;
+
+		tab[index]++;
+	}
+
+	int missing = 0;
+
+	for(int i = 0; i < 65536; i++)
+	{
+		if(tab[i] == 0) missing++;
+	}
+
+	printf("missing - %d\n",missing);
+}
+
+//-----------------------------------------------------------------------------
+
+void add_row ( int m[16], int a, int b )
+{
+	for(int i = 0; i < 4; i++)
+	{
+		m[4*a+i] += m[4*b+i];
+	}
+}
+
+void sub_row ( int m[16], int a, int b )
+{
+	for(int i = 0; i < 4; i++)
+	{
+		m[4*a+i] -= m[4*b+i];
+	}
+}
+
+//-----------------------------------------------------------------------------
+// search through diffusion matrices computable in N operations, find ones
+// with a maximal number of odd terms
+
+bool check ( const int m[16], std::vector<int> & dst, std::vector<int> & src )
+{
+	static int best = 0;
+
+	int c = 0;
+	int s = 0;
+
+	if(abs(m[0]+m[4]+m[8]+m[12]) > 2) return false;
+	if(abs(m[1]+m[5]+m[9]+m[13]) > 2) return false;
+	if(abs(m[2]+m[6]+m[10]+m[14]) > 2) return false;
+	if(abs(m[3]+m[7]+m[11]+m[15]) > 2) return false;
+
+	for(int i = 0; i < 16; i++)
+	{
+		if(m[i] == 0) return false;
+
+		int d = abs(m[i]);
+
+		c += (d & 1);
+
+		if(m[i] < 0) s++;
+	}
+
+	if((c == 13) && (s == 8))
+	{
+		std::string g[4];
+
+		g[0] = "A";
+		g[1] = "B";
+		g[2] = "C";
+		g[3] = "D";
+
+		printf("----------\n");
+
+		for(int i = 0; i < (int)dst.size(); i++)
+		{
+			int d = dst[i];
+			int s = src[i];
+
+			std::string tmp;
+
+			tmp += g[d-1];
+
+			tmp += (s < 0) ? "-" : "+";
+
+			tmp += "(";
+			tmp += g[abs(s)-1];
+			tmp += ")";
+
+			g[d-1] = tmp;
+		}
+
+		printf("A : %s\n",g[0].c_str());
+		printf("B : %s\n",g[1].c_str());
+		printf("C : %s\n",g[2].c_str());
+		printf("D : %s\n",g[3].c_str());
+
+		for(int i = 0; i < (int)dst.size(); i++)
+		{
+			int d = dst[i];
+			int s = src[i];
+
+			if(s < 0)
+			{
+				printf("h[%1d] -= h[%1d];\n",d,-s);
+			}
+			else
+			{
+				printf("h[%1d] += h[%1d];\n",d,s);
+			}
+		}
+		printf("----------\n");
+	}
+
+	return c == 16;
+}
+
+bool difrecurse ( const int m[16], int depth, int maxdepth, int last, std::vector<int> & dst, std::vector<int> & src )
+{
+	if(depth == maxdepth)
+	{
+		return check(m,dst,src);
+	}
+
+	for(int i = 0; i < 4; i++)
+	{
+		dst.push_back(i+1);
+
+		for(int j = 0; j < 4; j++)
+		{
+			if(i == j) continue;
+
+			if(i == last) continue;
+			if(j == last) continue;
+
+			int n[16];
+
+			memcpy(n,m,sizeof(n));
+
+			src.push_back(j+1);
+			add_row(n,i,j);
+			difrecurse(n,depth+1,maxdepth,i,dst,src);
+			sub_row(n,i,j);
+			src.pop_back();
+
+			src.push_back(-(j+1));
+			sub_row(n,i,j);
+			difrecurse(n,depth+1,maxdepth,i,dst,src);
+			add_row(n,i,j);
+			src.pop_back();
+		}
+
+		dst.pop_back();
+	}
+
+	return false;
+}
+
+void findDiffuse ( void )
+{
+	int m[16];
+
+	memset(m,0,sizeof(m));
+
+	m[4*0 + 0] = 1;
+	m[4*1 + 1] = 1;
+	m[4*2 + 2] = 1;
+	m[4*3 + 3] = 1;
+
+	std::vector<int> dst;
+	std::vector<int> src;
+
+	difrecurse(m,0,7,-1,dst,src);
+	printf("\n");
+}
+
diff --git a/Diffusion.h b/Diffusion.h
new file mode 100644
index 0000000..7b9637e
--- /dev/null
+++ b/Diffusion.h
@@ -0,0 +1 @@
+#pragma once
\ No newline at end of file
diff --git a/FWTransform.cpp b/FWTransform.cpp
new file mode 100644
index 0000000..cf9ed80
--- /dev/null
+++ b/FWTransform.cpp
@@ -0,0 +1,443 @@
+#include "FWTransform.h"
+
+#include "Random.h"
+
+// FWT1/2/3/4 are tested up to 2^16 against a brute-force implementation.
+
+//----------------------------------------------------------------------------
+
+double test_linear_approximation ( mixfunc<uint32_t> f, uint32_t l, uint32_t mask, int64_t size )
+{
+	int64_t d = 0;
+
+	for(int64_t i = 0; i < size; i++)
+	{
+		uint32_t x = (uint32_t)i;
+		uint32_t b1 = parity( f(x) & mask );
+		uint32_t b2 = parity( x & l );
+
+		d += (b1 ^ b2);
+	}
+
+	return double(d) / double(size);
+}
+
+//----------------------------------------------------------------------------
+// In-place, non-recursive FWT transform. Reference implementation.
+
+void FWT1 ( int * v, int64_t count )
+{
+	for(int64_t width = 2; width <= count; width *= 2)
+	{
+		int64_t blocks = count / width;
+
+		for(int64_t i = 0; i < blocks; i++)
+		{
+			int64_t ia = i * width;
+			int64_t ib = ia + (width/2);
+
+			for(int64_t j = 0; j < (width/2); j++)
+			{
+				int a = v[ia];
+				int b = v[ib];
+				
+				v[ia++] = a + b;
+				v[ib++] = a - b;
+			}
+		}
+	}
+}
+
+//-----------------------------------------------------------------------------
+// recursive, but fall back to non-recursive for tables of 4k or smaler
+
+// (this proved to be fastest)
+
+void FWT2 ( int * v, int64_t count )
+{
+	if(count <= 4*1024) return FWT1(v,(int32_t)count);
+
+	int64_t c = count/2;
+
+	for(int64_t i = 0; i < c; i++) 
+	{
+		int a = v[i];
+		int b = v[i+c];
+		
+		v[i] = a + b;
+		v[i+c] = a - b;
+	}
+
+	if(count > 2)
+	{
+		FWT2(v,c);
+		FWT2(v+c,c);
+	}
+}
+
+//-----------------------------------------------------------------------------
+// fully recursive (slow)
+
+void FWT3 ( int * v, int64_t count )
+{
+	int64_t c = count/2;
+
+	for(int64_t i = 0; i < c; i++) 
+	{
+		int a = v[i];
+		int b = v[i+c];
+		
+		v[i] = a + b;
+		v[i+c] = a - b;
+	}
+
+	if(count > 2)
+	{
+		FWT3(v,c);
+		FWT3(v+c,c);
+	}
+}
+
+//----------------------------------------------------------------------------
+// some other method
+
+void FWT4 ( int * data, const int64_t count )
+{
+	int nbits = 0;
+
+	for(int64_t c = count; c; c >>= 1) nbits++;
+
+	for (int i = 0; i < nbits; i++)
+	{
+		int64_t block = (int64_t(1) << i);
+		int64_t half  = (int64_t(1) << (i-1));
+
+		for (int64_t j = 0; j < count; j += block)
+		{
+			for (int k = 0; k < half; ++k)
+			{
+				int64_t ia = j+k;
+				int64_t ib = j+k+half;
+
+				int a = data[ia];
+				int b = data[ib];
+
+				data[ia] = a+b;
+				data[ib] = a-b;
+			}
+		}
+	}
+}
+
+//----------------------------------------------------------------------------
+// Evaluate a single point in the FWT hierarchy
+
+/*
+int FWTPoint ( mixfunc<uint32_t> f, int level, int nbits, uint32_t y )
+{
+	if(level == 0)
+	{
+		return f(y);
+	}
+	else
+	{
+		uint32_t mask = 1 << (nbits - level);
+
+		if(y & mask)
+		{
+			return 
+		}
+	}
+}
+*/
+
+
+//----------------------------------------------------------------------------
+// compute 2 tiers down into FWT, so we can break a table up into 4 chunks
+
+int computeWalsh2 ( mixfunc<uint32_t> f, int64_t y, int bits, uint32_t mask )
+{
+	uint32_t size1 = 1 << (bits-1);
+	uint32_t size2 = 1 << (bits-2);
+
+	int a = parity(f((uint32_t)y        ) & mask) ? 1 : -1;
+	int b = parity(f((uint32_t)y ^ size2) & mask) ? 1 : -1;
+
+	int ab = (y & size2) ? b-a : a+b;
+
+	int c = parity(f((uint32_t)y ^ size1        ) & mask) ? 1 : -1;
+	int d = parity(f((uint32_t)y ^ size1 ^ size2) & mask) ? 1 : -1;
+
+	int cd = (y & size2) ? d-c : c+d;
+
+	int e = (y & size1) ? cd-ab : ab+cd;
+
+	return e;
+}
+
+int computeWalsh2 ( int * func, int64_t y, int bits )
+{
+	uint32_t size1 = 1 << (bits-1);
+	uint32_t size2 = 1 << (bits-2);
+
+	int a = parity((uint32_t)func[(uint32_t)y        ]) ? 1 : -1;
+	int b = parity((uint32_t)func[(uint32_t)y ^ size2]) ? 1 : -1;
+
+	int ab = (y & size2) ? b-a : a+b;
+
+	int c = parity((uint32_t)func[(uint32_t)y ^ size1        ]) ? 1 : -1;
+	int d = parity((uint32_t)func[(uint32_t)y ^ size1 ^ size2]) ? 1 : -1;
+
+	int cd = (y & size2) ? d-c : c+d;
+
+	int e = (y & size1) ? cd-ab : ab+cd;
+
+	return e;
+}
+
+//----------------------------------------------------------------------------
+// this version computes the entire table at once - needs 16 gigs of RAM for
+// 32-bit FWT (!!!)
+
+void find_linear_approximation_walsh ( mixfunc<uint32_t> f, uint32_t mask, int inbits, uint32_t & outL, int64_t & outBias )
+{
+	// create table
+
+	const int64_t count = int64_t(1) << inbits;
+
+	int * table = new int[(int)count];
+
+	// fill table
+
+	for(int64_t i = 0; i < count; i++)
+	{
+		table[i] = parity(f((uint32_t)i) & mask) ? 1 : -1;
+	}
+
+	// apply walsh transform
+
+	FWT1(table,count);
+
+	// find maximum value in transformed table, which corresponds
+	// to closest linear approximation to F
+
+	outL = 0;
+	outBias = 0;
+
+	for(unsigned int l = 0; l < count; l++)
+	{
+		if(abs(table[l]) > outBias)
+		{
+			outBias = abs(table[l]);
+			outL = l;
+		}
+	}
+
+	delete [] table;
+}
+
+//-----------------------------------------------------------------------------
+// this version breaks the task into 4 pieces, or 4 gigs of RAM for 32-bit FWT
+
+void find_linear_approximation_walsh2 ( mixfunc<uint32_t> f, uint32_t mask, int inbits, uint32_t & outL, int64_t & outBias )
+{
+	const int64_t count = int64_t(1) << inbits;
+ 	const int64_t stride = count/4;
+
+	int * table2 = new int[(int)stride];
+
+	uint32_t worstL = 0;
+	int64_t worstBias = 0;
+
+	for(int64_t j = 0; j < count; j += stride)
+	{
+		printf(".");
+
+		for(int i = 0; i < stride; i++)
+		{
+			table2[i] = computeWalsh2(f,i+j,inbits,mask);
+		}
+
+		FWT2(table2,stride);
+
+		for(int64_t l = 0; l < stride; l++)
+		{
+			if(abs(table2[l]) > worstBias)
+			{
+				worstBias = abs(table2[l]);
+				worstL = uint32_t(l)+uint32_t(j);
+			}
+		}
+	}
+
+	outBias = worstBias/2;
+	outL = worstL;
+
+	delete [] table2;
+}
+
+
+//----------------------------------------------------------------------------
+
+void printtab ( int * tab, int size )
+{
+	for(int j = 0; j < 16; j++)
+	{
+		printf("[");
+		for(int i = 0; i < (size/16); i++)
+		{
+			printf("%3d ",tab[j*16+i]);
+		}
+		printf("]\n");
+	}
+}
+
+void comparetab ( int * tabA, int * tabB, int size )
+{
+	bool fail = false;
+
+	for(int i = 0; i < size; i++)
+	{
+		if(tabA[i] != tabB[i])
+		{
+			fail = true;
+			break;
+		}
+	}
+
+	printf(fail ? "X" : "-");
+}
+
+void testFWT ( void )
+{
+	const int bits = 12;
+	const int size = (1 << bits);
+
+	int * func = new int[size];
+	int * resultA = new int[size];
+	int * resultB = new int[size];
+
+	for(int rep = 0; rep < 1; rep++)
+	{
+		// Generate a random boolean function
+
+		for(int i = 0; i < size; i++)
+		{
+			func[i] = rand_u32() & 1;
+
+			//func[i] = (i ^ (i >> 2)) & 1;
+		}
+
+		//printf("Input boolean function -\n");
+		//printtab(func);
+		//printf("\n");
+
+		// Test against all 256 linear functions
+
+
+		memset(resultA,0,size * sizeof(int));
+
+		//printf("Result - \n");
+		for(uint32_t linfunc = 0; linfunc < size; linfunc++)
+		{
+			resultA[linfunc] = 0;
+
+			for(uint32_t k = 0; k < size; k++)
+			{
+				int b1 = func[k];
+				int b2 = parity( k & linfunc );
+
+				if(b1 == b2) resultA[linfunc]++;
+			}
+
+			resultA[linfunc] -= (size/2);
+		}
+
+		//printtab(resultA);
+		//printf("\n");
+
+
+		// Test with FWTs
+
+		for(int i = 0; i < size; i++) resultB[i] = (func[i] == 0) ? -1 : 1;
+		FWT1(resultB,size);
+		for(int i = 0; i < size; i++) resultB[i] = -resultB[i]/2;
+		comparetab(resultA,resultB,size);
+
+		for(int i = 0; i < size; i++) resultB[i] = (func[i] == 0) ? -1 : 1;
+		FWT2(resultB,size);
+		for(int i = 0; i < size; i++) resultB[i] = -resultB[i]/2;
+		comparetab(resultA,resultB,size);
+
+		for(int i = 0; i < size; i++) resultB[i] = (func[i] == 0) ? -1 : 1;
+		FWT3(resultB,size);
+		for(int i = 0; i < size; i++) resultB[i] = -resultB[i]/2;
+		comparetab(resultA,resultB,size);
+
+		// Test with subdiv-by-4
+
+		{
+			for(int i = 0; i < size; i++) resultB[i] = (func[i] == 0) ? -1 : 1;
+
+			const int64_t count = int64_t(1) << bits;
+			const int64_t stride = count/4;
+
+			for(int64_t j = 0; j < count; j += stride)
+			{
+				for(int i = 0; i < stride; i++)
+				{
+					resultB[i+j] = computeWalsh2(func,i+j,bits);
+				}
+
+				FWT2(&resultB[j],stride);
+			}
+
+			for(int i = 0; i < size; i++) resultB[i] = -resultB[i]/2;
+			comparetab(resultA,resultB,size);
+		}
+
+		printf(" ");
+	}
+
+	delete [] func;
+	delete [] resultA;
+	delete [] resultB;
+}
+
+//-----------------------------------------------------------------------------
+// Compare known-good implementation against optimized implementation
+
+void testFWT2 ( void )
+{
+	const int bits = 24;
+	const int size = (1 << bits);
+
+	int * func = new int[size];
+	int * resultA = new int[size];
+	int * resultB = new int[size];
+
+	for(int rep = 0; rep < 4; rep++)
+	{
+		// Generate a random boolean function
+
+		for(int i = 0; i < size; i++)
+		{
+			func[i] = rand_u32() & 1;
+		}
+
+		// Test with FWTs
+
+		for(int i = 0; i < size; i++) resultA[i] = resultB[i] = (func[i] == 0) ? -1 : 1;
+
+		FWT1(resultA,size);
+		FWT4(resultB,size);
+		
+		comparetab(resultA,resultB,size);
+
+		printf(" ");
+	}
+
+	delete [] func;
+	delete [] resultA;
+	delete [] resultB;
+}
\ No newline at end of file
diff --git a/FWTransform.h b/FWTransform.h
new file mode 100644
index 0000000..6979cbd
--- /dev/null
+++ b/FWTransform.h
@@ -0,0 +1,12 @@
+#include "Types.h"
+#include "Bitvec.h"
+
+// Fast Walsh transform stuff. Used for determining how close an arbitrary
+// boolean function is to the set of all possible linear functions.
+
+// Given an arbitrary N-bit mixing function mix(x), we can generate a boolean
+// function out of it by choosing a N-bit mask and computing
+// parity(mix(x) & mask).
+
+// If the mask has 1 bit set, this is equivalent to selecting a column of
+// output bits from the mixing function to test.
diff --git a/Hamming.cpp b/Hamming.cpp
new file mode 100644
index 0000000..e00e5b7
--- /dev/null
+++ b/Hamming.cpp
@@ -0,0 +1,133 @@
+#include "Hamming.h"
+
+#include "Types.h"
+#include "Random.h"
+
+// Code to measure the hamming weight of mix functions, etc.
+
+// (documentation needed)
+
+// If I change N bits of the input, how many bits of the output change on average?
+
+
+//-----------------------------------------------------------------------------
+// compute table of differential hamming weight for input differentials
+// up to 5 bits
+
+void hamtest ( uint32_t (*mix)(uint32_t), uint32_t d, const int reps, double out[33] )
+{
+	double temp[33];
+
+	memset(temp,0,sizeof(temp));
+
+	for(int i = 0; i < reps; i++)
+	{
+		uint32_t a = rand_u32();
+		uint32_t b = a ^ d;
+
+		uint32_t ma = mix(a);
+		uint32_t mb = mix(b);
+
+		uint32_t md = ma ^ mb;
+
+		temp[popcount(md)] += 1.0 / double(reps);
+	}
+
+	for(int i = 0; i < 33; i++)
+	{
+		if(temp[i] > out[i]) out[i] = temp[i];
+	}
+}
+
+void SparseDiffHamming32 ( uint32_t (*mix)(uint32_t), double accum[33] )
+{
+	uint32_t d = 0;
+
+	memset(accum,0,sizeof(accum));
+
+	//const double c32_1 = 32;
+	//const double c32_2 = 496;
+	//const double c32_3 = 4960;
+	//const double c32_4 = 35960;
+	//const double c32_5 = 201376;
+	//const double c32[5] = { c32_1, c32_2, c32_3, c32_4, c32_5 };
+
+	const int reps = 1000;
+
+	double temp[6][33];
+
+	for(int i = 0; i < 6; i++)
+	{
+		memset(temp[i],0,33 * sizeof(double));
+	}
+
+	for(int i = 0; i < 32; i++)
+	{
+		d ^= (1 << i);
+		hamtest(mix,d,reps,temp[1]);
+
+		for(int j = i+1; j < 32; j++)
+		{
+			d ^= (1 << j);
+			hamtest(mix,d,reps,temp[2]);
+
+			for(int k = j+1; k < 32; k++)
+			{
+				d ^= (1 << k);
+				hamtest(mix,d,reps,temp[3]);
+
+				for(int l = k+1; l < 32; l++)
+				{
+					d ^= (1 << l);
+					hamtest(mix,d,reps,temp[4]);
+
+					//for(int m = l+1; m < 32; m++)
+					//{
+					//	d ^= (1 << m);
+					//	hamtest(mix,d,reps,temp[5]);
+					//
+					//	d ^= (1 << m);
+					//}
+
+					d ^= (1 << l);
+				}
+				d ^= (1 << k);
+			}
+			d ^= (1 << j);
+		}
+		d ^= (1 << i);
+	}
+
+	for(int i = 0; i < 33; i++)
+	{
+		accum[i] = 0;
+	}
+
+	for(int j = 0; j < 33; j++)
+	{
+		for(int i = 0; i < 6; i++)
+		{
+			if((i+j) >= 33) continue;
+
+			double t = temp[i][j];
+
+			if(t > accum[i+j]) accum[i+j] = t;
+		}
+	}
+
+	for(int i = 0; i < 33; i++)
+	{
+		accum[i] *= 100;
+	}
+}
+
+bool hamless ( int count, double * a, double * b )
+{
+	for(int i = 0; i < count; i++)
+	{
+		if(a[i] < b[i]) return true;
+		if(a[i] > b[i]) return false;
+	}
+
+	return false;
+}
diff --git a/Hamming.h b/Hamming.h
new file mode 100644
index 0000000..a372925
--- /dev/null
+++ b/Hamming.h
@@ -0,0 +1,5 @@
+#pragma once
+#include "Types.h"
+
+void SparseDiffHamming32 ( uint32_t (*mix)(uint32_t), double accum[33] );
+bool hamless ( int count, double * a, double * b );
\ No newline at end of file
diff --git a/Hashes.cpp b/Hashes.cpp
new file mode 100644
index 0000000..b27a2f0
--- /dev/null
+++ b/Hashes.cpp
@@ -0,0 +1,114 @@
+#include "Hashes.h"
+
+#include "Random.h"
+
+//----------------------------------------------------------------------------
+// fake / bad hashes
+
+void randhash ( const void *, int, uint32_t, void * out )
+{
+	*(uint32_t*)out = rand_u32();
+}
+
+void BadHash ( const void * key, int len, uint32_t seed, void * out )
+{
+	uint32_t h = seed;
+
+	const uint8_t * data = (const uint8_t*)key;
+
+	for(int i = 0; i < len; i++)
+	{
+		h ^= h >> 3;
+		h ^= h << 5;
+		h ^= data[i];
+	}
+
+	*(uint32_t*)out = h;
+}
+
+void sumhash ( const void * key, int len, uint32_t seed, void * out )
+{
+	uint32_t h = seed;
+
+	const uint8_t * data = (const uint8_t*)key;
+
+	for(int i = 0; i < len; i++)
+	{
+		h += data[i];
+	}
+
+	*(uint32_t*)out = h;
+}
+
+void DoNothingHash ( const void *, int, uint32_t, void * )
+{
+	return;
+}
+
+//-----------------------------------------------------------------------------
+// One-byte-at-a-time hash based on Murmur's mix
+
+uint32_t MurmurOAAT ( const void * key, int len, uint32_t h )
+{
+	const uint8_t * data = (const uint8_t*)key;
+
+	h ^= len;
+
+	for(int i = 0; i < len; i++)
+	{
+		h ^= data[i];
+		h *= 0x5bd1e995;
+		h ^= h >> 16;
+	}
+
+	return h;
+}
+
+//----------------------------------------------------------------------------
+
+void FNV ( const void * key, int len, uint32_t seed, void * out )
+{
+	unsigned int h = seed;
+
+	const uint8_t * data = (const uint8_t*)key;
+
+	h ^= 2166136261;
+
+	for(int i = 0; i < len; i++)
+	{
+		h ^= data[i];
+		h *= 16777619;
+	}
+
+	*(uint32_t*)out = h;
+}
+
+//-----------------------------------------------------------------------------
+
+uint32_t x17 ( const void * key, int len, uint32_t h ) 
+{
+	const uint8_t * data = (const uint8_t*)key;
+    
+	for(int i = 0; i < len; ++i) 
+	{
+        h = 17 * h + (data[i] - ' ');
+    }
+
+    return h ^ (h >> 16);
+}
+
+//-----------------------------------------------------------------------------
+
+uint32_t Bernstein ( const void * key, int len, uint32_t h ) 
+{
+	const uint8_t * data = (const uint8_t*)key;
+    
+	for(int i = 0; i < len; ++i) 
+	{
+        h = 33 * h + data[i];
+    }
+
+	return h;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Hashes.h b/Hashes.h
new file mode 100644
index 0000000..8bf998e
--- /dev/null
+++ b/Hashes.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include "Types.h"
+
+#include "MurmurHash1.h"
+#include "MurmurHash2.h"
+#include "MurmurHash3.h"
+
+void DoNothingHash         ( const void * key, int len, uint32_t seed, void * out );
+void FNV                   ( const void * key, int len, uint32_t seed, void * out );
+void SuperFastHash         ( const void * key, int len, uint32_t seed, void * out );
+void lookup3_test          ( const void * key, int len, uint32_t seed, void * out );
+void md5_32                ( const void * key, int len, uint32_t seed, void * out );
+void crc32                 ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash2_test      ( const void * key, int len, uint32_t seed, void * out );
+void MurmurHash2A_test     ( const void * key, int len, uint32_t seed, void * out );
+
+//-----------------------------------------------------------------------------
+
+inline void MurmurHash1_test ( const void * key, int len, uint32_t seed, void * out )
+{
+	*(uint32_t*)out = MurmurHash1(key,len,seed);
+}
+
+inline void MurmurHash2_test ( const void * key, int len, uint32_t seed, void * out )
+{
+	*(uint32_t*)out = MurmurHash2(key,len,seed);
+}
+
+inline void MurmurHash2A_test ( const void * key, int len, uint32_t seed, void * out )
+{
+	*(uint32_t*)out = MurmurHash2A(key,len,seed);
+}
+
diff --git a/Junk.cpp b/Junk.cpp
new file mode 100644
index 0000000..62e700c
--- /dev/null
+++ b/Junk.cpp
@@ -0,0 +1,38 @@
+#include "Junk.h"
+
+#include "Random.h"
+
+//-----------------------------------------------------------------------------
+// Given a 64->32 bit compression function and a set of differentials, compute
+// the number of collisions
+
+typedef uint32_t (*pfCompress32) ( uint64_t x );
+
+int TestCompress ( pfCompress32 comp, std::vector<uint64_t> & diffs, const int reps )
+{
+	int total = 0;
+
+	for(int j = 0; j < (int)diffs.size(); j++)
+	{
+		uint64_t d = diffs[j];
+
+		int collisions = 0;
+
+		for(int i = 0; i < reps; i++)
+		{
+			uint64_t a = rand_u64();
+			uint64_t b = a ^ d;
+
+			uint32_t ca = comp(a);
+			uint32_t cb = comp(b);
+
+			if(ca == cb) collisions++;
+		}
+
+		if(collisions > 1) total += collisions;
+	}
+
+	return total;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Junk.h b/Junk.h
new file mode 100644
index 0000000..a4fc5fd
--- /dev/null
+++ b/Junk.h
@@ -0,0 +1,46 @@
+#include "Types.h"
+
+//-----------------------------------------------------------------------------
+
+template < typename mixtype >
+void calcMixBias ( mixtype (*mix)(mixtype), std::vector<int>& bins, int reps )
+{
+	const int inbits  = sizeof(mixtype) * 8;
+	const int outbits = sizeof(mixtype) * 8;
+
+	mixtype K,A,B,C;
+
+	for(int irep = 0; irep < reps; irep++)
+	{
+		rand_t(K);
+
+		A = mix(K);
+
+		for(int iBit = 0; iBit < inbits; iBit++)
+		{
+			B = mix(K ^ (mixtype(1) << iBit));
+
+			C = A ^ B;
+
+			for(int iOut = 0; iOut < outbits; iOut++)
+			{
+				bins[(iBit*outbits) + iOut] += (C >> iOut) & 1;
+			}
+		}
+	}
+}
+
+//----------
+
+template < typename mixtype >
+double calcMixBias ( mixtype (*mix)(mixtype), int reps )
+{
+	const int bits = sizeof(mixtype) * 8;
+	std::vector<int> bins(bits*bits);
+
+	calcMixBias<mixtype>(mix,bins,reps);
+
+	return maxBias(bins,reps);
+}
+
+//-----------------------------------------------------------------------------
diff --git a/MurmurHash1.cpp b/MurmurHash1.cpp
new file mode 100644
index 0000000..4322bb8
--- /dev/null
+++ b/MurmurHash1.cpp
@@ -0,0 +1,171 @@
+#include "MurmurHash1.h"
+
+//-----------------------------------------------------------------------------
+// MurmurHash1, by Austin Appleby
+
+// Note - This code makes a few assumptions about how your machine behaves -
+
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4
+
+// And it has a few limitations -
+
+// 1. It will not work incrementally.
+// 2. It will not produce the same results on little-endian and big-endian
+//    machines.
+
+uint32_t MurmurHash1 ( const void * key, int len, uint32_t seed )
+{
+	const unsigned int m = 0xc6a4a793;
+
+	const int r = 16;
+
+	unsigned int h = seed ^ (len * m);
+
+	//----------
+	
+	const unsigned char * data = (const unsigned char *)key;
+
+	while(len >= 4)
+	{
+		unsigned int k = *(unsigned int *)data;
+
+		h += k;
+		h *= m;
+		h ^= h >> 16;
+
+		data += 4;
+		len -= 4;
+	}
+	
+	//----------
+	
+	switch(len)
+	{
+	case 3:
+		h += data[2] << 16;
+	case 2:
+		h += data[1] << 8;
+	case 1:
+		h += data[0];
+		h *= m;
+		h ^= h >> r;
+	};
+ 
+	//----------
+
+	h *= m;
+	h ^= h >> 10;
+	h *= m;
+	h ^= h >> 17;
+
+	return h;
+} 
+
+//-----------------------------------------------------------------------------
+// MurmurHash1Aligned, by Austin Appleby
+
+// Same algorithm as MurmurHash1, but only does aligned reads - should be safer
+// on certain platforms. 
+
+// Performance should be equal to or better than the simple version.
+
+unsigned int MurmurHash1Aligned ( const void * key, int len, unsigned int seed )
+{
+	const unsigned int m = 0xc6a4a793;
+	const int r = 16;
+
+	const unsigned char * data = (const unsigned char *)key;
+
+	unsigned int h = seed ^ (len * m);
+
+	int align = (int)data & 3;
+
+	if(align && (len >= 4))
+	{
+		// Pre-load the temp registers
+
+		unsigned int t = 0, d = 0;
+
+		switch(align)
+		{
+			case 1: t |= data[2] << 16;
+			case 2: t |= data[1] << 8;
+			case 3: t |= data[0];
+		}
+
+		t <<= (8 * align);
+
+		data += 4-align;
+		len -= 4-align;
+
+		int sl = 8 * (4-align);
+		int sr = 8 * align;
+
+		// Mix
+
+		while(len >= 4)
+		{
+			d = *(unsigned int *)data;
+			t = (t >> sr) | (d << sl);
+			h += t;
+			h *= m;
+			h ^= h >> r;
+			t = d;
+
+			data += 4;
+			len -= 4;
+		}
+
+		// Handle leftover data in temp registers
+
+		int pack = len < align ? len : align;
+
+		d = 0;
+
+		switch(pack)
+		{
+		case 3: d |= data[2] << 16;
+		case 2: d |= data[1] << 8;
+		case 1: d |= data[0];
+		case 0: h += (t >> sr) | (d << sl);
+				h *= m;
+				h ^= h >> r;
+		}
+
+		data += pack;
+		len -= pack;
+	}
+	else
+	{
+		while(len >= 4)
+		{
+			h += *(unsigned int *)data;
+			h *= m;
+			h ^= h >> r;
+
+			data += 4;
+			len -= 4;
+		}
+	}
+
+	//----------
+	// Handle tail bytes
+
+	switch(len)
+	{
+	case 3: h += data[2] << 16;
+	case 2: h += data[1] << 8;
+	case 1: h += data[0];
+			h *= m;
+			h ^= h >> r;
+	};
+
+	h *= m;
+	h ^= h >> 10;
+	h *= m;
+	h ^= h >> 17;
+
+	return h;
+}
+
diff --git a/MurmurHash1.h b/MurmurHash1.h
new file mode 100644
index 0000000..e297035
--- /dev/null
+++ b/MurmurHash1.h
@@ -0,0 +1,8 @@
+#include "pstdint.h"
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash1        ( const void * key, int len, uint32_t seed );
+uint32_t MurmurHash1Aligned ( const void * key, int len, uint32_t seed );
+
+//-----------------------------------------------------------------------------
diff --git a/MurmurHash2.cpp b/MurmurHash2.cpp
new file mode 100644
index 0000000..349ed8e
--- /dev/null
+++ b/MurmurHash2.cpp
@@ -0,0 +1,502 @@
+#include "MurmurHash2.h"
+
+//-----------------------------------------------------------------------------
+// MurmurHash2, by Austin Appleby
+
+// Note - This code makes a few assumptions about how your machine behaves -
+
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4
+
+// And it has a few limitations -
+
+// 1. It will not work incrementally.
+// 2. It will not produce the same results on little-endian and big-endian
+//    machines.
+
+uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
+{
+	// 'm' and 'r' are mixing constants generated offline.
+	// They're not really 'magic', they just happen to work well.
+
+	const uint32_t m = 0x5bd1e995;
+	const int r = 24;
+
+	// Initialize the hash to a 'random' value
+
+	uint32_t h = seed ^ len;
+
+	// Mix 4 bytes at a time into the hash
+
+	const unsigned char * data = (const unsigned char *)key;
+
+	while(len >= 4)
+	{
+		uint32_t k = *(uint32_t*)data;
+
+		k *= m;
+		k ^= k >> r;
+		k *= m;
+
+		h *= m;
+		h ^= k;
+
+		data += 4;
+		len -= 4;
+	}
+
+	// Handle the last few bytes of the input array
+
+	switch(len)
+	{
+	case 3: h ^= data[2] << 16;
+	case 2: h ^= data[1] << 8;
+	case 1: h ^= data[0];
+			h *= m;
+	};
+
+	// Do a few final mixes of the hash to ensure the last few
+	// bytes are well-incorporated.
+
+	h ^= h >> 13;
+	h *= m;
+	h ^= h >> 15;
+
+	return h;
+} 
+
+//-----------------------------------------------------------------------------
+// MurmurHash2, 64-bit versions, by Austin Appleby
+
+// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment 
+// and endian-ness issues if used across multiple platforms.
+
+// 64-bit hash for 64-bit platforms
+
+uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed )
+{
+	const uint64_t m = 0xc6a4a7935bd1e995;
+	const int r = 47;
+
+	uint64_t h = seed ^ (len * m);
+
+	const uint64_t * data = (const uint64_t *)key;
+	const uint64_t * end = data + (len/8);
+
+	while(data != end)
+	{
+		uint64_t k = *data++;
+
+		k *= m; 
+		k ^= k >> r; 
+		k *= m; 
+		
+		h ^= k;
+		h *= m; 
+	}
+
+	const unsigned char * data2 = (const unsigned char*)data;
+
+	switch(len & 7)
+	{
+	case 7: h ^= uint64_t(data2[6]) << 48;
+	case 6: h ^= uint64_t(data2[5]) << 40;
+	case 5: h ^= uint64_t(data2[4]) << 32;
+	case 4: h ^= uint64_t(data2[3]) << 24;
+	case 3: h ^= uint64_t(data2[2]) << 16;
+	case 2: h ^= uint64_t(data2[1]) << 8;
+	case 1: h ^= uint64_t(data2[0]);
+	        h *= m;
+	};
+ 
+	h ^= h >> r;
+	h *= m;
+	h ^= h >> r;
+
+	return h;
+} 
+
+
+// 64-bit hash for 32-bit platforms
+
+uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed )
+{
+	const uint32_t m = 0x5bd1e995;
+	const int r = 24;
+
+	uint32_t h1 = uint32_t(seed) ^ len;
+	uint32_t h2 = uint32_t(seed >> 32);
+
+	const uint32_t * data = (const uint32_t *)key;
+
+	while(len >= 8)
+	{
+		uint32_t k1 = *data++;
+		k1 *= m; k1 ^= k1 >> r; k1 *= m;
+		h1 *= m; h1 ^= k1;
+		len -= 4;
+
+		uint32_t k2 = *data++;
+		k2 *= m; k2 ^= k2 >> r; k2 *= m;
+		h2 *= m; h2 ^= k2;
+		len -= 4;
+	}
+
+	if(len >= 4)
+	{
+		uint32_t k1 = *data++;
+		k1 *= m; k1 ^= k1 >> r; k1 *= m;
+		h1 *= m; h1 ^= k1;
+		len -= 4;
+	}
+
+	switch(len)
+	{
+	case 3: h2 ^= ((unsigned char*)data)[2] << 16;
+	case 2: h2 ^= ((unsigned char*)data)[1] << 8;
+	case 1: h2 ^= ((unsigned char*)data)[0];
+			h2 *= m;
+	};
+
+	h1 ^= h2 >> 18; h1 *= m;
+	h2 ^= h1 >> 22; h2 *= m;
+	h1 ^= h2 >> 17; h1 *= m;
+	h2 ^= h1 >> 19; h2 *= m;
+
+	uint64_t h = h1;
+
+	h = (h << 32) | h2;
+
+	return h;
+} 
+
+//-----------------------------------------------------------------------------
+// MurmurHash2A, by Austin Appleby
+
+// This is a variant of MurmurHash2 modified to use the Merkle-Damgard 
+// construction. Bulk speed should be identical to Murmur2, small-key speed 
+// will be 10%-20% slower due to the added overhead at the end of the hash.
+
+// This variant fixes a minor issue where null keys were more likely to
+// collide with each other than expected, and also makes the function
+// more amenable to incremental implementations.
+
+#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
+
+uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed )
+{
+	const uint32_t m = 0x5bd1e995;
+	const int r = 24;
+	uint32_t l = len;
+
+	const unsigned char * data = (const unsigned char *)key;
+
+	uint32_t h = seed;
+
+	while(len >= 4)
+	{
+		uint32_t k = *(uint32_t*)data;
+
+		mmix(h,k);
+
+		data += 4;
+		len -= 4;
+	}
+
+	uint32_t t = 0;
+
+	switch(len)
+	{
+	case 3: t ^= data[2] << 16;
+	case 2: t ^= data[1] << 8;
+	case 1: t ^= data[0];
+	};
+
+	mmix(h,t);
+	mmix(h,l);
+
+	h ^= h >> 13;
+	h *= m;
+	h ^= h >> 15;
+
+	return h;
+}
+
+//-----------------------------------------------------------------------------
+// CMurmurHash2A, by Austin Appleby
+
+// This is a sample implementation of MurmurHash2A designed to work 
+// incrementally.
+
+// Usage - 
+
+// CMurmurHash2A hasher
+// hasher.Begin(seed);
+// hasher.Add(data1,size1);
+// hasher.Add(data2,size2);
+// ...
+// hasher.Add(dataN,sizeN);
+// uint32_t hash = hasher.End()
+
+class CMurmurHash2A
+{
+public:
+
+	void Begin ( uint32_t seed = 0 )
+	{
+		m_hash  = seed;
+		m_tail  = 0;
+		m_count = 0;
+		m_size  = 0;
+	}
+
+	void Add ( const unsigned char * data, int len )
+	{
+		m_size += len;
+
+		MixTail(data,len);
+
+		while(len >= 4)
+		{
+			uint32_t k = *(uint32_t*)data;
+
+			mmix(m_hash,k);
+
+			data += 4;
+			len -= 4;
+		}
+
+		MixTail(data,len);
+	}
+
+	uint32_t End ( void )
+	{
+		mmix(m_hash,m_tail);
+		mmix(m_hash,m_size);
+
+		m_hash ^= m_hash >> 13;
+		m_hash *= m;
+		m_hash ^= m_hash >> 15;
+
+		return m_hash;
+	}
+
+private:
+
+	static const uint32_t m = 0x5bd1e995;
+	static const int r = 24;
+
+	void MixTail ( const unsigned char * & data, int & len )
+	{
+		while( len && ((len<4) || m_count) )
+		{
+			m_tail |= (*data++) << (m_count * 8);
+
+			m_count++;
+			len--;
+
+			if(m_count == 4)
+			{
+				mmix(m_hash,m_tail);
+				m_tail = 0;
+				m_count = 0;
+			}
+		}
+	}
+
+	uint32_t m_hash;
+	uint32_t m_tail;
+	uint32_t m_count;
+	uint32_t m_size;
+};
+
+//-----------------------------------------------------------------------------
+// MurmurHashNeutral2, by Austin Appleby
+
+// Same as MurmurHash2, but endian- and alignment-neutral.
+// Half the speed though, alas.
+
+uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed )
+{
+	const uint32_t m = 0x5bd1e995;
+	const int r = 24;
+
+	uint32_t h = seed ^ len;
+
+	const unsigned char * data = (const unsigned char *)key;
+
+	while(len >= 4)
+	{
+		uint32_t k;
+
+		k  = data[0];
+		k |= data[1] << 8;
+		k |= data[2] << 16;
+		k |= data[3] << 24;
+
+		k *= m; 
+		k ^= k >> r; 
+		k *= m;
+
+		h *= m;
+		h ^= k;
+
+		data += 4;
+		len -= 4;
+	}
+	
+	switch(len)
+	{
+	case 3: h ^= data[2] << 16;
+	case 2: h ^= data[1] << 8;
+	case 1: h ^= data[0];
+	        h *= m;
+	};
+
+	h ^= h >> 13;
+	h *= m;
+	h ^= h >> 15;
+
+	return h;
+} 
+
+//-----------------------------------------------------------------------------
+// MurmurHashAligned2, by Austin Appleby
+
+// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
+// on certain platforms. 
+
+// Performance will be lower than MurmurHash2
+
+#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
+
+uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed )
+{
+	const uint32_t m = 0x5bd1e995;
+	const int r = 24;
+
+	const unsigned char * data = (const unsigned char *)key;
+
+	uint32_t h = seed ^ len;
+
+	int align = (int)data & 3;
+
+	if(align && (len >= 4))
+	{
+		// Pre-load the temp registers
+
+		uint32_t t = 0, d = 0;
+
+		switch(align)
+		{
+			case 1: t |= data[2] << 16;
+			case 2: t |= data[1] << 8;
+			case 3: t |= data[0];
+		}
+
+		t <<= (8 * align);
+
+		data += 4-align;
+		len -= 4-align;
+
+		int sl = 8 * (4-align);
+		int sr = 8 * align;
+
+		// Mix
+
+		while(len >= 4)
+		{
+			d = *(uint32_t *)data;
+			t = (t >> sr) | (d << sl);
+
+			uint32_t k = t;
+
+			MIX(h,k,m);
+
+			t = d;
+
+			data += 4;
+			len -= 4;
+		}
+
+		// Handle leftover data in temp registers
+
+		d = 0;
+
+		if(len >= align)
+		{
+			switch(align)
+			{
+			case 3: d |= data[2] << 16;
+			case 2: d |= data[1] << 8;
+			case 1: d |= data[0];
+			}
+
+			uint32_t k = (t >> sr) | (d << sl);
+			MIX(h,k,m);
+
+			data += align;
+			len -= align;
+
+			//----------
+			// Handle tail bytes
+
+			switch(len)
+			{
+			case 3: h ^= data[2] << 16;
+			case 2: h ^= data[1] << 8;
+			case 1: h ^= data[0];
+					h *= m;
+			};
+		}
+		else
+		{
+			switch(len)
+			{
+			case 3: d |= data[2] << 16;
+			case 2: d |= data[1] << 8;
+			case 1: d |= data[0];
+			case 0: h ^= (t >> sr) | (d << sl);
+					h *= m;
+			}
+		}
+
+		h ^= h >> 13;
+		h *= m;
+		h ^= h >> 15;
+
+		return h;
+	}
+	else
+	{
+		while(len >= 4)
+		{
+			uint32_t k = *(uint32_t *)data;
+
+			MIX(h,k,m);
+
+			data += 4;
+			len -= 4;
+		}
+
+		//----------
+		// Handle tail bytes
+
+		switch(len)
+		{
+		case 3: h ^= data[2] << 16;
+		case 2: h ^= data[1] << 8;
+		case 1: h ^= data[0];
+				h *= m;
+		};
+
+		h ^= h >> 13;
+		h *= m;
+		h ^= h >> 15;
+
+		return h;
+	}
+}
+
+//-----------------------------------------------------------------------------
+
diff --git a/MurmurHash2.h b/MurmurHash2.h
new file mode 100644
index 0000000..e3b00da
--- /dev/null
+++ b/MurmurHash2.h
@@ -0,0 +1,13 @@
+#include "pstdint.h"
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash2        ( const void * key, int len, uint32_t seed );
+uint64_t MurmurHash64A      ( const void * key, int len, uint64_t seed );
+uint64_t MurmurHash64B      ( const void * key, int len, uint64_t seed );
+uint32_t MurmurHash2A       ( const void * key, int len, uint32_t seed );
+uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed );
+uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed );
+
+//-----------------------------------------------------------------------------
+
diff --git a/MurmurHash2_test.cpp b/MurmurHash2_test.cpp
new file mode 100644
index 0000000..e69de29
diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
new file mode 100644
index 0000000..581d1d3
--- /dev/null
+++ b/MurmurHash3.cpp
@@ -0,0 +1,288 @@
+#include "MurmurHash3.h"
+
+#include <stdlib.h>    // for _rotl
+
+#pragma warning(disable:4100)
+
+//-----------------------------------------------------------------------------
+// need to replace this
+
+inline uint32_t kmix ( uint32_t k, uint32_t c1, uint32_t c2 ) 
+{
+	k *= c1; 
+	k  = _rotl(k,11); 
+	k *= c2;
+
+	return k;
+}
+
+// block mix
+
+inline void bmix1 ( uint32_t & h, uint32_t k, uint32_t c1, uint32_t c2 )
+{
+	k = kmix(k,c1,c2);
+	
+	h = h*5+0xa6b84e31;
+	h ^= k;
+}
+
+// xor before mul is faster on x64
+
+inline void bmix2 ( uint32_t & h, uint32_t k, uint32_t c1, uint32_t c2 )
+{
+	k = kmix(k,c1,c2);
+	
+	h ^= k;
+	h = h*3+0xa6b84e31;
+}
+
+// block constant mix
+
+inline void cmix ( uint32_t & c1, uint32_t & c2 )
+{
+	c1 = c1*9+0x273581d8;
+	c2 = c2*5+0xee700bac;
+}
+
+// finalizer mix - avalanches all bits to within 0.25% bias
+
+inline uint32_t fmix32 ( uint32_t h )
+{
+	h ^= h >> 16;
+	h *= 0x85ebca6b;
+	h ^= h >> 13;
+	h *= 0xc2b2ae35;
+	h ^= h >> 16;
+
+	return h;
+}
+
+// 64-bit finalizer mix - avalanches all bits to within 0.05% bias
+
+inline uint64_t fmix64 ( uint64_t k )
+{
+	k ^= k >> 33;
+	k *= 0xff51afd7ed558ccd;
+	k ^= k >> 33;
+	k *= 0xc4ceb9fe1a85ec53;
+	k ^= k >> 33;
+
+	return k;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out )
+{
+	uint32_t h = 0x971e137b ^ seed;
+
+	const uint8_t * tail = (const uint8_t*)(key) + (len & ~3);
+
+	//----------
+	// body
+
+	const uint32_t * block = (const uint32_t *)tail;
+
+	uint32_t c1 = 0x95543787;
+	uint32_t c2 = 0x2ad7eb25;
+
+	for(int l = -(len/4); l; l++)
+	{
+		bmix1(h,block[l],c1,c2);
+		cmix(c1,c2);
+	}
+
+	//----------
+	// tail
+
+	uint32_t k = 0;
+
+	switch(len & 3)
+	{
+	case 3: k ^= tail[2] << 16;
+	case 2: k ^= tail[1] << 8;
+	case 1: k ^= tail[0];
+			bmix1(h,k,c1,c2);
+	};
+
+	//----------
+	// finalization
+
+	h ^= len;
+
+	h = fmix32(h);
+
+	*(uint32_t*)out = h;
+} 
+
+//-----------------------------------------------------------------------------
+
+void merge64 ( uint32_t h[2], const uint32_t * blocks, uint32_t c1, uint32_t c2 )
+{
+	h[0] = _rotl(h[0],9);
+	h[1] = _rotl(h[1],24);
+
+	h[0] += h[1];
+	h[1] += h[0];
+
+	bmix1(h[0],blocks[0],c1,c2);
+	bmix1(h[1],blocks[1],c1,c2);
+}
+
+//----------
+
+void MurmurHash3_x86_64 ( const void * data, int len, uint32_t seed, void * out )
+{
+	uint32_t h[2];
+
+	h[0] = 0x8de1c3ac ^ seed;
+	h[1] = 0xbab98226 ^ seed;
+
+	//----------
+	// body
+
+	const uint32_t * blocks = (const uint32_t *)data;
+
+	uint32_t c1 = 0x95543787;
+	uint32_t c2 = 0x2ad7eb25;
+
+	while(len >= 8)
+	{
+		merge64(h,blocks,c1,c2);
+		cmix(c1,c2);
+
+		blocks += 2;
+		len -= 8;
+	}
+
+	//----------
+	// tail
+	
+	uint32_t k[2] = { 0, 0 };
+
+	const uint8_t * tail = (const uint8_t*)blocks;
+
+	switch(len)
+	{
+	case 7: k[1] ^= tail[6] << 16;
+	case 6: k[1] ^= tail[5] << 8;
+	case 5: k[1] ^= tail[4] << 0;
+	case 4: k[0] ^= tail[3] << 24;
+	case 3: k[0] ^= tail[2] << 16;
+	case 2: k[0] ^= tail[1] << 8;
+	case 1: k[0] ^= tail[0] << 0;
+			merge64(h,k,c1,c2);
+	};
+
+	//----------
+	// finalization
+
+	h[1] ^= len;
+
+	h[0] =  fmix32(h[0]);
+	h[1] ^= kmix(h[0],c1,c2);
+	h[0] ^= fmix32(h[1]);
+	h[1] ^= kmix(h[0],c1,c2);
+
+	((uint32_t*)out)[0] = h[0];
+	((uint32_t*)out)[1] = h[1];
+}
+
+//-----------------------------------------------------------------------------
+
+void merge128 ( uint32_t h[4], const uint32_t * blocks, uint32_t c1, uint32_t c2 )
+{
+	h[0] = _rotl(h[0],3);
+	h[1] = _rotl(h[1],10);
+	h[2] = _rotl(h[2],19);
+	h[3] = _rotl(h[3],26);
+
+	h[0] += h[1];
+	h[0] += h[2];
+	h[0] += h[3];
+
+	h[1] += h[0];
+	h[2] += h[0];
+	h[3] += h[0];
+
+	bmix1(h[0],blocks[0],c1,c2);
+	bmix1(h[1],blocks[1],c1,c2);
+	bmix1(h[2],blocks[2],c1,c2);
+	bmix1(h[3],blocks[3],c1,c2);
+}
+
+//----------
+
+void MurmurHash3_x86_128 ( const void * data, int len, uint32_t seed, uint32_t * out )
+{
+	uint32_t h[4] =
+	{
+		0x8de1c3ac ^ seed,
+		0xbab98226 ^ seed,
+		0xfcba5b2d ^ seed,
+		0x32452e3e ^ seed
+	};
+
+	//----------
+	// body
+
+	const uint32_t * blocks = (const uint32_t *)data;
+
+	uint32_t c1 = 0x95543787;
+	uint32_t c2 = 0x2ad7eb25;
+
+	while(len >= 16)
+	{
+		merge128(h,blocks,c1,c2);
+		cmix(c1,c2);
+
+		blocks += 4;
+		len -= 16;
+	}
+
+	//----------
+	// tail
+
+	uint32_t k[4] = { 0, 0, 0, 0 };
+
+	const uint8_t * tail = (const uint8_t*)blocks;
+
+	switch(len)
+	{
+	case 15: k[3] ^= tail[14] << 16;
+	case 14: k[3] ^= tail[13] << 8;
+	case 13: k[3] ^= tail[12] << 0;
+	case 12: k[2] ^= tail[11] << 24;
+	case 11: k[2] ^= tail[10] << 16;
+	case 10: k[2] ^= tail[ 9] << 8;
+	case  9: k[2] ^= tail[ 8] << 0;
+	case  8: k[1] ^= tail[ 7] << 24;
+	case  7: k[1] ^= tail[ 6] << 16;
+	case  6: k[1] ^= tail[ 5] << 8;
+	case  5: k[1] ^= tail[ 4] << 0;
+	case  4: k[0] ^= tail[ 3] << 24;
+	case  3: k[0] ^= tail[ 2] << 16;
+	case  2: k[0] ^= tail[ 1] << 8;
+	case  1: k[0] ^= tail[ 0] << 0;
+			merge128(h,k,c1,c2);
+	};
+
+	//----------
+	// finalization
+
+	h[3] ^= len;
+
+	h[0] ^= fmix32(h[1]); h[2] ^= fmix32(h[3]);
+	h[1] ^= kmix(h[0],c1,c2); h[3] ^= kmix(h[2],c1,c2);
+	h[3] ^= fmix32(h[0]); h[1] ^= fmix32(h[2]);
+	h[0] ^= kmix(h[3],c1,c2); h[2] ^= kmix(h[1],c1,c2);
+	h[1] ^= fmix32(h[0]); h[3] ^= fmix32(h[2]);
+
+	out[0] = h[0];
+	out[1] = h[1];
+	out[2] = h[2];
+	out[3] = h[3];
+}
+
+//-----------------------------------------------------------------------------
+
diff --git a/MurmurHash3.h b/MurmurHash3.h
new file mode 100644
index 0000000..5e19064
--- /dev/null
+++ b/MurmurHash3.h
@@ -0,0 +1,11 @@
+#include "pstdint.h"
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
+void MurmurHash3_x64_32  ( const void * key, int len, uint32_t seed, void * out );
+void MurmurHash3_x86_64  ( const void * key, int len, uint32_t seed, void * out );
+void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
+
+//-----------------------------------------------------------------------------
+
diff --git a/MurmurHash64.cpp b/MurmurHash64.cpp
new file mode 100644
index 0000000..e69de29
diff --git a/MurmurHashAligned.cpp b/MurmurHashAligned.cpp
new file mode 100644
index 0000000..716dda6
--- /dev/null
+++ b/MurmurHashAligned.cpp
@@ -0,0 +1,2 @@
+#include "stdafx.h"
+
diff --git a/MurmurHashAligned2.cpp b/MurmurHashAligned2.cpp
new file mode 100644
index 0000000..23dced4
--- /dev/null
+++ b/MurmurHashAligned2.cpp
@@ -0,0 +1,4 @@
+#include "stdafx.h"
+
+#pragma warning(disable:4311)
+
diff --git a/MurmurHashNeutral2.cpp b/MurmurHashNeutral2.cpp
new file mode 100644
index 0000000..716dda6
--- /dev/null
+++ b/MurmurHashNeutral2.cpp
@@ -0,0 +1,2 @@
+#include "stdafx.h"
+
diff --git a/MurmurHashTest.cpp b/MurmurHashTest.cpp
new file mode 100644
index 0000000..6b18f53
--- /dev/null
+++ b/MurmurHashTest.cpp
@@ -0,0 +1,26 @@
+#include "pstdint.h"
+
+uint32_t MurmurHash1        ( const void * key, int len, uint32_t seed );
+uint32_t MurmurHash2        ( const void * key, int len, uint32_t seed );
+uint64_t MurmurHash64A      ( const void * key, int len, uint64_t seed );
+uint64_t MurmurHash64B      ( const void * key, int len, uint64_t seed );
+uint32_t MurmurHash2A       ( const void * key, int len, uint32_t seed );
+uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed );
+uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed );
+
+
+void MurmurHash1_test ( const void * key, int len, uint32_t seed, void * out )
+{
+	*(uint32_t*)out = MurmurHash1(key,len,seed);
+}
+
+void MurmurHash2_test ( const void * key, int len, uint32_t seed, void * out )
+{
+	*(uint32_t*)out = MurmurHash2(key,len,seed);
+}
+
+void MurmurHash2A_test ( const void * key, int len, uint32_t seed, void * out )
+{
+	*(uint32_t*)out = MurmurHash2A(key,len,seed);
+}
+
diff --git a/Random.cpp b/Random.cpp
new file mode 100644
index 0000000..12d7077
--- /dev/null
+++ b/Random.cpp
@@ -0,0 +1,61 @@
+#include "Random.h"
+
+Rand g_rand1(1);
+Rand g_rand2(2);
+Rand g_rand3(3);
+Rand g_rand4(4);
+
+//-----------------------------------------------------------------------------
+// Pseudo-random oracle. Mix avalanches x/y/z to < 0.07% bias.
+
+inline void omix ( uint32_t & x, uint32_t & y, uint32_t & z )
+{
+	uint64_t m = 0x65a3d38b;
+	uint64_t t = 0;
+
+	t = x * m; y ^= t; z ^= (t >> 32);
+	t = z * m; x ^= t; y ^= (t >> 32);
+	t = y * m; z ^= t; x ^= (t >> 32);
+	t = x * m; y ^= t; z ^= (t >> 32);
+	t = z * m; x ^= t; y ^= (t >> 32);
+	t = y * m; z ^= t; x ^= (t >> 32);
+}
+
+void oracle ( uint32_t key, uint32_t nonce, void * blob, int size )
+{
+	uint32_t x = 0x498b3bc5;
+	uint32_t y = 0x9c3ed699;
+	uint32_t z = 0x5a05089a;
+
+	x ^= key;
+	y ^= nonce;
+	z ^= size;
+
+	uint8_t * cursor = (uint8_t*)blob;
+
+	while(size)
+	{
+		omix(x,y,z);
+
+		if(size > 4)
+		{
+			*(uint32_t*)cursor = x;
+
+			cursor += 4;
+			size -= 4;
+		}
+		else
+		{
+			switch(size)
+			{
+			case 3: cursor[2] = (uint8_t)(x >> 16);
+			case 2: cursor[1] = (uint8_t)(x >>  8);
+			case 1: cursor[0] = (uint8_t)(x >>  0);
+			};
+
+			return;
+		}
+	}
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Random.h b/Random.h
new file mode 100644
index 0000000..87ed656
--- /dev/null
+++ b/Random.h
@@ -0,0 +1,144 @@
+#pragma once
+
+#include "Types.h"
+
+//-----------------------------------------------------------------------------
+// random oracle (stateless)
+
+void oracle ( uint32_t key, uint32_t nonce, void * blob, int size );
+
+//-----------------------------------------------------------------------------
+// Xorshift-based RNG from George Marsaglia, algorithm taken from Wikipedia
+
+struct Rand
+{
+	uint32_t x;
+	uint32_t y;
+	uint32_t z;
+	uint32_t w;
+
+	Rand()
+	{
+		reseed(uint32_t(0));
+	}
+
+	Rand( uint32_t seed )
+	{
+		reseed(seed);
+	}
+
+	uint32_t rand_u32 ( void )
+	{
+		uint32_t t = x ^ (x << 11);
+
+		x = y; 
+		y = z; 
+		z = w;
+		w = (w ^ (w >> 19)) ^ (t ^ (t >> 8));
+
+		return w;
+	}
+
+
+	void reseed ( uint32_t seed )
+	{
+		x = 0x498b3bc5 ^ seed;
+		y = 0x9c3ed699 ^ seed;
+		z = 0x5a05089a ^ seed;
+		w = 0x2c8a5c59 ^ seed; 
+
+		for(int i = 0; i < 10; i++) rand_u32();
+	}
+
+	void reseed ( uint64_t seed )
+	{
+		x = 0x498b3bc5 ^ (uint32_t)(seed >>  0);
+		y = 0x9c3ed699 ^ (uint32_t)(seed >> 11);
+		z = 0x5a05089a ^ (uint32_t)(seed >> 22);
+		w = 0x2c8a5c59 ^ (uint32_t)(seed >> 32); 
+
+		for(int i = 0; i < 10; i++) rand_u32();
+	}
+
+	//-----------------------------------------------------------------------------
+
+	operator uint32_t ( void ) 
+	{
+		return rand_u32();
+	}
+
+	operator uint64_t ( void ) 
+	{
+		uint64_t a = rand_u32();
+
+		a <<= 32;
+		a |= rand_u32();
+
+		return a;
+	}
+
+	void rand_p ( void * blob, int bytes )
+	{
+		uint32_t * blocks = (uint32_t*)blob;
+
+		while(bytes >= 4)
+		{
+			*blocks++ = rand_u32();
+			bytes -= 4;
+		}
+
+		uint8_t * tail = (uint8_t*)blocks;
+
+		for(int i = 0; i < bytes; i++)
+		{
+			tail[i] = (uint8_t)rand_u32();
+		}
+	}
+};
+
+//-----------------------------------------------------------------------------
+
+extern Rand g_rand1;
+
+inline uint32_t rand_u32 ( void ) { return g_rand1; }
+inline uint64_t rand_u64 ( void ) { return g_rand1; }
+
+inline void rand_p ( void * blob, int bytes )
+{
+	uint32_t * blocks = (uint32_t*)blob;
+
+	while(bytes >= 4)
+	{
+		*blocks++ = rand_u32();
+		bytes -= 4;
+	}
+
+	uint8_t * tail = (uint8_t*)blocks;
+
+	for(int i = 0; i < bytes; i++)
+	{
+		tail[i] = (uint8_t)rand_u32();
+	}
+}
+
+//-----------------------------------------------------------------------------
+
+template < typename T >
+inline void rand_t ( T & t )
+{
+	rand_p(&,sizeof(t));
+}
+
+template<> inline void rand_t ( uint32_t & t ) { t = rand_u32(); }
+template<> inline void rand_t ( uint64_t & t ) { t = rand_u64(); }
+
+template<> inline void rand_t ( u128 & t )
+{
+	uint32_t * b  = (uint32_t*)&t;
+	b[0] = rand_u32();
+	b[1] = rand_u32();
+	b[2] = rand_u32();
+	b[3] = rand_u32();
+}
+
+//-----------------------------------------------------------------------------
diff --git a/SimAnneal.cpp b/SimAnneal.cpp
new file mode 100644
index 0000000..0096598
--- /dev/null
+++ b/SimAnneal.cpp
@@ -0,0 +1,97 @@
+#include "SimAnneal.h"
+
+#include "Types.h"
+#include "Random.h"
+
+//-----------------------------------------------------------------------------
+// Pseudo-simulated-annealing
+
+double SimAnneal ( void * block, int len, pfFitness fit, pfDump dump, int nFlip, int reps )
+{
+	double baseScore = fit(block,len);
+	double tempScore = 0;
+	double bestScore = 0;
+
+	uint8_t * baseBlock = new uint8_t[len];
+	uint8_t * tempBlock = new uint8_t[len];
+	uint8_t * bestBlock = new uint8_t[len];
+
+	memcpy(baseBlock,block,len);
+	memcpy(tempBlock,block,len);
+	memcpy(bestBlock,block,len);
+
+	while(nFlip)
+	{
+		printf("fit - %f, bits - %2d, dump - ",baseScore,nFlip);
+
+		dump(baseBlock,len);
+
+		bestScore = baseScore;
+
+		if(nFlip == 1)
+		{
+			for(int i = 0; i < len*8; i++)
+			{
+				printf(".");
+
+				memcpy(tempBlock,baseBlock,len);
+				flipbit(tempBlock,len,i);
+
+				tempScore = fit(tempBlock,len);
+
+				if(tempScore > bestScore)
+				{
+					bestScore = tempScore;
+					memcpy(bestBlock,tempBlock,len);
+					break;
+				}
+			}
+		}
+		else
+		{
+			for(int i = 0; i < reps; i++)
+			{
+				//if(i % (reps/10) == 0) printf(".");
+				printf(".");
+
+				memcpy(tempBlock,baseBlock,len);
+
+				for(int i = 0; i < nFlip; i++)
+				{
+					flipbit( tempBlock, len, rand_u32() % (len*8) );
+				}
+
+				tempScore = fit(tempBlock,len);
+
+				if(tempScore > bestScore)
+				{
+					bestScore = tempScore;
+					memcpy(bestBlock,tempBlock,len);
+					break;
+				}
+			}
+		}
+
+		printf("\n");
+
+		// If we found a better solution, expand space starting from that solution
+		// Otherwise, shrink space around previous best
+
+		if(bestScore > baseScore)
+		{
+			memcpy(baseBlock,bestBlock,len);
+			baseScore = bestScore;
+
+			nFlip++;
+		}
+		else
+		{
+			nFlip--;
+		}
+	}
+
+	memcpy(block,baseBlock,len);
+	return baseScore;
+}
+
+
diff --git a/SimAnneal.h b/SimAnneal.h
new file mode 100644
index 0000000..1670bbe
--- /dev/null
+++ b/SimAnneal.h
@@ -0,0 +1,6 @@
+#pragma once
+
+typedef double (*pfFitness) ( void * block, int len );
+typedef void   (*pfDump)    ( void * block, int len );
+
+double SimAnneal ( void * block, int len, pfFitness fit, pfDump dump, int nFlip, int reps );
\ No newline at end of file
diff --git a/SparseKeyTest.cpp b/SparseKeyTest.cpp
new file mode 100644
index 0000000..234b6bb
--- /dev/null
+++ b/SparseKeyTest.cpp
@@ -0,0 +1,111 @@
+#include "SparseKeyTest.h"
+
+#include "Types.h"
+#include "Stats.h" // for testkeylist
+
+//----------------------------------------------------------------------------
+
+template < int keybits, typename hashtype >
+bool SparseKeyTest3 ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram  )
+{
+	printf("Testing %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits);
+
+	typedef Blob<keybits> keytype;
+
+	std::vector<keytype> keys;
+
+	keytype k;
+	memset(&k,0,sizeof(k));
+
+	if(inclusive) keys.push_back(k);
+
+	SparseKeygenRecurse(0,setbits,inclusive,k,keys);
+
+	printf("%d keys, %d bytes\n",(int)keys.size(),(int)keys.size() * sizeof(keytype));
+
+	bool result = testkeylist<keytype,hashtype>(hash,keys,testColl,testDist,drawDiagram);
+
+	printf("\n");
+
+	return result;
+}
+
+//----------------------------------------------------------------------------
+
+template< typename hashtype >
+bool SparsePermuteKeyTest2 ( hashfunc<hashtype> hash, bool testColl, bool testDist, bool drawDiagram )
+{
+	bool result = true;
+
+	typedef Blob<320> keytype;
+
+	std::vector<keytype> keys;
+
+	printf("Testing %d-bit sparse-permute keys - ",sizeof(keytype)*8);
+
+	//----------
+
+	keytype key;
+
+	const int ndwords = sizeof(keytype) / 4;
+	uint32_t * dwords = (uint32_t*)&key;
+
+	for(int i = 0; i < ndwords; i++)
+	{
+		dwords[i] = uint32_t(1) << ((i+2) * 3);
+	}
+
+	SPKeygenRecurse2(key,0,keys);
+
+	printf("%d keys, %d bytes\n",(int)keys.size(),(int)keys.size() * sizeof(keytype));
+
+	//----------
+
+	result &= testkeylist<keytype,hashtype>(hash,keys,testColl,testDist,drawDiagram);
+
+	return result;
+}
+
+//----------------------------------------------------------------------------
+// Inclusive test produces about the same distribution on poor hashes, and
+// tends to create more collisions.
+
+template < typename hashtype >
+bool SparseKeyTest2 ( hashfunc<hashtype> hash, bool drawDiagram )
+{
+	bool result = true;
+
+	result &= SparseKeyTest3<32,hashtype>(hash,6,true,true,true,drawDiagram);
+	result &= SparseKeyTest3<40,hashtype>(hash,6,true,true,true,drawDiagram);
+	result &= SparseKeyTest3<48,hashtype>(hash,5,true,true,true,drawDiagram);
+	result &= SparseKeyTest3<56,hashtype>(hash,5,true,true,true,drawDiagram);
+
+	result &= SparseKeyTest3<64,hashtype>(hash,5,true,true,true,drawDiagram);
+	result &= SparseKeyTest3<96,hashtype>(hash,4,true,true,true,drawDiagram); 
+	result &= SparseKeyTest3<256,hashtype>(hash,3,true,true,true,drawDiagram);
+	result &= SparseKeyTest3<1536,hashtype>(hash,2,true,true,true,drawDiagram);
+
+	// 192-bit sparse keys with 4 bits set generates 1.4 gigs of keydata - use
+	// at your own risk
+
+	// SparseKeyTest3<192,4,hashtype>(hash,true,true,true); 
+
+	result &= SparsePermuteKeyTest2<hashtype>(hash,true,true,drawDiagram);
+
+	return result;
+}
+
+bool SparseKeyTest ( hashfunc<uint32_t> hash, bool drawDiagram )
+{
+	return SparseKeyTest2<uint32_t>(hash,drawDiagram);
+}
+
+bool SparseKeyTest ( hashfunc<uint64_t> hash, bool drawDiagram )
+{
+	return SparseKeyTest2<uint64_t>(hash,drawDiagram);
+}
+
+bool SparseKeyTest ( hashfunc<u128> hash, bool drawDiagram )
+{
+	return SparseKeyTest2<u128>(hash,drawDiagram);
+}
diff --git a/SparseKeyTest.h b/SparseKeyTest.h
new file mode 100644
index 0000000..bfec8c5
--- /dev/null
+++ b/SparseKeyTest.h
@@ -0,0 +1,89 @@
+#pragma once
+
+#include "Types.h"
+
+#pragma warning(push)
+#pragma warning(disable:4200) // user-defined type contains zero length array
+#pragma warning(disable:4127) // conditional expression is constant
+
+//-----------------------------------------------------------------------------
+
+template < typename keytype >
+void SparseKeygenRecurse ( int start, int bitsleft, bool inclusive, keytype & k, std::vector<keytype> & keys )
+{
+	const int nbytes = sizeof(keytype);
+	const int nbits = nbytes * 8;
+
+	for(int i = start; i < nbits; i++)
+	{
+		flipbit(&k,nbytes,i);
+
+		if(inclusive || (bitsleft == 1))
+		{
+			keys.push_back(k);
+		}
+
+		if(bitsleft > 1)
+		{
+			SparseKeygenRecurse(i+1,bitsleft-1,inclusive,k,keys);
+		}
+
+		flipbit(&k,nbytes,i);
+	}
+}
+
+//----------
+
+template < typename keytype >
+void SparseKeygenRecurse_R ( int start, int bitsleft, bool inclusive, keytype & k, std::vector<keytype> & keys )
+{
+	const int nbytes = sizeof(keytype);
+	const int nbits = nbytes * 8;
+
+	for(int i = start; i < nbits; i++)
+	{
+		flipbit(&k,nbytes,(bits-i-1));
+
+		if(inclusive || (bitsleft == 1))
+		{
+			keys.push_back(k);
+		}
+
+		if(bitsleft > 1)
+		{
+			SparseKeygenRecurse(i+1,bitsleft-1,inclusive,k,keys);
+		}
+
+		flipbit(&k,nbytes,(bits-i-1));
+	}
+}
+
+//----------
+
+template< typename keytype >
+void SPKeygenRecurse2 ( keytype & key, int k, std::vector<keytype> & keys )
+{
+	//assert(keytype::align4);
+
+	const int ndwords = key.nbytes/4;
+	uint32_t * dwords = (uint32_t*)&key;
+
+	if(k == ndwords-1)
+	{
+		keys.push_back(key);
+		return;
+	}
+
+	for(int i = k; i < ndwords; i++)
+	{
+		swap(dwords[k],dwords[i]);
+
+		SPKeygenRecurse2(key,k+1,keys);
+
+		swap(dwords[k],dwords[i]);
+	}
+}
+
+//-----------------------------------------------------------------------------
+
+#pragma warning(pop)
diff --git a/Stats.cpp b/Stats.cpp
new file mode 100644
index 0000000..31ca481
--- /dev/null
+++ b/Stats.cpp
@@ -0,0 +1,338 @@
+#include "Stats.h"
+
+//-----------------------------------------------------------------------------
+
+// If you want to compute these two statistics, uncomment the code and link with
+// the GSL library.
+
+/*
+extern "C"
+{
+	double gsl_sf_gamma_inc_P(const double a, const double x);
+	double gsl_sf_gamma_inc_Q(const double a, const double x);
+};
+
+// P-val for a set of binomial distributions
+
+void pval_binomial ( int * buckets, int len, int n, double p, double & sdev, double & pval )
+{
+	double c = 0;
+
+	double u = n*p;
+	double s = sqrt(n*p*(1-p));
+
+	for(int i = 0; i < len; i++)
+	{
+		double x = buckets[i];
+
+		double n = (x-u)/s;
+
+		c += n*n;
+	}
+
+	sdev = sqrt(c / len);
+
+	pval = gsl_sf_gamma_inc_P( len/2, c/2 );
+}
+
+// P-val for a histogram - K keys distributed between N buckets
+// Note the (len-1) due to the degree-of-freedom reduction
+
+void pval_pearson ( int * buckets, int len, int keys, double & sdev, double & pval )
+{
+	double c = 0;
+
+	double n = keys;
+	double p = 1.0 / double(len);
+
+	double u = n*p;
+	double s = sqrt(n*p*(1-p));
+
+	for(int i = 0; i < len; i++)
+	{
+		double x = buckets[i];
+
+		double n = (x-u)/s;
+
+		c += n*n;
+	}
+
+	sdev = sqrt(c / len);
+
+	pval = gsl_sf_gamma_inc_P( (len-1)/2, c/2 );
+}
+*/
+
+//----------------------------------------------------------------------------
+
+double erf2 ( double x )
+{
+    const double a1 =  0.254829592;
+    const double a2 = -0.284496736;
+    const double a3 =  1.421413741;
+    const double a4 = -1.453152027;
+    const double a5 =  1.061405429;
+    const double p  =  0.3275911;
+
+    double sign = 1;
+    if(x < 0) sign = -1;
+
+    x = abs(x);
+
+    double t = 1.0/(1.0 + p*x);
+    double y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-x*x);
+
+    return sign*y;
+}
+
+double normal_cdf ( double u, double s2, double x )
+{
+	x = (x - u) / sqrt(2*s2);
+
+	double c = (1 + erf2(x)) / 2;
+
+	return c;
+}
+
+double binom_cdf ( double n, double p, double k )
+{
+	double u = n*p;
+	double s2 = n*p*(1-p);
+
+	return normal_cdf(u,s2,k);
+}
+
+// return the probability that a random variable from distribution A is greater than a random variable from distribution B
+
+double comparenorms ( double uA, double sA, double uB, double sB )
+{
+	double c = 1.0 - normal_cdf(uA-uB,sA*sA+sB*sB,0);
+
+	return c;
+}
+
+// convert beta distribution to normal distribution approximation
+
+void beta2norm ( double a, double b, double & u, double & s )
+{
+	u = a / (a+b);
+
+	double t1 = a*b;
+	double t2 = a+b;
+	double t3 = t2*t2*(t2+1);
+
+	s = sqrt( t1 / t3 );
+}
+
+#pragma warning(disable : 4189)
+
+double comparecoins ( double hA, double tA, double hB, double tB )
+{
+	double uA,sA,uB,sB;
+
+	beta2norm(hA+1,tA+1,uA,sA);
+	beta2norm(hB+1,tB+1,uB,sB);
+
+	// this is not the right way to handle the discontinuity at 0.5, but i don't want to deal with truncated normal distributions...
+
+	if(uA < 0.5) uA = 1.0 - uA;
+	if(uB < 0.5) uB = 1.0 - uB;
+
+	return 1.0 - comparenorms(uA,sA,uB,sB);
+}
+
+// Binomial distribution using the normal approximation
+
+double binom2 ( double n, double p, double k )
+{
+	double u = n*p;
+	double s2 = n*p*(1-p);
+
+	double a = k-u;
+
+	const double pi = 3.14159265358979323846264338327950288419716939937510;
+
+	a = a*a / (-2.0*s2);
+	a = exp(a) / sqrt(s2*2.0*pi);
+
+	return a;
+}
+
+double RandWork ( double bucketcount, double keycount )
+{
+	double avgload = keycount / bucketcount; 
+
+	double total = 0;
+
+	if(avgload <= 16)
+	{
+		// if the load is low enough we can compute the expected work directly
+
+		double p = pow((bucketcount-1)/bucketcount,keycount);
+
+		double work = 0;
+
+		for(double i = 0; i < 50; i++)
+		{
+			work  += i;
+			total += work * p;
+
+			p *= (keycount-i) / ( (i+1) * (bucketcount-1) );
+		}
+	}
+	else
+	{
+		// otherwise precision errors screw up the calculation, and so we fall back
+		// to the normal approxmation to the binomial distribution
+
+		double min = avgload / 5.0;
+		double max = avgload * 5.0;
+
+		for(double i = min; i <= max; i++)
+		{
+			double p = binom2(keycount,1.0 / bucketcount,i);
+
+			total += double((i*i+i) / 2) * p;
+		}
+	}
+
+	return total / avgload;
+}
+
+// Normalized standard deviation.
+
+double nsdev ( int * buckets, int len, int keys )
+{
+	double n = len;
+	double k = keys;
+	double p = 1.0/n;
+
+	double u = k*p;
+	double s = sqrt(k*p*(1-p));
+
+	double c = 0;
+
+	for(int i = 0; i < len; i++)
+	{
+		double d = buckets[i];
+
+		d = (d-u)/s;
+
+		c += d*d;
+	}
+
+	double nsd = sqrt(c / n);
+
+	return nsd;
+}
+
+
+double chooseK ( int n, int k )
+{
+    if(k > (n - k)) k = n - k;
+
+	double c = 1;
+
+	for(int i = 0; i < k; i++)
+	{
+		double t = double(n-i) / double(i+1);
+
+		c *= t;
+	}
+
+    return c;
+}
+
+double chooseUpToK ( int n, int k )
+{
+	double c = 0;
+
+	for(int i = 1; i <= k; i++)
+	{
+		c += chooseK(n,i);
+	}
+
+	return c;
+}
+
+//-----------------------------------------------------------------------------
+
+uint32_t bitrev ( uint32_t v )
+{
+	v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
+	v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
+	v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
+	v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
+	v = ( v >> 16             ) | ( v               << 16);
+
+	return v;
+}
+
+//-----------------------------------------------------------------------------
+
+// Distribution "score"
+// TODO - big writeup of what this score means
+
+// Basically, we're computing a constant that says "The test distribution is as
+// uniform, RMS-wise, as a random distribution restricted to (1-X)*100 percent of
+// the bins. This makes for a nice uniform way to rate a distribution that isn't
+// dependent on the number of bins or the number of keys
+
+// (as long as # keys > # bins * 3 or so, otherwise random fluctuations show up
+// as distribution weaknesses)
+
+double calcScore ( std::vector<int> const & bins, int keys )
+{
+	double n = (int)bins.size();
+	double k = keys;
+
+	// compute rms value
+
+	double r = 0;
+
+	for(size_t i = 0; i < bins.size(); i++)
+	{
+		double b = bins[i];
+
+		r += b*b;
+	}
+
+	r = sqrt(r / n);
+
+	// compute fill factor
+
+	double f = (k*k - 1) / (n*r*r - k);
+
+	// rescale to (0,1) with 0 = good, 1 = bad
+
+	return 1 - (f / n);
+}
+
+
+//----------------------------------------------------------------------------
+
+void plot ( double n )
+{
+	double n2 = n * 1;
+
+	if(n2 < 0) n2 = 0;
+
+	n2 *= 100;
+
+	if(n2 > 64) n2 = 64;
+
+	int n3 = (int)floor(n2 + 0.5);
+
+	if(n3 == 0)
+		printf(".");
+	else
+	{
+		char x = '0' + char(n3);
+
+		if(x > '9') x = 'X';
+
+		printf("%c",x);
+	}
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Stats.h b/Stats.h
new file mode 100644
index 0000000..5cae64e
--- /dev/null
+++ b/Stats.h
@@ -0,0 +1,559 @@
+#pragma once
+
+#include "Core.h"
+
+#include <algorithm>
+#include <math.h>
+#include <assert.h>
+#include <float.h>
+
+double calcScore ( std::vector<int> const & bins, int balls );
+
+void plot ( double n );
+
+inline double ExpectedCollisions ( double balls, double bins )
+{
+	return balls - bins + bins * pow(1 - 1/bins,balls);
+}
+
+double comparenorms ( double u1, double s1, double u2, double s2 );
+void beta2norm ( double a, double b, double & u, double & s );
+
+double chooseK ( int b, int k );
+double chooseUpToK ( int n, int k );
+
+inline uint32_t f3mix ( uint32_t k )
+{
+	k ^= k >> 16;
+	k *= 0x85ebca6b;
+	k ^= k >> 13;
+	k *= 0xc2b2ae35;
+	k ^= k >> 16;
+
+	return k;
+}
+
+//-----------------------------------------------------------------------------
+
+template< typename hashtype >
+int CountCollisions ( std::vector<hashtype> const & hashes )
+{
+	int collcount = 0;
+
+	std::vector<hashtype> temp = hashes;
+	std::sort(temp.begin(),temp.end());
+
+	for(size_t i = 1; i < hashes.size(); i++)
+	{
+		if(temp[i] == temp[i-1]) collcount++;
+	}
+
+	return collcount;
+}
+
+//-----------------------------------------------------------------------------
+
+/*
+template < class keytype, typename hashtype >
+int PrintCollisions ( hashfunc<hashtype> hash, std::vector<keytype> & keys )
+{
+	int collcount = 0;
+
+	typedef std::map<hashtype,keytype> htab;
+	htab tab;
+
+	for(size_t i = 1; i < keys.size(); i++)
+	{
+		keytype & k1 = keys[i];
+
+		hashtype h = hash(&k1,sizeof(k),0);
+
+		htab::iterator it = tab.find(h);
+
+		if(it != tab.end())
+		{
+			keytype & k2 = (*it).second;
+
+			printf("A: ");
+			printbits(&k1,sizeof(k1));
+			printf("B: ");
+			printbits(&k2,sizeof(k2));
+		}
+		else
+		{
+			htab.insert( htab::value_type(h,k);
+		}
+	}
+
+	return collcount;
+}
+*/
+
+//----------------------------------------------------------------------------
+
+template < typename hashtype >
+bool testhashlist( std::vector<hashtype> & hashes, bool testColl, bool testDist, bool drawDiagram )
+{
+	bool verbose = true;
+	bool result = true;
+
+	if(testColl)
+	{
+		size_t count = hashes.size();
+
+		double expected = (double(count) * double(count-1)) / pow(2.0,double(sizeof(hashtype) * 8 + 1));
+
+		if(verbose) printf("Testing collisions - Expected %8.2f, ",expected);
+
+		double collcount = 0;
+
+		collcount = CountCollisions(hashes);
+
+		if(verbose)
+		{
+			printf("actual %8.2f (%5.2fx) \n",collcount, collcount / expected);
+		}
+		else
+		{
+			double collscore = collcount / expected;
+
+			printf("Coll score %5.3f, ",collscore);
+		}
+
+		// 2x expected collisions = fail
+
+		if(double(collcount) / double(expected) > 2.0)
+		{
+			result = false;
+		}
+	}
+
+	//----------
+
+	if(testDist)
+	{
+		if(verbose) printf("Testing distribution - ");
+
+		if(drawDiagram) printf("\n");
+
+		TestDistribution(hashes,drawDiagram);
+	}
+
+	return result;
+}
+
+//-----------------------------------------------------------------------------
+
+template < class keytype, typename hashtype >
+bool testkeylist ( hashfunc<hashtype> hash, std::vector<keytype> & keys, bool testColl, bool testDist, bool drawDiagram )
+{
+	int keycount = (int)keys.size();
+
+	std::vector<hashtype> hashes;
+
+	hashes.resize(keycount);
+
+	//printf("Hashing keyset");
+
+	for(int ikey = 0; ikey < keycount; ikey++)
+	{
+		keytype & k = keys[ikey];
+
+		//if(ikey % (keycount / 10) == 0) printf(".");
+
+		hashes[ikey] = hash(&k,sizeof(k),0);
+	}
+
+	//printf("\n");
+
+	bool result = testhashlist(hashes,testColl,testDist,drawDiagram);
+
+	return result;
+}
+
+//-----------------------------------------------------------------------------
+
+template < typename hashtype >
+bool testkeylist_string ( hashfunc<hashtype> hash, std::vector<std::string> & keys, bool testColl, bool testDist )
+{
+	int keycount = (int)keys.size();
+
+	std::vector<hashtype> hashes;
+
+	hashes.resize(keycount);
+
+	//printf("Hashing keyset");
+
+	for(int ikey = 0; ikey < keycount; ikey++)
+	{
+		std::string & k = keys[ikey];
+
+		//if(ikey % (keycount / 10) == 0) printf(".");
+
+		hashes[ikey] = hash(&k[0],(int)k.size(),0);
+	}
+
+	//printf("\n");
+
+	bool result = testhashlist(hashes,testColl,testDist);
+
+	return result;
+}
+
+//-----------------------------------------------------------------------------
+// Bytepair test - generate 16-bit indices from all possible non-overlapping
+// 8-bit sections of the hash value, check distribution on all of them.
+
+// This is a very good test for catching weak intercorrelations between bits - 
+// much harder to pass than the normal distribution test. However, it doesn't
+// really model the normal usage of hash functions in hash table lookup, so
+// I'm not sure it's that useful (and hash functions that fail this test but
+// pass the normal distribution test still work well in practice)
+
+template < typename hashtype >
+double TestDistributionBytepairs ( std::vector<hashtype> & hashes, bool drawDiagram )
+{
+	const int nbytes = sizeof(hashtype);
+	const int nbits = nbytes * 8;
+	
+	const int nbins = 65536;
+
+	std::vector<int> bins(nbins,0);
+
+	double worst = 0;
+
+	for(int a = 0; a < nbits; a++)
+	{
+		if(drawDiagram) if((a % 8 == 0) && (a > 0)) printf("\n");
+
+		if(drawDiagram) printf("[");
+
+		for(int b = 0; b < nbits; b++)
+		{
+			if(drawDiagram) if((b % 8 == 0) && (b > 0)) printf(" ");
+
+			bins.clear();
+			bins.resize(nbins,0);
+
+			for(size_t i = 0; i < hashes.size(); i++)
+			{
+				hashtype & hash = hashes[i];
+
+				uint32_t pa = window(&hash,sizeof(hash),a,8);
+				uint32_t pb = window(&hash,sizeof(hash),b,8);
+
+				bins[pa | (pb << 8)]++;
+			}
+
+			double s = calcScore(bins,hashes.size());
+
+			if(drawDiagram) plot(s);
+
+			if(s > worst)
+			{
+				worst = s;
+			}
+		}
+
+		if(drawDiagram) printf("]\n");
+	}
+
+	return worst;
+}
+
+
+//----------------------------------------------------------------------------
+// Measure the distribution "score" for each possible N-bit span up to 16 bits
+// and draw a nice graph of the output. 'X' in graph = 10% deviation from ideal.
+
+template< typename hashtype >
+double TestDistribution ( std::vector<hashtype> & hashes, bool drawDiagram )
+{
+	bool verbose = false;
+
+	const int nbits = sizeof(hashtype) * 8;
+	const int maxwidth = 20;
+
+	std::vector<int> bins;
+
+	double worst = 0;
+	int worstStart = -1;
+	int worstWidth = -1;
+
+	for(int width = 1; width <= maxwidth; width++)
+	{
+		const int bincount = (1 << width);
+
+		//If we don't have enough keys to get 2 per bin, skip the test
+
+		//if(double(hashes.size()) / double(bincount) < 2.0) continue;
+
+		if(drawDiagram) printf("%2d - [",width);
+
+		for(int start = 0; start < nbits; start++)
+		{
+			bins.clear();
+			bins.resize(bincount, 0);
+
+			for(size_t j = 0; j < hashes.size(); j++)
+			{
+				hashtype & hash = hashes[j];
+
+				uint32_t index = window(&hash,sizeof(hash),start,width);
+
+				bins[index]++;
+			}
+
+			double n = calcScore(bins,(int)hashes.size());
+
+			if(n > worst)
+			{
+				worst = n;
+				worstStart = start;
+				worstWidth = width;
+			}
+
+			if(drawDiagram) plot(n);
+		}
+
+		if(drawDiagram) printf("]\n");
+	}
+
+	if(verbose)
+	{
+		printf("Worst distribution is for (%d:%d) - %f\n",worstStart,(worstStart+worstWidth-1)%32,worst);
+	}
+	else
+	{
+		printf("Dist score %6.3f\n",(1.0 - worst) * 100);
+	}
+
+	return worst;
+}
+
+//-----------------------------------------------------------------------------
+// Simplified test - only check 64k distributions, and only on byte boundaries
+
+template < typename hashtype >
+void TestDistributionFast ( std::vector<hashtype> & hashes, double & dworst, double & davg )
+{
+	const int nbits = sizeof(hashtype) * 8;
+	const int nbins = 65536;
+	
+	std::vector<int> bins(nbins,0);
+
+	dworst = -1.0e90;
+	davg = 0;
+
+	for(int start = 0; start < nbits; start += 8)
+	{
+		bins.clear();
+		bins.resize(nbins,0);
+
+		for(size_t j = 0; j < hashes.size(); j++)
+		{
+			hashtype & hash = hashes[j];
+
+			uint32_t index = window(&hash,sizeof(hash),start,16);
+
+			bins[index]++;
+		}
+
+		double n = calcScore(bins,(int)hashes.size());
+		
+		davg += n;
+
+		if(n > dworst) dworst = n;
+	}
+
+	davg /= double(nbits/8);
+}
+
+//-----------------------------------------------------------------------------
+
+/*
+struct Stats 
+{
+	enum mode
+	{
+		AVALANCHE,
+		HISTOGRAM,
+	};
+
+	Stats ( int mode, std::vector<int> const & bins, int balls )
+	{
+		switch(mode)
+		{
+		case AVALANCHE:
+			calcAvalanche(bins,balls);
+			break;
+
+		case HISTOGRAM:
+			calcHistogram(bins,balls);
+			break;
+
+		default:
+			assert(false);
+			break;
+		}
+	}
+
+	//----------
+	// Histogram mode
+
+	void calcHistogram ( std::vector<int> const & bins, int balls )
+	{
+		m_nbins  = (int)bins.size();
+		m_nballs = balls;
+
+		m_mean   = 0;
+		m_rms    = 0;
+		m_sigma  = 0;
+		m_max    = -DBL_MAX;
+		m_min    = DBL_MAX;
+
+		for(size_t i = 0; i < bins.size(); i++)
+		{
+			double x = bins[i];
+
+			m_mean += x;
+			m_rms += x*x;
+
+			m_max = x > m_max ? x : m_max;
+			m_min = x < m_min ? x : m_min;
+		}
+
+		m_mean /= m_nbins;
+		m_rms /= m_nbins;
+		m_rms = sqrt(m_rms);
+
+		for(size_t i = 0; i < bins.size(); i++)
+		{
+			double d = bins[i] - m_mean;
+			
+			m_sigma += d*d;
+		}
+
+		m_sigma /= m_nbins;
+		m_sigma = sqrt(m_sigma);
+	}
+	
+	//----------
+	// Normalized standard deviation
+
+	double calcNSD ( std::vector<int> const & bins, int balls )
+	{
+		double n = (int)bins.size();
+		double k = balls;
+		double p = 1.0/n;
+
+		double u = k*p;
+		double s = sqrt(k*p*(1-p));
+
+		double c = 0;
+
+		for(size_t i = 0; i < bins.size(); i++)
+		{
+			double d = bins[i];
+
+			d = (d-u)/s;
+
+			c += d*d;
+		}
+
+		m_nsd = sqrt(c / m_nbins);
+	}
+
+	double calcScore ( std::vector<int> const & bins, int balls )
+	{
+		double n = (int)bins.size();
+		double k = balls;
+
+		// compute rms value
+
+		double r = 0;
+
+		for(size_t i = 0; i < bins.size(); i++)
+		{
+			double b = bins[i];
+
+			r += b*b;
+		}
+
+		r = sqrt(r / n);
+
+		// compute fill factor
+
+		double f = (k*k - 1) / (n*r*r - k);
+
+		// rescale to (0,1) with 0 = good, 1 = bad
+
+		m_score = 1 - (f / n);
+	}
+
+	//----------
+	// Avalanche statistics - convert each table entry to a bias value
+	// and compute stats based on that.
+
+	void calcAvalanche ( std::vector<int> const & bins, int balls )
+	{
+		m_nbins  = (int)bins.size();
+		m_nballs = balls;
+
+		m_mean   = 0;
+		m_rms    = 0;
+		m_sigma  = 0;
+		m_max    = -DBL_MAX;
+		m_min    = DBL_MAX;
+		m_nbad   = 0;
+
+		for(size_t i = 0; i < bins.size(); i++)
+		{
+			double x = (bins[i] / m_nballs) * 2 - 1;
+
+			m_mean += x;
+			m_rms += x*x;
+
+			x = fabs(x);
+
+			if(x > 0.7) m_nbad++;
+
+			m_max = x > m_max ? x : m_max;
+			m_min = x < m_min ? x : m_min;
+		}
+
+		m_mean /= m_nbins;
+		m_rms /= m_nbins;
+		m_rms = sqrt(m_rms);
+
+		for(size_t i = 0; i < bins.size(); i++)
+		{
+			double x = (bins[i] / m_nballs) * 2 - 1;
+
+			double d = x - m_mean;
+			
+			m_sigma += d*d;
+		}
+
+		m_sigma /= m_nbins;
+		m_sigma = sqrt(m_sigma);
+	}
+
+	double m_nbins;
+	double m_nballs;
+
+	double m_mean;
+	double m_rms;
+	double m_sigma;
+
+	double m_nsd;
+	double m_score;
+
+	double m_nbad;
+
+	double m_max;
+	double m_min;
+};
+*/
+
+//-----------------------------------------------------------------------------
diff --git a/StreamCipher.cpp b/StreamCipher.cpp
new file mode 100644
index 0000000..bf9f620
--- /dev/null
+++ b/StreamCipher.cpp
@@ -0,0 +1,13 @@
+#include "StreamCipher.h"
+
+//----------------------------------------------------------------------------
+
+StreamCipher::StreamCipher ( void )
+{
+}
+
+StreamCipher::~StreamCipher ( void )
+{
+}
+
+//----------------------------------------------------------------------------
diff --git a/StreamCipher.h b/StreamCipher.h
new file mode 100644
index 0000000..b78e4db
--- /dev/null
+++ b/StreamCipher.h
@@ -0,0 +1,17 @@
+#pragma once
+#include "Cipher.h"
+
+//----------------------------------------------------------------------------
+
+class StreamCipher : public Cipher
+{
+public:
+
+	StreamCipher ( void );
+	virtual ~StreamCipher ( void );
+
+	virtual void encrypt ( void * k, int keySize, void * p, void * c, int size ) = 0;
+	virtual void decrypt ( void * k, int keySize, void * c, void * p, int size ) = 0;
+};
+
+//----------------------------------------------------------------------------
diff --git a/SuperFastHash.cpp b/SuperFastHash.cpp
new file mode 100644
index 0000000..3425634
--- /dev/null
+++ b/SuperFastHash.cpp
@@ -0,0 +1,68 @@
+#include "pstdint.h"
+
+/* By Paul Hsieh (C) 2004, 2005.  Covered under the Paul Hsieh derivative 
+   license. See: 
+   http://www.azillionmonkeys.com/qed/weblicense.html for license details.
+
+   http://www.azillionmonkeys.com/qed/hash.html */
+
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
+  || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
+#define get16bits(d) (*((const uint16_t *) (d)))
+#endif
+
+#if !defined (get16bits)
+#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
+                       +(uint32_t)(((const uint8_t *)(d))[0]) )
+#endif
+
+uint32_t SuperFastHash (const char * data, int len) {
+uint32_t hash = 0, tmp;
+int rem;
+
+	if (len <= 0 || data == NULL) return 0;
+
+	rem = len & 3;
+	len >>= 2;
+
+	/* Main loop */
+	for (;len > 0; len--) {
+		hash  += get16bits (data);
+		tmp    = (get16bits (data+2) << 11) ^ hash;
+		hash   = (hash << 16) ^ tmp;
+		data  += 2*sizeof (uint16_t);
+		hash  += hash >> 11;
+	}
+
+	/* Handle end cases */
+	switch (rem) {
+		case 3:	hash += get16bits (data);
+				hash ^= hash << 16;
+				hash ^= data[sizeof (uint16_t)] << 18;
+				hash += hash >> 11;
+				break;
+		case 2:	hash += get16bits (data);
+				hash ^= hash << 11;
+				hash += hash >> 17;
+				break;
+		case 1: hash += *data;
+				hash ^= hash << 10;
+				hash += hash >> 1;
+	}
+
+	/* Force "avalanching" of final 127 bits */
+	hash ^= hash << 3;
+	hash += hash >> 5;
+	hash ^= hash << 4;
+	hash += hash >> 17;
+	hash ^= hash << 25;
+	hash += hash >> 6;
+
+	return hash;
+}
+
+void SuperFastHash     ( const void * key, int len, uint32_t /*seed*/, void * out )
+{
+	*(uint32_t*)out = SuperFastHash((const char*)key,len);
+}
\ No newline at end of file
diff --git a/TEA.cpp b/TEA.cpp
new file mode 100644
index 0000000..a84d688
--- /dev/null
+++ b/TEA.cpp
@@ -0,0 +1,52 @@
+#include "TEA.h"
+
+#include <memory.h>
+#include <algorithm>
+
+// The TEA algorithm is public domain
+
+//-----------------------------------------------------------------------------
+
+void TEACipher::setKey ( void * key, int keySize )
+{
+	memset(m_key,0,16);
+	memcpy(m_key,key,std::min(keySize,16));
+}
+
+//----------------------------------------------------------------------------
+
+void TEACipher::encrypt ( void * block, unsigned int /*nonce*/ ) const
+{
+	unsigned int * v = (unsigned int*)block;
+	unsigned int * k = (unsigned int*)m_key;
+
+	unsigned int sum   = 0;
+	unsigned int delta = 0x9E3779B9;
+
+	for( int i = 0; i < 32; i++ )
+	{
+		sum += delta;
+		v[0] += ((v[1]<<4) + k[0]) ^ (v[1] + sum) ^ ((v[1]>>5) + k[1]);
+		v[1] += ((v[0]<<4) + k[2]) ^ (v[0] + sum) ^ ((v[0]>>5) + k[3]);
+	}
+}
+
+//----------
+
+void TEACipher::decrypt ( void * block, unsigned int /*nonce*/ ) const
+{
+	unsigned int * v = (unsigned int*)block;
+	unsigned int * k = (unsigned int*)m_key;
+
+	unsigned int sum   = 0xC6EF3720;
+	unsigned int delta = 0x9E3779B9;
+
+	for( int i = 0; i < 32; i++ )
+	{
+		v[1] -= ((v[0]<<4) + k[2]) ^ (v[0] + sum) ^ ((v[0]>>5) + k[3]);
+		v[0] -= ((v[1]<<4) + k[0]) ^ (v[1] + sum) ^ ((v[1]>>5) + k[1]);
+		sum -= delta;
+	}
+}
+
+//----------------------------------------------------------------------------
diff --git a/TEA.h b/TEA.h
new file mode 100644
index 0000000..fff63f1
--- /dev/null
+++ b/TEA.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include "BlockCipher.h"
+
+//----------------------------------------------------------------------------
+
+class TEACipher : public BlockCipher
+{
+public:
+
+	int getBlockSize ( void ) { return 8; }
+
+	void setKey  ( void * key, int keySize );
+
+	void encrypt ( void * block, unsigned int nonce ) const;
+	void decrypt ( void * block, unsigned int nonce ) const;
+
+protected:
+
+	uint32_t m_key[4];
+};
+
+//----------------------------------------------------------------------------
diff --git a/Tests.cpp b/Tests.cpp
new file mode 100644
index 0000000..22bea53
--- /dev/null
+++ b/Tests.cpp
@@ -0,0 +1,542 @@
+#include "Tests.h"
+
+#include "Stats.h"  // for CountCollisions
+#include "Random.h"
+#include "Bitvec.h"
+
+#include <string.h>
+#include <math.h>
+#include <set>
+#include <vector>
+#include <intrin.h>
+
+#pragma warning(disable:4127)
+
+#pragma warning(disable:4127) // warning C4985: 'ceil': attributes not present on previous declaration.
+
+//-----------------------------------------------------------------------------
+// 256k blocks seem to give the best results.
+
+void BulkSpeedTest ( pfHash hash )
+{
+	const int trials = 9999;
+	const int blocksize = 256 * 1024;
+
+	printf("Bulk speed test - %d-byte keys\n",blocksize);
+
+	char * block = new char[blocksize + 16];
+
+	rand_p(block,blocksize+16);
+
+	uint32_t temp[16];
+
+	for(int align = 0; align < 4; align++)
+	{
+		double bestbpc = 0;
+
+		for(int itrial = 0; itrial < trials; itrial++)
+		{
+			__int64 begin,end;
+
+			begin = __rdtsc();
+
+			hash(block + align,blocksize,itrial,temp);
+
+			end = __rdtsc();
+
+			blackhole(temp[0]);
+
+			double cycles = double(end-begin);
+			double bpc = double(blocksize) / cycles;
+			if(bpc > bestbpc) bestbpc = bpc;
+		}
+
+		double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
+		printf("Alignment %2d - %8.4f bytes/cycle, (%.2f MiB/sec @ 3 ghz)\n",align,bestbpc,bestbps);
+	}
+	printf("\n");
+
+	delete [] block;
+}
+
+//----------------------------------------------------------------------------
+// Tests the Bit Independence Criteron. Not sure if this still works.
+
+template< int _bits, int _reps, class hashtype >
+void BicTest ( hashfunc<uint32_t> hash )
+{
+	const int bits = _bits;
+	const int reps = _reps;
+	const int bytes = bits / 8;
+	const int hashbytes = sizeof(hashtype);
+	const int hashbits = hashbytes * 8;
+
+	int counts[bits][hashbits][hashbits];
+
+	memset(counts,0,sizeof(counts));
+
+	unsigned char k[bytes];
+
+	for(int irep = 0; irep < reps; irep++)
+	{
+		if(irep % 1000 == 0) printf(".");
+
+		rand_p(k,bytes);
+		unsigned int h1 = hash(k,bytes,0);
+
+		for(int in = 0; in < bits; in++)
+		{
+			flipbit(k,in);
+			unsigned int h2 = hash(k,bytes,0);
+			flipbit(k,in);
+
+			unsigned int h = h1 ^ h2;
+
+			for(int out1 = 0; out1 < hashbits; out1++)
+			for(int out2 = out1; out2 < hashbits; out2++)
+			{
+				int b1 = (h >> out1) & 1;
+				int b2 = (h >> out2) & 1;
+
+				int b = b1 ^ b2;
+
+				if(b1 ^ b2)
+				{
+					counts[in][out1][out2]++;
+				}
+			}
+		}
+	}
+
+	printf("\n");
+
+	int biases[4] = {0,0,0,0};
+
+	for(int i = 0; i < hashbits; i++)
+	{
+		for(int j = 0; j < hashbits; j++)
+		{
+			if(i == j)
+			{
+				printf("\\");
+			}
+			else if(i > j)
+			{
+				printf(" ");
+			}
+			else
+			{
+				double d = double(counts[16][i][j]) / double(reps);
+
+				int b = (int)(fabs(d * 2 - 1) * 100);
+
+				if(b == 0) printf(".");
+				else if(b < 5) printf("o");
+				else if(b < 33) printf("O");
+				else printf("X");
+
+				if(b == 0)      biases[0]++;
+				else if(b < 5)  biases[1]++;
+				else if(b < 33) biases[2]++;
+				else            biases[3]++;
+			}
+		}
+
+		printf("\n");
+	}
+
+
+	printf("Bias distribution - %3d : %3d : %3d : %3d\n",biases[0],biases[1],biases[2],biases[3]);
+
+	printf("\n");
+}
+
+//----------------------------------------------------------------------------
+// Bijection test = hash all possible 32-bit keys, see how many 32-bit values
+// are missing. Each missing value indicates a collision.
+
+void BijectionTest ( hashfunc<uint32_t> hash )
+{
+	const int nbytes = 512 * 1024 * 1024;
+
+	unsigned char * block = new unsigned char[nbytes];
+
+	memset(block,0,nbytes);
+
+	printf("Testing bijection for 32-bit keys");
+
+	unsigned int k = 0;
+	
+	do
+	{
+		unsigned int h = hash(&k,4,0);
+		setbit(block,nbytes,h);
+
+		if(k % 100000000 == 0) printf(".");
+
+		k++;
+	}
+	while(k != 0);
+
+	int missing = 0;
+
+	do
+	{
+		if(!getbit(block,nbytes,k)) missing++;
+
+		k++;
+	}
+	while(k != 0);
+
+	printf("\nMissing values - %d\n",missing);
+
+	delete [] block;
+}
+
+//----------------------------------------------------------------------------
+// Loop counting
+
+// Repeatedly hash the same 4-byte value over and over, and track how many
+// loops are in the output. 
+
+void LoopTest ( hashfunc<uint32_t> hash )
+{
+	const int nbytes = 512 * 1024 * 1024;
+
+	unsigned char * block = new unsigned char[nbytes];
+
+	memset(block,0,nbytes);
+
+	int loops = 0;
+	unsigned int start = 0;
+
+	while(1)
+	{
+		if(!getbit(block,nbytes,start)) 
+		{
+			loops++;
+
+			unsigned int r = 0;
+			unsigned int h = start;
+
+			printf("Testing loop starting at %u",start);
+
+			while(1)
+			{
+				setbit(block,nbytes,h);
+				r++;
+				h = hash(&h,4,0);
+
+				if(h == start) break;
+
+				if(r % 100000000 == 0) printf(".");
+			}
+
+			printf("\nStart point %u looped after %u\n",start,r);
+		}
+
+		start++;
+		if(start == 0) break;
+	}
+
+	printf("Total loops - %d\n",loops);
+
+	delete [] block;
+}
+
+//-----------------------------------------------------------------------------
+// Trickle test, not really usable by itself. Used for testing the diffusion
+// properties of a sequence of adds, xors, rotates, etc - replace the adds and
+// xors with ors, compute how many iterations it takes to convert an input with
+// a single 1 bit into all 1s.
+
+// I was using it to find optimal rotation constants for various types of 
+// diffusion functions, but didn't end up using them
+
+int trickle ( int r[4], int reps )
+{
+	int worst = 1000;
+
+	for(int i = 0; i < 128; i++)
+	{
+		uint32_t t[4] = { 0, 0, 0, 0 };
+
+		setbit(t,4,i);
+
+		for(int j = 0; j < reps; j++)
+		{
+			t[0] |= t[1];
+			t[2] |= t[3];
+			t[1] |= t[0];
+			t[0] |= t[2];
+
+			t[2] |= t[1];
+			t[1] |= t[0];
+			t[3] |= t[0];
+
+			t[0] = _rotl(t[0],r[0]); 
+			t[1] = _rotl(t[1],r[1]);
+			t[2] = _rotl(t[2],r[2]); 
+			t[3] = _rotl(t[3],r[3]);
+		}
+
+		int p = popcount(t[0]) + popcount(t[1]) +  popcount(t[2]) + popcount(t[3]);
+
+		if(p < worst) worst = p;
+	}
+
+	return worst;
+}
+
+void TrickleTest ( void )
+{
+	int best = 0;
+
+	int r[4];
+
+	for(int i = 0; i < 1024*1024; i++)
+	{
+		r[0] = (i >>  0) & 31;
+		r[1] = (i >>  5) & 31;
+		r[2] = (i >> 10) & 31;
+		r[3] = (i >> 15) & 31;
+
+		//if(trickle(r,2) < 16) continue;
+		//if(trickle(r,3) < 40) continue;
+		//if(trickle(r,4) < 80) continue;
+
+		int worst = trickle(r,6);
+
+		if(worst >= best)
+		//if(i % 10000 == 0)
+		{
+			best = worst;
+
+			printf("\t{");
+			for(int i = 0; i < 4; i++)
+			{
+				printf("%2d, ",r[i]);
+			}
+			printf("}, // %3d\n",worst);
+		}
+	}
+}
+
+//----------------------------------------------------------------------------
+// Alignment of the keys should not affect the hash value - if it does,
+// something is broken.
+
+void AlignmentTest ( pfHash hash, const int hashbits )
+{
+	const int hashbytes = hashbits / 8;
+
+	printf("Testing alignment handling on small keys..........");
+
+	char bufs[16][64];
+
+	char * strings[16];
+
+	for(int i = 0; i < 16; i++)
+	{
+		uint32_t b = uint32_t(&bufs[i][0]);
+
+		b = (b+15)&(~15);
+
+		strings[i] = (char*)(b + i);
+
+		strcpy_s(strings[i],32,"DeadBeefDeadBeef");
+	}
+
+	uint32_t hash1[64];
+	uint32_t hash2[64];
+
+	for(int k = 1; k <= 16; k++)
+	for(int j = 0; j < 15; j++)
+	for(int i = j+1; i < 16; i++)
+	{
+		const char * s1 = strings[i];
+		const char * s2 = strings[j];
+
+		hash(s1,k,0,hash1);
+		hash(s2,k,0,hash2);
+
+		if(memcmp(hash1,hash2,hashbytes) != 0)
+		{
+			printf("*********FAIL*********\n");
+			return;
+		}
+	}
+
+	printf("PASS\n");
+}
+
+//----------------------------------------------------------------------------
+// Appending zero bytes to a key should always cause it to produce a different
+// hash value
+
+void AppendedZeroesTest ( pfHash hash, const int hashbits )
+{
+	const int hashbytes = hashbits/8;
+
+	printf("Testing zero-appending..........");
+
+	for(int rep = 0; rep < 100; rep++)
+	{
+		if(rep % 10 == 0) printf(".");
+
+		unsigned char key[256];
+
+		memset(key,0,sizeof(key));
+
+		rand_p(key,32);
+
+		uint32_t h1[16];
+		uint32_t h2[16];
+
+		memset(h1,0,hashbytes);
+		memset(h2,0,hashbytes);
+
+		for(int i = 0; i < 32; i++)
+		{
+			hash(key,32+i,0,h1);
+
+			if(memcmp(h1,h2,hashbytes) == 0)
+			{
+				printf("\n*********FAIL*********\n");
+				return;
+			}
+
+			memcpy(h2,h1,hashbytes);
+		}
+	}
+
+	printf("\nPASS\n");
+}
+
+//----------------------------------------------------------------------------
+// Flipping a bit of a key should, with very high probability, result in a 
+// different hash.
+
+bool TwiddleTest ( pfHash hash, const int hashbits )
+{
+	bool result = true;
+
+	const int hashbytes = hashbits/8;
+
+	printf("Testing bit twiddling..........");
+
+	uint8_t key[256];
+	uint32_t h1[16];
+	uint32_t h2[16];
+
+	for(int len = 1; len < 16; len++)
+	{
+		for(int bit = 0; bit < (len * 8); bit++)
+		{
+			rand_p(key,len);
+
+			hash(key,len,0,h1);
+			flipbit(key,len,bit);
+			hash(key,len,0,h2);
+
+			if(memcmp(h1,h2,hashbytes) == 0)
+			{
+				//printf("X");
+				result = false;
+			}
+			else
+			{
+				//printf(".");
+			}
+		}
+
+		//printf("\n");
+	}
+
+	if(result == false)
+		printf("*********FAIL*********\n");
+	else
+		printf("PASS\n");
+
+	return result;
+}
+
+//-----------------------------------------------------------------------------
+// Create a bunch of zero-byte keys of different lengths and check distribution.
+
+// We reuse one block of empty bytes, otherwise the RAM cost is enormous.
+// The distribution table will have some random 1s in the bottom rows due to
+// there not being enough keys for a good test.
+
+void NullKeysetTest ( hashfunc<uint32_t> hash, bool drawDiagram )
+{
+	int keycount = 64*1024;
+
+	unsigned char * nullblock = new unsigned char[keycount];
+	memset(nullblock,0,keycount);
+
+	//----------
+
+	std::vector<uint32_t> hashes;
+
+	hashes.resize(keycount);
+
+	//----------
+
+	printf("Collision test - Hash keyset 100 times, count collisions");
+
+	for(int i = 0; i < keycount; i++)
+	{
+		if(i % (keycount/10) == 0) printf(".");
+
+		uint32_t h = hash(nullblock,i,0);
+		hashes[i] = h;
+	}
+
+	testhashlist(hashes,true,true,drawDiagram);
+
+	delete [] nullblock;
+}
+
+//-----------------------------------------------------------------------------
+// Simple collections of alphanumeric strings
+
+template < typename hashtype >
+void TextKeyTest2 ( hashfunc<hashtype> hash, bool drawDiagram )
+{
+	const char * s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456790123456789";
+
+	const int nbytes = 7;
+	const int nbits = 56;
+	const int nkeys = 1024*1024;
+
+	printf("Testing %d 7-character text keys - \n",nkeys,nbits);
+
+	//----------
+	// generate keyset
+
+	typedef Blob<nbits> keytype;
+
+	std::vector<keytype> keys;
+	keys.reserve(nkeys);
+
+	keytype key;
+
+	uint8_t * bytes = (uint8_t*)&key;
+
+	for(int i = 0; i < nkeys; i++)
+	{
+		for(int j = 0; j < nbytes; j++)
+		{
+			int d = i >> (j * 3);
+
+			bytes[j] = s[d % 64];
+		}
+
+		keys.push_back(key);
+	}
+
+	//----------
+
+	testkeylist<keytype,hashtype>(hash,keys,true,true,drawDiagram);
+}
diff --git a/Tests.h b/Tests.h
new file mode 100644
index 0000000..975b454
--- /dev/null
+++ b/Tests.h
@@ -0,0 +1,195 @@
+#pragma once
+
+#include "Types.h"
+#include "Random.h"
+//#include "Stats.h"
+
+//#include <intrin.h>
+
+#include "AvalancheTest.h"
+#include "CycleTest.h"
+#include "DifferentialTest.h"
+#include "DictionaryTest.h"
+
+//-----------------------------------------------------------------------------
+
+template< typename hashtype >
+void QuickBrownFox ( hashfunc<hashtype> hash )
+{
+	const char * text1 = "The quick brown fox jumps over the lazy dog";
+	const char * text2 = "The quick brown fox jumps over the lazy cog";
+
+	hashtype h1, h2;
+
+	hash(text1,(int)strlen(text1),0,&h1);
+	hash(text2,(int)strlen(text2),0,&h2);
+
+	printf("Quick Brown Fox -\n");
+	printhex32(&h1,sizeof(hashtype)); printf("\n");
+	printhex32(&h2,sizeof(hashtype)); printf("\n");
+	printf("\n");
+}
+
+//-----------------------------------------------------------------------------
+
+void BulkSpeedTest ( pfHash hash );
+
+/*
+template < typename hashtype >
+void BulkSpeedTest ( hashfunc<hashtype> hash )
+{
+	BulkSpeedTest(hash,sizeof(hashtype) * 8);
+}
+*/
+
+//----------------------------------------------------------------------------
+
+template < typename hashtype, int keysize >
+void TinySpeedTest ( pfHash hash )
+{
+	const int trials = 100000;
+
+	printf("Small key speed test - %4d-byte keys - ",keysize);
+
+	uint8_t k[keysize];
+	hashtype h;
+
+	double bestcycles = 1e9;
+
+	for(int itrial = 0; itrial < trials; itrial++)
+	{
+		__int64 begin,end;
+
+		rand_p(k,keysize);
+
+		begin = __rdtsc();
+		
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+
+		end = __rdtsc();
+
+		blackhole(*(uint32_t*)(&h));
+
+		double cycles = double(end-begin) / 64;
+		if(cycles < bestcycles) bestcycles = cycles;
+	}
+
+	double bestbpc = double(keysize) / bestcycles;
+	printf("%8.2f cycles/hash, %8.4f bytes/cycle\n",bestcycles,bestbpc);
+}
+
+//-----------------------------------------------------------------------------
+
+void AlignmentTest ( pfHash hash, const int hashbits );
+
+template < typename hashtype >
+void AlignmentTest ( hashfunc<hashtype> hash )
+{
+	AlignmentTest(hash,sizeof(hashtype) * 8);
+}
+
+//-----------------------------------------------------------------------------
+
+void AppendedZeroesTest ( pfHash hash, const int hashbits );
+
+template < typename hashtype >
+void AppendedZeroesTest ( hashfunc<hashtype> hash )
+{
+	AppendedZeroesTest(hash,sizeof(hashtype) * 8);
+}
+
+//-----------------------------------------------------------------------------
+
+bool TwiddleTest ( pfHash hash, const int hashbits );
+
+template < typename hashtype >
+bool TwiddleTest ( hashfunc<hashtype> hash )
+{
+	return TwiddleTest(hash,sizeof(hashtype) * 8);
+}
+
+//-----------------------------------------------------------------------------
+
+template < typename hashtype >
+bool AvalancheTest ( hashfunc<hashtype> hash )
+{
+	bool result = true;
+
+	const int nbytes = sizeof(hashtype);
+	const int nbits = nbytes * 8;
+
+	for(int i = 4; i <= 10; i++)
+	{
+		result &= AvalancheTest(hash,8*i,nbits,2000000);
+	}
+
+	if(!result) printf("*********FAIL*********\n");
+
+
+	return result;
+}
+
+//-----------------------------------------------------------------------------
+
+template < typename hashtype >
+bool SparseKeyTest2 ( hashfunc<hashtype> hash, bool drawDiagram );
+
+template < typename hashtype >
+bool SparseKeyTest ( hashfunc<hashtype> hash, bool drawDiagram )
+{
+	return SparseKeyTest2<hashtype>(hash,drawDiagram);
+}
+
+//-----------------------------------------------------------------------------
+// For a given 20-bit window of a 64-bit key, generate all possible keys with
+// bits set in that window
+
+template < typename hashtype >
+void BitrangeKeysetTest ( hashfunc<hashtype> hash, bool drawDiagram )
+{
+	const int keybits = 64;
+
+	for(int j = 0; j <= (keybits-20); j++)
+	{
+		int minbit = j;
+		int maxbit = j+20-1;
+
+		int keycount = 1 << (maxbit - minbit + 1);
+
+		printf("Bitrange keyset (%2d,%2d) - %d keys - ",minbit,maxbit,keycount);
+
+		std::vector<uint64_t> keys;
+		keys.reserve(keycount);
+
+		for(int i = 0; i < keycount; i++)
+		{
+			uint64_t k = i;
+
+			k = k << minbit;
+
+			keys.push_back(k);
+		}
+
+		testkeylist<uint64_t,hashtype>(hash,keys,true,true,drawDiagram);
+	}
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Types.cpp b/Types.cpp
new file mode 100644
index 0000000..8ae5185
--- /dev/null
+++ b/Types.cpp
@@ -0,0 +1,17 @@
+#include "Types.h"
+
+// Throw a value in the oubliette to prevent the compiler from optimizing away
+// the code that calculated it
+
+#pragma optimize( "", off )
+
+void blackhole ( uint32_t )
+{
+}
+
+uint32_t whitehole ( void )
+{
+	return 0;
+}
+
+#pragma optimize( "", on ) 
diff --git a/Types.h b/Types.h
new file mode 100644
index 0000000..7dd7cda
--- /dev/null
+++ b/Types.h
@@ -0,0 +1,449 @@
+#pragma once
+
+#include "pstdint.h"
+#include "Bitvec.h"
+#include <vector>
+#include <assert.h>
+
+void     blackhole ( uint32_t x );
+uint32_t whitehole ( void );
+
+typedef void (*pfHash) ( const void * blob, int len, uint32_t seed, void * out );
+
+template < typename T >
+void swap ( T & a, T & b )
+{
+	T t = a;
+	a = b;
+	b = t;
+}
+
+//-----------------------------------------------------------------------------
+
+template < class T >
+class hashfunc
+{
+public:
+
+	hashfunc ( pfHash h ) : m_hash(h)
+	{
+	}
+
+	inline void operator () ( const void * key, int len, uint32_t seed, uint32_t * out )
+	{
+		m_hash(key,len,seed,out);
+	}
+
+	inline operator pfHash ( void ) const
+	{
+		return m_hash;
+	}
+
+	inline T operator () ( const void * key, int len, uint32_t seed ) 
+	{
+		T result;
+
+		m_hash(key,len,seed,(uint32_t*)&result);
+
+		return result;
+	}
+
+	/*
+	T operator () ( T const & key )
+	{
+		T result;
+
+		m_hash(&key,sizeof(T),0,&result);
+
+		return result;
+	}
+	*/
+
+	pfHash m_hash;
+};
+
+//-----------------------------------------------------------------------------
+
+template < class T >
+class mixfunc
+{
+public:
+
+	typedef T (*pfMix) ( T key );
+
+	mixfunc ( pfMix m ) : m_mix(m)
+	{
+	}
+
+	T operator () ( T key )
+	{
+		return m_mix(key);
+	}
+
+	pfMix m_mix;
+};
+
+//-----------------------------------------------------------------------------
+
+template < int _bits >
+class Blob
+{
+public:
+
+	Blob()
+	{
+	}
+
+	Blob ( int x )
+	{
+		for(int i = 0; i < nbytes; i++)
+		{
+			bytes[i] = 0;
+		}
+
+		*(int*)bytes = x;
+	}
+
+	Blob ( const Blob & k )
+	{
+		for(int i = 0; i < nbytes; i++)
+		{
+			bytes[i] = k.bytes[i];
+		}
+	}
+
+	Blob & operator = ( const Blob & k )
+	{
+		for(int i = 0; i < nbytes; i++)
+		{
+			bytes[i] = k.bytes[i];
+		}
+
+		return *this;
+	}
+
+	void set ( const void * blob, int len )
+	{
+		const uint8_t * k = (const uint8_t*)blob;
+
+		len = len > nbytes ? nbytes : len;
+
+		for(int i = 0; i < len; i++)
+		{
+			bytes[i] = k[i];
+		}
+
+		for(int i = len; i < nbytes; i++)
+		{
+			bytes[i] = 0;
+		}
+	}
+
+	uint8_t & operator [] ( int i )
+	{
+		return bytes[i];
+	}
+
+	const uint8_t & operator [] ( int i ) const
+	{
+		return bytes[i];
+	}
+
+	//----------
+	// boolean operations
+	
+	bool operator < ( const Blob & k ) const
+	{
+		for(int i = 0; i < nbytes; i++)
+		{
+			if(bytes[i] < k.bytes[i]) return true;
+			if(bytes[i] > k.bytes[i]) return false;
+		}
+
+		return false;
+	}
+
+	bool operator == ( const Blob & k ) const
+	{
+		for(int i = 0; i < nbytes; i++)
+		{
+			if(bytes[i] != k.bytes[i]) return false;
+		}
+
+		return true;
+	}
+
+	bool operator != ( const Blob & k ) const
+	{
+		return !(*this == k);
+	}
+
+	//----------
+	// bitwise operations
+
+	Blob operator ^ ( const Blob & k ) const 
+	{
+		Blob t;
+
+		for(int i = 0; i < nbytes; i++)
+		{
+			t.bytes[i] = bytes[i] ^ k.bytes[i];
+		}
+
+		return t;
+	}
+
+	Blob & operator ^= ( const Blob & k )
+	{
+		for(int i = 0; i < nbytes; i++)
+		{
+			bytes[i] ^= k.bytes[i];
+		}
+
+		return *this;
+	}
+
+	int operator & ( int x )
+	{
+		return (*(int*)bytes) & x;
+	}
+
+	Blob & operator &= ( const Blob & k )
+	{
+		for(int i = 0; i < nbytes; i++)
+		{
+			bytes[i] &= k.bytes[i];
+		}
+	}
+
+	Blob operator << ( int c )
+	{
+		Blob t = *this;
+
+		lshift(t.bytes,nbytes,c);
+
+		return t;
+	}
+
+	Blob operator >> ( int c )
+	{
+		Blob t = *this;
+
+		rshift(t.bytes,nbytes,c);
+
+		return t;
+	}
+
+	Blob & operator <<= ( int c )
+	{
+		lshift(bytes,nbytes,c);
+
+		return *this;
+	}
+
+	Blob & operator >>= ( int c )
+	{
+		rshift(bytes,nbytes,c);
+
+		return *this;
+	}
+
+	//----------
+	
+	enum
+	{
+		nbits = _bits,
+		nbytes = (_bits+7)/8,
+
+		align4  = (nbytes & 2) ? 0 : 1,
+		align8  = (nbytes & 3) ? 0 : 1,
+		align16 = (nbytes & 4) ? 0 : 1,
+	};
+
+private:
+
+	uint8_t bytes[nbytes];
+};
+
+typedef Blob<128> u128;
+
+//-----------------------------------------------------------------------------
+
+class VBlob : public std::vector<uint8_t>
+{
+public:
+
+	VBlob( int len ) : std::vector<uint8_t>(len,0)
+	{
+	}
+
+	/*
+	VBlob ( const VBlob & k )
+	{
+		for(size_t i = 0; i < size(); i++)
+		{
+			at(i) = k.at(i);
+		}
+	}
+	*/
+
+	/*
+	VBlob & operator = ( const VBlob & k )
+	{
+		for(size_t i = 0; i < size(); i++)
+		{
+			at(i) = k.at(i);
+		}
+
+		return *this;
+	}
+	*/
+
+	void set ( const void * VBlob, int len )
+	{
+		assert(size() == (size_t)len);
+
+		const uint8_t * k = (const uint8_t*)VBlob;
+
+		len = len > (int)size() ? (int)size() : len;
+
+		for(int i = 0; i < len; i++)
+		{
+			at(i) = k[i];
+		}
+
+		for(size_t i = len; i < size(); i++)
+		{
+			at(i) = 0;
+		}
+	}
+
+	//----------
+	// boolean operations
+	
+	bool operator < ( const VBlob & k ) const
+	{
+		assert(size() == k.size());
+
+		for(size_t i = 0; i < size(); i++)
+		{
+			if(at(i) < k.at(i)) return true;
+			if(at(i) > k.at(i)) return false;
+		}
+
+		return false;
+	}
+
+	bool operator == ( const VBlob & k ) const
+	{
+		assert(size() == k.size());
+
+		for(size_t i = 0; i < size(); i++)
+		{
+			if(at(i) != k.at(i)) return false;
+		}
+
+		return true;
+	}
+
+	bool operator != ( const VBlob & k ) const
+	{
+		assert(size() == k.size());
+
+		return !(*this == k);
+	}
+
+	//----------
+	// bitwise operations
+
+	VBlob operator ^ ( const VBlob & k ) const 
+	{
+		assert(size() == k.size());
+
+		VBlob t((int)k.size());
+
+		for(size_t i = 0; i < size(); i++)
+		{
+			t.at(i) = at(i) ^ k.at(i);
+		}
+
+		return t;
+	}
+
+	VBlob & operator ^= ( const VBlob & k )
+	{
+		assert(size() == k.size());
+
+		for(size_t i = 0; i < size(); i++)
+		{
+			at(i) ^= k.at(i);
+		}
+
+		return *this;
+	}
+
+	VBlob & operator &= ( const VBlob & k )
+	{
+		assert(size() == k.size());
+
+		for(size_t i = 0; i < size(); i++)
+		{
+			at(i) &= k.at(i);
+		}
+	}
+
+	VBlob & operator <<= ( int c )
+	{
+		lshift(&at(0),(int)size(),c);
+
+		return *this;
+	}
+
+	VBlob & operator >>= ( int c )
+	{
+		rshift(&at(0),(int)size(),c);
+
+		return *this;
+	}
+};
+
+//-----------------------------------------------------------------------------
+
+/*
+class Blobvec
+{
+public:
+
+	Blobvec ( int stride, int size )
+	{
+		m_data = new uint8_t[stride*size];
+	}
+
+	~Blobvec ( void )
+	{
+		delete [] m_data;
+	}
+
+	int size ( void ) const
+	{
+		return m_size;
+	}
+
+	const void * operator [] ( const int index ) const
+	{
+		return &m_data[index * m_stride];
+	}
+
+	void * operator [] ( const int index )
+	{
+		return &m_data[index * m_stride];
+	}
+
+	int m_stride;
+	int m_size;
+
+	uint8_t * m_data;
+};
+*/
+
+//-----------------------------------------------------------------------------
diff --git a/XTEA.cpp b/XTEA.cpp
new file mode 100644
index 0000000..3ec3591
--- /dev/null
+++ b/XTEA.cpp
@@ -0,0 +1,119 @@
+#include "XTEA.h"
+
+#include <algorithm>
+
+static const int g_rounds = 64;
+
+// The XTEA and BTEA algorithms are public domain
+
+//----------------------------------------------------------------------------
+
+void XTEACipher::setKey ( void * key, int keySize )
+{
+	memset(m_key,0,16);
+	memcpy(m_key,key,std::min(keySize,16));
+}
+
+//----------------------------------------------------------------------------
+
+void XTEACipher::encrypt ( void * block, unsigned int nonce ) const
+{
+	uint32_t * v = (uint32_t*)block;
+	uint32_t * k = (uint32_t*)m_key;
+
+	uint32_t delta = 0x9E3779B9;
+	uint32_t sum = 0;
+
+	v[0] ^= nonce;
+
+	for(int i = 0; i < g_rounds; i++)
+	{
+		v[0] += (((v[1] << 4) ^ (v[1] >> 5)) + v[1]) ^ (sum + k[sum & 3]);
+	    
+		sum += delta;
+	    
+		v[1] += (((v[0] << 4) ^ (v[0] >> 5)) + v[0]) ^ (sum + k[(sum>>11) & 3]);
+	}
+}
+
+//----------
+
+void XTEACipher::decrypt ( void * block, unsigned int nonce ) const
+{
+	uint32_t * v = (uint32_t*)block;
+	uint32_t * k = (uint32_t*)m_key;
+
+	uint32_t delta = 0x9E3779B9;
+	uint32_t sum = delta * g_rounds;
+
+	for(int i = 0; i < g_rounds; i++)
+	{
+		v[1] -= (((v[0] << 4) ^ (v[0] >> 5)) + v[0]) ^ (sum + k[(sum>>11) & 3]);
+	    
+		sum -= delta;
+	    
+		v[0] -= (((v[1] << 4) ^ (v[1] >> 5)) + v[1]) ^ (sum + k[sum & 3]);
+	}
+
+	v[0] ^= nonce;
+}
+
+//----------------------------------------------------------------------------
+
+#define DELTA 0x9e3779b9
+#define MX ((z>>5^y<<2) + (y>>3^z<<4)) ^ ((sum^y) + (k[(p&3)^e] ^ z));
+
+void btea ( unsigned int *v, int n, unsigned int const k[4]) 
+{
+	const int rounds = 6 + (52/n);
+	unsigned int sum = 0;
+
+	unsigned int y = 0;
+	unsigned int z = v[n-1];
+
+	for(int round = 0; round < rounds; round++)
+	{
+		sum += DELTA;
+		unsigned int e = (sum >> 2) & 3;
+
+		int p;
+
+		for( p=0; p < n-1; p++ )
+		{
+			y = v[p+1];
+			z = v[p] += MX;
+		}
+		
+		y = v[0];
+		z = v[n-1] += MX;
+	} 
+}
+
+void btea_decrypt ( unsigned int *v, int n, unsigned int const k[4]) 
+{
+	const int rounds = 6 + (52/n);
+	unsigned int sum = rounds*DELTA;
+
+	unsigned int y = v[0];
+	unsigned int z = 0;
+
+	for(int round = 0; round < rounds; round++)
+	{
+		unsigned int e = (sum >> 2) & 3;
+		
+		int p;
+
+		for( p = n-1; p > 0; p-- )
+		{
+			z = v[p-1];
+			y = v[p] -= MX;
+		}
+		
+		z = v[n-1];
+		y = v[0] -= MX;
+
+		sum -= DELTA;
+	} 
+}
+
+//----------------------------------------------------------------------------
diff --git a/XTEA.h b/XTEA.h
new file mode 100644
index 0000000..770248f
--- /dev/null
+++ b/XTEA.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include "BlockCipher.h"
+
+//----------------------------------------------------------------------------
+
+class XTEACipher : public BlockCipher
+{
+public:
+
+	int getBlockSize ( void ) { return 8; }
+
+	void setKey  ( void * key, int keySize );
+
+	void encrypt ( void * block, unsigned int nonce ) const;
+	void decrypt ( void * block, unsigned int nonce ) const;
+
+protected:
+
+	uint32_t m_key[4];
+};
+
+//----------------------------------------------------------------------------
diff --git a/crc.cpp b/crc.cpp
new file mode 100644
index 0000000..7e65d3a
--- /dev/null
+++ b/crc.cpp
@@ -0,0 +1,101 @@
+/*
+ * This file is derived from crc32.c from the zlib-1.1.3 distribution
+ * by Jean-loup Gailly and Mark Adler.
+ */
+
+/* crc32.c -- compute the CRC-32 of a data stream
+ * Copyright (C) 1995-1998 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+typedef unsigned long uint32_t;
+typedef unsigned char uint8_t;
+
+
+/* ========================================================================
+ * Table of CRC-32's of all single-byte values (made by make_crc_table)
+ */
+static const uint32_t crc_table[256] = {
+  0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
+  0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
+  0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
+  0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
+  0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
+  0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
+  0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
+  0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
+  0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
+  0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
+  0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
+  0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
+  0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
+  0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
+  0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
+  0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
+  0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
+  0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
+  0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
+  0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
+  0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
+  0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
+  0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
+  0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
+  0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
+  0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
+  0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
+  0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
+  0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
+  0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
+  0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
+  0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
+  0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
+  0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
+  0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
+  0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
+  0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
+  0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
+  0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
+  0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
+  0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
+  0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
+  0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
+  0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
+  0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
+  0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
+  0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
+  0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
+  0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
+  0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
+  0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
+  0x2d02ef8dL
+};
+
+/* ========================================================================= */
+
+#define DO1(buf) crc = crc_table[((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8);
+#define DO2(buf)  DO1(buf); DO1(buf);
+#define DO4(buf)  DO2(buf); DO2(buf);
+#define DO8(buf)  DO4(buf); DO4(buf);
+
+/* ========================================================================= */
+
+void crc32 ( const void * key, int len, uint32_t seed, void * out )
+{
+	uint8_t * buf = (uint8_t*)key;
+	uint32_t crc = seed ^ 0xffffffffL;
+
+	while (len >= 8)
+	{
+		DO8(buf);
+		len -= 8;
+	}
+
+	while(len--)
+	{
+		DO1(buf);
+	} 
+
+	crc ^= 0xffffffffL;
+
+	*(uint32_t*)out = crc;
+}
diff --git a/crc.h b/crc.h
new file mode 100644
index 0000000..893fa61
--- /dev/null
+++ b/crc.h
@@ -0,0 +1,77 @@
+/**********************************************************************
+ *
+ * Filename:    crc.h
+ * 
+ * Description: A header file describing the various CRC standards.
+ *
+ * Notes:       
+ *
+ * 
+ * Copyright (c) 2000 by Michael Barr.  This software is placed into
+ * the public domain and may be used for any purpose.  However, this
+ * notice must not be changed or removed and no warranty is either
+ * expressed or implied by its publication or distribution.
+ **********************************************************************/
+
+#ifndef _crc_h
+#define _crc_h
+
+
+#define FALSE	0
+#define TRUE	!FALSE
+
+/*
+ * Select the CRC standard from the list that follows.
+ */
+#define CRC32
+
+
+#if defined(CRC_CCITT)
+
+typedef unsigned short  crc;
+
+#define CRC_NAME			"CRC-CCITT"
+#define POLYNOMIAL			0x1021
+#define INITIAL_REMAINDER	0xFFFF
+#define FINAL_XOR_VALUE		0x0000
+#define REFLECT_DATA		FALSE
+#define REFLECT_REMAINDER	FALSE
+#define CHECK_VALUE			0x29B1
+
+#elif defined(CRC16)
+
+typedef unsigned short  crc;
+
+#define CRC_NAME			"CRC-16"
+#define POLYNOMIAL			0x8005
+#define INITIAL_REMAINDER	0x0000
+#define FINAL_XOR_VALUE		0x0000
+#define REFLECT_DATA		TRUE
+#define REFLECT_REMAINDER	TRUE
+#define CHECK_VALUE			0xBB3D
+
+#elif defined(CRC32)
+
+typedef unsigned long  crc;
+
+#define CRC_NAME			"CRC-32"
+#define POLYNOMIAL			0x04C11DB7
+#define INITIAL_REMAINDER	0xFFFFFFFF
+#define FINAL_XOR_VALUE		0xFFFFFFFF
+#define REFLECT_DATA		TRUE
+#define REFLECT_REMAINDER	TRUE
+#define CHECK_VALUE			0xCBF43926
+
+#else
+
+#error "One of CRC_CCITT, CRC16, or CRC32 must be #define'd."
+
+#endif
+
+
+void  crcInit(void);
+crc   crcSlow(unsigned char const message[], int nBytes);
+crc   crcFast(unsigned char const message[], int nBytes);
+
+
+#endif /* _crc_h */
\ No newline at end of file
diff --git a/lookup3.cpp b/lookup3.cpp
new file mode 100644
index 0000000..5dd3a42
--- /dev/null
+++ b/lookup3.cpp
@@ -0,0 +1,72 @@
+// lookup3 by Bob Jekins, code is public domain.
+
+#include "pstdint.h"
+
+#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+
+#define mix(a,b,c) \
+{ \
+  a -= c;  a ^= rot(c, 4);  c += b; \
+  b -= a;  b ^= rot(a, 6);  a += c; \
+  c -= b;  c ^= rot(b, 8);  b += a; \
+  a -= c;  a ^= rot(c,16);  c += b; \
+  b -= a;  b ^= rot(a,19);  a += c; \
+  c -= b;  c ^= rot(b, 4);  b += a; \
+}
+
+#define final(a,b,c) \
+{ \
+  c ^= b; c -= rot(b,14); \
+  a ^= c; a -= rot(c,11); \
+  b ^= a; b -= rot(a,25); \
+  c ^= b; c -= rot(b,16); \
+  a ^= c; a -= rot(c,4);  \
+  b ^= a; b -= rot(a,14); \
+  c ^= b; c -= rot(b,24); \
+}
+
+uint32_t lookup3 ( const void * key, int length, uint32_t initval )
+{
+	uint32_t a,b,c;                                          /* internal state */
+
+	a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+
+	const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
+
+	/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+	while (length > 12)
+	{
+		a += k[0];
+		b += k[1];
+		c += k[2];
+		mix(a,b,c);
+		length -= 12;
+		k += 3;
+	}
+
+	switch(length)
+	{
+		case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+		case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
+		case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
+		case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
+		case 8 : b+=k[1]; a+=k[0]; break;
+		case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
+		case 6 : b+=k[1]&0xffff; a+=k[0]; break;
+		case 5 : b+=k[1]&0xff; a+=k[0]; break;
+		case 4 : a+=k[0]; break;
+		case 3 : a+=k[0]&0xffffff; break;
+		case 2 : a+=k[0]&0xffff; break;
+		case 1 : a+=k[0]&0xff; break;
+		case 0 : { return c; }              /* zero length strings require no mixing */
+	}
+
+	final(a,b,c);
+
+	return c;
+}
+
+void lookup3_test ( const void * key, int len, uint32_t seed, void * out )
+{
+	*(uint32_t*)out = lookup3(key,len,seed);
+}
diff --git a/main.cpp b/main.cpp
new file mode 100644
index 0000000..658f668
--- /dev/null
+++ b/main.cpp
@@ -0,0 +1,104 @@
+#include <stdio.h>
+
+#include <time.h>
+#include "hashes.h"
+#include "tests.h"
+
+#include <windows.h>
+
+#pragma warning(disable:4702)
+
+//----------------------------------------------------------------------------
+
+template < typename hashtype >
+void test ( hashfunc<hashtype> hash, const char * hashname )
+{
+	printf("Testing %s\n",hashname);
+
+	//const int hbytes = sizeof(hashtype);
+	//const int hbits  = hbytes * 8;
+
+	TwiddleTest(hash);
+	AlignmentTest(hash);
+	AppendedZeroesTest(hash);
+	QuickBrownFox(hash);
+	printf("\n");
+
+	BulkSpeedTest(hash);
+
+	TinySpeedTest<hashtype,4>(hash);
+	TinySpeedTest<hashtype,5>(hash);
+	TinySpeedTest<hashtype,6>(hash);
+	TinySpeedTest<hashtype,7>(hash);
+	TinySpeedTest<hashtype,8>(hash);
+	TinySpeedTest<hashtype,256>(hash);
+	printf("\n");
+
+	// # of bytes in the cycle must be at least # of bytes in the hash output
+
+	//CycleTest<hashtype>(hash,sizeof(hashtype)+0,8,10000000);
+	//CycleTest<hashtype>(hash,sizeof(hashtype)+1,8,10000000);
+	//CycleTest<hashtype>(hash,sizeof(hashtype)+2,8,10000000);
+	//CycleTest<hashtype>(hash,sizeof(hashtype)+3,8,10000000);
+	//CycleTest<hashtype>(hash,sizeof(hashtype)+4,8,10000000);
+
+	printf("\n");
+
+	/*
+	DiffTest< Blob<64>,  hashtype >(hash,5,1000);
+	DiffTest< Blob<128>, hashtype >(hash,4,1000);
+	DiffTest< Blob<256>, hashtype >(hash,3,1000);
+
+	printf("\n");
+
+	AvalancheTest(hash);
+	*/
+
+	SparseKeyTest(hash,false);
+
+	//DictionaryTest(hash);
+	//BitrangeKeysetTest(hash,false);
+	//TextKeyTest(hash.m_hash);
+}
+
+//-----------------------------------------------------------------------------
+
+void optimize_fmix64 ( void );
+
+void main ( void )
+{
+	SetProcessAffinityMask(GetCurrentProcess(),2);
+
+	int a = clock();
+
+#if 0
+
+	optimize_fmix64();
+
+	//scratchmain();
+
+#else
+
+	//----------
+
+	//test<uint32_t>  ( md5_32,  "MD5, first 32 bits" );
+	//test<uint32_t>  ( lookup3_test,  "Jenkins lookup3" );
+	//test<uint32_t>  ( SuperFastHash,  "SuperFastHash" );
+	//test<uint32_t>  ( MurmurHash2_test,  "MurmurHash2 32-bit" );
+	//test<uint32_t>  ( MurmurHash2A_test,  "MurmurHash2 32-bit" );
+	//test<uint32_t>  ( FNV,  "FNV 32-bit" );
+	//test<uint32_t>  ( crc32,  "CRC-32" );
+	//test<uint32_t>  ( DoNothingHash,  "MurmurHash3 32-bit" );
+
+	//test<uint32_t>  ( MurmurHash3_x86_32,  "MurmurHash3 32-bit" );
+	test<uint64_t>  ( MurmurHash3_x86_64,  "MurmurHash3 64-bit" );
+	//test<k128> ( MurmurHash3_128, "MurmurHash3 128-bit" );
+
+	//test<uint32_t>  ( MurmurHash3x64_32,  "MurmurHash3 32-bit" );
+
+#endif
+
+	int b = clock();
+
+	printf("time %d\n",b-a);
+}
\ No newline at end of file
diff --git a/md5.cpp b/md5.cpp
new file mode 100644
index 0000000..57bcbf3
--- /dev/null
+++ b/md5.cpp
@@ -0,0 +1,382 @@
+#include <memory.h>
+#include "Types.h"
+
+// "Derived from the RSA Data Security, Inc. MD5 Message Digest Algorithm"
+
+/**
+ * \brief          MD5 context structure
+ */
+typedef struct
+{
+    unsigned long total[2];     /*!< number of bytes processed  */
+    unsigned long state[4];     /*!< intermediate digest state  */
+    unsigned char buffer[64];   /*!< data block being processed */
+
+    unsigned char ipad[64];     /*!< HMAC: inner padding        */
+    unsigned char opad[64];     /*!< HMAC: outer padding        */
+}
+md5_context;
+
+/**
+ * \brief          MD5 context setup
+ *
+ * \param ctx      context to be initialized
+ */
+void md5_starts( md5_context *ctx );
+
+/**
+ * \brief          MD5 process buffer
+ *
+ * \param ctx      MD5 context
+ * \param input    buffer holding the  data
+ * \param ilen     length of the input data
+ */
+void md5_update( md5_context *ctx, unsigned char *input, int ilen );
+
+/**
+ * \brief          MD5 final digest
+ *
+ * \param ctx      MD5 context
+ * \param output   MD5 checksum result
+ */
+void md5_finish( md5_context *ctx, unsigned char output[16] );
+
+/**
+ * \brief          Output = MD5( input buffer )
+ *
+ * \param input    buffer holding the  data
+ * \param ilen     length of the input data
+ * \param output   MD5 checksum result
+ */
+void md5( unsigned char *input, int ilen, unsigned char output[16] );
+
+/**
+ * \brief          Output = MD5( file contents )
+ *
+ * \param path     input file name
+ * \param output   MD5 checksum result
+ *
+ * \return         0 if successful, 1 if fopen failed,
+ *                 or 2 if fread failed
+ */
+int md5_file( char *path, unsigned char output[16] );
+
+/**
+ * \brief          MD5 HMAC context setup
+ *
+ * \param ctx      HMAC context to be initialized
+ * \param key      HMAC secret key
+ * \param keylen   length of the HMAC key
+ */
+void md5_hmac_starts( md5_context *ctx, unsigned char *key, int keylen );
+
+/**
+ * \brief          MD5 HMAC process buffer
+ *
+ * \param ctx      HMAC context
+ * \param input    buffer holding the  data
+ * \param ilen     length of the input data
+ */
+void md5_hmac_update( md5_context *ctx, unsigned char *input, int ilen );
+
+/**
+ * \brief          MD5 HMAC final digest
+ *
+ * \param ctx      HMAC context
+ * \param output   MD5 HMAC checksum result
+ */
+void md5_hmac_finish( md5_context *ctx, unsigned char output[16] );
+
+/**
+ * \brief          Output = HMAC-MD5( hmac key, input buffer )
+ *
+ * \param key      HMAC secret key
+ * \param keylen   length of the HMAC key
+ * \param input    buffer holding the  data
+ * \param ilen     length of the input data
+ * \param output   HMAC-MD5 result
+ */
+void md5_hmac( unsigned char *key, int keylen,
+               unsigned char *input, int ilen,
+               unsigned char output[16] );
+
+/**
+ * \brief          Checkup routine
+ *
+ * \return         0 if successful, or 1 if the test failed
+ */
+int md5_self_test( int verbose );
+
+/*
+ * 32-bit integer manipulation macros (little endian)
+ */
+#ifndef GET_ULONG_LE
+#define GET_ULONG_LE(n,b,i)                             \
+{                                                       \
+    (n) = ( (unsigned long) (b)[(i)    ]       )        \
+        | ( (unsigned long) (b)[(i) + 1] <<  8 )        \
+        | ( (unsigned long) (b)[(i) + 2] << 16 )        \
+        | ( (unsigned long) (b)[(i) + 3] << 24 );       \
+}
+#endif
+
+#ifndef PUT_ULONG_LE
+#define PUT_ULONG_LE(n,b,i)                             \
+{                                                       \
+    (b)[(i)    ] = (unsigned char) ( (n)       );       \
+    (b)[(i) + 1] = (unsigned char) ( (n) >>  8 );       \
+    (b)[(i) + 2] = (unsigned char) ( (n) >> 16 );       \
+    (b)[(i) + 3] = (unsigned char) ( (n) >> 24 );       \
+}
+#endif
+
+/*
+ * MD5 context setup
+ */
+void md5_starts( md5_context *ctx )
+{
+    ctx->total[0] = 0;
+    ctx->total[1] = 0;
+
+    ctx->state[0] = 0x67452301;
+    ctx->state[1] = 0xEFCDAB89;
+    ctx->state[2] = 0x98BADCFE;
+    ctx->state[3] = 0x10325476;
+}
+
+static void md5_process( md5_context *ctx, unsigned char data[64] )
+{
+    unsigned long X[16], A, B, C, D;
+
+    GET_ULONG_LE( X[ 0], data,  0 );
+    GET_ULONG_LE( X[ 1], data,  4 );
+    GET_ULONG_LE( X[ 2], data,  8 );
+    GET_ULONG_LE( X[ 3], data, 12 );
+    GET_ULONG_LE( X[ 4], data, 16 );
+    GET_ULONG_LE( X[ 5], data, 20 );
+    GET_ULONG_LE( X[ 6], data, 24 );
+    GET_ULONG_LE( X[ 7], data, 28 );
+    GET_ULONG_LE( X[ 8], data, 32 );
+    GET_ULONG_LE( X[ 9], data, 36 );
+    GET_ULONG_LE( X[10], data, 40 );
+    GET_ULONG_LE( X[11], data, 44 );
+    GET_ULONG_LE( X[12], data, 48 );
+    GET_ULONG_LE( X[13], data, 52 );
+    GET_ULONG_LE( X[14], data, 56 );
+    GET_ULONG_LE( X[15], data, 60 );
+
+#define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n)))
+
+#define P(a,b,c,d,k,s,t)                                \
+{                                                       \
+    a += F(b,c,d) + X[k] + t; a = S(a,s) + b;           \
+}
+
+    A = ctx->state[0];
+    B = ctx->state[1];
+    C = ctx->state[2];
+    D = ctx->state[3];
+
+#define F(x,y,z) (z ^ (x & (y ^ z)))
+
+    P( A, B, C, D,  0,  7, 0xD76AA478 );
+    P( D, A, B, C,  1, 12, 0xE8C7B756 );
+    P( C, D, A, B,  2, 17, 0x242070DB );
+    P( B, C, D, A,  3, 22, 0xC1BDCEEE );
+    P( A, B, C, D,  4,  7, 0xF57C0FAF );
+    P( D, A, B, C,  5, 12, 0x4787C62A );
+    P( C, D, A, B,  6, 17, 0xA8304613 );
+    P( B, C, D, A,  7, 22, 0xFD469501 );
+    P( A, B, C, D,  8,  7, 0x698098D8 );
+    P( D, A, B, C,  9, 12, 0x8B44F7AF );
+    P( C, D, A, B, 10, 17, 0xFFFF5BB1 );
+    P( B, C, D, A, 11, 22, 0x895CD7BE );
+    P( A, B, C, D, 12,  7, 0x6B901122 );
+    P( D, A, B, C, 13, 12, 0xFD987193 );
+    P( C, D, A, B, 14, 17, 0xA679438E );
+    P( B, C, D, A, 15, 22, 0x49B40821 );
+
+#undef F
+
+#define F(x,y,z) (y ^ (z & (x ^ y)))
+
+    P( A, B, C, D,  1,  5, 0xF61E2562 );
+    P( D, A, B, C,  6,  9, 0xC040B340 );
+    P( C, D, A, B, 11, 14, 0x265E5A51 );
+    P( B, C, D, A,  0, 20, 0xE9B6C7AA );
+    P( A, B, C, D,  5,  5, 0xD62F105D );
+    P( D, A, B, C, 10,  9, 0x02441453 );
+    P( C, D, A, B, 15, 14, 0xD8A1E681 );
+    P( B, C, D, A,  4, 20, 0xE7D3FBC8 );
+    P( A, B, C, D,  9,  5, 0x21E1CDE6 );
+    P( D, A, B, C, 14,  9, 0xC33707D6 );
+    P( C, D, A, B,  3, 14, 0xF4D50D87 );
+    P( B, C, D, A,  8, 20, 0x455A14ED );
+    P( A, B, C, D, 13,  5, 0xA9E3E905 );
+    P( D, A, B, C,  2,  9, 0xFCEFA3F8 );
+    P( C, D, A, B,  7, 14, 0x676F02D9 );
+    P( B, C, D, A, 12, 20, 0x8D2A4C8A );
+
+#undef F
+    
+#define F(x,y,z) (x ^ y ^ z)
+
+    P( A, B, C, D,  5,  4, 0xFFFA3942 );
+    P( D, A, B, C,  8, 11, 0x8771F681 );
+    P( C, D, A, B, 11, 16, 0x6D9D6122 );
+    P( B, C, D, A, 14, 23, 0xFDE5380C );
+    P( A, B, C, D,  1,  4, 0xA4BEEA44 );
+    P( D, A, B, C,  4, 11, 0x4BDECFA9 );
+    P( C, D, A, B,  7, 16, 0xF6BB4B60 );
+    P( B, C, D, A, 10, 23, 0xBEBFBC70 );
+    P( A, B, C, D, 13,  4, 0x289B7EC6 );
+    P( D, A, B, C,  0, 11, 0xEAA127FA );
+    P( C, D, A, B,  3, 16, 0xD4EF3085 );
+    P( B, C, D, A,  6, 23, 0x04881D05 );
+    P( A, B, C, D,  9,  4, 0xD9D4D039 );
+    P( D, A, B, C, 12, 11, 0xE6DB99E5 );
+    P( C, D, A, B, 15, 16, 0x1FA27CF8 );
+    P( B, C, D, A,  2, 23, 0xC4AC5665 );
+
+#undef F
+
+#define F(x,y,z) (y ^ (x | ~z))
+
+    P( A, B, C, D,  0,  6, 0xF4292244 );
+    P( D, A, B, C,  7, 10, 0x432AFF97 );
+    P( C, D, A, B, 14, 15, 0xAB9423A7 );
+    P( B, C, D, A,  5, 21, 0xFC93A039 );
+    P( A, B, C, D, 12,  6, 0x655B59C3 );
+    P( D, A, B, C,  3, 10, 0x8F0CCC92 );
+    P( C, D, A, B, 10, 15, 0xFFEFF47D );
+    P( B, C, D, A,  1, 21, 0x85845DD1 );
+    P( A, B, C, D,  8,  6, 0x6FA87E4F );
+    P( D, A, B, C, 15, 10, 0xFE2CE6E0 );
+    P( C, D, A, B,  6, 15, 0xA3014314 );
+    P( B, C, D, A, 13, 21, 0x4E0811A1 );
+    P( A, B, C, D,  4,  6, 0xF7537E82 );
+    P( D, A, B, C, 11, 10, 0xBD3AF235 );
+    P( C, D, A, B,  2, 15, 0x2AD7D2BB );
+    P( B, C, D, A,  9, 21, 0xEB86D391 );
+
+#undef F
+
+    ctx->state[0] += A;
+    ctx->state[1] += B;
+    ctx->state[2] += C;
+    ctx->state[3] += D;
+}
+
+/*
+ * MD5 process buffer
+ */
+void md5_update( md5_context *ctx, unsigned char *input, int ilen )
+{
+    int fill;
+    unsigned long left;
+
+    if( ilen <= 0 )
+        return;
+
+    left = ctx->total[0] & 0x3F;
+    fill = 64 - left;
+
+    ctx->total[0] += ilen;
+    ctx->total[0] &= 0xFFFFFFFF;
+
+    if( ctx->total[0] < (unsigned long) ilen )
+        ctx->total[1]++;
+
+    if( left && ilen >= fill )
+    {
+        memcpy( (void *) (ctx->buffer + left),
+                (void *) input, fill );
+        md5_process( ctx, ctx->buffer );
+        input += fill;
+        ilen  -= fill;
+        left = 0;
+    }
+
+    while( ilen >= 64 )
+    {
+        md5_process( ctx, input );
+        input += 64;
+        ilen  -= 64;
+    }
+
+    if( ilen > 0 )
+    {
+        memcpy( (void *) (ctx->buffer + left),
+                (void *) input, ilen );
+    }
+}
+
+static const unsigned char md5_padding[64] =
+{
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/*
+ * MD5 final digest
+ */
+void md5_finish( md5_context *ctx, unsigned char output[16] )
+{
+    unsigned long last, padn;
+    unsigned long high, low;
+    unsigned char msglen[8];
+
+    high = ( ctx->total[0] >> 29 )
+         | ( ctx->total[1] <<  3 );
+    low  = ( ctx->total[0] <<  3 );
+
+    PUT_ULONG_LE( low,  msglen, 0 );
+    PUT_ULONG_LE( high, msglen, 4 );
+
+    last = ctx->total[0] & 0x3F;
+    padn = ( last < 56 ) ? ( 56 - last ) : ( 120 - last );
+
+    md5_update( ctx, (unsigned char *) md5_padding, padn );
+    md5_update( ctx, msglen, 8 );
+
+    PUT_ULONG_LE( ctx->state[0], output,  0 );
+    PUT_ULONG_LE( ctx->state[1], output,  4 );
+    PUT_ULONG_LE( ctx->state[2], output,  8 );
+    PUT_ULONG_LE( ctx->state[3], output, 12 );
+}
+
+/*
+ * output = MD5( input buffer )
+ */
+void md5( unsigned char *input, int ilen, unsigned char output[16] )
+{
+    md5_context ctx;
+
+    md5_starts( &ctx );
+    md5_update( &ctx, input, ilen );
+    md5_finish( &ctx, output );
+
+    memset( &ctx, 0, sizeof( md5_context ) );
+}
+
+unsigned int md5hash ( const void * input, int len, unsigned int /*seed*/ )
+{
+	unsigned int hash[4];
+
+	md5((unsigned char *)input,len,(unsigned char *)hash);
+
+	//return hash[0] ^ hash[1] ^ hash[2] ^ hash[3];
+
+	return hash[0];
+}	
+
+void md5_32            ( const void * key, int len, uint32_t /*seed*/, void * out )
+{
+	unsigned int hash[4];
+
+	md5((unsigned char*)key,len,(unsigned char*)hash);
+
+	*(uint32_t*)out = hash[0];
+}
\ No newline at end of file
diff --git a/pstdint.h b/pstdint.h
new file mode 100644
index 0000000..12c108a
--- /dev/null
+++ b/pstdint.h
@@ -0,0 +1,799 @@
+/*  A portable stdint.h
+ ****************************************************************************
+ *  BSD License:
+ ****************************************************************************
+ *
+ *  Copyright (c) 2005-2007 Paul Hsieh
+ *  All rights reserved.
+ *  
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *  
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. The name of the author may not be used to endorse or promote products
+ *     derived from this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************
+ *
+ *  Version 0.1.11
+ *
+ *  The ANSI C standard committee, for the C99 standard, specified the
+ *  inclusion of a new standard include file called stdint.h.  This is
+ *  a very useful and long desired include file which contains several
+ *  very precise definitions for integer scalar types that is
+ *  critically important for making portable several classes of
+ *  applications including cryptography, hashing, variable length
+ *  integer libraries and so on.  But for most developers its likely
+ *  useful just for programming sanity.
+ *
+ *  The problem is that most compiler vendors have decided not to
+ *  implement the C99 standard, and the next C++ language standard
+ *  (which has a lot more mindshare these days) will be a long time in
+ *  coming and its unknown whether or not it will include stdint.h or
+ *  how much adoption it will have.  Either way, it will be a long time
+ *  before all compilers come with a stdint.h and it also does nothing
+ *  for the extremely large number of compilers available today which
+ *  do not include this file, or anything comparable to it.
+ *
+ *  So that's what this file is all about.  Its an attempt to build a
+ *  single universal include file that works on as many platforms as
+ *  possible to deliver what stdint.h is supposed to.  A few things
+ *  that should be noted about this file:
+ *
+ *    1) It is not guaranteed to be portable and/or present an identical
+ *       interface on all platforms.  The extreme variability of the
+ *       ANSI C standard makes this an impossibility right from the
+ *       very get go. Its really only meant to be useful for the vast
+ *       majority of platforms that possess the capability of
+ *       implementing usefully and precisely defined, standard sized
+ *       integer scalars.  Systems which are not intrinsically 2s
+ *       complement may produce invalid constants.
+ *
+ *    2) There is an unavoidable use of non-reserved symbols.
+ *
+ *    3) Other standard include files are invoked.
+ *
+ *    4) This file may come in conflict with future platforms that do
+ *       include stdint.h.  The hope is that one or the other can be
+ *       used with no real difference.
+ *
+ *    5) In the current verison, if your platform can't represent
+ *       int32_t, int16_t and int8_t, it just dumps out with a compiler
+ *       error.
+ *
+ *    6) 64 bit integers may or may not be defined.  Test for their
+ *       presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX.
+ *       Note that this is different from the C99 specification which
+ *       requires the existence of 64 bit support in the compiler.  If
+ *       this is not defined for your platform, yet it is capable of
+ *       dealing with 64 bits then it is because this file has not yet
+ *       been extended to cover all of your system's capabilities.
+ *
+ *    7) (u)intptr_t may or may not be defined.  Test for its presence
+ *       with the test: #ifdef PTRDIFF_MAX.  If this is not defined
+ *       for your platform, then it is because this file has not yet
+ *       been extended to cover all of your system's capabilities, not
+ *       because its optional.
+ *
+ *    8) The following might not been defined even if your platform is
+ *       capable of defining it:
+ *
+ *       WCHAR_MIN
+ *       WCHAR_MAX
+ *       (u)int64_t
+ *       PTRDIFF_MIN
+ *       PTRDIFF_MAX
+ *       (u)intptr_t
+ *
+ *    9) The following have not been defined:
+ *
+ *       WINT_MIN
+ *       WINT_MAX
+ *
+ *   10) The criteria for defining (u)int_least(*)_t isn't clear,
+ *       except for systems which don't have a type that precisely
+ *       defined 8, 16, or 32 bit types (which this include file does
+ *       not support anyways). Default definitions have been given.
+ *
+ *   11) The criteria for defining (u)int_fast(*)_t isn't something I
+ *       would trust to any particular compiler vendor or the ANSI C
+ *       committee.  It is well known that "compatible systems" are
+ *       commonly created that have very different performance
+ *       characteristics from the systems they are compatible with,
+ *       especially those whose vendors make both the compiler and the
+ *       system.  Default definitions have been given, but its strongly
+ *       recommended that users never use these definitions for any
+ *       reason (they do *NOT* deliver any serious guarantee of
+ *       improved performance -- not in this file, nor any vendor's
+ *       stdint.h).
+ *
+ *   12) The following macros:
+ *
+ *       PRINTF_INTMAX_MODIFIER
+ *       PRINTF_INT64_MODIFIER
+ *       PRINTF_INT32_MODIFIER
+ *       PRINTF_INT16_MODIFIER
+ *       PRINTF_LEAST64_MODIFIER
+ *       PRINTF_LEAST32_MODIFIER
+ *       PRINTF_LEAST16_MODIFIER
+ *       PRINTF_INTPTR_MODIFIER
+ *
+ *       are strings which have been defined as the modifiers required
+ *       for the "d", "u" and "x" printf formats to correctly output
+ *       (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t,
+ *       (u)least32_t, (u)least16_t and (u)intptr_t types respectively.
+ *       PRINTF_INTPTR_MODIFIER is not defined for some systems which
+ *       provide their own stdint.h.  PRINTF_INT64_MODIFIER is not
+ *       defined if INT64_MAX is not defined.  These are an extension
+ *       beyond what C99 specifies must be in stdint.h.
+ *
+ *       In addition, the following macros are defined:
+ *
+ *       PRINTF_INTMAX_HEX_WIDTH
+ *       PRINTF_INT64_HEX_WIDTH
+ *       PRINTF_INT32_HEX_WIDTH
+ *       PRINTF_INT16_HEX_WIDTH
+ *       PRINTF_INT8_HEX_WIDTH
+ *       PRINTF_INTMAX_DEC_WIDTH
+ *       PRINTF_INT64_DEC_WIDTH
+ *       PRINTF_INT32_DEC_WIDTH
+ *       PRINTF_INT16_DEC_WIDTH
+ *       PRINTF_INT8_DEC_WIDTH
+ *
+ *       Which specifies the maximum number of characters required to
+ *       print the number of that type in either hexadecimal or decimal.
+ *       These are an extension beyond what C99 specifies must be in
+ *       stdint.h.
+ *
+ *  Compilers tested (all with 0 warnings at their highest respective
+ *  settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32
+ *  bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio
+ *  .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3
+ *
+ *  This file should be considered a work in progress.  Suggestions for
+ *  improvements, especially those which increase coverage are strongly
+ *  encouraged.
+ *
+ *  Acknowledgements
+ *
+ *  The following people have made significant contributions to the
+ *  development and testing of this file:
+ *
+ *  Chris Howie
+ *  John Steele Scott
+ *  Dave Thorup
+ *
+ */
+
+#include <stddef.h>
+#include <limits.h>
+#include <signal.h>
+
+/*
+ *  For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and
+ *  do nothing else.  On the Mac OS X version of gcc this is _STDINT_H_.
+ */
+
+#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) )) && !defined (_PSTDINT_H_INCLUDED)
+#include <stdint.h>
+#define _PSTDINT_H_INCLUDED
+# ifndef PRINTF_INT64_MODIFIER
+#  define PRINTF_INT64_MODIFIER "ll"
+# endif
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER "l"
+# endif
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER "h"
+# endif
+# ifndef PRINTF_INTMAX_MODIFIER
+#  define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
+# endif
+# ifndef PRINTF_INT64_HEX_WIDTH
+#  define PRINTF_INT64_HEX_WIDTH "16"
+# endif
+# ifndef PRINTF_INT32_HEX_WIDTH
+#  define PRINTF_INT32_HEX_WIDTH "8"
+# endif
+# ifndef PRINTF_INT16_HEX_WIDTH
+#  define PRINTF_INT16_HEX_WIDTH "4"
+# endif
+# ifndef PRINTF_INT8_HEX_WIDTH
+#  define PRINTF_INT8_HEX_WIDTH "2"
+# endif
+# ifndef PRINTF_INT64_DEC_WIDTH
+#  define PRINTF_INT64_DEC_WIDTH "20"
+# endif
+# ifndef PRINTF_INT32_DEC_WIDTH
+#  define PRINTF_INT32_DEC_WIDTH "10"
+# endif
+# ifndef PRINTF_INT16_DEC_WIDTH
+#  define PRINTF_INT16_DEC_WIDTH "5"
+# endif
+# ifndef PRINTF_INT8_DEC_WIDTH
+#  define PRINTF_INT8_DEC_WIDTH "3"
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
+# endif
+
+/*
+ *  Something really weird is going on with Open Watcom.  Just pull some of
+ *  these duplicated definitions from Open Watcom's stdint.h file for now.
+ */
+
+# if defined (__WATCOMC__) && __WATCOMC__ >= 1250
+#  if !defined (INT64_C)
+#   define INT64_C(x)   (x + (INT64_MAX - INT64_MAX))
+#  endif
+#  if !defined (UINT64_C)
+#   define UINT64_C(x)  (x + (UINT64_MAX - UINT64_MAX))
+#  endif
+#  if !defined (INT32_C)
+#   define INT32_C(x)   (x + (INT32_MAX - INT32_MAX))
+#  endif
+#  if !defined (UINT32_C)
+#   define UINT32_C(x)  (x + (UINT32_MAX - UINT32_MAX))
+#  endif
+#  if !defined (INT16_C)
+#   define INT16_C(x)   (x)
+#  endif
+#  if !defined (UINT16_C)
+#   define UINT16_C(x)  (x)
+#  endif
+#  if !defined (INT8_C)
+#   define INT8_C(x)   (x)
+#  endif
+#  if !defined (UINT8_C)
+#   define UINT8_C(x)  (x)
+#  endif
+#  if !defined (UINT64_MAX)
+#   define UINT64_MAX  18446744073709551615ULL
+#  endif
+#  if !defined (INT64_MAX)
+#   define INT64_MAX  9223372036854775807LL
+#  endif
+#  if !defined (UINT32_MAX)
+#   define UINT32_MAX  4294967295UL
+#  endif
+#  if !defined (INT32_MAX)
+#   define INT32_MAX  2147483647L
+#  endif
+#  if !defined (INTMAX_MAX)
+#   define INTMAX_MAX INT64_MAX
+#  endif
+#  if !defined (INTMAX_MIN)
+#   define INTMAX_MIN INT64_MIN
+#  endif
+# endif
+#endif
+
+#ifndef _PSTDINT_H_INCLUDED
+#define _PSTDINT_H_INCLUDED
+
+#ifndef SIZE_MAX
+# define SIZE_MAX (~(size_t)0)
+#endif
+
+/*
+ *  Deduce the type assignments from limits.h under the assumption that
+ *  integer sizes in bits are powers of 2, and follow the ANSI
+ *  definitions.
+ */
+
+#ifndef UINT8_MAX
+# define UINT8_MAX 0xff
+#endif
+#ifndef uint8_t
+# if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S)
+    typedef unsigned char uint8_t;
+#   define UINT8_C(v) ((uint8_t) v)
+# else
+#   error "Platform not supported"
+# endif
+#endif
+
+#ifndef INT8_MAX
+# define INT8_MAX 0x7f
+#endif
+#ifndef INT8_MIN
+# define INT8_MIN INT8_C(0x80)
+#endif
+#ifndef int8_t
+# if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S)
+    typedef signed char int8_t;
+#   define INT8_C(v) ((int8_t) v)
+# else
+#   error "Platform not supported"
+# endif
+#endif
+
+#ifndef UINT16_MAX
+# define UINT16_MAX 0xffff
+#endif
+#ifndef uint16_t
+#if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S)
+  typedef unsigned int uint16_t;
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER ""
+# endif
+# define UINT16_C(v) ((uint16_t) (v))
+#elif (USHRT_MAX == UINT16_MAX)
+  typedef unsigned short uint16_t;
+# define UINT16_C(v) ((uint16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER "h"
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef INT16_MAX
+# define INT16_MAX 0x7fff
+#endif
+#ifndef INT16_MIN
+# define INT16_MIN INT16_C(0x8000)
+#endif
+#ifndef int16_t
+#if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S)
+  typedef signed int int16_t;
+# define INT16_C(v) ((int16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER ""
+# endif
+#elif (SHRT_MAX == INT16_MAX)
+  typedef signed short int16_t;
+# define INT16_C(v) ((int16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER "h"
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef UINT32_MAX
+# define UINT32_MAX (0xffffffffUL)
+#endif
+#ifndef uint32_t
+#if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S)
+  typedef unsigned long uint32_t;
+# define UINT32_C(v) v ## UL
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER "l"
+# endif
+#elif (UINT_MAX == UINT32_MAX)
+  typedef unsigned int uint32_t;
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+# define UINT32_C(v) v ## U
+#elif (USHRT_MAX == UINT32_MAX)
+  typedef unsigned short uint32_t;
+# define UINT32_C(v) ((unsigned short) (v))
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef INT32_MAX
+# define INT32_MAX (0x7fffffffL)
+#endif
+#ifndef INT32_MIN
+# define INT32_MIN INT32_C(0x80000000)
+#endif
+#ifndef int32_t
+#if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S)
+  typedef signed long int32_t;
+# define INT32_C(v) v ## L
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER "l"
+# endif
+#elif (INT_MAX == INT32_MAX)
+  typedef signed int int32_t;
+# define INT32_C(v) v
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+#elif (SHRT_MAX == INT32_MAX)
+  typedef signed short int32_t;
+# define INT32_C(v) ((short) (v))
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+/*
+ *  The macro stdint_int64_defined is temporarily used to record
+ *  whether or not 64 integer support is available.  It must be
+ *  defined for any 64 integer extensions for new platforms that are
+ *  added.
+ */
+
+#undef stdint_int64_defined
+#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S)
+# if (__STDC__ && __STDC_VERSION >= 199901L) || defined (S_SPLINT_S)
+#  define stdint_int64_defined
+   typedef long long int64_t;
+   typedef unsigned long long uint64_t;
+#  define UINT64_C(v) v ## ULL
+#  define  INT64_C(v) v ## LL
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "ll"
+#  endif
+# endif
+#endif
+
+#if !defined (stdint_int64_defined)
+# if defined(__GNUC__)
+#  define stdint_int64_defined
+   __extension__ typedef long long int64_t;
+   __extension__ typedef unsigned long long uint64_t;
+#  define UINT64_C(v) v ## ULL
+#  define  INT64_C(v) v ## LL
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "ll"
+#  endif
+# elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S)
+#  define stdint_int64_defined
+   typedef long long int64_t;
+   typedef unsigned long long uint64_t;
+#  define UINT64_C(v) v ## ULL
+#  define  INT64_C(v) v ## LL
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "ll"
+#  endif
+# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC)
+#  define stdint_int64_defined
+   typedef __int64 int64_t;
+   typedef unsigned __int64 uint64_t;
+#  define UINT64_C(v) v ## UI64
+#  define  INT64_C(v) v ## I64
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "I64"
+#  endif
+# endif
+#endif
+
+#if !defined (LONG_LONG_MAX) && defined (INT64_C)
+# define LONG_LONG_MAX INT64_C (9223372036854775807)
+#endif
+#ifndef ULONG_LONG_MAX
+# define ULONG_LONG_MAX UINT64_C (18446744073709551615)
+#endif
+
+#if !defined (INT64_MAX) && defined (INT64_C)
+# define INT64_MAX INT64_C (9223372036854775807)
+#endif
+#if !defined (INT64_MIN) && defined (INT64_C)
+# define INT64_MIN INT64_C (-9223372036854775808)
+#endif
+#if !defined (UINT64_MAX) && defined (INT64_C)
+# define UINT64_MAX UINT64_C (18446744073709551615)
+#endif
+
+/*
+ *  Width of hexadecimal for number field.
+ */
+
+#ifndef PRINTF_INT64_HEX_WIDTH
+# define PRINTF_INT64_HEX_WIDTH "16"
+#endif
+#ifndef PRINTF_INT32_HEX_WIDTH
+# define PRINTF_INT32_HEX_WIDTH "8"
+#endif
+#ifndef PRINTF_INT16_HEX_WIDTH
+# define PRINTF_INT16_HEX_WIDTH "4"
+#endif
+#ifndef PRINTF_INT8_HEX_WIDTH
+# define PRINTF_INT8_HEX_WIDTH "2"
+#endif
+
+#ifndef PRINTF_INT64_DEC_WIDTH
+# define PRINTF_INT64_DEC_WIDTH "20"
+#endif
+#ifndef PRINTF_INT32_DEC_WIDTH
+# define PRINTF_INT32_DEC_WIDTH "10"
+#endif
+#ifndef PRINTF_INT16_DEC_WIDTH
+# define PRINTF_INT16_DEC_WIDTH "5"
+#endif
+#ifndef PRINTF_INT8_DEC_WIDTH
+# define PRINTF_INT8_DEC_WIDTH "3"
+#endif
+
+/*
+ *  Ok, lets not worry about 128 bit integers for now.  Moore's law says
+ *  we don't need to worry about that until about 2040 at which point
+ *  we'll have bigger things to worry about.
+ */
+
+#ifdef stdint_int64_defined
+  typedef int64_t intmax_t;
+  typedef uint64_t uintmax_t;
+# define  INTMAX_MAX   INT64_MAX
+# define  INTMAX_MIN   INT64_MIN
+# define UINTMAX_MAX  UINT64_MAX
+# define UINTMAX_C(v) UINT64_C(v)
+# define  INTMAX_C(v)  INT64_C(v)
+# ifndef PRINTF_INTMAX_MODIFIER
+#   define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
+# endif
+#else
+  typedef int32_t intmax_t;
+  typedef uint32_t uintmax_t;
+# define  INTMAX_MAX   INT32_MAX
+# define UINTMAX_MAX  UINT32_MAX
+# define UINTMAX_C(v) UINT32_C(v)
+# define  INTMAX_C(v)  INT32_C(v)
+# ifndef PRINTF_INTMAX_MODIFIER
+#   define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH
+# endif
+#endif
+
+/*
+ *  Because this file currently only supports platforms which have
+ *  precise powers of 2 as bit sizes for the default integers, the
+ *  least definitions are all trivial.  Its possible that a future
+ *  version of this file could have different definitions.
+ */
+
+#ifndef stdint_least_defined
+  typedef   int8_t   int_least8_t;
+  typedef  uint8_t  uint_least8_t;
+  typedef  int16_t  int_least16_t;
+  typedef uint16_t uint_least16_t;
+  typedef  int32_t  int_least32_t;
+  typedef uint32_t uint_least32_t;
+# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER
+# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER
+# define  UINT_LEAST8_MAX  UINT8_MAX
+# define   INT_LEAST8_MAX   INT8_MAX
+# define UINT_LEAST16_MAX UINT16_MAX
+# define  INT_LEAST16_MAX  INT16_MAX
+# define UINT_LEAST32_MAX UINT32_MAX
+# define  INT_LEAST32_MAX  INT32_MAX
+# define   INT_LEAST8_MIN   INT8_MIN
+# define  INT_LEAST16_MIN  INT16_MIN
+# define  INT_LEAST32_MIN  INT32_MIN
+# ifdef stdint_int64_defined
+    typedef  int64_t  int_least64_t;
+    typedef uint64_t uint_least64_t;
+#   define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER
+#   define UINT_LEAST64_MAX UINT64_MAX
+#   define  INT_LEAST64_MAX  INT64_MAX
+#   define  INT_LEAST64_MIN  INT64_MIN
+# endif
+#endif
+#undef stdint_least_defined
+
+/*
+ *  The ANSI C committee pretending to know or specify anything about
+ *  performance is the epitome of misguided arrogance.  The mandate of
+ *  this file is to *ONLY* ever support that absolute minimum
+ *  definition of the fast integer types, for compatibility purposes.
+ *  No extensions, and no attempt to suggest what may or may not be a
+ *  faster integer type will ever be made in this file.  Developers are
+ *  warned to stay away from these types when using this or any other
+ *  stdint.h.
+ */
+
+typedef   int_least8_t   int_fast8_t;
+typedef  uint_least8_t  uint_fast8_t;
+typedef  int_least16_t  int_fast16_t;
+typedef uint_least16_t uint_fast16_t;
+typedef  int_least32_t  int_fast32_t;
+typedef uint_least32_t uint_fast32_t;
+#define  UINT_FAST8_MAX  UINT_LEAST8_MAX
+#define   INT_FAST8_MAX   INT_LEAST8_MAX
+#define UINT_FAST16_MAX UINT_LEAST16_MAX
+#define  INT_FAST16_MAX  INT_LEAST16_MAX
+#define UINT_FAST32_MAX UINT_LEAST32_MAX
+#define  INT_FAST32_MAX  INT_LEAST32_MAX
+#define   INT_FAST8_MIN   INT_LEAST8_MIN
+#define  INT_FAST16_MIN  INT_LEAST16_MIN
+#define  INT_FAST32_MIN  INT_LEAST32_MIN
+#ifdef stdint_int64_defined
+  typedef  int_least64_t  int_fast64_t;
+  typedef uint_least64_t uint_fast64_t;
+# define UINT_FAST64_MAX UINT_LEAST64_MAX
+# define  INT_FAST64_MAX  INT_LEAST64_MAX
+# define  INT_FAST64_MIN  INT_LEAST64_MIN
+#endif
+
+#undef stdint_int64_defined
+
+/*
+ *  Whatever piecemeal, per compiler thing we can do about the wchar_t
+ *  type limits.
+ */
+
+#if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__)
+# include <wchar.h>
+# ifndef WCHAR_MIN
+#  define WCHAR_MIN 0
+# endif
+# ifndef WCHAR_MAX
+#  define WCHAR_MAX ((wchar_t)-1)
+# endif
+#endif
+
+/*
+ *  Whatever piecemeal, per compiler/platform thing we can do about the
+ *  (u)intptr_t types and limits.
+ */
+
+#if defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED)
+# define STDINT_H_UINTPTR_T_DEFINED
+#endif
+
+#ifndef STDINT_H_UINTPTR_T_DEFINED
+# if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64)
+#  define stdint_intptr_bits 64
+# elif defined (__WATCOMC__) || defined (__TURBOC__)
+#  if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__)
+#    define stdint_intptr_bits 16
+#  else
+#    define stdint_intptr_bits 32
+#  endif
+# elif defined (__i386__) || defined (_WIN32) || defined (WIN32)
+#  define stdint_intptr_bits 32
+# elif defined (__INTEL_COMPILER)
+/* TODO -- what will Intel do about x86-64? */
+# endif
+
+# ifdef stdint_intptr_bits
+#  define stdint_intptr_glue3_i(a,b,c)  a##b##c
+#  define stdint_intptr_glue3(a,b,c)    stdint_intptr_glue3_i(a,b,c)
+#  ifndef PRINTF_INTPTR_MODIFIER
+#    define PRINTF_INTPTR_MODIFIER      stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER)
+#  endif
+#  ifndef PTRDIFF_MAX
+#    define PTRDIFF_MAX                 stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
+#  endif
+#  ifndef PTRDIFF_MIN
+#    define PTRDIFF_MIN                 stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
+#  endif
+#  ifndef UINTPTR_MAX
+#    define UINTPTR_MAX                 stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX)
+#  endif
+#  ifndef INTPTR_MAX
+#    define INTPTR_MAX                  stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
+#  endif
+#  ifndef INTPTR_MIN
+#    define INTPTR_MIN                  stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
+#  endif
+#  ifndef INTPTR_C
+#    define INTPTR_C(x)                 stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x)
+#  endif
+#  ifndef UINTPTR_C
+#    define UINTPTR_C(x)                stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x)
+#  endif
+  typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t;
+  typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t)  intptr_t;
+# else
+/* TODO -- This following is likely wrong for some platforms, and does
+   nothing for the definition of uintptr_t. */
+  typedef ptrdiff_t intptr_t;
+# endif
+# define STDINT_H_UINTPTR_T_DEFINED
+#endif
+
+/*
+ *  Assumes sig_atomic_t is signed and we have a 2s complement machine.
+ */
+
+#ifndef SIG_ATOMIC_MAX
+# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1)
+#endif
+
+#endif
+
+#if defined (__TEST_PSTDINT_FOR_CORRECTNESS)
+
+/* 
+ *  Please compile with the maximum warning settings to make sure macros are not
+ *  defined more than once.
+ */
+ 
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+ 
+#define glue3_aux(x,y,z) x ## y ## z
+#define glue3(x,y,z) glue3_aux(x,y,z)
+
+#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,=) glue3(UINT,bits,_C) (0);
+#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,=) glue3(INT,bits,_C) (0);
+
+#define DECL(us,bits) glue3(DECL,us,) (bits)
+
+#define TESTUMAX(bits) glue3(u,bits,=) glue3(~,u,bits); if (glue3(UINT,bits,_MAX) glue3(!=,u,bits)) printf ("Something wrong with UINT%d_MAX\n", bits)
+ 
+int main () {
+	DECL(I,8)
+	DECL(U,8)
+	DECL(I,16)
+	DECL(U,16)
+	DECL(I,32)
+	DECL(U,32)
+#ifdef INT64_MAX
+	DECL(I,64)
+	DECL(U,64)
+#endif
+	intmax_t imax = INTMAX_C(0);
+	uintmax_t umax = UINTMAX_C(0);
+	char str0[256], str1[256];
+
+	sprintf (str0, "%d %x\n", 0, ~0);
+	
+	sprintf (str1, "%d %x\n",  i8, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1);
+	sprintf (str1, "%u %x\n",  u8, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1);
+	sprintf (str1, "%d %x\n",  i16, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1);
+	sprintf (str1, "%u %x\n",  u16, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1);	
+	sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n",  i32, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1);
+	sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n",  u32, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1);
+#ifdef INT64_MAX	
+	sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n",  i64, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1);
+#endif
+	sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n",  imax, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1);
+	sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n",  umax, ~0);
+	if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1);	
+	
+	TESTUMAX(8);
+	TESTUMAX(16);
+	TESTUMAX(32);
+#ifdef INT64_MAX
+	TESTUMAX(64);
+#endif
+
+	return EXIT_SUCCESS;
+}
+
+#endif
diff --git a/scratch.cpp b/scratch.cpp
new file mode 100644
index 0000000..0043966
--- /dev/null
+++ b/scratch.cpp
@@ -0,0 +1,823 @@
+#include <stdio.h>
+#include <tchar.h>
+
+#include "Types.h"
+#include "Stats.h"
+#include "Tests.h"
+#include "Hamming.h"
+#include "Junk.h"
+#include "SimAnneal.h"
+
+#include <vector>
+#include <set>
+#include <map>
+#include <math.h>
+#include <intrin.h>
+
+#pragma warning(disable : 4702)
+
+//-----------------------------------------------------------------------------
+
+template < int nbits > 
+void printkey ( Blob<nbits> & k )
+{
+	int nbytes = nbits/8;
+
+	printf("{");
+
+	uint8_t * d = (uint8_t*)&k;
+
+	for(int i = 0; i < nbytes; i++)
+	{
+		printf("0x%02x,",d[i]);
+	}
+	printf("};\n");
+}
+
+
+//-----------------------------------------------------------------------------
+// Test code for Murmur3's mix function
+
+/*
+uint32_t i1 = 0x95543787;
+uint32_t i2 = 0x2ad7eb25;
+
+uint32_t m1 = 9;
+uint32_t a1 = 0x273581d8;
+
+uint32_t m2 = 5;
+uint32_t a2 = 0xee700bac;
+
+uint32_t m3 = 3;
+uint32_t a3 = 0xa6b84e31;
+
+uint32_t r1 = 5;
+
+int stage = 0;
+
+uint32_t m3mix ( uint32_t k )
+{
+	//return rand_u32();
+
+	uint32_t h = 0x971e137b;
+	uint32_t c1 = i1;
+	uint32_t c2 = i2;
+
+	for(int i = 0; i < stage; i++)
+	{
+		h   = h*m3+a3;
+		c1  = c1*m1+a1;
+		c2  = c2*m2+a2;
+	}
+
+	k  *= c1;
+	k   = _rotl(k,r1);
+	h   = h*m3+a3;
+	k  *= c2;
+	c1  = c1*m1+a1;
+	c2  = c2*m2+a2;
+	h  ^= k;
+
+	return h;
+}
+*/
+
+/*
+uint32_t m1 = 0x85ebca6b;
+uint32_t m2 = 0xc2b2ae35;
+uint32_t m3 = 0x893ed583;
+
+int s1 = 16;
+int s2 = 13;
+int s3 = 16;
+
+uint32_t fmix ( uint32_t k )
+{
+	return rand_u32();
+
+	k ^= k >> 16;
+	k *= 0x85ebca6b;
+	k ^= k >> 13;
+	k *= 0xc2b2ae35;
+	k ^= k >> 16;
+
+	return k;
+}
+*/
+
+//-----------------------------------------------------------------------------
+
+/*
+struct mixconfig
+{
+	uint32_t m1;
+	uint32_t m2;
+};
+
+mixconfig mc = 
+{
+	0x010d5a2d,
+	0xd3636b39,
+};
+
+uint32_t fmix32 ( uint32_t k )
+{
+	//return rand_u32();
+
+	k ^= k >> 16;
+	k *= mc.m1;
+	k ^= k >> 16;
+	k *= mc.m2;
+	k ^= k >> 16;
+
+	return k;
+}
+
+double mixfit ( void * block, int )
+{
+	mixconfig * pc = (mixconfig*)block;
+
+	mc.m1 = pc->m1 | 1;
+	mc.m2 = pc->m2 | 1;
+
+	Stats s = testMixAvalanche<uint32_t>(mixfunc<uint32_t>(blahmix),2000000);
+
+	return 1.0 - s.m_max;
+}
+
+void mixdump ( void * block, int )
+{
+	mixconfig * pc = (mixconfig*)block;
+
+	printf("0x%08x 0x%08x",pc->m1, pc->m2 );
+}
+*/
+
+//-----------------------------------------------------------------------------
+// SimAnneal optimize of fmix64
+
+struct mixconfig
+{
+	//uint8_t s1;
+	uint64_t m1;
+	//uint8_t s2;
+	uint64_t m2;
+	//uint8_t s3;
+};
+
+mixconfig mc = { 0xff51afd7ed558ccd, 0xc4ceb9fe1a85ec53 };
+
+uint64_t fmix64_test ( uint64_t k )
+{
+	k ^= k >> 33;
+	//k ^= k >> mc.s1;
+
+	k *= mc.m1;
+
+	k ^= k >> 33;
+	//k ^= k >> mc.s2;
+
+	k *= mc.m2;
+
+	k ^= k >> 33;
+	//k ^= k >> mc.s3;
+
+	return k;
+}
+
+double fmix64_fit ( void * block, int )
+{
+	mixconfig * pc = (mixconfig*)block;
+
+	mc.m1 = pc->m1 | 1;
+	mc.m2 = pc->m2 | 1;
+
+	//mc.s1 = pc->s1 & 63;
+	//mc.s2 = pc->s1 & 63;
+	//mc.s3 = pc->s1 & 63;
+
+	double bias = calcMixBias<uint64_t>(fmix64_test,50000000);
+
+	return 1.0 - bias;
+}
+
+void fmix64_dump ( void * block, int )
+{
+	mixconfig * pc = (mixconfig*)block;
+
+	//pc->s1 &= 63;
+	//pc->s2 &= 63;
+	//pc->s3 &= 63;
+
+	//printf("{ %2d, 0x%016I64x, %2d, 0x%016I64x, %2d }; ",pc->s1, pc->m1, pc->s2, pc->m2, pc->s3 );
+	printf("{ 0x%016I64x, 0x%016I64x }; ", pc->m1, pc->m2 );
+}
+
+uint32_t fmix32_test ( uint32_t h )
+{
+	h ^= h >> 16;
+	h *= 0x85ebca6b;
+	h ^= h >> 13;
+	h *= 0xc2b2ae35;
+	h ^= h >> 16;
+
+	return h;
+}
+
+void optimize_fmix64 ( void )
+{
+	printf("lskdflksj\n");
+	double bias = calcMixBias<uint32_t>(fmix32_test,500000000);
+
+	printf("%f\n",bias);
+
+	//SimAnneal(&mc,sizeof(mc),fmix64_fit,fmix64_dump,4,100);
+}
+
+
+//-----------------------------------------------------------------------------
+// Fitness == distribution of Hamming weights.
+// Optimize mix by minmaxing Hamming weights
+
+// (we want the smallest differential hamming weight to be as large as possible)
+
+void HammingOptimize ( uint32_t (*mix)(uint32_t) )
+{
+	double best[33];
+	best[0] = 2000000000;
+
+	double c[33];
+
+	printf("0x%08x\n",rand_u32());
+
+	//for(m3 = 0; m3 < 32; m3++)
+	
+	for(int i = 0; i < 100000; i++)
+	{
+		//for(r1 = 12; r1 < 18; r1++)
+		{
+			memset(c,0,sizeof(c));
+			SparseDiffHamming32(mix,c);
+
+			if(hamless(33,c,best))
+			{
+				memcpy(best,c,sizeof(c));
+
+				//printf("{%6.3f, %6.3f, %6.3f, %6.3f, %6.3f, %6.3f, %6.3f, %6.3f, %6.3f } - ",c[0],c[1],c[2],c[3],c[4],c[5],c[6],c[7],c[8]);
+
+				printf("{");
+
+				for(int i = 0; i < 33; i++) printf("%6.3f ",c[i]);
+				printf("} - ");
+
+				//printf("0x%08x, %2d, 0x%08x %2d\n",m1,r1,m2,m3);
+				//printf("0x%08x, 0x%08x\n",m1,m2);
+				printf("\n");
+			}
+		}
+	}
+}
+
+//-----------------------------------------------------------------------------
+
+u128 mix128 ( u128 h2 )
+{
+	uint32_t * h = (uint32_t*)&h2;
+
+	for(int i = 0; i < 30; i++)
+	{
+		h[0] = _rotl(h[0],3);
+		h[1] = _rotl(h[1],10);
+		h[2] = _rotl(h[2],19);
+		h[3] = _rotl(h[3],26);
+
+		h[0] += h[1];
+		h[0] += h[2];
+		h[0] += h[3];
+
+		h[1] += h[0];
+		h[2] += h[0];
+		h[3] += h[0];
+	}
+
+	return h2;
+}
+
+//-----------------------------------------------------------------------------
+
+void scratchmain ( void )
+{
+	/*
+	double worst = 1000;
+
+	double worstStage = 0;
+
+	for(stage = 0; stage < 16; stage++)
+	{
+		Stats s = testMixAvalanche<uint32_t>(mixfunc<uint32_t>(m3mix),300);
+
+		if(s.m_nbad > worstStage) worstStage = s.m_nbad;
+	}
+
+	if(worstStage < worst)
+	{
+		worst = worstStage;
+
+		printf("%3.4f : 0x%08x 0x%08x %2d 0x%08x %2d 0x%08x %2d 0x%08x %2d\n",worst,i1,i2,m1,a1,m2,a2,m3,a3,r1);
+	}
+
+	//----------
+
+	for(int i = 0; i < 1000000; i++)
+	{
+		for(m1 = 3; m1 < 10; m1 += 2)
+		for(m2 = 3; m2 < 10; m2 += 2)
+		for(m3 = 3; m3 < 10; m3 += 2)
+		for(r1 = 0; r1 < 32; r1++)
+		//for(int bit = 0; bit < 32; bit++)
+		{
+			//i2 ^= (1 << bit);
+
+			if(m1 == 7) continue;
+			if(m2 == 7) continue;
+			if(m3 == 7) continue;
+
+			double worstStage = 0;
+
+			for(stage = 0; stage < 16; stage++)
+			{
+				Stats s = testMixAvalanche<uint32_t>(mixfunc<uint32_t>(m3mix),300);
+
+				if(s.m_nbad > worstStage) worstStage = s.m_nbad;
+			}
+
+			if(worstStage < worst)
+			{
+				worst = worstStage;
+
+				printf("%3.4f : 0x%08x 0x%08x %2d 0x%08x %2d 0x%08x %2d 0x%08x %2d\n",worst,i1,i2,m1,a1,m2,a2,m3,a3,r1);
+			}
+			else
+			{
+				//i2 ^= (1 << bit);
+			}
+		}
+
+		//i1 = rand_u32();
+		//i2 = rand_u32();
+
+		//a1 = rand_u32();
+		//a2 = rand_u32();
+		//a3 = rand_u32();
+	}
+	*/
+}
+
+//-----------------------------------------------------------------------------
+
+/*
+void Pathological ( void )
+{
+	std::set<uint32_t> s;
+
+	uint32_t c = 0;
+	uint32_t seed = 0xdeadbeef * 16;
+
+	for(int j = 0; j < 5000; j++)
+	{
+		for(uint32_t i = 0; i < 10000; i++)
+		{
+			uint32_t key[4] = {c,c,c,c};
+
+			uint32_t hash = MurmurHash2(key,16,seed);
+
+			//v.push_back(hash);
+			s.insert(hash);
+			c++;
+		}
+
+		printf("%8d %8f\n",s.size(),double(s.size()) / double(c));
+	}
+}
+*/
+
+/*
+void Pathological ( void )
+{
+	const int nbytes = 512 * 1024 * 1024;
+
+	unsigned char * block = new unsigned char[nbytes];
+
+	memset(block,0,nbytes);
+
+	unsigned int k = 0;
+	unsigned int key[256];
+	unsigned int collisions = 0;
+	
+	do
+	{
+		for(int i = 0; i < 256; i++) key[i] = k;
+
+		unsigned int h;
+		h = MurmurHash2(&key[0],256*4,(0xdeadbeef * 16));
+		//MurmurHash3_x86_32(&key[0],32,(0xdeadbeef * 16),&h);
+
+		//printf("0x%08x\n",h);
+
+		if(getbit(block,nbytes,h))
+		{
+			collisions++;
+		}
+
+		setbit(block,nbytes,h);
+
+		if(k % 10000000 == 0)
+		{
+			printf("%12d : %9d : %f\n",k,collisions,double(collisions) / double(k));
+		}
+
+		k++;
+	}
+	while(k != 0);
+
+	printf("%d total collisions",collisions);
+
+	delete [] block;
+}
+*/
+
+/*
+void Pathological ( void )
+{
+	const int nbytes = 512 * 1024 * 1024;
+
+	unsigned char * block = new unsigned char[nbytes];
+
+	memset(block,0,nbytes);
+
+	unsigned int k = 0;
+	unsigned int unique = 0;
+	
+	do
+	{
+		const uint32_t m = 0xdeadbeef;
+		const int r = 24;
+
+		uint32_t x = 0;
+		uint32_t h = 0;
+
+		x = k;
+		x *= m;
+		x ^= x >> r;
+		x *= m;
+
+		h *= m;
+		h ^= x;
+
+		x = k;
+		x *= m;
+		x ^= x >> r;
+		x *= m;
+
+		h *= m;
+		h ^= x;
+
+		if(!getbit(block,nbytes,h))
+		{
+			unique++;
+		}
+
+		setbit(block,nbytes,h);
+
+		if(k % 10000000 == 0)
+		{
+			printf("%12d : %9d :%f\n",k,unique,double(unique) / double(k));
+		}
+
+		k++;
+	}
+	while(k);
+
+	printf("%d unique",unique);
+
+	delete [] block;
+}
+*/
+
+/*
+void Pathological ( void )
+{
+	typedef std::map<uint32_t,uint32_t> cmap;
+	
+	cmap collisionmap;
+
+	const int nbytes = 512 * 1024 * 1024;
+
+	unsigned char * block = new unsigned char[nbytes];
+
+	memset(block,0,nbytes);
+
+	unsigned int k = 0;
+	unsigned int key[4];
+	unsigned int collisions = 0;
+	
+	do
+	{
+		for(int i = 0; i < 4; i++) key[i] = k;
+
+		unsigned int h;
+		h = MurmurHash2(&key[0],4*sizeof(uint32_t),16);
+		//MurmurHash3_x86_32(&key[0],32,(0xdeadbeef * 16),&h);
+
+		//printf("0x%08x\n",h);
+
+		if(getbit(block,nbytes,h))
+		{
+			collisions++;
+			collisionmap[h]++;
+		}
+
+		setbit(block,nbytes,h);
+
+		if(k % 10000000 == 0)
+		{
+			printf("%12d : %9d : %9d :%f\n",k,collisionmap.size(),collisions,double(collisions) / double(k));
+		}
+
+		k++;
+	}
+	//while(k);
+	while(k <= 200000000);
+
+	uint32_t most = 0;
+	for(cmap::iterator i = collisionmap.begin(); i != collisionmap.end(); ++i)
+	{
+		uint32_t h = (*i).first;
+		uint32_t c = (*i).second;
+
+		if(c > most)
+		{
+			most = c;
+			printf("0x%08x : %d\n",h,c);
+		}
+	}
+
+	printf("%d total collisions",collisions);
+
+	delete [] block;
+}
+*/
+
+/*
+void Pathological ( void )
+{
+	unsigned int k = 0;
+	unsigned int key[4];
+
+	std::vector<uint32_t> v;
+	
+	do
+	{
+		for(int i = 0; i < 4; i++) key[i] = k;
+
+		unsigned int h;
+		h = MurmurHash2(&key[0],4*sizeof(uint32_t),16);
+
+		if(h == 0xb5abf828) 
+		{
+			v.push_back(k);
+		}
+
+		if(k % 100000000 == 0)
+		{
+			printf("%12u : %12d\n",k,v.size());
+		}
+
+
+		k++;
+	}
+	while(k);
+
+	for(size_t i = 0; i < v.size(); i++)
+	{
+		printf("0x%08x,",v[i]);
+		if(i%8==7) printf("\n");
+	}
+	printf("\n");
+}
+*/
+
+/*
+uint32_t bad[] = 
+{
+0x0017f1a9,0x00f8c102,0x01685768,0x01c6d69e,0x02109e20,0x02ea2120,0x03615606,0x03bab745,
+0x03eb73e9,0x03f7db48,0x04391e64,0x04747fa7,0x04b81cf5,0x04fbcab0,0x054bf06a,0x05d33abc,
+0x05d8eb48,0x06560ce6,0x0697bcfa,0x06a40faa,0x071977fb,0x073a4306,0x073eb088,0x0751c777,
+0x07534cb4,0x079d2fbe,0x07a0ba13,0x07cff5fc,0x082b2d13,0x08457c35,0x093de81e,0x09711b75,
+0x097fdb48,0x09ba9060,0x0a06228a,0x0a5f8691,0x0a63881c,0x0a70bcd7,0x0aed67dd,0x0b0ed19a,
+0x0bc68125,0x0c29fe48,0x0ca1eb57,0x0cbfc528,0x0d4017e2,0x0d6d91c2,0x0d7388de,0x0f0133e9,
+0x0f8d17e7,0x0f90e980,0x0fe6be43,0x1033d71d,0x1087872c,0x10b52186,0x12005768,0x12c817e2,
+0x12ed3caf,0x1343eae2,0x137b2949,0x1407d537,0x1462906a,0x156742a0,0x15f44042,0x17204969,
+0x18c86d6a,0x192c6777,0x1950b0f3,0x19548454,0x1961fb59,0x19e92685,0x1a24be52,0x1a72ccfa,
+0x1a7caf9b,0x1a9d7aa6,0x1b9407c9,0x1b9d472c,0x1bdc3c3f,0x1c2a955f,0x1c44f065,0x1c75fda6,
+0x1c934985,0x1cd45315,0x1d1dce3e,0x1d695a2a,0x1e88f490,0x203a3985,0x2050669c,0x20a34f82,
+0x221b4985,0x222718dc,0x2240aa13,0x22a67680,0x24bdf477,0x250ead99,0x255d00e9,0x2652bb8e,
+0x26823b4d,0x27298fd2,0x27bf3042,0x27e2e537,0x282dbcdc,0x295777e2,0x2ab449ff,0x2d347ad3,
+0x2d3c176d,0x2d4c5e25,0x2d72b111,0x2d9f768f,0x2ddfe73b,0x2e00b246,0x2f9f1523,0x2fdbdba7,
+0x30831cfa,0x30cc91ca,0x3129f75c,0x313f9486,0x315255e3,0x31e70a31,0x33490a31,0x33622c30,
+0x33863468,0x3441b8a7,0x349f03ad,0x3715eda6,0x374df66c,0x3766e2fc,0x3848010c,0x385325bb,
+0x38a843f3,0x398e8722,0x39cc0d5b,0x39e572ed,0x3ace4477,0x3afb8c19,0x3b98b8d4,0x3ce6212a,
+0x3cec46c6,0x3d43761a,0x3de45e25,0x3e1e5a2c,0x3f612a36,0x4008f490,0x41431edb,0x4163e9e6,
+0x41742120,0x41854564,0x41ca60f3,0x41fa37f6,0x421e16a3,0x4263b66c,0x42bc7a4a,0x434286ad,
+0x435858a7,0x43bbf5f2,0x43e43d7e,0x442fc96a,0x443e6342,0x44b58d83,0x45378356,0x45df4db0,
+0x46b09971,0x47337cff,0x47f46fc3,0x48023b4d,0x4823a50a,0x49691a36,0x497767dd,0x4a50eadd,
+0x4ad26a3b,0x4b8463b7,0x4bc34e34,0x4bcd5cc3,0x4bf245e3,0x4c62946d,0x4d18b7f9,0x4da4d029,
+0x4dcac8e3,0x4df83139,0x4e2514b8,0x4e859f82,0x4ea95477,0x4ef42c1c,0x4f68a832,0x4f7acba7,
+0x4fa478d9,0x4ffe8c21,0x50ee3486,0x514795c5,0x51948107,0x51c5fce4,0x51e3eaec,0x52015e27,
+0x526260f3,0x5288a930,0x5360193c,0x53e7ac58,0x54a6567b,0x54c72186,0x54cb8f08,0x54dea5f7,
+0x552d9893,0x555d6f96,0x55b80b93,0x56cac69e,0x56fdf9f5,0x5793010a,0x57d7b747,0x57ec6511,
+0x57f0669c,0x57fd9b57,0x5818c523,0x58fe6cff,0x5a011a36,0x5a4ca3a8,0x5b00675e,0x5c50bfbc,
+0x5c6a50f3,0x5d19f667,0x5d2504a9,0x5ddbc685,0x5e85812a,0x5ed4c61f,0x5f4d0056,0x5fd14dba,
+0x5fd77356,0x608b5837,0x60d6c07e,0x610807c9,0x610986bc,0x6194b3b7,0x62f42120,0x62f774b3,
+0x63233736,0x6361c3c1,0x63811ec2,0x64ad27e9,0x650011e5,0x66b945f7,0x66dd8f73,0x67361999,
+0x67471347,0x67760505,0x6789c685,0x68098e1b,0x683ac4a9,0x68ca7a40,0x69b773df,0x69d5acdc,
+0x6a1ec7e7,0x6a202805,0x6a613195,0x6a6a70f8,0x6a74f315,0x6a838109,0x6aaaacbe,0x6af638aa,
+0x6b4727f6,0x6b7bfcc3,0x6d4eb4ac,0x6dc71805,0x6ef55b70,0x6fa82805,0x6fb3f75c,0x6fcd8893,
+0x7014bf91,0x70fc7fc8,0x724ad2f7,0x729b8c19,0x72b8b523,0x735e4f12,0x7378556e,0x73ac5dba,
+0x74b66e52,0x74e8531f,0x754c0ec2,0x7564261f,0x7567c4bd,0x756fc3b7,0x75af8e66,0x75ba9b5c,
+0x7841287f,0x7973ca45,0x7aaa7fc8,0x7ac8f5ed,0x7aec261f,0x7b2c550f,0x7b6cc5bb,0x7d2bf3a3,
+0x7d68ba27,0x7d8f1e39,0x7d98de70,0x7edf3463,0x80626b7a,0x80b1ec4c,0x81ce9727,0x827aca36,
+0x82944f12,0x86352273,0x8831268f,0x885b22f7,0x887d51bd,0x889f261a,0x89259754,0x89bcadba,
+0x8a323fd7,0x8a72ffaa,0x8a792546,0x8ad0549a,0x8b209af1,0x8bbe27e7,0x8c066fc3,0x8c4464b3,
+0x8cd4d306,0x8cee08b6,0x8d4ab321,0x8ecffd5b,0x8f1223e4,0x8f573f73,0x8f871676,0x904958ca,
+0x904f7e66,0x90e53727,0x91711bfe,0x91859d88,0x919dfef4,0x91cb41c2,0x92426c03,0x92c461d6,
+0x92fffef4,0x936c2c30,0x93dd8269,0x94351cd9,0x94c05b7f,0x94e87d04,0x954e3aba,0x95814e43,
+0x95bbcab0,0x96f5f8b6,0x985f48bb,0x99502cb4,0x995a3b43,0x997f2463,0x99ef72ed,0x9a4e3c2b,
+0x9b57a763,0x9b850fb9,0x9bb1f338,0x9bc723cb,0x9be0895d,0x9c3632f7,0x9c7c176d,0x9c810a9c,
+0x9cf586b2,0x9d07aa27,0x9d315759,0x9d8b6aa1,0x9e99eeef,0x9f215f87,0x9f70c96c,0x9fc195cf,
+0x9fef3f73,0xa06af1b8,0xa06d0dbf,0xa0840b00,0xa12e0083,0xa14df1d4,0xa1748ad8,0xa1884c58,
+0xa2ea4e16,0xa307c528,0xa3f0607e,0xa40bfafb,0xa4558d79,0xa547228c,0xa56495c7,0xa5a5a3a3,
+0xa68b4b7f,0xa728daba,0xa78df8b6,0xa8de0999,0xa90e5479,0xa9dd9e3c,0xa9f72f73,0xa9fd51bd,
+0xaab1e329,0xab3aeee7,0xab68a505,0xab9c9eea,0xabfd18dc,0xac125faa,0xac61a49f,0xac9edbac,
+0xacd9ded6,0xad5e2c3a,0xad6451d6,0xae1836b7,0xae639efe,0xae96653c,0xaee4ad99,0xaef795cf,
+0xaf11f9ff,0xaf43c0fd,0xb0845333,0xb0b015b6,0xb0eea241,0xb1114807,0xb28cf065,0xb3db78e8,
+0xb439f81e,0xb483bfa0,0xb4c2f819,0xb4d3f1c7,0xb516a505,0xb55d42a0,0xb5c7a329,0xb65758c0,
+0xb65e9569,0xb66afcc8,0xb72b3e75,0xb7628b5c,0xb7aba667,0xb7bf11ea,0xb7f74f78,0xb801d195,
+0xb8105f89,0xb84c0cc8,0xb8c92e66,0xb8d40676,0xb908db43,0xb90ade7a,0xb917312a,0xb9c66e34,
+0xba10513e,0xba43177c,0xbab89db5,0xbadb932c,0xbbf2fcc8,0xbc2db1e0,0xbc8239f0,0xbd60895d,
+0xbd81f31a,0xbda19e11,0xbe39a2a5,0xbe895e48,0xbe9d1fc8,0xbf150cd7,0xbfb33962,0xbfe0b342,
+0xc04593a3,0xc0eb2d92,0xc10533ee,0xc1393c3a,0xc1745569,0xc2040b00,0xc259dfc3,0xc275319f,
+0xc2a6f89d,0xc2f1049f,0xc2f4a33d,0xc2faa8ac,0xc3284306,0xc33c6ce6,0xc47378e8,0xc53b3962,
+0xc5605e2f,0xc5b70c62,0xc6d5b1ea,0xc700a8c5,0xc8375e48,0xc879049f,0xcb1bfcb9,0xcb25bcf0,
+0xcb3b8eea,0xcbc7a5d4,0xcbd51cd9,0xcc97dfd2,0xcce5ee7a,0xcd109c26,0xcdef49fa,0xce072949,
+0xce1068ac,0xce3ecacc,0xce4f5dbf,0xceb811e5,0xcee91f26,0xd007a8b6,0xd0212d92,0xd0fc1610,
+0xd2c3881c,0xd3167102,0xd5199800,0xd5be050f,0xd60a303d,0xd62c049a,0xd7498c3a,0xd7bf1e57,
+0xd7d02269,0xd8ad7971,0xd8c5dd0e,0xd8f55ccd,0xd94b0667,0xd9934e43,0xd9d14333,0xda61b186,
+0xdad791a1,0xdbca9962,0xdddc5ce6,0xdf127c08,0xdf2add74,0xdfa79c53,0xdfbf7fa5,0xdfe5d291,
+0xe073d3c6,0xe08cdd74,0xe16a60e9,0xe1c1fb59,0xe2755b84,0xe2db193a,0xe2f63e7a,0xe33fb34a,
+0xe348a930,0xe39d18dc,0xe3b2b606,0xe45a2bb1,0xe5bc2bb1,0xe5d54db0,0xe5f955e8,0xe712252d,
+0xe7db1aab,0xe954024b,0xe96d67dd,0xe9890f26,0xe9c117ec,0xe9da047c,0xea08f5ed,0xeabb228c,
+0xeac6473b,0xec01a8a2,0xec26cd6f,0xec3f2edb,0xec58946d,0xed4e744f,0xed6ead99,0xedf7d038,
+0xedf9ec3f,0xee10e980,0xeebadf03,0xeedad054,0xef152ad8,0xf0577fa5,0xf0917bac,0xf094a3a8,
+0xf17d3efe,0xf198d97b,0xf1e26bf9,0xf27c1610,0xf2d4010c,0xf3d70b66,0xf3e742a0,0xf4913823,
+0xf4b5b93a,0xf4d6d7ec,0xf5b5a82d,0xf62f1772,0xf66ae819,0xf69b32f9,0xf6a2eaea,0xf78a303d,
+0xf8c7cd67,0xf923baf1,0xf9297d6a,0xf989f75c,0xfa2bba2c,0xfa755ccd,0xfa96c68a,0xfbea895d,
+0xfc718c19,0xfc84744f,0xfc9ed87f,0xfcc40c5d,0xfcd09f7d,0xfdf78537,0xfe9e2687,0xff8bd979,
+};
+
+void Pathological ( void )
+{
+	// 'm' and 'r' are mixing constants generated offline.
+	// They're not really 'magic', they just happen to work well.
+
+	const uint32_t m = 0x5bd1e995;
+	const int r = 24;
+
+	for(int i = 0; i < 100; i++)
+	{
+		uint32_t h = 0;
+		uint32_t k = bad[i];
+
+		printf("0x%08x : ",k);
+		k *= m;
+		printf("0x%08x : ",k);
+		k ^= k >> r;
+		printf("0x%08x : ",k);
+		k *= m;
+		printf("0x%08x : ",k);
+
+		printf(" - ");
+
+		h = k;
+		printf("0x%08x : ",h);
+		h *= m;
+		printf("0x%08x : ",h);
+		h ^= k;
+		printf("0x%08x : ",h);
+		h *= m;
+		printf("0x%08x : ",h);
+		h ^= k;
+		printf("0x%08x : ",h);
+		h *= m;
+		printf("0x%08x : ",h);
+		h ^= k;
+		printf("0x%08x\n",h);
+
+	}
+}
+*/
+
+/*
+void Pathological ( void )
+{
+	const int nbytes = 512 * 1024 * 1024;
+
+	unsigned char * block = new unsigned char[nbytes];
+
+	memset(block,0,nbytes);
+
+	unsigned int k = 0;
+	unsigned int collisions = 0;
+	
+	do
+	{
+		//const uint32_t m = 0x5bd1e995;
+		unsigned int h = 0;
+
+		uint32_t m1 = 0x5bd1e995;
+		uint32_t m2 = 0x5bd1e995;
+		uint32_t m3 = 0x5bd1e995;
+		uint32_t x;
+
+		x = k; x *= m1; x ^= x >> 25; x *= m2; h ^= x; h *= m3;
+		m2 = m2*9+0x273581d8;
+		x = k; x *= m1; x ^= x >> 25; x *= m2; h ^= x; h *= m3;
+		m2 = m2*9+0x273581d8;
+		
+		//printf("0x%08x : 0x%08x\n",k,h);
+		//h *= 3;
+
+		if(getbit(block,nbytes,h))
+		{
+			collisions++;
+		}
+
+		setbit(block,nbytes,h);
+
+		if(k % 10000000 == 0)
+		{
+			printf("%12u : %9u : %f\n",k,collisions,double(collisions) / double(k));
+		}
+
+		k++;
+	}
+	while(k != 0);
+
+	printf("%u total collisions, %f",collisions,double(collisions) / 4294967296.0);
+
+	delete [] block;
+}
+*/
+
+/*
+// Applying FWT to fmix32 to look for linearities (it found some bias, but nothing above a fraction of a percent)
+
+void find_linear_approximation_walsh2 ( mixfunc<uint32_t> f, uint32_t mask, int inbits, uint32_t & outL, int64_t & outBias );
+void find_linear_approximation_walsh  ( mixfunc<uint32_t> f, uint32_t mask, int inbits, uint32_t & outL, int64_t & outBias );
+uint32_t test_linear_approximation ( mixfunc<uint32_t> f, uint32_t l, uint32_t mask, int inbits );
+
+uint32_t bitrev ( uint32_t v );
+
+uint32_t FWTMix ( uint32_t x )
+{
+	x ^= x >> 16;
+	x *= 0x85ebca6b;
+	x ^= x >> 13;
+	x *= 0xc2b2ae35;
+	x ^= x >> 16;
+
+	return x;
+}
+
+double test_linear_approximation ( mixfunc<uint32_t> f, uint32_t l, uint32_t mask, int64_t size );
+
+void WalshStuff(void )
+{
+	const int64_t nbits = 32;
+	const int64_t size = int64_t(1) << nbits;
+
+	mixfunc<uint32_t> f(FWTMix);
+
+	for(int i = 0; i < nbits; i++)
+	{
+		uint32_t mask = (1 << i);
+		uint32_t outL = 0;
+		int64_t bias = 0;
+		find_linear_approximation_walsh2(f,mask,nbits,outL,bias);
+
+		double b = test_linear_approximation ( f, outL, mask, size);
+
+		printf("0x%08x, 0x%08x, %8I64d, %f\n",mask,outL,bias,b);
+	}
+}
+*/
\ No newline at end of file
diff --git a/sha1.cpp b/sha1.cpp
new file mode 100644
index 0000000..c4b79b8
--- /dev/null
+++ b/sha1.cpp
@@ -0,0 +1,603 @@
+/*
+ *  sha1.cpp
+ *
+ *  Copyright (C) 1998, 2009
+ *  Paul E. Jones <paulej@packetizer.com>
+ *  All Rights Reserved.
+ *
+ *****************************************************************************
+ *  $Id: sha1.cpp 12 2009-06-22 19:34:25Z paulej $
+ *****************************************************************************
+ *
+ *  Description:
+ *      This class implements the Secure Hashing Standard as defined
+ *      in FIPS PUB 180-1 published April 17, 1995.
+ *
+ *      The Secure Hashing Standard, which uses the Secure Hashing
+ *      Algorithm (SHA), produces a 160-bit message digest for a
+ *      given data stream.  In theory, it is highly improbable that
+ *      two messages will produce the same message digest.  Therefore,
+ *      this algorithm can serve as a means of providing a "fingerprint"
+ *      for a message.
+ *
+ *  Portability Issues:
+ *      SHA-1 is defined in terms of 32-bit "words".  This code was
+ *      written with the expectation that the processor has at least
+ *      a 32-bit machine word size.  If the machine word size is larger,
+ *      the code should still function properly.  One caveat to that
+ *      is that the input functions taking characters and character arrays
+ *      assume that only 8 bits of information are stored in each character.
+ *
+ *  Caveats:
+ *      SHA-1 is designed to work with messages less than 2^64 bits long.
+ *      Although SHA-1 allows a message digest to be generated for
+ *      messages of any number of bits less than 2^64, this implementation
+ *      only works with messages with a length that is a multiple of 8
+ *      bits.
+ *
+ */
+
+
+#include "sha1.h"
+
+/*  
+ *  SHA1
+ *
+ *  Description:
+ *      This is the constructor for the sha1 class.
+ *
+ *  Parameters:
+ *      None.
+ *
+ *  Returns:
+ *      Nothing.
+ *
+ *  Comments:
+ *
+ */
+SHA1::SHA1()
+{
+    Reset();
+}
+
+/*  
+ *  ~SHA1
+ *
+ *  Description:
+ *      This is the destructor for the sha1 class
+ *
+ *  Parameters:
+ *      None.
+ *
+ *  Returns:
+ *      Nothing.
+ *
+ *  Comments:
+ *
+ */
+SHA1::~SHA1()
+{
+    // The destructor does nothing
+}
+
+/*  
+ *  Reset
+ *
+ *  Description:
+ *      This function will initialize the sha1 class member variables
+ *      in preparation for computing a new message digest.
+ *
+ *  Parameters:
+ *      None.
+ *
+ *  Returns:
+ *      Nothing.
+ *
+ *  Comments:
+ *
+ */
+void SHA1::Reset()
+{
+    Length_Low          = 0;
+    Length_High         = 0;
+    Message_Block_Index = 0;
+
+    H[0]        = 0x67452301;
+    H[1]        = 0xEFCDAB89;
+    H[2]        = 0x98BADCFE;
+    H[3]        = 0x10325476;
+    H[4]        = 0xC3D2E1F0;
+
+    Computed    = false;
+    Corrupted   = false;
+}
+
+/*  
+ *  Result
+ *
+ *  Description:
+ *      This function will return the 160-bit message digest into the
+ *      array provided.
+ *
+ *  Parameters:
+ *      message_digest_array: [out]
+ *          This is an array of five unsigned integers which will be filled
+ *          with the message digest that has been computed.
+ *
+ *  Returns:
+ *      True if successful, false if it failed.
+ *
+ *  Comments:
+ *
+ */
+bool SHA1::Result(unsigned *message_digest_array)
+{
+    int i;                                  // Counter
+
+    if (Corrupted)
+    {
+        return false;
+    }
+
+    if (!Computed)
+    {
+        PadMessage();
+        Computed = true;
+    }
+
+    for(i = 0; i < 5; i++)
+    {
+        message_digest_array[i] = H[i];
+    }
+
+    return true;
+}
+
+/*  
+ *  Input
+ *
+ *  Description:
+ *      This function accepts an array of octets as the next portion of
+ *      the message.
+ *
+ *  Parameters:
+ *      message_array: [in]
+ *          An array of characters representing the next portion of the
+ *          message.
+ *
+ *  Returns:
+ *      Nothing.
+ *
+ *  Comments:
+ *
+ */
+void SHA1::Input(   const unsigned char *message_array,
+                    unsigned            length)
+{
+    if (!length)
+    {
+        return;
+    }
+
+    if (Computed || Corrupted)
+    {
+        Corrupted = true;
+        return;
+    }
+
+    while(length-- && !Corrupted)
+    {
+        Message_Block[Message_Block_Index++] = (*message_array & 0xFF);
+
+        Length_Low += 8;
+        Length_Low &= 0xFFFFFFFF;               // Force it to 32 bits
+        if (Length_Low == 0)
+        {
+            Length_High++;
+            Length_High &= 0xFFFFFFFF;          // Force it to 32 bits
+            if (Length_High == 0)
+            {
+                Corrupted = true;               // Message is too long
+            }
+        }
+
+        if (Message_Block_Index == 64)
+        {
+            ProcessMessageBlock();
+        }
+
+        message_array++;
+    }
+}
+
+/*  
+ *  Input
+ *
+ *  Description:
+ *      This function accepts an array of octets as the next portion of
+ *      the message.
+ *
+ *  Parameters:
+ *      message_array: [in]
+ *          An array of characters representing the next portion of the
+ *          message.
+ *      length: [in]
+ *          The length of the message_array
+ *
+ *  Returns:
+ *      Nothing.
+ *
+ *  Comments:
+ *
+ */
+void SHA1::Input(   const char  *message_array,
+                    unsigned    length)
+{
+    Input((unsigned char *) message_array, length);
+}
+
+/*  
+ *  Input
+ *
+ *  Description:
+ *      This function accepts a single octets as the next message element.
+ *
+ *  Parameters:
+ *      message_element: [in]
+ *          The next octet in the message.
+ *
+ *  Returns:
+ *      Nothing.
+ *
+ *  Comments:
+ *
+ */
+void SHA1::Input(unsigned char message_element)
+{
+    Input(&message_element, 1);
+}
+
+/*  
+ *  Input
+ *
+ *  Description:
+ *      This function accepts a single octet as the next message element.
+ *
+ *  Parameters:
+ *      message_element: [in]
+ *          The next octet in the message.
+ *
+ *  Returns:
+ *      Nothing.
+ *
+ *  Comments:
+ *
+ */
+void SHA1::Input(char message_element)
+{
+    Input((unsigned char *) &message_element, 1);
+}
+
+/*  
+ *  operator<<
+ *
+ *  Description:
+ *      This operator makes it convenient to provide character strings to
+ *      the SHA1 object for processing.
+ *
+ *  Parameters:
+ *      message_array: [in]
+ *          The character array to take as input.
+ *
+ *  Returns:
+ *      A reference to the SHA1 object.
+ *
+ *  Comments:
+ *      Each character is assumed to hold 8 bits of information.
+ *
+ */
+SHA1& SHA1::operator<<(const char *message_array)
+{
+    const char *p = message_array;
+
+    while(*p)
+    {
+        Input(*p);
+        p++;
+    }
+
+    return *this;
+}
+
+/*  
+ *  operator<<
+ *
+ *  Description:
+ *      This operator makes it convenient to provide character strings to
+ *      the SHA1 object for processing.
+ *
+ *  Parameters:
+ *      message_array: [in]
+ *          The character array to take as input.
+ *
+ *  Returns:
+ *      A reference to the SHA1 object.
+ *
+ *  Comments:
+ *      Each character is assumed to hold 8 bits of information.
+ *
+ */
+SHA1& SHA1::operator<<(const unsigned char *message_array)
+{
+    const unsigned char *p = message_array;
+
+    while(*p)
+    {
+        Input(*p);
+        p++;
+    }
+
+    return *this;
+}
+
+/*  
+ *  operator<<
+ *
+ *  Description:
+ *      This function provides the next octet in the message.
+ *
+ *  Parameters:
+ *      message_element: [in]
+ *          The next octet in the message
+ *
+ *  Returns:
+ *      A reference to the SHA1 object.
+ *
+ *  Comments:
+ *      The character is assumed to hold 8 bits of information.
+ *
+ */
+SHA1& SHA1::operator<<(const char message_element)
+{
+    Input((unsigned char *) &message_element, 1);
+
+    return *this;
+}
+
+/*  
+ *  operator<<
+ *
+ *  Description:
+ *      This function provides the next octet in the message.
+ *
+ *  Parameters:
+ *      message_element: [in]
+ *          The next octet in the message
+ *
+ *  Returns:
+ *      A reference to the SHA1 object.
+ *
+ *  Comments:
+ *      The character is assumed to hold 8 bits of information.
+ *
+ */
+SHA1& SHA1::operator<<(const unsigned char message_element)
+{
+    Input(&message_element, 1);
+
+    return *this;
+}
+
+/*  
+ *  ProcessMessageBlock
+ *
+ *  Description:
+ *      This function will process the next 512 bits of the message
+ *      stored in the Message_Block array.
+ *
+ *  Parameters:
+ *      None.
+ *
+ *  Returns:
+ *      Nothing.
+ *
+ *  Comments:
+ *      Many of the variable names in this function, especially the single
+ *      character names, were used because those were the names used
+ *      in the publication.
+ *
+ */
+void SHA1::ProcessMessageBlock()
+{
+    const unsigned K[] =    {               // Constants defined for SHA-1
+                                0x5A827999,
+                                0x6ED9EBA1,
+                                0x8F1BBCDC,
+                                0xCA62C1D6
+                            };
+    int         t;                          // Loop counter
+    unsigned    temp;                       // Temporary word value
+    unsigned    W[80];                      // Word sequence
+    unsigned    A, B, C, D, E;              // Word buffers
+
+    /*
+     *  Initialize the first 16 words in the array W
+     */
+    for(t = 0; t < 16; t++)
+    {
+        W[t] = ((unsigned) Message_Block[t * 4]) << 24;
+        W[t] |= ((unsigned) Message_Block[t * 4 + 1]) << 16;
+        W[t] |= ((unsigned) Message_Block[t * 4 + 2]) << 8;
+        W[t] |= ((unsigned) Message_Block[t * 4 + 3]);
+    }
+
+    for(t = 16; t < 80; t++)
+    {
+       W[t] = CircularShift(1,W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16]);
+    }
+
+    A = H[0];
+    B = H[1];
+    C = H[2];
+    D = H[3];
+    E = H[4];
+
+    for(t = 0; t < 20; t++)
+    {
+        temp = CircularShift(5,A) + ((B & C) | ((~B) & D)) + E + W[t] + K[0];
+        temp &= 0xFFFFFFFF;
+        E = D;
+        D = C;
+        C = CircularShift(30,B);
+        B = A;
+        A = temp;
+    }
+
+    for(t = 20; t < 40; t++)
+    {
+        temp = CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[1];
+        temp &= 0xFFFFFFFF;
+        E = D;
+        D = C;
+        C = CircularShift(30,B);
+        B = A;
+        A = temp;
+    }
+
+    for(t = 40; t < 60; t++)
+    {
+        temp = CircularShift(5,A) +
+               ((B & C) | (B & D) | (C & D)) + E + W[t] + K[2];
+        temp &= 0xFFFFFFFF;
+        E = D;
+        D = C;
+        C = CircularShift(30,B);
+        B = A;
+        A = temp;
+    }
+
+    for(t = 60; t < 80; t++)
+    {
+        temp = CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[3];
+        temp &= 0xFFFFFFFF;
+        E = D;
+        D = C;
+        C = CircularShift(30,B);
+        B = A;
+        A = temp;
+    }
+
+    H[0] = (H[0] + A) & 0xFFFFFFFF;
+    H[1] = (H[1] + B) & 0xFFFFFFFF;
+    H[2] = (H[2] + C) & 0xFFFFFFFF;
+    H[3] = (H[3] + D) & 0xFFFFFFFF;
+    H[4] = (H[4] + E) & 0xFFFFFFFF;
+
+    Message_Block_Index = 0;
+}
+
+/*  
+ *  PadMessage
+ *
+ *  Description:
+ *      According to the standard, the message must be padded to an even
+ *      512 bits.  The first padding bit must be a '1'.  The last 64 bits
+ *      represent the length of the original message.  All bits in between
+ *      should be 0.  This function will pad the message according to those
+ *      rules by filling the message_block array accordingly.  It will also
+ *      call ProcessMessageBlock() appropriately.  When it returns, it
+ *      can be assumed that the message digest has been computed.
+ *
+ *  Parameters:
+ *      None.
+ *
+ *  Returns:
+ *      Nothing.
+ *
+ *  Comments:
+ *
+ */
+void SHA1::PadMessage()
+{
+    /*
+     *  Check to see if the current message block is too small to hold
+     *  the initial padding bits and length.  If so, we will pad the
+     *  block, process it, and then continue padding into a second block.
+     */
+    if (Message_Block_Index > 55)
+    {
+        Message_Block[Message_Block_Index++] = 0x80;
+        while(Message_Block_Index < 64)
+        {
+            Message_Block[Message_Block_Index++] = 0;
+        }
+
+        ProcessMessageBlock();
+
+        while(Message_Block_Index < 56)
+        {
+            Message_Block[Message_Block_Index++] = 0;
+        }
+    }
+    else
+    {
+        Message_Block[Message_Block_Index++] = 0x80;
+        while(Message_Block_Index < 56)
+        {
+            Message_Block[Message_Block_Index++] = 0;
+        }
+
+    }
+
+    /*
+     *  Store the message length as the last 8 octets
+     */
+    Message_Block[56] = (Length_High >> 24) & 0xFF;
+    Message_Block[57] = (Length_High >> 16) & 0xFF;
+    Message_Block[58] = (Length_High >> 8) & 0xFF;
+    Message_Block[59] = (Length_High) & 0xFF;
+    Message_Block[60] = (Length_Low >> 24) & 0xFF;
+    Message_Block[61] = (Length_Low >> 16) & 0xFF;
+    Message_Block[62] = (Length_Low >> 8) & 0xFF;
+    Message_Block[63] = (Length_Low) & 0xFF;
+
+    ProcessMessageBlock();
+}
+
+
+/*  
+ *  CircularShift
+ *
+ *  Description:
+ *      This member function will perform a circular shifting operation.
+ *
+ *  Parameters:
+ *      bits: [in]
+ *          The number of bits to shift (1-31)
+ *      word: [in]
+ *          The value to shift (assumes a 32-bit integer)
+ *
+ *  Returns:
+ *      The shifted value.
+ *
+ *  Comments:
+ *
+ */
+unsigned SHA1::CircularShift(int bits, unsigned word)
+{
+    return ((word << bits) & 0xFFFFFFFF) | ((word & 0xFFFFFFFF) >> (32-bits));
+}
+
+
+//-----------------------------------------------------------------------------
+// Adapter for HashTest
+
+void sha1hash ( const void * key, int len, unsigned int seed, unsigned int * result )
+{
+	SHA1 s;
+
+	s.Input((const unsigned char*)&seed,4);
+	s.Input((const unsigned char*)key,len);
+
+	s.Result(result);
+}
\ No newline at end of file
diff --git a/sha1.h b/sha1.h
new file mode 100644
index 0000000..c0efa1c
--- /dev/null
+++ b/sha1.h
@@ -0,0 +1,89 @@
+/*
+ *  sha1.h
+ *
+ *  Copyright (C) 1998, 2009
+ *  Paul E. Jones <paulej@packetizer.com>
+ *  All Rights Reserved.
+ *
+ *****************************************************************************
+ *  $Id: sha1.h 12 2009-06-22 19:34:25Z paulej $
+ *****************************************************************************
+ *
+ *  Description:
+ *      This class implements the Secure Hashing Standard as defined
+ *      in FIPS PUB 180-1 published April 17, 1995.
+ *
+ *      Many of the variable names in this class, especially the single
+ *      character names, were used because those were the names used
+ *      in the publication.
+ *
+ *      Please read the file sha1.cpp for more information.
+ *
+ */
+
+#ifndef _SHA1_H_
+#define _SHA1_H_
+
+class SHA1
+{
+
+    public:
+
+        SHA1();
+        virtual ~SHA1();
+
+        /*
+         *  Re-initialize the class
+         */
+        void Reset();
+
+        /*
+         *  Returns the message digest
+         */
+        bool Result(unsigned *message_digest_array);
+
+        /*
+         *  Provide input to SHA1
+         */
+        void Input( const unsigned char *message_array,
+                    unsigned            length);
+        void Input( const char  *message_array,
+                    unsigned    length);
+        void Input(unsigned char message_element);
+        void Input(char message_element);
+        SHA1& operator<<(const char *message_array);
+        SHA1& operator<<(const unsigned char *message_array);
+        SHA1& operator<<(const char message_element);
+        SHA1& operator<<(const unsigned char message_element);
+
+    private:
+
+        /*
+         *  Process the next 512 bits of the message
+         */
+        void ProcessMessageBlock();
+
+        /*
+         *  Pads the current message block to 512 bits
+         */
+        void PadMessage();
+
+        /*
+         *  Performs a circular left shift operation
+         */
+        inline unsigned CircularShift(int bits, unsigned word);
+
+        unsigned H[5];                      // Message digest buffers
+
+        unsigned Length_Low;                // Message length in bits
+        unsigned Length_High;               // Message length in bits
+
+        unsigned char Message_Block[64];    // 512-bit message blocks
+        int Message_Block_Index;            // Index into message block array
+
+        bool Computed;                      // Is the digest computed?
+        bool Corrupted;                     // Is the message digest corruped?
+    
+};
+
+#endif
diff --git a/simplex.cpp b/simplex.cpp
new file mode 100644
index 0000000..3f08f1d
--- /dev/null
+++ b/simplex.cpp
@@ -0,0 +1,171 @@
+#include <stdio.h>
+#include <set>
+#include <map>
+#include "pstdint.h"
+
+#pragma warning(disable:4996)
+
+struct node;
+
+typedef std::set<node*> nodeset;
+
+struct node
+{
+	node ( void )
+	{
+		name = 0;
+		mark = 0;
+		used = 0;
+		next = 0;
+	}
+
+	uint32_t name;
+	uint32_t mark;
+	uint32_t used;
+
+	node * next;
+
+	nodeset edges;
+};
+
+typedef std::map<uint32_t,node> nodegraph;
+
+nodegraph graph;
+
+bool can_link ( node * A, node * B )
+{
+	if(A->edges.find(B) == A->edges.end()) return false;
+	if(B->edges.find(A) == B->edges.end()) return false;
+
+	return true;
+}
+
+bool can_link_all ( node * A, node * B )
+{
+	node * cursor = A;
+
+	while(cursor)
+	{
+		if(!can_link(cursor,B)) return false;
+
+		cursor = cursor->next;
+	}
+
+	return true;
+}
+
+void print_simplex( node * head )
+{
+	node * cursor = head;
+
+	while(cursor)
+	{
+		printf("0x%08x,",cursor->name);
+		cursor = cursor->next;
+	}
+	printf("\n");
+}
+
+void find_simplex ( node * head )
+{
+	bool found = false;
+
+	for(nodeset::iterator it = head->edges.begin(); it != head->edges.end(); it++)
+	{
+		node * next = (*it);
+
+		if(next->mark) continue;
+		if(next->name > head->name) continue;
+
+		if(can_link_all(head,next))
+		{
+			found = true;
+			next->mark = head->mark + 1;
+			next->next = head;
+
+			find_simplex(next);
+
+			next->mark = 0;
+			next->next = 0;
+		}
+	}
+
+	if(!found && (head->mark > 3))
+	{
+		bool used = false;
+
+		node * cursor = head;
+
+		while(cursor)
+		{
+			if(cursor->used) used = true;
+
+			cursor = cursor->next;
+		}
+
+		if(!used) 
+		{
+			print_simplex(head);
+
+			node * cursor = head;
+
+			while(cursor)
+			{
+				cursor->used = 1;
+				cursor = cursor->next;
+			}
+		}
+	}
+}
+
+int simplex_main ( int argc, char * argv[] )
+{
+	if(argc < 2)
+	{
+		printf("blah\n");
+		return 1;
+	}
+
+	FILE * file = fopen(argv[1],"r");
+
+	if(!file)
+	{
+		printf("Couldn't open file\n");
+		return 1;
+	}
+
+	char buffer[512];
+
+	while(fgets(buffer,512,file))
+	{
+		uint32_t nameA;
+		uint32_t nameB;
+
+		int found = sscanf(buffer,"0x%08x,0x%08x",&nameA,&nameB);
+
+		if(found != 2) continue;
+
+		node * nodeA = &graph[nameA];
+		node * nodeB = &graph[nameB];
+
+		nodeA->name = nameA;
+		nodeB->name = nameB;
+
+		nodeA->edges.insert(nodeB);
+		nodeB->edges.insert(nodeA);
+	}
+
+	for(std::map<uint32_t,node>::iterator it = graph.begin(); it != graph.end(); it++)
+	{
+		node & n = (*it).second;
+
+		n.mark = 1;
+
+		find_simplex(&n);
+
+		n.mark = 0;
+	}
+
+	return 0;
+}
+
-- 
cgit v1.2.3


From 8c49498591d3ae0ea3e3fffce716becd4a8e7df1 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Tue, 2 Nov 2010 00:53:07 +0000
Subject: Add project files

git-svn-id: http://smhasher.googlecode.com/svn/trunk@3 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 SMHasher.sln    |  26 +++
 SMHasher.vcproj | 589 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 615 insertions(+)
 create mode 100644 SMHasher.sln
 create mode 100644 SMHasher.vcproj

diff --git a/SMHasher.sln b/SMHasher.sln
new file mode 100644
index 0000000..f8c7a41
--- /dev/null
+++ b/SMHasher.sln
@@ -0,0 +1,26 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 10.00
+# Visual Studio 2008
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SMHasher", "SMHasher.vcproj", "{AF3C61C4-642A-425B-928D-CEC37C678442}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Debug|x64 = Debug|x64
+		Release|Win32 = Release|Win32
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{AF3C61C4-642A-425B-928D-CEC37C678442}.Debug|Win32.ActiveCfg = Debug|Win32
+		{AF3C61C4-642A-425B-928D-CEC37C678442}.Debug|Win32.Build.0 = Debug|Win32
+		{AF3C61C4-642A-425B-928D-CEC37C678442}.Debug|x64.ActiveCfg = Debug|x64
+		{AF3C61C4-642A-425B-928D-CEC37C678442}.Debug|x64.Build.0 = Debug|x64
+		{AF3C61C4-642A-425B-928D-CEC37C678442}.Release|Win32.ActiveCfg = Release|Win32
+		{AF3C61C4-642A-425B-928D-CEC37C678442}.Release|Win32.Build.0 = Release|Win32
+		{AF3C61C4-642A-425B-928D-CEC37C678442}.Release|x64.ActiveCfg = Release|x64
+		{AF3C61C4-642A-425B-928D-CEC37C678442}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/SMHasher.vcproj b/SMHasher.vcproj
new file mode 100644
index 0000000..0c553f0
--- /dev/null
+++ b/SMHasher.vcproj
@@ -0,0 +1,589 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="9.00"
+	Name="SMHasher"
+	ProjectGUID="{AF3C61C4-642A-425B-928D-CEC37C678442}"
+	RootNamespace="SMHasher"
+	Keyword="Win32Proj"
+	TargetFrameworkVersion="196613"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+		<Platform
+			Name="x64"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="true"
+				DebugInformationFormat="4"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="2"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|x64"
+			OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="true"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="2"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				TargetMachine="17"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="1"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				EnableIntrinsicFunctions="true"
+				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
+				RuntimeLibrary="2"
+				EnableFunctionLevelLinking="true"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="true"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="1"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|x64"
+			OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="1"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				EnableIntrinsicFunctions="true"
+				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
+				RuntimeLibrary="2"
+				EnableFunctionLevelLinking="true"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="true"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="1"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				TargetMachine="17"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Crypto"
+			>
+			<File
+				RelativePath=".\BlockCipher.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\BlockCipher.h"
+				>
+			</File>
+			<File
+				RelativePath=".\Cipher.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\Cipher.h"
+				>
+			</File>
+			<File
+				RelativePath=".\StreamCipher.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\StreamCipher.h"
+				>
+			</File>
+			<File
+				RelativePath=".\TEA.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\TEA.h"
+				>
+			</File>
+			<File
+				RelativePath=".\XTEA.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\XTEA.h"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Hashes"
+			>
+			<File
+				RelativePath=".\crc.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\crc.h"
+				>
+			</File>
+			<File
+				RelativePath=".\Hashes.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\Hashes.h"
+				>
+			</File>
+			<File
+				RelativePath=".\lookup3.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\md5.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\MurmurHash1.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\MurmurHash1.h"
+				>
+			</File>
+			<File
+				RelativePath=".\MurmurHash2.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\MurmurHash2.h"
+				>
+			</File>
+			<File
+				RelativePath=".\MurmurHash3.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\MurmurHash3.h"
+				>
+			</File>
+			<File
+				RelativePath=".\sha1.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\sha1.h"
+				>
+			</File>
+			<File
+				RelativePath=".\SuperFastHash.cpp"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Tests"
+			>
+			<File
+				RelativePath=".\AvalancheTest.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\AvalancheTest.h"
+				>
+			</File>
+			<File
+				RelativePath=".\CycleTest.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\CycleTest.h"
+				>
+			</File>
+			<File
+				RelativePath=".\DictionaryTest.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\DictionaryTest.h"
+				>
+			</File>
+			<File
+				RelativePath=".\DifferentialTest.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\DifferentialTest.h"
+				>
+			</File>
+			<File
+				RelativePath=".\SparseKeyTest.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\SparseKeyTest.h"
+				>
+			</File>
+			<File
+				RelativePath=".\Tests.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\Tests.h"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Utils"
+			>
+			<File
+				RelativePath=".\Bitvec.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\Bitvec.h"
+				>
+			</File>
+			<File
+				RelativePath=".\Core.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\Core.h"
+				>
+			</File>
+			<File
+				RelativePath=".\Random.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\Random.h"
+				>
+			</File>
+			<File
+				RelativePath=".\simplex.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\Stats.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\Stats.h"
+				>
+			</File>
+			<File
+				RelativePath=".\Types.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\Types.h"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Junk"
+			>
+			<File
+				RelativePath=".\Diffusion.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\Diffusion.h"
+				>
+			</File>
+			<File
+				RelativePath=".\FWTransform.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\FWTransform.h"
+				>
+			</File>
+			<File
+				RelativePath=".\Hamming.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\Hamming.h"
+				>
+			</File>
+			<File
+				RelativePath=".\Junk.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\Junk.h"
+				>
+			</File>
+			<File
+				RelativePath=".\SimAnneal.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\SimAnneal.h"
+				>
+			</File>
+		</Filter>
+		<File
+			RelativePath=".\main.cpp"
+			>
+		</File>
+		<File
+			RelativePath=".\pstdint.h"
+			>
+		</File>
+		<File
+			RelativePath=".\scratch.cpp"
+			>
+		</File>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
-- 
cgit v1.2.3


From 2e8984f1e39b91dae37a23aea35d30b78f46c096 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Tue, 2 Nov 2010 01:00:16 +0000
Subject: Remove code that's not part of this library

git-svn-id: http://smhasher.googlecode.com/svn/trunk@4 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 BlockCipher.cpp    | 294 -------------------
 BlockCipher.h      |  90 ------
 Cipher.cpp         |   1 -
 Cipher.h           |  16 --
 DictionaryTest.cpp |  61 ----
 DictionaryTest.h   | 119 --------
 Diffusion.cpp      | 204 -------------
 Diffusion.h        |   1 -
 FWTransform.cpp    | 443 ----------------------------
 FWTransform.h      |  12 -
 Hamming.cpp        | 133 ---------
 Hamming.h          |   5 -
 Junk.cpp           |  38 ---
 Junk.h             |  46 ---
 SMHasher.vcproj    | 104 -------
 SimAnneal.cpp      |  97 -------
 SimAnneal.h        |   6 -
 StreamCipher.cpp   |  13 -
 StreamCipher.h     |  17 --
 TEA.cpp            |  52 ----
 TEA.h              |  23 --
 Tests.h            |   1 -
 XTEA.cpp           | 119 --------
 XTEA.h             |  23 --
 main.cpp           |  11 -
 scratch.cpp        | 823 -----------------------------------------------------
 simplex.cpp        | 171 -----------
 27 files changed, 2923 deletions(-)
 delete mode 100644 BlockCipher.cpp
 delete mode 100644 BlockCipher.h
 delete mode 100644 Cipher.cpp
 delete mode 100644 Cipher.h
 delete mode 100644 DictionaryTest.cpp
 delete mode 100644 DictionaryTest.h
 delete mode 100644 Diffusion.cpp
 delete mode 100644 Diffusion.h
 delete mode 100644 FWTransform.cpp
 delete mode 100644 FWTransform.h
 delete mode 100644 Hamming.cpp
 delete mode 100644 Hamming.h
 delete mode 100644 Junk.cpp
 delete mode 100644 Junk.h
 delete mode 100644 SimAnneal.cpp
 delete mode 100644 SimAnneal.h
 delete mode 100644 StreamCipher.cpp
 delete mode 100644 StreamCipher.h
 delete mode 100644 TEA.cpp
 delete mode 100644 TEA.h
 delete mode 100644 XTEA.cpp
 delete mode 100644 XTEA.h
 delete mode 100644 scratch.cpp
 delete mode 100644 simplex.cpp

diff --git a/BlockCipher.cpp b/BlockCipher.cpp
deleted file mode 100644
index 414cf92..0000000
--- a/BlockCipher.cpp
+++ /dev/null
@@ -1,294 +0,0 @@
-#include "BlockCipher.h"
-
-#include <assert.h>
-#include <memory.h>
-
-//----------------------------------------------------------------------------
-
-BlockCipher::BlockCipher ( void )
-{
-	clear();
-
-	setMode(ECB);
-}
-
-BlockCipher::BlockCipher ( CipherMode mode )
-{
-	clear();
-
-	setMode(mode);
-}
-
-BlockCipher::~BlockCipher ( void )
-{
-}
-
-void BlockCipher::clear ( void )
-{
-	memset(m_plain,  0, 64);
-	memset(m_input,  0, 64);
-	memset(m_temp,   0, 64);
-	memset(m_output, 0, 64);
-	memset(m_crypt,  0, 64);
-}
-
-//----------------------------------------------------------------------------
-
-void BlockCipher::setMode ( CipherMode m )
-{
-	switch(m)
-	{
-		case ECB:  m_pEncrypt = &BlockCipher::encrypt_ECB;  m_pDecrypt = &BlockCipher::decrypt_ECB;  break;
-		case ECBN: m_pEncrypt = &BlockCipher::encrypt_ECBN; m_pDecrypt = &BlockCipher::decrypt_ECBN; break;
-		case CBC:  m_pEncrypt = &BlockCipher::encrypt_CBC;  m_pDecrypt = &BlockCipher::decrypt_CBC;  break;
-		case CFB:  m_pEncrypt = &BlockCipher::encrypt_CFB;  m_pDecrypt = &BlockCipher::decrypt_CFB;  break;
-		case OFB:  m_pEncrypt = &BlockCipher::encrypt_OFB;  m_pDecrypt = &BlockCipher::decrypt_OFB;  break;
-		case PCBC: m_pEncrypt = &BlockCipher::encrypt_PCBC; m_pDecrypt = &BlockCipher::decrypt_PCBC; break;
-		case CTR:  m_pEncrypt = &BlockCipher::encrypt_CTR;  m_pDecrypt = &BlockCipher::decrypt_CTR;  break;
-
-		default:   assert(false); setMode(PCBC); break;
-	};
-}
-
-//----------------------------------------------------------------------------
-
-void BlockCipher::encrypt ( void * key, int keySize, void * plain, void * crypt, int size )
-{
-	clear();
-
-	uint8_t * in = (uint8_t*)plain;
-	uint8_t * out = (uint8_t*)crypt;
-
-	int blockSize = getBlockSize();
-	int blockCount = size / blockSize;
-
-	setKey(key,keySize);
-
-	for(m_blockIndex = 0; m_blockIndex < blockCount; m_blockIndex++)
-	{
-		copy(m_plain,in);
-
-		(this->*m_pEncrypt)();
-
-		copy(out,m_crypt);
-
-		in  += blockSize;
-		out += blockSize;
-	}
-}
-
-void BlockCipher::decrypt ( void * key, int keySize, void * crypt, void * plain, int size )
-{
-	clear();
-
-	uint8_t * in = (uint8_t*)crypt;
-	uint8_t * out = (uint8_t*)plain;
-
-	int blockSize = getBlockSize();
-	int blockCount = size / blockSize;
-
-	setKey(key,keySize);
-
-	for(m_blockIndex = 0; m_blockIndex < blockCount; m_blockIndex++)
-	{
-		copy(m_crypt,in);
-
-		(this->*m_pDecrypt)();
-
-		copy(out,m_plain);
-
-		in  += blockSize;
-		out += blockSize;
-	}
-}
-
-//----------------------------------------------------------------------------
-// Electronic Codebook
-
-void BlockCipher::encrypt_ECB ( void )
-{
-	copy(m_crypt,m_plain);
-
-	encrypt(m_crypt,0);
-}
-
-//----------
-
-void BlockCipher::decrypt_ECB ( void )
-{
-	copy(m_plain,m_crypt);
-
-	decrypt(m_plain,0);
-}
-
-//----------------------------------------------------------------------------
-// Electronic Codebook + Nonce
-
-void BlockCipher::encrypt_ECBN ( void )
-{
-	copy(m_crypt,m_plain);
-
-	encrypt(m_crypt,m_blockIndex);
-}
-
-//----------
-
-void BlockCipher::decrypt_ECBN ( void )
-{
-	copy(m_plain,m_crypt);
-
-	decrypt(m_plain,m_blockIndex);
-}
-
-//----------------------------------------------------------------------------
-// Cipher Block Chaining
-
-void BlockCipher::encrypt_CBC ( void )
-{
-	xor(m_temp,m_plain,m_input);
-
-	encrypt(m_temp,0);
-
-	copy(m_input,m_temp);
-	copy(m_crypt,m_temp);
-}
-
-//----------
-
-void BlockCipher::decrypt_CBC ( void )
-{
-	copy(m_temp,m_crypt);
-
-	decrypt(m_temp,0);
-
-	xor(m_plain,m_temp,m_output);
-	copy(m_output,m_crypt);
-}
-
-//----------------------------------------------------------------------------
-// Cipher Feedback
-
-void BlockCipher::encrypt_CFB ( void )
-{
-	copy(m_temp,m_input);
-
-	encrypt(m_temp,0);
-
-	xor(m_crypt,m_temp,m_plain);
-	copy(m_input,m_crypt);
-}
-
-//----------
-
-void BlockCipher::decrypt_CFB ( void )
-{
-	copy(m_temp,m_input);
-
-	encrypt(m_temp,0);
-
-	xor(m_plain,m_temp,m_crypt);
-	copy(m_input,m_crypt);
-}
-
-//----------------------------------------------------------------------------
-// Output Feedback
-
-void BlockCipher::encrypt_OFB ( void )
-{
-	copy(m_temp,m_input);
-
-	encrypt(m_temp,0);
-
-	xor(m_crypt,m_temp,m_plain);
-	copy(m_input,m_temp);
-}
-
-//----------
-
-void BlockCipher::decrypt_OFB( void )
-{
-	copy(m_temp,m_input);
-
-	encrypt(m_temp,0);
-
-	xor(m_plain,m_temp,m_crypt);
-	copy(m_input,m_temp);
-}
-
-//----------------------------------------------------------------------------
-// Propagating Cipher Block Chaining
-
-// P = M(i)
-// I = M(i-1)
-// C = C(i-1)
-
-void BlockCipher::encrypt_PCBC ( void )
-{
-	xor(m_temp,m_input,m_crypt);
-	xor(m_temp,m_temp,m_plain);
-	copy(m_input,m_plain);
-
-	encrypt(m_temp,0);
-
-	copy(m_crypt,m_temp);
-}
-
-//----------
-
-// P = M(i-1)
-// I = C(i-1)
-// C = C(i)
-
-void BlockCipher::decrypt_PCBC ( void )
-{
-	copy(m_temp,m_crypt);
-
-	decrypt(m_temp,0);
-
-	xor(m_plain,m_plain,m_temp);
-	xor(m_plain,m_plain,m_input);
-
-	copy(m_input,m_crypt);
-}
-
-//----------------------------------------------------------------------------
-// Counter mode
-
-void BlockCipher::encrypt_CTR ( void )
-{
-	*(int*)m_temp = m_blockIndex;
-
-	encrypt(m_temp,0);
-
-	xor(m_crypt,m_temp,m_plain);
-}
-
-//----------
-
-void BlockCipher::decrypt_CTR ( void )
-{
-	*(int*)m_temp = m_blockIndex;
-
-	encrypt(m_temp,0);
-
-	xor(m_plain,m_temp,m_crypt);
-}
-
-//----------------------------------------------------------------------------
-
-void BlockCipher::copy ( uint8_t * dst, const uint8_t * src )
-{
-	memcpy(dst,src,getBlockSize());
-}
-
-void BlockCipher::xor ( uint8_t * dst, const uint8_t * a, const uint8_t * b )
-{
-	int blockSize = getBlockSize();
-
-	for(int i = 0; i < blockSize; i++)
-	{
-		dst[i] = a[i] ^ b[i];
-	}
-}
-
-//----------------------------------------------------------------------------
diff --git a/BlockCipher.h b/BlockCipher.h
deleted file mode 100644
index c5c63ad..0000000
--- a/BlockCipher.h
+++ /dev/null
@@ -1,90 +0,0 @@
-#pragma once
-#include "Cipher.h"
-#include "pstdint.h"
-
-//----------------------------------------------------------------------------
-
-class BlockCipher : public Cipher
-{
-public:
-
-	enum CipherMode
-	{
-		ECB,  // Electronic Codebook
-		ECBN, // Electronic Codebook + Nonce
-		CBC,  // Cipher block chaining
-		CFB,  // Cipher feedback
-		OFB,  // Output feedback
-		PCBC, // Propagating CBC
-		CTR,  // Counter
-		MAX = CTR,
-	};
-
-	//----------
-
-	BlockCipher ( void );
-	BlockCipher ( CipherMode mode );
-	virtual ~BlockCipher ( void );
-
-	virtual void clear ( void );
-
-	//----------
-	// Subclass interface
-
-	virtual int  getBlockSize ( void ) = 0;
-
-	virtual void setKey  ( void * k, int keySize ) = 0;
-
-	virtual void encrypt ( void * block, unsigned int nonce ) const = 0;
-	virtual void decrypt ( void * block, unsigned int nonce ) const = 0;
-
-	//----------
-	// Client interface
-
-	void setMode ( CipherMode m );
-
-	virtual void encrypt ( void * key, int keySize, void * plain, void * crypt, int size );
-	virtual void decrypt ( void * key, int keySize, void * crypt, void * plain, int size );
-
-	//----------
-
-private:
-
-	void encrypt_ECB  ( void );
-	void encrypt_ECBN ( void );
-	void encrypt_CBC  ( void );
-	void encrypt_CFB  ( void );
-	void encrypt_OFB  ( void );
-	void encrypt_PCBC ( void );
-	void encrypt_CTR  ( void );
-
-	void decrypt_ECB  ( void );
-	void decrypt_ECBN ( void );
-	void decrypt_CBC  ( void );
-	void decrypt_CFB  ( void );
-	void decrypt_OFB  ( void );
-	void decrypt_PCBC ( void );
-	void decrypt_CTR  ( void );
-
-	//----------
-
-	virtual void copy ( uint8_t * dst, const uint8_t * src );
-	virtual void xor  ( uint8_t * dst, const uint8_t * a, const uint8_t * b );
-
-	//----------
-
-	uint8_t  m_plain[64];
-	uint8_t  m_input[64];
-	uint8_t  m_temp[64];
-	uint8_t  m_output[64];
-	uint8_t  m_crypt[64];
-
-	int m_blockIndex;
-
-	typedef void (BlockCipher::*pFunc)(void);
-
-	pFunc m_pEncrypt;
-	pFunc m_pDecrypt;
-};
-
-//----------------------------------------------------------------------------
diff --git a/Cipher.cpp b/Cipher.cpp
deleted file mode 100644
index a1de5e6..0000000
--- a/Cipher.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "Cipher.h"
\ No newline at end of file
diff --git a/Cipher.h b/Cipher.h
deleted file mode 100644
index 5aa4155..0000000
--- a/Cipher.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#pragma once
-
-//----------------------------------------------------------------------------
-
-class Cipher
-{
-public:
-
-	Cipher ( void ) {}
-	virtual ~Cipher ( void ) {}
-
-	virtual void encrypt ( void * key, int keySize, void * plain, void * crypt, int size ) = 0;
-	virtual void decrypt ( void * key, int keySize, void * crypt, void * plain, int size ) = 0;
-};
-
-//----------------------------------------------------------------------------
diff --git a/DictionaryTest.cpp b/DictionaryTest.cpp
deleted file mode 100644
index 56ed6df..0000000
--- a/DictionaryTest.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-#include "DictionaryTest.h"
-
-#include <intrin.h>
-
-#pragma warning(disable:4996) // fopen is unsafe
-
-
-wordlist g_words;
-int g_wordcount = 0;
-
-const char ** g_pwords = NULL;
-int * g_plengths = NULL;
-
-double g_dictoverhead = 0;
-
-//----------------------------------------------------------------------------
-
-void LoadWords ( void )
-{
-	FILE * f = fopen("allwords.txt","r");
-
-	char buffer[1024];
-
-	while(fgets(buffer,1024,f))
-	{
-		char * cursor = buffer + strlen(buffer);
-
-		while((*cursor == 0x0a) || (*cursor == 0))
-		{
-			*cursor = 0;
-			cursor--;
-		}
-
-		g_words.push_back(buffer);
-	}
-
-	fclose(f);
-
-	g_wordcount = (int)g_words.size();
-
-	printf("Loaded %d words\n",g_wordcount);
-
-	g_pwords = new const char*[g_wordcount];
-	g_plengths = new int[g_wordcount];
-
-	for(int i = 0; i < g_wordcount; i++)
-	{
-		g_pwords[i] = g_words[i].c_str();
-		g_plengths[i] = (int)g_words[i].size();
-	}
-}
-
-void DeleteWords ( void )
-{
-	delete [] g_pwords;
-	delete [] g_plengths;
-
-	g_words.clear();
-}
-
-//----------------------------------------------------------------------------
diff --git a/DictionaryTest.h b/DictionaryTest.h
deleted file mode 100644
index 2b82047..0000000
--- a/DictionaryTest.h
+++ /dev/null
@@ -1,119 +0,0 @@
-#pragma once
-
-#include "Types.h"
-#include "Stats.h" // for testkeylist_string 
-
-#include <map>
-
-void LoadWords ( void );
-void DeleteWords ( void );
-
-typedef std::vector<std::string> wordlist;
-
-extern wordlist g_words;
-extern int g_wordcount;
-extern const char ** g_pwords;
-extern int * g_plengths;
-
-//-----------------------------------------------------------------------------
-
-
-template< typename hashtype >
-double DictHashTest ( hashfunc<hashtype> hash )
-{
-	__int64 begin,end;
-
-	const int reps = 999;
-
-	double best = 1.0e90;
-
-	for(int i = 0; i < reps; i++)
-	{
-		begin = __rdtsc();
-
-		for(int i = 0; i < g_wordcount; i++)
-		{
-			const char * buffer = g_pwords[i];
-			const int len = g_plengths[i];
-
-			hash(buffer,len,0);
-		}
-
-		end = __rdtsc();
-	
-		double clocks = double(end-begin) / double(g_wordcount);
-
-		if(clocks < best) best = clocks;
-	}
-
-	return best;
-}
-
-//-----------------------------------------------------------------------------
-
-template< typename hashtype >
-void DumpCollisions ( hashfunc<hashtype> hash )
-{
-	printf("\nDumping collisions for seed 0 - \n\n");
-
-	typedef std::map<hashtype,std::vector<std::string>> hashmap;
-	hashmap hashes;
-
-	for(int i = 0; i < g_wordcount; i++)
-	{
-		hashtype h = hash(g_pwords[i],g_plengths[i],0);
-
-		hashes[h].push_back(g_pwords[i]);
-	}
-
-	int collcount = 0;
-
-	for(hashmap::iterator it = hashes.begin(); it != hashes.end(); it++)
-	{
-		hashtype hash = (*it).first;
-
-		std::vector<std::string> & strings = (*it).second;
-
-		if(strings.size() > 1)
-		{
-			collcount += (int)strings.size() - 1;
-
-			printf("0x%08x - ",hash);
-
-			for(int i = 0; i < (int)strings.size(); i++)
-			{
-				printf("%20s,",strings[i].c_str());
-			}
-
-			printf("\n");
-		}
-	}
-
-	printf("%d collisions\n",collcount);
-}
-
-//----------------------------------------------------------------------------
-
-template< typename hashtype >
-void DictionaryTest ( hashfunc<hashtype> hash )
-{
-	printf("Dictionary-based tests -\n");
-	printf("\n");
-
-	LoadWords();
-
-	double clocks = DictHashTest<hashtype>(hash);
-
-	printf("All words hashed in min %f clocks/word\n",clocks);
-	printf("\n");
-
-	printf("Testing dictionary stats\n");
-	testkeylist_string<hashtype>(hash,g_words,true,true);
-	printf("\n");
-
-	DumpCollisions(hash);
-
-	DeleteWords();
-}
-
-//-----------------------------------------------------------------------------
diff --git a/Diffusion.cpp b/Diffusion.cpp
deleted file mode 100644
index 6927daa..0000000
--- a/Diffusion.cpp
+++ /dev/null
@@ -1,204 +0,0 @@
-#include "Diffusion.h"
-
-#include "Types.h"
-
-#include <memory.h>
-
-//-----------------------------------------------------------------------------
-// check invertibility of diffusion matrix
-
-void TestDiffusionMatrix ( void )
-{
-	//int m[4] = { 3, 1, 1, 3 };
-
-	int tab[65536];
-
-	memset(tab,0,sizeof(tab));
-
-	for(int i = 0; i < 65536; i++)
-	{
-		uint8_t a1 = (uint8_t)i;
-		uint8_t a2 = (uint8_t)(i >> 8);
-
-		//uint8_t b1 = uint8_t(a1 * m[0]) + uint8_t(a2*m[1]);
-		//uint8_t b2 = uint8_t(a1 * m[2]) + uint8_t(a2*m[3]);
-
-		uint8_t b1 = a1;
-		uint8_t b2 = a2;
-
-		b1 += b2;
-		b2 += b1;
-
-		int index = (int(b1) << 8) + b2;
-
-		tab[index]++;
-	}
-
-	int missing = 0;
-
-	for(int i = 0; i < 65536; i++)
-	{
-		if(tab[i] == 0) missing++;
-	}
-
-	printf("missing - %d\n",missing);
-}
-
-//-----------------------------------------------------------------------------
-
-void add_row ( int m[16], int a, int b )
-{
-	for(int i = 0; i < 4; i++)
-	{
-		m[4*a+i] += m[4*b+i];
-	}
-}
-
-void sub_row ( int m[16], int a, int b )
-{
-	for(int i = 0; i < 4; i++)
-	{
-		m[4*a+i] -= m[4*b+i];
-	}
-}
-
-//-----------------------------------------------------------------------------
-// search through diffusion matrices computable in N operations, find ones
-// with a maximal number of odd terms
-
-bool check ( const int m[16], std::vector<int> & dst, std::vector<int> & src )
-{
-	static int best = 0;
-
-	int c = 0;
-	int s = 0;
-
-	if(abs(m[0]+m[4]+m[8]+m[12]) > 2) return false;
-	if(abs(m[1]+m[5]+m[9]+m[13]) > 2) return false;
-	if(abs(m[2]+m[6]+m[10]+m[14]) > 2) return false;
-	if(abs(m[3]+m[7]+m[11]+m[15]) > 2) return false;
-
-	for(int i = 0; i < 16; i++)
-	{
-		if(m[i] == 0) return false;
-
-		int d = abs(m[i]);
-
-		c += (d & 1);
-
-		if(m[i] < 0) s++;
-	}
-
-	if((c == 13) && (s == 8))
-	{
-		std::string g[4];
-
-		g[0] = "A";
-		g[1] = "B";
-		g[2] = "C";
-		g[3] = "D";
-
-		printf("----------\n");
-
-		for(int i = 0; i < (int)dst.size(); i++)
-		{
-			int d = dst[i];
-			int s = src[i];
-
-			std::string tmp;
-
-			tmp += g[d-1];
-
-			tmp += (s < 0) ? "-" : "+";
-
-			tmp += "(";
-			tmp += g[abs(s)-1];
-			tmp += ")";
-
-			g[d-1] = tmp;
-		}
-
-		printf("A : %s\n",g[0].c_str());
-		printf("B : %s\n",g[1].c_str());
-		printf("C : %s\n",g[2].c_str());
-		printf("D : %s\n",g[3].c_str());
-
-		for(int i = 0; i < (int)dst.size(); i++)
-		{
-			int d = dst[i];
-			int s = src[i];
-
-			if(s < 0)
-			{
-				printf("h[%1d] -= h[%1d];\n",d,-s);
-			}
-			else
-			{
-				printf("h[%1d] += h[%1d];\n",d,s);
-			}
-		}
-		printf("----------\n");
-	}
-
-	return c == 16;
-}
-
-bool difrecurse ( const int m[16], int depth, int maxdepth, int last, std::vector<int> & dst, std::vector<int> & src )
-{
-	if(depth == maxdepth)
-	{
-		return check(m,dst,src);
-	}
-
-	for(int i = 0; i < 4; i++)
-	{
-		dst.push_back(i+1);
-
-		for(int j = 0; j < 4; j++)
-		{
-			if(i == j) continue;
-
-			if(i == last) continue;
-			if(j == last) continue;
-
-			int n[16];
-
-			memcpy(n,m,sizeof(n));
-
-			src.push_back(j+1);
-			add_row(n,i,j);
-			difrecurse(n,depth+1,maxdepth,i,dst,src);
-			sub_row(n,i,j);
-			src.pop_back();
-
-			src.push_back(-(j+1));
-			sub_row(n,i,j);
-			difrecurse(n,depth+1,maxdepth,i,dst,src);
-			add_row(n,i,j);
-			src.pop_back();
-		}
-
-		dst.pop_back();
-	}
-
-	return false;
-}
-
-void findDiffuse ( void )
-{
-	int m[16];
-
-	memset(m,0,sizeof(m));
-
-	m[4*0 + 0] = 1;
-	m[4*1 + 1] = 1;
-	m[4*2 + 2] = 1;
-	m[4*3 + 3] = 1;
-
-	std::vector<int> dst;
-	std::vector<int> src;
-
-	difrecurse(m,0,7,-1,dst,src);
-	printf("\n");
-}
-
diff --git a/Diffusion.h b/Diffusion.h
deleted file mode 100644
index 7b9637e..0000000
--- a/Diffusion.h
+++ /dev/null
@@ -1 +0,0 @@
-#pragma once
\ No newline at end of file
diff --git a/FWTransform.cpp b/FWTransform.cpp
deleted file mode 100644
index cf9ed80..0000000
--- a/FWTransform.cpp
+++ /dev/null
@@ -1,443 +0,0 @@
-#include "FWTransform.h"
-
-#include "Random.h"
-
-// FWT1/2/3/4 are tested up to 2^16 against a brute-force implementation.
-
-//----------------------------------------------------------------------------
-
-double test_linear_approximation ( mixfunc<uint32_t> f, uint32_t l, uint32_t mask, int64_t size )
-{
-	int64_t d = 0;
-
-	for(int64_t i = 0; i < size; i++)
-	{
-		uint32_t x = (uint32_t)i;
-		uint32_t b1 = parity( f(x) & mask );
-		uint32_t b2 = parity( x & l );
-
-		d += (b1 ^ b2);
-	}
-
-	return double(d) / double(size);
-}
-
-//----------------------------------------------------------------------------
-// In-place, non-recursive FWT transform. Reference implementation.
-
-void FWT1 ( int * v, int64_t count )
-{
-	for(int64_t width = 2; width <= count; width *= 2)
-	{
-		int64_t blocks = count / width;
-
-		for(int64_t i = 0; i < blocks; i++)
-		{
-			int64_t ia = i * width;
-			int64_t ib = ia + (width/2);
-
-			for(int64_t j = 0; j < (width/2); j++)
-			{
-				int a = v[ia];
-				int b = v[ib];
-				
-				v[ia++] = a + b;
-				v[ib++] = a - b;
-			}
-		}
-	}
-}
-
-//-----------------------------------------------------------------------------
-// recursive, but fall back to non-recursive for tables of 4k or smaler
-
-// (this proved to be fastest)
-
-void FWT2 ( int * v, int64_t count )
-{
-	if(count <= 4*1024) return FWT1(v,(int32_t)count);
-
-	int64_t c = count/2;
-
-	for(int64_t i = 0; i < c; i++) 
-	{
-		int a = v[i];
-		int b = v[i+c];
-		
-		v[i] = a + b;
-		v[i+c] = a - b;
-	}
-
-	if(count > 2)
-	{
-		FWT2(v,c);
-		FWT2(v+c,c);
-	}
-}
-
-//-----------------------------------------------------------------------------
-// fully recursive (slow)
-
-void FWT3 ( int * v, int64_t count )
-{
-	int64_t c = count/2;
-
-	for(int64_t i = 0; i < c; i++) 
-	{
-		int a = v[i];
-		int b = v[i+c];
-		
-		v[i] = a + b;
-		v[i+c] = a - b;
-	}
-
-	if(count > 2)
-	{
-		FWT3(v,c);
-		FWT3(v+c,c);
-	}
-}
-
-//----------------------------------------------------------------------------
-// some other method
-
-void FWT4 ( int * data, const int64_t count )
-{
-	int nbits = 0;
-
-	for(int64_t c = count; c; c >>= 1) nbits++;
-
-	for (int i = 0; i < nbits; i++)
-	{
-		int64_t block = (int64_t(1) << i);
-		int64_t half  = (int64_t(1) << (i-1));
-
-		for (int64_t j = 0; j < count; j += block)
-		{
-			for (int k = 0; k < half; ++k)
-			{
-				int64_t ia = j+k;
-				int64_t ib = j+k+half;
-
-				int a = data[ia];
-				int b = data[ib];
-
-				data[ia] = a+b;
-				data[ib] = a-b;
-			}
-		}
-	}
-}
-
-//----------------------------------------------------------------------------
-// Evaluate a single point in the FWT hierarchy
-
-/*
-int FWTPoint ( mixfunc<uint32_t> f, int level, int nbits, uint32_t y )
-{
-	if(level == 0)
-	{
-		return f(y);
-	}
-	else
-	{
-		uint32_t mask = 1 << (nbits - level);
-
-		if(y & mask)
-		{
-			return 
-		}
-	}
-}
-*/
-
-
-//----------------------------------------------------------------------------
-// compute 2 tiers down into FWT, so we can break a table up into 4 chunks
-
-int computeWalsh2 ( mixfunc<uint32_t> f, int64_t y, int bits, uint32_t mask )
-{
-	uint32_t size1 = 1 << (bits-1);
-	uint32_t size2 = 1 << (bits-2);
-
-	int a = parity(f((uint32_t)y        ) & mask) ? 1 : -1;
-	int b = parity(f((uint32_t)y ^ size2) & mask) ? 1 : -1;
-
-	int ab = (y & size2) ? b-a : a+b;
-
-	int c = parity(f((uint32_t)y ^ size1        ) & mask) ? 1 : -1;
-	int d = parity(f((uint32_t)y ^ size1 ^ size2) & mask) ? 1 : -1;
-
-	int cd = (y & size2) ? d-c : c+d;
-
-	int e = (y & size1) ? cd-ab : ab+cd;
-
-	return e;
-}
-
-int computeWalsh2 ( int * func, int64_t y, int bits )
-{
-	uint32_t size1 = 1 << (bits-1);
-	uint32_t size2 = 1 << (bits-2);
-
-	int a = parity((uint32_t)func[(uint32_t)y        ]) ? 1 : -1;
-	int b = parity((uint32_t)func[(uint32_t)y ^ size2]) ? 1 : -1;
-
-	int ab = (y & size2) ? b-a : a+b;
-
-	int c = parity((uint32_t)func[(uint32_t)y ^ size1        ]) ? 1 : -1;
-	int d = parity((uint32_t)func[(uint32_t)y ^ size1 ^ size2]) ? 1 : -1;
-
-	int cd = (y & size2) ? d-c : c+d;
-
-	int e = (y & size1) ? cd-ab : ab+cd;
-
-	return e;
-}
-
-//----------------------------------------------------------------------------
-// this version computes the entire table at once - needs 16 gigs of RAM for
-// 32-bit FWT (!!!)
-
-void find_linear_approximation_walsh ( mixfunc<uint32_t> f, uint32_t mask, int inbits, uint32_t & outL, int64_t & outBias )
-{
-	// create table
-
-	const int64_t count = int64_t(1) << inbits;
-
-	int * table = new int[(int)count];
-
-	// fill table
-
-	for(int64_t i = 0; i < count; i++)
-	{
-		table[i] = parity(f((uint32_t)i) & mask) ? 1 : -1;
-	}
-
-	// apply walsh transform
-
-	FWT1(table,count);
-
-	// find maximum value in transformed table, which corresponds
-	// to closest linear approximation to F
-
-	outL = 0;
-	outBias = 0;
-
-	for(unsigned int l = 0; l < count; l++)
-	{
-		if(abs(table[l]) > outBias)
-		{
-			outBias = abs(table[l]);
-			outL = l;
-		}
-	}
-
-	delete [] table;
-}
-
-//-----------------------------------------------------------------------------
-// this version breaks the task into 4 pieces, or 4 gigs of RAM for 32-bit FWT
-
-void find_linear_approximation_walsh2 ( mixfunc<uint32_t> f, uint32_t mask, int inbits, uint32_t & outL, int64_t & outBias )
-{
-	const int64_t count = int64_t(1) << inbits;
- 	const int64_t stride = count/4;
-
-	int * table2 = new int[(int)stride];
-
-	uint32_t worstL = 0;
-	int64_t worstBias = 0;
-
-	for(int64_t j = 0; j < count; j += stride)
-	{
-		printf(".");
-
-		for(int i = 0; i < stride; i++)
-		{
-			table2[i] = computeWalsh2(f,i+j,inbits,mask);
-		}
-
-		FWT2(table2,stride);
-
-		for(int64_t l = 0; l < stride; l++)
-		{
-			if(abs(table2[l]) > worstBias)
-			{
-				worstBias = abs(table2[l]);
-				worstL = uint32_t(l)+uint32_t(j);
-			}
-		}
-	}
-
-	outBias = worstBias/2;
-	outL = worstL;
-
-	delete [] table2;
-}
-
-
-//----------------------------------------------------------------------------
-
-void printtab ( int * tab, int size )
-{
-	for(int j = 0; j < 16; j++)
-	{
-		printf("[");
-		for(int i = 0; i < (size/16); i++)
-		{
-			printf("%3d ",tab[j*16+i]);
-		}
-		printf("]\n");
-	}
-}
-
-void comparetab ( int * tabA, int * tabB, int size )
-{
-	bool fail = false;
-
-	for(int i = 0; i < size; i++)
-	{
-		if(tabA[i] != tabB[i])
-		{
-			fail = true;
-			break;
-		}
-	}
-
-	printf(fail ? "X" : "-");
-}
-
-void testFWT ( void )
-{
-	const int bits = 12;
-	const int size = (1 << bits);
-
-	int * func = new int[size];
-	int * resultA = new int[size];
-	int * resultB = new int[size];
-
-	for(int rep = 0; rep < 1; rep++)
-	{
-		// Generate a random boolean function
-
-		for(int i = 0; i < size; i++)
-		{
-			func[i] = rand_u32() & 1;
-
-			//func[i] = (i ^ (i >> 2)) & 1;
-		}
-
-		//printf("Input boolean function -\n");
-		//printtab(func);
-		//printf("\n");
-
-		// Test against all 256 linear functions
-
-
-		memset(resultA,0,size * sizeof(int));
-
-		//printf("Result - \n");
-		for(uint32_t linfunc = 0; linfunc < size; linfunc++)
-		{
-			resultA[linfunc] = 0;
-
-			for(uint32_t k = 0; k < size; k++)
-			{
-				int b1 = func[k];
-				int b2 = parity( k & linfunc );
-
-				if(b1 == b2) resultA[linfunc]++;
-			}
-
-			resultA[linfunc] -= (size/2);
-		}
-
-		//printtab(resultA);
-		//printf("\n");
-
-
-		// Test with FWTs
-
-		for(int i = 0; i < size; i++) resultB[i] = (func[i] == 0) ? -1 : 1;
-		FWT1(resultB,size);
-		for(int i = 0; i < size; i++) resultB[i] = -resultB[i]/2;
-		comparetab(resultA,resultB,size);
-
-		for(int i = 0; i < size; i++) resultB[i] = (func[i] == 0) ? -1 : 1;
-		FWT2(resultB,size);
-		for(int i = 0; i < size; i++) resultB[i] = -resultB[i]/2;
-		comparetab(resultA,resultB,size);
-
-		for(int i = 0; i < size; i++) resultB[i] = (func[i] == 0) ? -1 : 1;
-		FWT3(resultB,size);
-		for(int i = 0; i < size; i++) resultB[i] = -resultB[i]/2;
-		comparetab(resultA,resultB,size);
-
-		// Test with subdiv-by-4
-
-		{
-			for(int i = 0; i < size; i++) resultB[i] = (func[i] == 0) ? -1 : 1;
-
-			const int64_t count = int64_t(1) << bits;
-			const int64_t stride = count/4;
-
-			for(int64_t j = 0; j < count; j += stride)
-			{
-				for(int i = 0; i < stride; i++)
-				{
-					resultB[i+j] = computeWalsh2(func,i+j,bits);
-				}
-
-				FWT2(&resultB[j],stride);
-			}
-
-			for(int i = 0; i < size; i++) resultB[i] = -resultB[i]/2;
-			comparetab(resultA,resultB,size);
-		}
-
-		printf(" ");
-	}
-
-	delete [] func;
-	delete [] resultA;
-	delete [] resultB;
-}
-
-//-----------------------------------------------------------------------------
-// Compare known-good implementation against optimized implementation
-
-void testFWT2 ( void )
-{
-	const int bits = 24;
-	const int size = (1 << bits);
-
-	int * func = new int[size];
-	int * resultA = new int[size];
-	int * resultB = new int[size];
-
-	for(int rep = 0; rep < 4; rep++)
-	{
-		// Generate a random boolean function
-
-		for(int i = 0; i < size; i++)
-		{
-			func[i] = rand_u32() & 1;
-		}
-
-		// Test with FWTs
-
-		for(int i = 0; i < size; i++) resultA[i] = resultB[i] = (func[i] == 0) ? -1 : 1;
-
-		FWT1(resultA,size);
-		FWT4(resultB,size);
-		
-		comparetab(resultA,resultB,size);
-
-		printf(" ");
-	}
-
-	delete [] func;
-	delete [] resultA;
-	delete [] resultB;
-}
\ No newline at end of file
diff --git a/FWTransform.h b/FWTransform.h
deleted file mode 100644
index 6979cbd..0000000
--- a/FWTransform.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#include "Types.h"
-#include "Bitvec.h"
-
-// Fast Walsh transform stuff. Used for determining how close an arbitrary
-// boolean function is to the set of all possible linear functions.
-
-// Given an arbitrary N-bit mixing function mix(x), we can generate a boolean
-// function out of it by choosing a N-bit mask and computing
-// parity(mix(x) & mask).
-
-// If the mask has 1 bit set, this is equivalent to selecting a column of
-// output bits from the mixing function to test.
diff --git a/Hamming.cpp b/Hamming.cpp
deleted file mode 100644
index e00e5b7..0000000
--- a/Hamming.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-#include "Hamming.h"
-
-#include "Types.h"
-#include "Random.h"
-
-// Code to measure the hamming weight of mix functions, etc.
-
-// (documentation needed)
-
-// If I change N bits of the input, how many bits of the output change on average?
-
-
-//-----------------------------------------------------------------------------
-// compute table of differential hamming weight for input differentials
-// up to 5 bits
-
-void hamtest ( uint32_t (*mix)(uint32_t), uint32_t d, const int reps, double out[33] )
-{
-	double temp[33];
-
-	memset(temp,0,sizeof(temp));
-
-	for(int i = 0; i < reps; i++)
-	{
-		uint32_t a = rand_u32();
-		uint32_t b = a ^ d;
-
-		uint32_t ma = mix(a);
-		uint32_t mb = mix(b);
-
-		uint32_t md = ma ^ mb;
-
-		temp[popcount(md)] += 1.0 / double(reps);
-	}
-
-	for(int i = 0; i < 33; i++)
-	{
-		if(temp[i] > out[i]) out[i] = temp[i];
-	}
-}
-
-void SparseDiffHamming32 ( uint32_t (*mix)(uint32_t), double accum[33] )
-{
-	uint32_t d = 0;
-
-	memset(accum,0,sizeof(accum));
-
-	//const double c32_1 = 32;
-	//const double c32_2 = 496;
-	//const double c32_3 = 4960;
-	//const double c32_4 = 35960;
-	//const double c32_5 = 201376;
-	//const double c32[5] = { c32_1, c32_2, c32_3, c32_4, c32_5 };
-
-	const int reps = 1000;
-
-	double temp[6][33];
-
-	for(int i = 0; i < 6; i++)
-	{
-		memset(temp[i],0,33 * sizeof(double));
-	}
-
-	for(int i = 0; i < 32; i++)
-	{
-		d ^= (1 << i);
-		hamtest(mix,d,reps,temp[1]);
-
-		for(int j = i+1; j < 32; j++)
-		{
-			d ^= (1 << j);
-			hamtest(mix,d,reps,temp[2]);
-
-			for(int k = j+1; k < 32; k++)
-			{
-				d ^= (1 << k);
-				hamtest(mix,d,reps,temp[3]);
-
-				for(int l = k+1; l < 32; l++)
-				{
-					d ^= (1 << l);
-					hamtest(mix,d,reps,temp[4]);
-
-					//for(int m = l+1; m < 32; m++)
-					//{
-					//	d ^= (1 << m);
-					//	hamtest(mix,d,reps,temp[5]);
-					//
-					//	d ^= (1 << m);
-					//}
-
-					d ^= (1 << l);
-				}
-				d ^= (1 << k);
-			}
-			d ^= (1 << j);
-		}
-		d ^= (1 << i);
-	}
-
-	for(int i = 0; i < 33; i++)
-	{
-		accum[i] = 0;
-	}
-
-	for(int j = 0; j < 33; j++)
-	{
-		for(int i = 0; i < 6; i++)
-		{
-			if((i+j) >= 33) continue;
-
-			double t = temp[i][j];
-
-			if(t > accum[i+j]) accum[i+j] = t;
-		}
-	}
-
-	for(int i = 0; i < 33; i++)
-	{
-		accum[i] *= 100;
-	}
-}
-
-bool hamless ( int count, double * a, double * b )
-{
-	for(int i = 0; i < count; i++)
-	{
-		if(a[i] < b[i]) return true;
-		if(a[i] > b[i]) return false;
-	}
-
-	return false;
-}
diff --git a/Hamming.h b/Hamming.h
deleted file mode 100644
index a372925..0000000
--- a/Hamming.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#pragma once
-#include "Types.h"
-
-void SparseDiffHamming32 ( uint32_t (*mix)(uint32_t), double accum[33] );
-bool hamless ( int count, double * a, double * b );
\ No newline at end of file
diff --git a/Junk.cpp b/Junk.cpp
deleted file mode 100644
index 62e700c..0000000
--- a/Junk.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-#include "Junk.h"
-
-#include "Random.h"
-
-//-----------------------------------------------------------------------------
-// Given a 64->32 bit compression function and a set of differentials, compute
-// the number of collisions
-
-typedef uint32_t (*pfCompress32) ( uint64_t x );
-
-int TestCompress ( pfCompress32 comp, std::vector<uint64_t> & diffs, const int reps )
-{
-	int total = 0;
-
-	for(int j = 0; j < (int)diffs.size(); j++)
-	{
-		uint64_t d = diffs[j];
-
-		int collisions = 0;
-
-		for(int i = 0; i < reps; i++)
-		{
-			uint64_t a = rand_u64();
-			uint64_t b = a ^ d;
-
-			uint32_t ca = comp(a);
-			uint32_t cb = comp(b);
-
-			if(ca == cb) collisions++;
-		}
-
-		if(collisions > 1) total += collisions;
-	}
-
-	return total;
-}
-
-//-----------------------------------------------------------------------------
diff --git a/Junk.h b/Junk.h
deleted file mode 100644
index a4fc5fd..0000000
--- a/Junk.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#include "Types.h"
-
-//-----------------------------------------------------------------------------
-
-template < typename mixtype >
-void calcMixBias ( mixtype (*mix)(mixtype), std::vector<int>& bins, int reps )
-{
-	const int inbits  = sizeof(mixtype) * 8;
-	const int outbits = sizeof(mixtype) * 8;
-
-	mixtype K,A,B,C;
-
-	for(int irep = 0; irep < reps; irep++)
-	{
-		rand_t(K);
-
-		A = mix(K);
-
-		for(int iBit = 0; iBit < inbits; iBit++)
-		{
-			B = mix(K ^ (mixtype(1) << iBit));
-
-			C = A ^ B;
-
-			for(int iOut = 0; iOut < outbits; iOut++)
-			{
-				bins[(iBit*outbits) + iOut] += (C >> iOut) & 1;
-			}
-		}
-	}
-}
-
-//----------
-
-template < typename mixtype >
-double calcMixBias ( mixtype (*mix)(mixtype), int reps )
-{
-	const int bits = sizeof(mixtype) * 8;
-	std::vector<int> bins(bits*bits);
-
-	calcMixBias<mixtype>(mix,bins,reps);
-
-	return maxBias(bins,reps);
-}
-
-//-----------------------------------------------------------------------------
diff --git a/SMHasher.vcproj b/SMHasher.vcproj
index 0c553f0..ab2b022 100644
--- a/SMHasher.vcproj
+++ b/SMHasher.vcproj
@@ -319,50 +319,6 @@
 	<References>
 	</References>
 	<Files>
-		<Filter
-			Name="Crypto"
-			>
-			<File
-				RelativePath=".\BlockCipher.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\BlockCipher.h"
-				>
-			</File>
-			<File
-				RelativePath=".\Cipher.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\Cipher.h"
-				>
-			</File>
-			<File
-				RelativePath=".\StreamCipher.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\StreamCipher.h"
-				>
-			</File>
-			<File
-				RelativePath=".\TEA.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\TEA.h"
-				>
-			</File>
-			<File
-				RelativePath=".\XTEA.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\XTEA.h"
-				>
-			</File>
-		</Filter>
 		<Filter
 			Name="Hashes"
 			>
@@ -446,14 +402,6 @@
 				RelativePath=".\CycleTest.h"
 				>
 			</File>
-			<File
-				RelativePath=".\DictionaryTest.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\DictionaryTest.h"
-				>
-			</File>
 			<File
 				RelativePath=".\DifferentialTest.cpp"
 				>
@@ -506,10 +454,6 @@
 				RelativePath=".\Random.h"
 				>
 			</File>
-			<File
-				RelativePath=".\simplex.cpp"
-				>
-			</File>
 			<File
 				RelativePath=".\Stats.cpp"
 				>
@@ -527,50 +471,6 @@
 				>
 			</File>
 		</Filter>
-		<Filter
-			Name="Junk"
-			>
-			<File
-				RelativePath=".\Diffusion.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\Diffusion.h"
-				>
-			</File>
-			<File
-				RelativePath=".\FWTransform.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\FWTransform.h"
-				>
-			</File>
-			<File
-				RelativePath=".\Hamming.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\Hamming.h"
-				>
-			</File>
-			<File
-				RelativePath=".\Junk.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\Junk.h"
-				>
-			</File>
-			<File
-				RelativePath=".\SimAnneal.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\SimAnneal.h"
-				>
-			</File>
-		</Filter>
 		<File
 			RelativePath=".\main.cpp"
 			>
@@ -579,10 +479,6 @@
 			RelativePath=".\pstdint.h"
 			>
 		</File>
-		<File
-			RelativePath=".\scratch.cpp"
-			>
-		</File>
 	</Files>
 	<Globals>
 	</Globals>
diff --git a/SimAnneal.cpp b/SimAnneal.cpp
deleted file mode 100644
index 0096598..0000000
--- a/SimAnneal.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-#include "SimAnneal.h"
-
-#include "Types.h"
-#include "Random.h"
-
-//-----------------------------------------------------------------------------
-// Pseudo-simulated-annealing
-
-double SimAnneal ( void * block, int len, pfFitness fit, pfDump dump, int nFlip, int reps )
-{
-	double baseScore = fit(block,len);
-	double tempScore = 0;
-	double bestScore = 0;
-
-	uint8_t * baseBlock = new uint8_t[len];
-	uint8_t * tempBlock = new uint8_t[len];
-	uint8_t * bestBlock = new uint8_t[len];
-
-	memcpy(baseBlock,block,len);
-	memcpy(tempBlock,block,len);
-	memcpy(bestBlock,block,len);
-
-	while(nFlip)
-	{
-		printf("fit - %f, bits - %2d, dump - ",baseScore,nFlip);
-
-		dump(baseBlock,len);
-
-		bestScore = baseScore;
-
-		if(nFlip == 1)
-		{
-			for(int i = 0; i < len*8; i++)
-			{
-				printf(".");
-
-				memcpy(tempBlock,baseBlock,len);
-				flipbit(tempBlock,len,i);
-
-				tempScore = fit(tempBlock,len);
-
-				if(tempScore > bestScore)
-				{
-					bestScore = tempScore;
-					memcpy(bestBlock,tempBlock,len);
-					break;
-				}
-			}
-		}
-		else
-		{
-			for(int i = 0; i < reps; i++)
-			{
-				//if(i % (reps/10) == 0) printf(".");
-				printf(".");
-
-				memcpy(tempBlock,baseBlock,len);
-
-				for(int i = 0; i < nFlip; i++)
-				{
-					flipbit( tempBlock, len, rand_u32() % (len*8) );
-				}
-
-				tempScore = fit(tempBlock,len);
-
-				if(tempScore > bestScore)
-				{
-					bestScore = tempScore;
-					memcpy(bestBlock,tempBlock,len);
-					break;
-				}
-			}
-		}
-
-		printf("\n");
-
-		// If we found a better solution, expand space starting from that solution
-		// Otherwise, shrink space around previous best
-
-		if(bestScore > baseScore)
-		{
-			memcpy(baseBlock,bestBlock,len);
-			baseScore = bestScore;
-
-			nFlip++;
-		}
-		else
-		{
-			nFlip--;
-		}
-	}
-
-	memcpy(block,baseBlock,len);
-	return baseScore;
-}
-
-
diff --git a/SimAnneal.h b/SimAnneal.h
deleted file mode 100644
index 1670bbe..0000000
--- a/SimAnneal.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#pragma once
-
-typedef double (*pfFitness) ( void * block, int len );
-typedef void   (*pfDump)    ( void * block, int len );
-
-double SimAnneal ( void * block, int len, pfFitness fit, pfDump dump, int nFlip, int reps );
\ No newline at end of file
diff --git a/StreamCipher.cpp b/StreamCipher.cpp
deleted file mode 100644
index bf9f620..0000000
--- a/StreamCipher.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-#include "StreamCipher.h"
-
-//----------------------------------------------------------------------------
-
-StreamCipher::StreamCipher ( void )
-{
-}
-
-StreamCipher::~StreamCipher ( void )
-{
-}
-
-//----------------------------------------------------------------------------
diff --git a/StreamCipher.h b/StreamCipher.h
deleted file mode 100644
index b78e4db..0000000
--- a/StreamCipher.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#pragma once
-#include "Cipher.h"
-
-//----------------------------------------------------------------------------
-
-class StreamCipher : public Cipher
-{
-public:
-
-	StreamCipher ( void );
-	virtual ~StreamCipher ( void );
-
-	virtual void encrypt ( void * k, int keySize, void * p, void * c, int size ) = 0;
-	virtual void decrypt ( void * k, int keySize, void * c, void * p, int size ) = 0;
-};
-
-//----------------------------------------------------------------------------
diff --git a/TEA.cpp b/TEA.cpp
deleted file mode 100644
index a84d688..0000000
--- a/TEA.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-#include "TEA.h"
-
-#include <memory.h>
-#include <algorithm>
-
-// The TEA algorithm is public domain
-
-//-----------------------------------------------------------------------------
-
-void TEACipher::setKey ( void * key, int keySize )
-{
-	memset(m_key,0,16);
-	memcpy(m_key,key,std::min(keySize,16));
-}
-
-//----------------------------------------------------------------------------
-
-void TEACipher::encrypt ( void * block, unsigned int /*nonce*/ ) const
-{
-	unsigned int * v = (unsigned int*)block;
-	unsigned int * k = (unsigned int*)m_key;
-
-	unsigned int sum   = 0;
-	unsigned int delta = 0x9E3779B9;
-
-	for( int i = 0; i < 32; i++ )
-	{
-		sum += delta;
-		v[0] += ((v[1]<<4) + k[0]) ^ (v[1] + sum) ^ ((v[1]>>5) + k[1]);
-		v[1] += ((v[0]<<4) + k[2]) ^ (v[0] + sum) ^ ((v[0]>>5) + k[3]);
-	}
-}
-
-//----------
-
-void TEACipher::decrypt ( void * block, unsigned int /*nonce*/ ) const
-{
-	unsigned int * v = (unsigned int*)block;
-	unsigned int * k = (unsigned int*)m_key;
-
-	unsigned int sum   = 0xC6EF3720;
-	unsigned int delta = 0x9E3779B9;
-
-	for( int i = 0; i < 32; i++ )
-	{
-		v[1] -= ((v[0]<<4) + k[2]) ^ (v[0] + sum) ^ ((v[0]>>5) + k[3]);
-		v[0] -= ((v[1]<<4) + k[0]) ^ (v[1] + sum) ^ ((v[1]>>5) + k[1]);
-		sum -= delta;
-	}
-}
-
-//----------------------------------------------------------------------------
diff --git a/TEA.h b/TEA.h
deleted file mode 100644
index fff63f1..0000000
--- a/TEA.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#pragma once
-
-#include "BlockCipher.h"
-
-//----------------------------------------------------------------------------
-
-class TEACipher : public BlockCipher
-{
-public:
-
-	int getBlockSize ( void ) { return 8; }
-
-	void setKey  ( void * key, int keySize );
-
-	void encrypt ( void * block, unsigned int nonce ) const;
-	void decrypt ( void * block, unsigned int nonce ) const;
-
-protected:
-
-	uint32_t m_key[4];
-};
-
-//----------------------------------------------------------------------------
diff --git a/Tests.h b/Tests.h
index 975b454..05486af 100644
--- a/Tests.h
+++ b/Tests.h
@@ -9,7 +9,6 @@
 #include "AvalancheTest.h"
 #include "CycleTest.h"
 #include "DifferentialTest.h"
-#include "DictionaryTest.h"
 
 //-----------------------------------------------------------------------------
 
diff --git a/XTEA.cpp b/XTEA.cpp
deleted file mode 100644
index 3ec3591..0000000
--- a/XTEA.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-#include "XTEA.h"
-
-#include <algorithm>
-
-static const int g_rounds = 64;
-
-// The XTEA and BTEA algorithms are public domain
-
-//----------------------------------------------------------------------------
-
-void XTEACipher::setKey ( void * key, int keySize )
-{
-	memset(m_key,0,16);
-	memcpy(m_key,key,std::min(keySize,16));
-}
-
-//----------------------------------------------------------------------------
-
-void XTEACipher::encrypt ( void * block, unsigned int nonce ) const
-{
-	uint32_t * v = (uint32_t*)block;
-	uint32_t * k = (uint32_t*)m_key;
-
-	uint32_t delta = 0x9E3779B9;
-	uint32_t sum = 0;
-
-	v[0] ^= nonce;
-
-	for(int i = 0; i < g_rounds; i++)
-	{
-		v[0] += (((v[1] << 4) ^ (v[1] >> 5)) + v[1]) ^ (sum + k[sum & 3]);
-	    
-		sum += delta;
-	    
-		v[1] += (((v[0] << 4) ^ (v[0] >> 5)) + v[0]) ^ (sum + k[(sum>>11) & 3]);
-	}
-}
-
-//----------
-
-void XTEACipher::decrypt ( void * block, unsigned int nonce ) const
-{
-	uint32_t * v = (uint32_t*)block;
-	uint32_t * k = (uint32_t*)m_key;
-
-	uint32_t delta = 0x9E3779B9;
-	uint32_t sum = delta * g_rounds;
-
-	for(int i = 0; i < g_rounds; i++)
-	{
-		v[1] -= (((v[0] << 4) ^ (v[0] >> 5)) + v[0]) ^ (sum + k[(sum>>11) & 3]);
-	    
-		sum -= delta;
-	    
-		v[0] -= (((v[1] << 4) ^ (v[1] >> 5)) + v[1]) ^ (sum + k[sum & 3]);
-	}
-
-	v[0] ^= nonce;
-}
-
-//----------------------------------------------------------------------------
-
-#define DELTA 0x9e3779b9
-#define MX ((z>>5^y<<2) + (y>>3^z<<4)) ^ ((sum^y) + (k[(p&3)^e] ^ z));
-
-void btea ( unsigned int *v, int n, unsigned int const k[4]) 
-{
-	const int rounds = 6 + (52/n);
-	unsigned int sum = 0;
-
-	unsigned int y = 0;
-	unsigned int z = v[n-1];
-
-	for(int round = 0; round < rounds; round++)
-	{
-		sum += DELTA;
-		unsigned int e = (sum >> 2) & 3;
-
-		int p;
-
-		for( p=0; p < n-1; p++ )
-		{
-			y = v[p+1];
-			z = v[p] += MX;
-		}
-		
-		y = v[0];
-		z = v[n-1] += MX;
-	} 
-}
-
-void btea_decrypt ( unsigned int *v, int n, unsigned int const k[4]) 
-{
-	const int rounds = 6 + (52/n);
-	unsigned int sum = rounds*DELTA;
-
-	unsigned int y = v[0];
-	unsigned int z = 0;
-
-	for(int round = 0; round < rounds; round++)
-	{
-		unsigned int e = (sum >> 2) & 3;
-		
-		int p;
-
-		for( p = n-1; p > 0; p-- )
-		{
-			z = v[p-1];
-			y = v[p] -= MX;
-		}
-		
-		z = v[n-1];
-		y = v[0] -= MX;
-
-		sum -= DELTA;
-	} 
-}
-
-//----------------------------------------------------------------------------
diff --git a/XTEA.h b/XTEA.h
deleted file mode 100644
index 770248f..0000000
--- a/XTEA.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#pragma once
-
-#include "BlockCipher.h"
-
-//----------------------------------------------------------------------------
-
-class XTEACipher : public BlockCipher
-{
-public:
-
-	int getBlockSize ( void ) { return 8; }
-
-	void setKey  ( void * key, int keySize );
-
-	void encrypt ( void * block, unsigned int nonce ) const;
-	void decrypt ( void * block, unsigned int nonce ) const;
-
-protected:
-
-	uint32_t m_key[4];
-};
-
-//----------------------------------------------------------------------------
diff --git a/main.cpp b/main.cpp
index 658f668..1f6e136 100644
--- a/main.cpp
+++ b/main.cpp
@@ -56,7 +56,6 @@ void test ( hashfunc<hashtype> hash, const char * hashname )
 
 	SparseKeyTest(hash,false);
 
-	//DictionaryTest(hash);
 	//BitrangeKeysetTest(hash,false);
 	//TextKeyTest(hash.m_hash);
 }
@@ -71,14 +70,6 @@ void main ( void )
 
 	int a = clock();
 
-#if 0
-
-	optimize_fmix64();
-
-	//scratchmain();
-
-#else
-
 	//----------
 
 	//test<uint32_t>  ( md5_32,  "MD5, first 32 bits" );
@@ -96,8 +87,6 @@ void main ( void )
 
 	//test<uint32_t>  ( MurmurHash3x64_32,  "MurmurHash3 32-bit" );
 
-#endif
-
 	int b = clock();
 
 	printf("time %d\n",b-a);
diff --git a/scratch.cpp b/scratch.cpp
deleted file mode 100644
index 0043966..0000000
--- a/scratch.cpp
+++ /dev/null
@@ -1,823 +0,0 @@
-#include <stdio.h>
-#include <tchar.h>
-
-#include "Types.h"
-#include "Stats.h"
-#include "Tests.h"
-#include "Hamming.h"
-#include "Junk.h"
-#include "SimAnneal.h"
-
-#include <vector>
-#include <set>
-#include <map>
-#include <math.h>
-#include <intrin.h>
-
-#pragma warning(disable : 4702)
-
-//-----------------------------------------------------------------------------
-
-template < int nbits > 
-void printkey ( Blob<nbits> & k )
-{
-	int nbytes = nbits/8;
-
-	printf("{");
-
-	uint8_t * d = (uint8_t*)&k;
-
-	for(int i = 0; i < nbytes; i++)
-	{
-		printf("0x%02x,",d[i]);
-	}
-	printf("};\n");
-}
-
-
-//-----------------------------------------------------------------------------
-// Test code for Murmur3's mix function
-
-/*
-uint32_t i1 = 0x95543787;
-uint32_t i2 = 0x2ad7eb25;
-
-uint32_t m1 = 9;
-uint32_t a1 = 0x273581d8;
-
-uint32_t m2 = 5;
-uint32_t a2 = 0xee700bac;
-
-uint32_t m3 = 3;
-uint32_t a3 = 0xa6b84e31;
-
-uint32_t r1 = 5;
-
-int stage = 0;
-
-uint32_t m3mix ( uint32_t k )
-{
-	//return rand_u32();
-
-	uint32_t h = 0x971e137b;
-	uint32_t c1 = i1;
-	uint32_t c2 = i2;
-
-	for(int i = 0; i < stage; i++)
-	{
-		h   = h*m3+a3;
-		c1  = c1*m1+a1;
-		c2  = c2*m2+a2;
-	}
-
-	k  *= c1;
-	k   = _rotl(k,r1);
-	h   = h*m3+a3;
-	k  *= c2;
-	c1  = c1*m1+a1;
-	c2  = c2*m2+a2;
-	h  ^= k;
-
-	return h;
-}
-*/
-
-/*
-uint32_t m1 = 0x85ebca6b;
-uint32_t m2 = 0xc2b2ae35;
-uint32_t m3 = 0x893ed583;
-
-int s1 = 16;
-int s2 = 13;
-int s3 = 16;
-
-uint32_t fmix ( uint32_t k )
-{
-	return rand_u32();
-
-	k ^= k >> 16;
-	k *= 0x85ebca6b;
-	k ^= k >> 13;
-	k *= 0xc2b2ae35;
-	k ^= k >> 16;
-
-	return k;
-}
-*/
-
-//-----------------------------------------------------------------------------
-
-/*
-struct mixconfig
-{
-	uint32_t m1;
-	uint32_t m2;
-};
-
-mixconfig mc = 
-{
-	0x010d5a2d,
-	0xd3636b39,
-};
-
-uint32_t fmix32 ( uint32_t k )
-{
-	//return rand_u32();
-
-	k ^= k >> 16;
-	k *= mc.m1;
-	k ^= k >> 16;
-	k *= mc.m2;
-	k ^= k >> 16;
-
-	return k;
-}
-
-double mixfit ( void * block, int )
-{
-	mixconfig * pc = (mixconfig*)block;
-
-	mc.m1 = pc->m1 | 1;
-	mc.m2 = pc->m2 | 1;
-
-	Stats s = testMixAvalanche<uint32_t>(mixfunc<uint32_t>(blahmix),2000000);
-
-	return 1.0 - s.m_max;
-}
-
-void mixdump ( void * block, int )
-{
-	mixconfig * pc = (mixconfig*)block;
-
-	printf("0x%08x 0x%08x",pc->m1, pc->m2 );
-}
-*/
-
-//-----------------------------------------------------------------------------
-// SimAnneal optimize of fmix64
-
-struct mixconfig
-{
-	//uint8_t s1;
-	uint64_t m1;
-	//uint8_t s2;
-	uint64_t m2;
-	//uint8_t s3;
-};
-
-mixconfig mc = { 0xff51afd7ed558ccd, 0xc4ceb9fe1a85ec53 };
-
-uint64_t fmix64_test ( uint64_t k )
-{
-	k ^= k >> 33;
-	//k ^= k >> mc.s1;
-
-	k *= mc.m1;
-
-	k ^= k >> 33;
-	//k ^= k >> mc.s2;
-
-	k *= mc.m2;
-
-	k ^= k >> 33;
-	//k ^= k >> mc.s3;
-
-	return k;
-}
-
-double fmix64_fit ( void * block, int )
-{
-	mixconfig * pc = (mixconfig*)block;
-
-	mc.m1 = pc->m1 | 1;
-	mc.m2 = pc->m2 | 1;
-
-	//mc.s1 = pc->s1 & 63;
-	//mc.s2 = pc->s1 & 63;
-	//mc.s3 = pc->s1 & 63;
-
-	double bias = calcMixBias<uint64_t>(fmix64_test,50000000);
-
-	return 1.0 - bias;
-}
-
-void fmix64_dump ( void * block, int )
-{
-	mixconfig * pc = (mixconfig*)block;
-
-	//pc->s1 &= 63;
-	//pc->s2 &= 63;
-	//pc->s3 &= 63;
-
-	//printf("{ %2d, 0x%016I64x, %2d, 0x%016I64x, %2d }; ",pc->s1, pc->m1, pc->s2, pc->m2, pc->s3 );
-	printf("{ 0x%016I64x, 0x%016I64x }; ", pc->m1, pc->m2 );
-}
-
-uint32_t fmix32_test ( uint32_t h )
-{
-	h ^= h >> 16;
-	h *= 0x85ebca6b;
-	h ^= h >> 13;
-	h *= 0xc2b2ae35;
-	h ^= h >> 16;
-
-	return h;
-}
-
-void optimize_fmix64 ( void )
-{
-	printf("lskdflksj\n");
-	double bias = calcMixBias<uint32_t>(fmix32_test,500000000);
-
-	printf("%f\n",bias);
-
-	//SimAnneal(&mc,sizeof(mc),fmix64_fit,fmix64_dump,4,100);
-}
-
-
-//-----------------------------------------------------------------------------
-// Fitness == distribution of Hamming weights.
-// Optimize mix by minmaxing Hamming weights
-
-// (we want the smallest differential hamming weight to be as large as possible)
-
-void HammingOptimize ( uint32_t (*mix)(uint32_t) )
-{
-	double best[33];
-	best[0] = 2000000000;
-
-	double c[33];
-
-	printf("0x%08x\n",rand_u32());
-
-	//for(m3 = 0; m3 < 32; m3++)
-	
-	for(int i = 0; i < 100000; i++)
-	{
-		//for(r1 = 12; r1 < 18; r1++)
-		{
-			memset(c,0,sizeof(c));
-			SparseDiffHamming32(mix,c);
-
-			if(hamless(33,c,best))
-			{
-				memcpy(best,c,sizeof(c));
-
-				//printf("{%6.3f, %6.3f, %6.3f, %6.3f, %6.3f, %6.3f, %6.3f, %6.3f, %6.3f } - ",c[0],c[1],c[2],c[3],c[4],c[5],c[6],c[7],c[8]);
-
-				printf("{");
-
-				for(int i = 0; i < 33; i++) printf("%6.3f ",c[i]);
-				printf("} - ");
-
-				//printf("0x%08x, %2d, 0x%08x %2d\n",m1,r1,m2,m3);
-				//printf("0x%08x, 0x%08x\n",m1,m2);
-				printf("\n");
-			}
-		}
-	}
-}
-
-//-----------------------------------------------------------------------------
-
-u128 mix128 ( u128 h2 )
-{
-	uint32_t * h = (uint32_t*)&h2;
-
-	for(int i = 0; i < 30; i++)
-	{
-		h[0] = _rotl(h[0],3);
-		h[1] = _rotl(h[1],10);
-		h[2] = _rotl(h[2],19);
-		h[3] = _rotl(h[3],26);
-
-		h[0] += h[1];
-		h[0] += h[2];
-		h[0] += h[3];
-
-		h[1] += h[0];
-		h[2] += h[0];
-		h[3] += h[0];
-	}
-
-	return h2;
-}
-
-//-----------------------------------------------------------------------------
-
-void scratchmain ( void )
-{
-	/*
-	double worst = 1000;
-
-	double worstStage = 0;
-
-	for(stage = 0; stage < 16; stage++)
-	{
-		Stats s = testMixAvalanche<uint32_t>(mixfunc<uint32_t>(m3mix),300);
-
-		if(s.m_nbad > worstStage) worstStage = s.m_nbad;
-	}
-
-	if(worstStage < worst)
-	{
-		worst = worstStage;
-
-		printf("%3.4f : 0x%08x 0x%08x %2d 0x%08x %2d 0x%08x %2d 0x%08x %2d\n",worst,i1,i2,m1,a1,m2,a2,m3,a3,r1);
-	}
-
-	//----------
-
-	for(int i = 0; i < 1000000; i++)
-	{
-		for(m1 = 3; m1 < 10; m1 += 2)
-		for(m2 = 3; m2 < 10; m2 += 2)
-		for(m3 = 3; m3 < 10; m3 += 2)
-		for(r1 = 0; r1 < 32; r1++)
-		//for(int bit = 0; bit < 32; bit++)
-		{
-			//i2 ^= (1 << bit);
-
-			if(m1 == 7) continue;
-			if(m2 == 7) continue;
-			if(m3 == 7) continue;
-
-			double worstStage = 0;
-
-			for(stage = 0; stage < 16; stage++)
-			{
-				Stats s = testMixAvalanche<uint32_t>(mixfunc<uint32_t>(m3mix),300);
-
-				if(s.m_nbad > worstStage) worstStage = s.m_nbad;
-			}
-
-			if(worstStage < worst)
-			{
-				worst = worstStage;
-
-				printf("%3.4f : 0x%08x 0x%08x %2d 0x%08x %2d 0x%08x %2d 0x%08x %2d\n",worst,i1,i2,m1,a1,m2,a2,m3,a3,r1);
-			}
-			else
-			{
-				//i2 ^= (1 << bit);
-			}
-		}
-
-		//i1 = rand_u32();
-		//i2 = rand_u32();
-
-		//a1 = rand_u32();
-		//a2 = rand_u32();
-		//a3 = rand_u32();
-	}
-	*/
-}
-
-//-----------------------------------------------------------------------------
-
-/*
-void Pathological ( void )
-{
-	std::set<uint32_t> s;
-
-	uint32_t c = 0;
-	uint32_t seed = 0xdeadbeef * 16;
-
-	for(int j = 0; j < 5000; j++)
-	{
-		for(uint32_t i = 0; i < 10000; i++)
-		{
-			uint32_t key[4] = {c,c,c,c};
-
-			uint32_t hash = MurmurHash2(key,16,seed);
-
-			//v.push_back(hash);
-			s.insert(hash);
-			c++;
-		}
-
-		printf("%8d %8f\n",s.size(),double(s.size()) / double(c));
-	}
-}
-*/
-
-/*
-void Pathological ( void )
-{
-	const int nbytes = 512 * 1024 * 1024;
-
-	unsigned char * block = new unsigned char[nbytes];
-
-	memset(block,0,nbytes);
-
-	unsigned int k = 0;
-	unsigned int key[256];
-	unsigned int collisions = 0;
-	
-	do
-	{
-		for(int i = 0; i < 256; i++) key[i] = k;
-
-		unsigned int h;
-		h = MurmurHash2(&key[0],256*4,(0xdeadbeef * 16));
-		//MurmurHash3_x86_32(&key[0],32,(0xdeadbeef * 16),&h);
-
-		//printf("0x%08x\n",h);
-
-		if(getbit(block,nbytes,h))
-		{
-			collisions++;
-		}
-
-		setbit(block,nbytes,h);
-
-		if(k % 10000000 == 0)
-		{
-			printf("%12d : %9d : %f\n",k,collisions,double(collisions) / double(k));
-		}
-
-		k++;
-	}
-	while(k != 0);
-
-	printf("%d total collisions",collisions);
-
-	delete [] block;
-}
-*/
-
-/*
-void Pathological ( void )
-{
-	const int nbytes = 512 * 1024 * 1024;
-
-	unsigned char * block = new unsigned char[nbytes];
-
-	memset(block,0,nbytes);
-
-	unsigned int k = 0;
-	unsigned int unique = 0;
-	
-	do
-	{
-		const uint32_t m = 0xdeadbeef;
-		const int r = 24;
-
-		uint32_t x = 0;
-		uint32_t h = 0;
-
-		x = k;
-		x *= m;
-		x ^= x >> r;
-		x *= m;
-
-		h *= m;
-		h ^= x;
-
-		x = k;
-		x *= m;
-		x ^= x >> r;
-		x *= m;
-
-		h *= m;
-		h ^= x;
-
-		if(!getbit(block,nbytes,h))
-		{
-			unique++;
-		}
-
-		setbit(block,nbytes,h);
-
-		if(k % 10000000 == 0)
-		{
-			printf("%12d : %9d :%f\n",k,unique,double(unique) / double(k));
-		}
-
-		k++;
-	}
-	while(k);
-
-	printf("%d unique",unique);
-
-	delete [] block;
-}
-*/
-
-/*
-void Pathological ( void )
-{
-	typedef std::map<uint32_t,uint32_t> cmap;
-	
-	cmap collisionmap;
-
-	const int nbytes = 512 * 1024 * 1024;
-
-	unsigned char * block = new unsigned char[nbytes];
-
-	memset(block,0,nbytes);
-
-	unsigned int k = 0;
-	unsigned int key[4];
-	unsigned int collisions = 0;
-	
-	do
-	{
-		for(int i = 0; i < 4; i++) key[i] = k;
-
-		unsigned int h;
-		h = MurmurHash2(&key[0],4*sizeof(uint32_t),16);
-		//MurmurHash3_x86_32(&key[0],32,(0xdeadbeef * 16),&h);
-
-		//printf("0x%08x\n",h);
-
-		if(getbit(block,nbytes,h))
-		{
-			collisions++;
-			collisionmap[h]++;
-		}
-
-		setbit(block,nbytes,h);
-
-		if(k % 10000000 == 0)
-		{
-			printf("%12d : %9d : %9d :%f\n",k,collisionmap.size(),collisions,double(collisions) / double(k));
-		}
-
-		k++;
-	}
-	//while(k);
-	while(k <= 200000000);
-
-	uint32_t most = 0;
-	for(cmap::iterator i = collisionmap.begin(); i != collisionmap.end(); ++i)
-	{
-		uint32_t h = (*i).first;
-		uint32_t c = (*i).second;
-
-		if(c > most)
-		{
-			most = c;
-			printf("0x%08x : %d\n",h,c);
-		}
-	}
-
-	printf("%d total collisions",collisions);
-
-	delete [] block;
-}
-*/
-
-/*
-void Pathological ( void )
-{
-	unsigned int k = 0;
-	unsigned int key[4];
-
-	std::vector<uint32_t> v;
-	
-	do
-	{
-		for(int i = 0; i < 4; i++) key[i] = k;
-
-		unsigned int h;
-		h = MurmurHash2(&key[0],4*sizeof(uint32_t),16);
-
-		if(h == 0xb5abf828) 
-		{
-			v.push_back(k);
-		}
-
-		if(k % 100000000 == 0)
-		{
-			printf("%12u : %12d\n",k,v.size());
-		}
-
-
-		k++;
-	}
-	while(k);
-
-	for(size_t i = 0; i < v.size(); i++)
-	{
-		printf("0x%08x,",v[i]);
-		if(i%8==7) printf("\n");
-	}
-	printf("\n");
-}
-*/
-
-/*
-uint32_t bad[] = 
-{
-0x0017f1a9,0x00f8c102,0x01685768,0x01c6d69e,0x02109e20,0x02ea2120,0x03615606,0x03bab745,
-0x03eb73e9,0x03f7db48,0x04391e64,0x04747fa7,0x04b81cf5,0x04fbcab0,0x054bf06a,0x05d33abc,
-0x05d8eb48,0x06560ce6,0x0697bcfa,0x06a40faa,0x071977fb,0x073a4306,0x073eb088,0x0751c777,
-0x07534cb4,0x079d2fbe,0x07a0ba13,0x07cff5fc,0x082b2d13,0x08457c35,0x093de81e,0x09711b75,
-0x097fdb48,0x09ba9060,0x0a06228a,0x0a5f8691,0x0a63881c,0x0a70bcd7,0x0aed67dd,0x0b0ed19a,
-0x0bc68125,0x0c29fe48,0x0ca1eb57,0x0cbfc528,0x0d4017e2,0x0d6d91c2,0x0d7388de,0x0f0133e9,
-0x0f8d17e7,0x0f90e980,0x0fe6be43,0x1033d71d,0x1087872c,0x10b52186,0x12005768,0x12c817e2,
-0x12ed3caf,0x1343eae2,0x137b2949,0x1407d537,0x1462906a,0x156742a0,0x15f44042,0x17204969,
-0x18c86d6a,0x192c6777,0x1950b0f3,0x19548454,0x1961fb59,0x19e92685,0x1a24be52,0x1a72ccfa,
-0x1a7caf9b,0x1a9d7aa6,0x1b9407c9,0x1b9d472c,0x1bdc3c3f,0x1c2a955f,0x1c44f065,0x1c75fda6,
-0x1c934985,0x1cd45315,0x1d1dce3e,0x1d695a2a,0x1e88f490,0x203a3985,0x2050669c,0x20a34f82,
-0x221b4985,0x222718dc,0x2240aa13,0x22a67680,0x24bdf477,0x250ead99,0x255d00e9,0x2652bb8e,
-0x26823b4d,0x27298fd2,0x27bf3042,0x27e2e537,0x282dbcdc,0x295777e2,0x2ab449ff,0x2d347ad3,
-0x2d3c176d,0x2d4c5e25,0x2d72b111,0x2d9f768f,0x2ddfe73b,0x2e00b246,0x2f9f1523,0x2fdbdba7,
-0x30831cfa,0x30cc91ca,0x3129f75c,0x313f9486,0x315255e3,0x31e70a31,0x33490a31,0x33622c30,
-0x33863468,0x3441b8a7,0x349f03ad,0x3715eda6,0x374df66c,0x3766e2fc,0x3848010c,0x385325bb,
-0x38a843f3,0x398e8722,0x39cc0d5b,0x39e572ed,0x3ace4477,0x3afb8c19,0x3b98b8d4,0x3ce6212a,
-0x3cec46c6,0x3d43761a,0x3de45e25,0x3e1e5a2c,0x3f612a36,0x4008f490,0x41431edb,0x4163e9e6,
-0x41742120,0x41854564,0x41ca60f3,0x41fa37f6,0x421e16a3,0x4263b66c,0x42bc7a4a,0x434286ad,
-0x435858a7,0x43bbf5f2,0x43e43d7e,0x442fc96a,0x443e6342,0x44b58d83,0x45378356,0x45df4db0,
-0x46b09971,0x47337cff,0x47f46fc3,0x48023b4d,0x4823a50a,0x49691a36,0x497767dd,0x4a50eadd,
-0x4ad26a3b,0x4b8463b7,0x4bc34e34,0x4bcd5cc3,0x4bf245e3,0x4c62946d,0x4d18b7f9,0x4da4d029,
-0x4dcac8e3,0x4df83139,0x4e2514b8,0x4e859f82,0x4ea95477,0x4ef42c1c,0x4f68a832,0x4f7acba7,
-0x4fa478d9,0x4ffe8c21,0x50ee3486,0x514795c5,0x51948107,0x51c5fce4,0x51e3eaec,0x52015e27,
-0x526260f3,0x5288a930,0x5360193c,0x53e7ac58,0x54a6567b,0x54c72186,0x54cb8f08,0x54dea5f7,
-0x552d9893,0x555d6f96,0x55b80b93,0x56cac69e,0x56fdf9f5,0x5793010a,0x57d7b747,0x57ec6511,
-0x57f0669c,0x57fd9b57,0x5818c523,0x58fe6cff,0x5a011a36,0x5a4ca3a8,0x5b00675e,0x5c50bfbc,
-0x5c6a50f3,0x5d19f667,0x5d2504a9,0x5ddbc685,0x5e85812a,0x5ed4c61f,0x5f4d0056,0x5fd14dba,
-0x5fd77356,0x608b5837,0x60d6c07e,0x610807c9,0x610986bc,0x6194b3b7,0x62f42120,0x62f774b3,
-0x63233736,0x6361c3c1,0x63811ec2,0x64ad27e9,0x650011e5,0x66b945f7,0x66dd8f73,0x67361999,
-0x67471347,0x67760505,0x6789c685,0x68098e1b,0x683ac4a9,0x68ca7a40,0x69b773df,0x69d5acdc,
-0x6a1ec7e7,0x6a202805,0x6a613195,0x6a6a70f8,0x6a74f315,0x6a838109,0x6aaaacbe,0x6af638aa,
-0x6b4727f6,0x6b7bfcc3,0x6d4eb4ac,0x6dc71805,0x6ef55b70,0x6fa82805,0x6fb3f75c,0x6fcd8893,
-0x7014bf91,0x70fc7fc8,0x724ad2f7,0x729b8c19,0x72b8b523,0x735e4f12,0x7378556e,0x73ac5dba,
-0x74b66e52,0x74e8531f,0x754c0ec2,0x7564261f,0x7567c4bd,0x756fc3b7,0x75af8e66,0x75ba9b5c,
-0x7841287f,0x7973ca45,0x7aaa7fc8,0x7ac8f5ed,0x7aec261f,0x7b2c550f,0x7b6cc5bb,0x7d2bf3a3,
-0x7d68ba27,0x7d8f1e39,0x7d98de70,0x7edf3463,0x80626b7a,0x80b1ec4c,0x81ce9727,0x827aca36,
-0x82944f12,0x86352273,0x8831268f,0x885b22f7,0x887d51bd,0x889f261a,0x89259754,0x89bcadba,
-0x8a323fd7,0x8a72ffaa,0x8a792546,0x8ad0549a,0x8b209af1,0x8bbe27e7,0x8c066fc3,0x8c4464b3,
-0x8cd4d306,0x8cee08b6,0x8d4ab321,0x8ecffd5b,0x8f1223e4,0x8f573f73,0x8f871676,0x904958ca,
-0x904f7e66,0x90e53727,0x91711bfe,0x91859d88,0x919dfef4,0x91cb41c2,0x92426c03,0x92c461d6,
-0x92fffef4,0x936c2c30,0x93dd8269,0x94351cd9,0x94c05b7f,0x94e87d04,0x954e3aba,0x95814e43,
-0x95bbcab0,0x96f5f8b6,0x985f48bb,0x99502cb4,0x995a3b43,0x997f2463,0x99ef72ed,0x9a4e3c2b,
-0x9b57a763,0x9b850fb9,0x9bb1f338,0x9bc723cb,0x9be0895d,0x9c3632f7,0x9c7c176d,0x9c810a9c,
-0x9cf586b2,0x9d07aa27,0x9d315759,0x9d8b6aa1,0x9e99eeef,0x9f215f87,0x9f70c96c,0x9fc195cf,
-0x9fef3f73,0xa06af1b8,0xa06d0dbf,0xa0840b00,0xa12e0083,0xa14df1d4,0xa1748ad8,0xa1884c58,
-0xa2ea4e16,0xa307c528,0xa3f0607e,0xa40bfafb,0xa4558d79,0xa547228c,0xa56495c7,0xa5a5a3a3,
-0xa68b4b7f,0xa728daba,0xa78df8b6,0xa8de0999,0xa90e5479,0xa9dd9e3c,0xa9f72f73,0xa9fd51bd,
-0xaab1e329,0xab3aeee7,0xab68a505,0xab9c9eea,0xabfd18dc,0xac125faa,0xac61a49f,0xac9edbac,
-0xacd9ded6,0xad5e2c3a,0xad6451d6,0xae1836b7,0xae639efe,0xae96653c,0xaee4ad99,0xaef795cf,
-0xaf11f9ff,0xaf43c0fd,0xb0845333,0xb0b015b6,0xb0eea241,0xb1114807,0xb28cf065,0xb3db78e8,
-0xb439f81e,0xb483bfa0,0xb4c2f819,0xb4d3f1c7,0xb516a505,0xb55d42a0,0xb5c7a329,0xb65758c0,
-0xb65e9569,0xb66afcc8,0xb72b3e75,0xb7628b5c,0xb7aba667,0xb7bf11ea,0xb7f74f78,0xb801d195,
-0xb8105f89,0xb84c0cc8,0xb8c92e66,0xb8d40676,0xb908db43,0xb90ade7a,0xb917312a,0xb9c66e34,
-0xba10513e,0xba43177c,0xbab89db5,0xbadb932c,0xbbf2fcc8,0xbc2db1e0,0xbc8239f0,0xbd60895d,
-0xbd81f31a,0xbda19e11,0xbe39a2a5,0xbe895e48,0xbe9d1fc8,0xbf150cd7,0xbfb33962,0xbfe0b342,
-0xc04593a3,0xc0eb2d92,0xc10533ee,0xc1393c3a,0xc1745569,0xc2040b00,0xc259dfc3,0xc275319f,
-0xc2a6f89d,0xc2f1049f,0xc2f4a33d,0xc2faa8ac,0xc3284306,0xc33c6ce6,0xc47378e8,0xc53b3962,
-0xc5605e2f,0xc5b70c62,0xc6d5b1ea,0xc700a8c5,0xc8375e48,0xc879049f,0xcb1bfcb9,0xcb25bcf0,
-0xcb3b8eea,0xcbc7a5d4,0xcbd51cd9,0xcc97dfd2,0xcce5ee7a,0xcd109c26,0xcdef49fa,0xce072949,
-0xce1068ac,0xce3ecacc,0xce4f5dbf,0xceb811e5,0xcee91f26,0xd007a8b6,0xd0212d92,0xd0fc1610,
-0xd2c3881c,0xd3167102,0xd5199800,0xd5be050f,0xd60a303d,0xd62c049a,0xd7498c3a,0xd7bf1e57,
-0xd7d02269,0xd8ad7971,0xd8c5dd0e,0xd8f55ccd,0xd94b0667,0xd9934e43,0xd9d14333,0xda61b186,
-0xdad791a1,0xdbca9962,0xdddc5ce6,0xdf127c08,0xdf2add74,0xdfa79c53,0xdfbf7fa5,0xdfe5d291,
-0xe073d3c6,0xe08cdd74,0xe16a60e9,0xe1c1fb59,0xe2755b84,0xe2db193a,0xe2f63e7a,0xe33fb34a,
-0xe348a930,0xe39d18dc,0xe3b2b606,0xe45a2bb1,0xe5bc2bb1,0xe5d54db0,0xe5f955e8,0xe712252d,
-0xe7db1aab,0xe954024b,0xe96d67dd,0xe9890f26,0xe9c117ec,0xe9da047c,0xea08f5ed,0xeabb228c,
-0xeac6473b,0xec01a8a2,0xec26cd6f,0xec3f2edb,0xec58946d,0xed4e744f,0xed6ead99,0xedf7d038,
-0xedf9ec3f,0xee10e980,0xeebadf03,0xeedad054,0xef152ad8,0xf0577fa5,0xf0917bac,0xf094a3a8,
-0xf17d3efe,0xf198d97b,0xf1e26bf9,0xf27c1610,0xf2d4010c,0xf3d70b66,0xf3e742a0,0xf4913823,
-0xf4b5b93a,0xf4d6d7ec,0xf5b5a82d,0xf62f1772,0xf66ae819,0xf69b32f9,0xf6a2eaea,0xf78a303d,
-0xf8c7cd67,0xf923baf1,0xf9297d6a,0xf989f75c,0xfa2bba2c,0xfa755ccd,0xfa96c68a,0xfbea895d,
-0xfc718c19,0xfc84744f,0xfc9ed87f,0xfcc40c5d,0xfcd09f7d,0xfdf78537,0xfe9e2687,0xff8bd979,
-};
-
-void Pathological ( void )
-{
-	// 'm' and 'r' are mixing constants generated offline.
-	// They're not really 'magic', they just happen to work well.
-
-	const uint32_t m = 0x5bd1e995;
-	const int r = 24;
-
-	for(int i = 0; i < 100; i++)
-	{
-		uint32_t h = 0;
-		uint32_t k = bad[i];
-
-		printf("0x%08x : ",k);
-		k *= m;
-		printf("0x%08x : ",k);
-		k ^= k >> r;
-		printf("0x%08x : ",k);
-		k *= m;
-		printf("0x%08x : ",k);
-
-		printf(" - ");
-
-		h = k;
-		printf("0x%08x : ",h);
-		h *= m;
-		printf("0x%08x : ",h);
-		h ^= k;
-		printf("0x%08x : ",h);
-		h *= m;
-		printf("0x%08x : ",h);
-		h ^= k;
-		printf("0x%08x : ",h);
-		h *= m;
-		printf("0x%08x : ",h);
-		h ^= k;
-		printf("0x%08x\n",h);
-
-	}
-}
-*/
-
-/*
-void Pathological ( void )
-{
-	const int nbytes = 512 * 1024 * 1024;
-
-	unsigned char * block = new unsigned char[nbytes];
-
-	memset(block,0,nbytes);
-
-	unsigned int k = 0;
-	unsigned int collisions = 0;
-	
-	do
-	{
-		//const uint32_t m = 0x5bd1e995;
-		unsigned int h = 0;
-
-		uint32_t m1 = 0x5bd1e995;
-		uint32_t m2 = 0x5bd1e995;
-		uint32_t m3 = 0x5bd1e995;
-		uint32_t x;
-
-		x = k; x *= m1; x ^= x >> 25; x *= m2; h ^= x; h *= m3;
-		m2 = m2*9+0x273581d8;
-		x = k; x *= m1; x ^= x >> 25; x *= m2; h ^= x; h *= m3;
-		m2 = m2*9+0x273581d8;
-		
-		//printf("0x%08x : 0x%08x\n",k,h);
-		//h *= 3;
-
-		if(getbit(block,nbytes,h))
-		{
-			collisions++;
-		}
-
-		setbit(block,nbytes,h);
-
-		if(k % 10000000 == 0)
-		{
-			printf("%12u : %9u : %f\n",k,collisions,double(collisions) / double(k));
-		}
-
-		k++;
-	}
-	while(k != 0);
-
-	printf("%u total collisions, %f",collisions,double(collisions) / 4294967296.0);
-
-	delete [] block;
-}
-*/
-
-/*
-// Applying FWT to fmix32 to look for linearities (it found some bias, but nothing above a fraction of a percent)
-
-void find_linear_approximation_walsh2 ( mixfunc<uint32_t> f, uint32_t mask, int inbits, uint32_t & outL, int64_t & outBias );
-void find_linear_approximation_walsh  ( mixfunc<uint32_t> f, uint32_t mask, int inbits, uint32_t & outL, int64_t & outBias );
-uint32_t test_linear_approximation ( mixfunc<uint32_t> f, uint32_t l, uint32_t mask, int inbits );
-
-uint32_t bitrev ( uint32_t v );
-
-uint32_t FWTMix ( uint32_t x )
-{
-	x ^= x >> 16;
-	x *= 0x85ebca6b;
-	x ^= x >> 13;
-	x *= 0xc2b2ae35;
-	x ^= x >> 16;
-
-	return x;
-}
-
-double test_linear_approximation ( mixfunc<uint32_t> f, uint32_t l, uint32_t mask, int64_t size );
-
-void WalshStuff(void )
-{
-	const int64_t nbits = 32;
-	const int64_t size = int64_t(1) << nbits;
-
-	mixfunc<uint32_t> f(FWTMix);
-
-	for(int i = 0; i < nbits; i++)
-	{
-		uint32_t mask = (1 << i);
-		uint32_t outL = 0;
-		int64_t bias = 0;
-		find_linear_approximation_walsh2(f,mask,nbits,outL,bias);
-
-		double b = test_linear_approximation ( f, outL, mask, size);
-
-		printf("0x%08x, 0x%08x, %8I64d, %f\n",mask,outL,bias,b);
-	}
-}
-*/
\ No newline at end of file
diff --git a/simplex.cpp b/simplex.cpp
deleted file mode 100644
index 3f08f1d..0000000
--- a/simplex.cpp
+++ /dev/null
@@ -1,171 +0,0 @@
-#include <stdio.h>
-#include <set>
-#include <map>
-#include "pstdint.h"
-
-#pragma warning(disable:4996)
-
-struct node;
-
-typedef std::set<node*> nodeset;
-
-struct node
-{
-	node ( void )
-	{
-		name = 0;
-		mark = 0;
-		used = 0;
-		next = 0;
-	}
-
-	uint32_t name;
-	uint32_t mark;
-	uint32_t used;
-
-	node * next;
-
-	nodeset edges;
-};
-
-typedef std::map<uint32_t,node> nodegraph;
-
-nodegraph graph;
-
-bool can_link ( node * A, node * B )
-{
-	if(A->edges.find(B) == A->edges.end()) return false;
-	if(B->edges.find(A) == B->edges.end()) return false;
-
-	return true;
-}
-
-bool can_link_all ( node * A, node * B )
-{
-	node * cursor = A;
-
-	while(cursor)
-	{
-		if(!can_link(cursor,B)) return false;
-
-		cursor = cursor->next;
-	}
-
-	return true;
-}
-
-void print_simplex( node * head )
-{
-	node * cursor = head;
-
-	while(cursor)
-	{
-		printf("0x%08x,",cursor->name);
-		cursor = cursor->next;
-	}
-	printf("\n");
-}
-
-void find_simplex ( node * head )
-{
-	bool found = false;
-
-	for(nodeset::iterator it = head->edges.begin(); it != head->edges.end(); it++)
-	{
-		node * next = (*it);
-
-		if(next->mark) continue;
-		if(next->name > head->name) continue;
-
-		if(can_link_all(head,next))
-		{
-			found = true;
-			next->mark = head->mark + 1;
-			next->next = head;
-
-			find_simplex(next);
-
-			next->mark = 0;
-			next->next = 0;
-		}
-	}
-
-	if(!found && (head->mark > 3))
-	{
-		bool used = false;
-
-		node * cursor = head;
-
-		while(cursor)
-		{
-			if(cursor->used) used = true;
-
-			cursor = cursor->next;
-		}
-
-		if(!used) 
-		{
-			print_simplex(head);
-
-			node * cursor = head;
-
-			while(cursor)
-			{
-				cursor->used = 1;
-				cursor = cursor->next;
-			}
-		}
-	}
-}
-
-int simplex_main ( int argc, char * argv[] )
-{
-	if(argc < 2)
-	{
-		printf("blah\n");
-		return 1;
-	}
-
-	FILE * file = fopen(argv[1],"r");
-
-	if(!file)
-	{
-		printf("Couldn't open file\n");
-		return 1;
-	}
-
-	char buffer[512];
-
-	while(fgets(buffer,512,file))
-	{
-		uint32_t nameA;
-		uint32_t nameB;
-
-		int found = sscanf(buffer,"0x%08x,0x%08x",&nameA,&nameB);
-
-		if(found != 2) continue;
-
-		node * nodeA = &graph[nameA];
-		node * nodeB = &graph[nameB];
-
-		nodeA->name = nameA;
-		nodeB->name = nameB;
-
-		nodeA->edges.insert(nodeB);
-		nodeB->edges.insert(nodeA);
-	}
-
-	for(std::map<uint32_t,node>::iterator it = graph.begin(); it != graph.end(); it++)
-	{
-		node & n = (*it).second;
-
-		n.mark = 1;
-
-		find_simplex(&n);
-
-		n.mark = 0;
-	}
-
-	return 0;
-}
-
-- 
cgit v1.2.3


From ad4b363201477cb33966510b850c2193b1f825fe Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Fri, 5 Nov 2010 01:20:58 +0000
Subject: MurmurHash3 is released to beta

(potentially some constant-tweaking yet to be done, but it is quite usable and all variants pass all tests)

git-svn-id: http://smhasher.googlecode.com/svn/trunk@5 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 AvalancheTest.cpp  | 213 ------------
 AvalancheTest.h    | 223 ++++++++++++-
 Bitvec.cpp         |  47 ---
 Bitvec.h           |  76 ++++-
 Core.cpp           | 186 -----------
 Core.h             |  15 -
 CycleTest.cpp      |   1 -
 CycleTest.h        |  45 ---
 DifferentialTest.h | 172 ++++++----
 Hashes.cpp         |  18 +-
 Hashes.h           |  33 +-
 KeysetTest.cpp     | 203 ++++++++++++
 KeysetTest.h       | 354 ++++++++++++++++++++
 MurmurHash3.cpp    | 496 ++++++++++++++++++----------
 MurmurHash3.h      |   5 +-
 Random.cpp         |  53 ---
 Random.h           |  84 ++---
 SMHasher.vcproj    |  26 +-
 SparseKeyTest.cpp  | 111 -------
 SparseKeyTest.h    |  89 -----
 SpeedTest.cpp      |  50 +++
 SpeedTest.h        |  61 ++++
 Stats.cpp          | 249 +-------------
 Stats.h            | 368 +++++----------------
 Tests.cpp          | 542 -------------------------------
 Tests.h            | 194 -----------
 Types.cpp          |   5 +-
 Types.h            | 238 +-------------
 main.cpp           | 424 +++++++++++++++++++++---
 sha1.cpp           | 926 +++++++++++++++++++----------------------------------
 sha1.h             | 110 ++-----
 31 files changed, 2290 insertions(+), 3327 deletions(-)
 delete mode 100644 Core.cpp
 delete mode 100644 Core.h
 delete mode 100644 CycleTest.cpp
 delete mode 100644 CycleTest.h
 create mode 100644 KeysetTest.cpp
 create mode 100644 KeysetTest.h
 delete mode 100644 SparseKeyTest.cpp
 delete mode 100644 SparseKeyTest.h
 create mode 100644 SpeedTest.cpp
 create mode 100644 SpeedTest.h
 delete mode 100644 Tests.cpp
 delete mode 100644 Tests.h

diff --git a/AvalancheTest.cpp b/AvalancheTest.cpp
index 25ee86c..bb68b2f 100644
--- a/AvalancheTest.cpp
+++ b/AvalancheTest.cpp
@@ -1,22 +1,9 @@
-//-----------------------------------------------------------------------------
-// Flipping a single bit of a key should cause an "avalanche" of changes in
-// the hash function's output. Ideally, each output bits should flip 50% of
-// the time - if the probability of an output bit flipping is not 50%, that bit
-// is "biased". Too much bias means that patterns applied to the input will
-// cause "echoes" of the patterns in the output, which in turn can cause the
-// hash function to fail to create an even, random distribution of hash values.
-
 #include "AvalancheTest.h"
 
-#include "Bitvec.h"
 #include "Random.h"
 
 #include <math.h>
 
-// Avalanche fails if a bit is biased by more than 1%
-
-double gc_avalancheFail = 0.01;
-
 //-----------------------------------------------------------------------------
 
 void PrintAvalancheDiagram ( int x, int y, int reps, double scale, int * bins )
@@ -70,204 +57,4 @@ double maxBias ( std::vector<int> & counts, int reps )
 	return worst;
 }
 
-double rmsBias ( std::vector<int> & counts, int reps )
-{
-	double rms = 0;
-
-	for(int i = 0; i < (int)counts.size(); i++)
-	{
-		double d = double(counts[i]) / reps;
-
-		d = fabs(d * 2 - 1);
-
-		rms += d*d;
-	}
-
-	rms /= counts.size();
-	rms = sqrt(rms);
-
-	return rms;
-}
-
-//-----------------------------------------------------------------------------
-
-void calcBias ( pfHash hash, const int nbitsIn, const int nbitsOut, std::vector<int> & counts, int reps )
-{
-	const int nbytesIn = nbitsIn / 8;
-	const int nbytesOut = nbitsOut / 8;
-
-	uint8_t * K = new uint8_t[nbytesIn];
-	uint8_t * A = new uint8_t[nbytesIn];
-	uint8_t * B = new uint8_t[nbytesIn];
-
-	Rand r(378473);
-
-	for(int irep = 0; irep < reps; irep++)
-	{
-		r.rand_p(K,nbytesIn);
-
-		hash(K,nbytesIn,0,A);
-
-		int * cursor = &counts[0];
-
-		for(int iBit = 0; iBit < nbitsIn; iBit++)
-		{
-			flipbit(K,nbytesIn,iBit);
-			hash(K,nbytesIn,0,B);
-			flipbit(K,nbytesIn,iBit);
-
-			for(int iOut = 0; iOut < nbitsOut; iOut++)
-			{
-				int bitA = getbit(A,nbytesOut,iOut);
-				int bitB = getbit(B,nbytesOut,iOut);
-
-				(*cursor++) += (bitA ^ bitB);
-			}
-		}
-	}
-
-	delete [] K;
-	delete [] A;
-	delete [] B;
-}
-
-//-----------------------------------------------------------------------------
-
-bool AvalancheTest ( pfHash hash, const int keybits, const int hashbits, const int reps )
-{
-	printf("Avalanche for %3d-bit keys -> %3d-bit hashes, %8d reps - ",keybits,hashbits,reps);
-
-	std::vector<int> bins(keybits*hashbits,0);
-
-	calcBias(hash,keybits,hashbits,bins,reps);
-	
-	double b = maxBias(bins,reps);
-
-	printf("Max avalanche bias is %f\n",b);
-
-	if(b > gc_avalancheFail)
-	{
-		return false;
-	}
-	else
-	{
-		return true;
-	}
-}
-
-//----------------------------------------------------------------------------
-// Computing whether a given mix function produces a low bias can take many 
-// millions of tests when the bias is low.  This code tries to speed up the 
-// process by early-outing if the probability that the bias will fall outside
-// the given range is over 99%
-
-/*
-bool testMixAvalanche32_Fast ( pfMix32 mix, double cutmin, double cutmax, bool winlose )
-{
-	int counts[32*32];
-
-	memset(counts,0,sizeof(counts));
-
-	double pmin = 0;
-	double pmax = 0;
-	double n = 0;
-	double s = 4.75;
-	int w = 0;
-
-	int batchsize = 512;
-
-	for(int iBatch = 0; iBatch < 1024 * 1024; iBatch++)
-	{
-		calcMixBias<uint32_t>(mix,counts,batchsize);
-
-		n = (iBatch+1) * batchsize;
-		w = maxIntBias(32,32,counts,(int)n);
-
-		// compute p such that w is at the bottom of the confidence interval
-
-		double a = s*s*n + n*n;
-		double b = -2.0*double(w)*n - s*s*n;
-		double c = double(w)*double(w);
-
-		SolveQuadratic(a,b,c,pmin,pmax);
-
-		double win = 0;
-		double tie = 0;
-		double lose = 0;
-
-		if(winlose)
-		{
-			if(pmax < cutmax)
-			{
-				printf("\n+!!! %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
-				return true;
-			}
-
-			if(pmin > cutmax)
-			{
-				//printf("\n-!!! %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
-				return false;
-			}
-
-			// doesn't fail or win outright. does it have a chance of winning?
-
-			if(pmin < cutmin)
-			{
-				// pmin:pmax contains cutmin:cutmax
-
-				assert(cutmin > pmin);
-				assert(cutmax < pmax);
-
-				win = (cutmin-pmin) / (pmax-pmin);
-				tie = (cutmax-cutmin) / (pmax-pmin);
-				lose = (pmax-cutmax) / (pmax-pmin);
-			}
-			else
-			{
-				// pmin:pmax overlaps above cutmin:cutmax
-
-				assert(cutmin < pmin);
-
-				win = 0;
-				tie = ((cutmax - pmin) / (pmax-pmin)) * ((cutmax-pmin) / (cutmax-cutmin));
-				lose = (pmax-cutmax) / (pmax-pmin);
-
-				return false;
-			}
-
-			double frac = win + tie*0.5;
-
-			if((pmax-pmin)/(cutmax-cutmin) < 5)
-			{
-				if(frac < 0.20)
-				{
-					// 99% chance of loss
-					//printf("\n--- %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
-					return false;
-				}
-
-				if(frac > 0.80)
-				{
-					// 99% chance of win
-					printf("\n+++ %f - %f : %f - %d\n",double(w)/n,pmin,pmax,int(n));
-					return true;
-				}
-			}
-		}
-
-		if(!winlose && (n > 0) && ((int)n % (128 * 1024) == 0))
-		{
-			printf("%f - %f : %f - %d - %f : %f : %f\n",double(w)/n,pmin,pmax,int(n),win,tie,lose);
-		}
-
-	}
-
-	// We failed to determine whether this mix function passes or fails
-
-	printf("\n??? %f - %f : %f",double(w)/n,pmin,pmax);
-
-	return true;
-}
-*/
-
 //-----------------------------------------------------------------------------
diff --git a/AvalancheTest.h b/AvalancheTest.h
index 88a0bc1..b8f693a 100644
--- a/AvalancheTest.h
+++ b/AvalancheTest.h
@@ -1,8 +1,227 @@
+//-----------------------------------------------------------------------------
+// Flipping a single bit of a key should cause an "avalanche" of changes in
+// the hash function's output. Ideally, each output bits should flip 50% of
+// the time - if the probability of an output bit flipping is not 50%, that bit
+// is "biased". Too much bias means that patterns applied to the input will
+// cause "echoes" of the patterns in the output, which in turn can cause the
+// hash function to fail to create an even, random distribution of hash values.
+
+
 #pragma once
 
 #include "Types.h"
+#include "Random.h"
+#include <vector>
+
+// Avalanche fails if a bit is biased by more than 1%
+
+#define AVALANCHE_FAIL 0.01
 
 double maxBias ( std::vector<int> & counts, int reps );
-double rmsBias ( std::vector<int> & counts, int reps );
 
-bool AvalancheTest ( pfHash hash, const int keybits, const int hashbits, const int reps );
+//-----------------------------------------------------------------------------
+
+template < typename keytype, typename hashtype >
+void calcBias ( pfHash hash, std::vector<int> & counts, int reps )
+{
+	const int keybytes = sizeof(keytype);
+	const int hashbytes = sizeof(hashtype);
+
+	const int keybits = keybytes * 8;
+	const int hashbits = hashbytes * 8;
+
+	keytype K;
+	hashtype A,B;
+
+	for(int irep = 0; irep < reps; irep++)
+	{
+		if(irep % (reps/10) == 0) printf(".");
+
+		rand_p(&K,keybytes);
+
+		hash(&K,keybytes,0,&A);
+
+		int * cursor = &counts[0];
+
+		for(int iBit = 0; iBit < keybits; iBit++)
+		{
+			flipbit(&K,keybytes,iBit);
+			hash(&K,keybytes,0,&B);
+			flipbit(&K,keybytes,iBit);
+
+			for(int iOut = 0; iOut < hashbits; iOut++)
+			{
+				int bitA = getbit(&A,hashbytes,iOut);
+				int bitB = getbit(&B,hashbytes,iOut);
+
+				(*cursor++) += (bitA ^ bitB);
+			}
+		}
+	}
+}
+
+//-----------------------------------------------------------------------------
+
+template < typename keytype, typename hashtype >
+bool AvalancheTest ( pfHash hash, const int reps )
+{
+	const int keybytes = sizeof(keytype);
+	const int hashbytes = sizeof(hashtype);
+
+	const int keybits = keybytes * 8;
+	const int hashbits = hashbytes * 8;
+
+	printf("Testing %3d-bit keys -> %3d-bit hashes, %8d reps",keybits,hashbits,reps);
+
+	//----------
+
+	std::vector<int> bins(keybits*hashbits,0);
+
+	calcBias<keytype,hashtype>(hash,bins,reps);
+	
+	//----------
+
+	bool result = true;
+
+	double b = maxBias(bins,reps);
+
+	printf(" worst bias is %f%%",b * 100.0);
+
+	if(b > AVALANCHE_FAIL)
+	{
+		printf(" !!!!! ");
+		result = false;
+	}
+
+	printf("\n");
+
+	return result;
+}
+
+//----------------------------------------------------------------------------
+// Tests the Bit Independence Criteron. Stricter than Avalanche, but slow and
+// not really all that useful.
+
+template< typename keytype, typename hashtype >
+void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose )
+{
+	const int keybytes = sizeof(keytype);
+	const int hashbytes = sizeof(hashtype);
+	const int hashbits = hashbytes * 8;
+
+	std::vector<int> bins(hashbits*hashbits*4,0);
+
+	keytype key;
+	hashtype h1,h2;
+
+	for(int irep = 0; irep < reps; irep++)
+	{
+		if(verbose)
+		{
+			if(irep % (reps/10) == 0) printf(".");
+		}
+
+		rand_p(&key,keybytes);
+		hash(&key,keybytes,0,&h1);
+
+		flipbit(key,keybit);
+		hash(&key,keybytes,0,&h2);
+
+		keytype d = h1 ^ h2;
+
+		for(int out1 = 0; out1 < hashbits; out1++)
+		for(int out2 = 0; out2 < hashbits; out2++)
+		{
+			if(out1 == out2) continue;
+
+			uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
+
+			bins[(out1 * hashbits + out2) * 4 + b]++;
+		}
+	}
+
+	if(verbose) printf("\n");
+
+	maxBias = 0;
+
+	for(int out1 = 0; out1 < hashbits; out1++)
+	{
+		for(int out2 = 0; out2 < hashbits; out2++)
+		{
+			if(out1 == out2)
+			{
+				if(verbose) printf("\\");
+				continue;
+			}
+
+			double bias = 0;
+
+			for(int b = 0; b < 4; b++)
+			{
+				double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2);
+				b2 = fabs(b2 * 2 - 1);
+
+				if(b2 > bias) bias = b2;
+			}
+
+			if(bias > maxBias)
+			{
+				maxBias = bias;
+				maxA = out1;
+				maxB = out2;
+			}
+
+			if(verbose) 
+			{
+				if     (bias < 0.01) printf(".");
+				else if(bias < 0.05) printf("o");
+				else if(bias < 0.33) printf("O");
+				else                 printf("X");
+			}
+		}
+
+		if(verbose) printf("\n");
+	}
+}
+
+//----------
+
+template< typename keytype, typename hashtype >
+bool BicTest ( pfHash hash, const int reps )
+{
+	const int keybytes = sizeof(keytype);
+	const int keybits = keybytes * 8;
+
+	double maxBias = 0;
+	int maxK = 0;
+	int maxA = 0;
+	int maxB = 0;
+
+	for(int i = 0; i < keybits; i++)
+	{
+		if(i % (keybits/10) == 0) printf(".");
+
+		double bias;
+		int a,b;
+		
+		BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,false);
+
+		if(bias > maxBias)
+		{
+			maxBias = bias;
+			maxK = i;
+			maxA = a;
+			maxB = b;
+		}
+	}
+
+	printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
+
+	// Bit independence is harder to pass than avalanche, so we're a bit more lax here.
+
+	bool result = (maxBias < 0.05);
+
+	return result;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Bitvec.cpp b/Bitvec.cpp
index 07ac815..463c1e4 100644
--- a/Bitvec.cpp
+++ b/Bitvec.cpp
@@ -71,53 +71,6 @@ void printhex32 ( void * blob, int len )
 	printf("}");
 }
 
-
-//-----------------------------------------------------------------------------
-// Bit-level manipulation
-
-// These are from the "Bit Twiddling Hacks" webpage
-
-uint32_t popcount ( uint32_t v )
-{
-	v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
-	v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
-	uint32_t c = ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
-
-	return c;
-}
-
-uint32_t popcount128 ( uint32_t * v )
-{
-	uint32_t c = popcount(v[0]);
-
-	c += popcount(v[1]);
-	c += popcount(v[2]);
-	c += popcount(v[3]);
-
-	return c;
-}
-
-uint32_t parity ( uint32_t v )
-{
-	v ^= v >> 16;
-	v ^= v >> 8;
-	v ^= v >> 4;
-	v &= 0xf;
-
-	return (0x6996 >> v) & 1;
-}
-
-uint64_t parity ( uint64_t v )
-{
-	v ^= v >> 32;
-	v ^= v >> 16;
-	v ^= v >> 8;
-	v ^= v >> 4;
-	v &= 0xf;
-
-	return (0x6996 >> v) & 1;
-}
-
 //-----------------------------------------------------------------------------
 
 uint32_t getbit ( void * block, int len, uint32_t bit )
diff --git a/Bitvec.h b/Bitvec.h
index b3988de..fce97d5 100644
--- a/Bitvec.h
+++ b/Bitvec.h
@@ -6,12 +6,6 @@
 
 //-----------------------------------------------------------------------------
 
-uint32_t parity      ( uint32_t v );
-uint64_t parity      ( uint64_t v );
-
-uint32_t popcount    ( uint32_t v );
-uint32_t popcount128 ( uint32_t * v );
-
 void     printbits   ( void * blob, int len );
 void     printhex32  ( void * blob, int len );
 
@@ -25,6 +19,38 @@ void     clearbit    ( void * blob, int len, uint32_t bit );
 
 void     flipbit     ( void * blob, int len, uint32_t bit );
 
+//----------
+
+template< typename T >
+inline uint32_t getbit ( T & blob, uint32_t bit )
+{
+	return getbit(&blob,sizeof(blob),bit);
+}
+
+template<> inline uint32_t getbit ( uint32_t & blob, uint32_t bit ) { return (blob >> (bit & 31)) & 1; }
+template<> inline uint32_t getbit ( uint64_t & blob, uint32_t bit ) { return (blob >> (bit & 63)) & 1; }
+
+//----------
+
+template< typename T >
+inline void setbit ( T & blob, uint32_t bit )
+{
+	return setbit(&blob,sizeof(blob),bit);
+}
+
+template<> inline void setbit ( uint32_t & blob, uint32_t bit ) { blob |= uint32_t(1) << (bit & 31); }
+template<> inline void setbit ( uint64_t & blob, uint32_t bit ) { blob |= uint64_t(1) << (bit & 63); }
+
+//----------
+
+template< typename T >
+inline void flipbit ( T & blob, uint32_t bit )
+{
+	flipbit(&blob,sizeof(blob),bit);
+}
+
+template<> inline void flipbit ( uint32_t & blob, uint32_t bit ) { bit &= 31; blob ^= (uint32_t(1) << bit); }
+template<> inline void flipbit ( uint64_t & blob, uint32_t bit ) { bit &= 63; blob ^= (uint64_t(1) << bit); }
 
 //-----------------------------------------------------------------------------
 // Left and right shift of blobs. The shift(N) versions work on chunks of N
@@ -42,11 +68,11 @@ inline void lshift ( void * blob, int len, int c )
 {
 	if((len & 3) == 0)
 	{
-		lshift32(&blob,len,c);
+		lshift32(blob,len,c);
 	}
 	else
 	{
-		lshift8(&blob,len,c);
+		lshift8(blob,len,c);
 	}
 }
 
@@ -54,11 +80,11 @@ inline void rshift ( void * blob, int len, int c )
 {
 	if((len & 3) == 0)
 	{
-		rshift32(&blob,len,c);
+		rshift32(blob,len,c);
 	}
 	else
 	{
-		rshift8(&blob,len,c);
+		rshift8(blob,len,c);
 	}
 }
 
@@ -105,6 +131,30 @@ void rrot1    ( void * blob, int len, int c );
 void rrot8    ( void * blob, int len, int c );
 void rrot32   ( void * blob, int len, int c );
 
+inline void lrot ( void * blob, int len, int c )
+{
+	if((len & 3) == 0)
+	{
+		return lrot32(blob,len,c);
+	}
+	else
+	{
+		return lrot8(blob,len,c);
+	}
+}
+
+inline void rrot ( void * blob, int len, int c )
+{
+	if((len & 3) == 0)
+	{
+		return rrot32(blob,len,c);
+	}
+	else
+	{
+		return rrot8(blob,len,c);
+	}
+}
+
 template < typename T >
 inline void lrot ( T & blob, int c )
 {
@@ -155,7 +205,6 @@ inline uint32_t window ( void * blob, int len, int start, int count )
 	}
 }
 
-/*
 template < typename T >
 inline uint32_t window ( T & blob, int start, int count )
 {
@@ -168,15 +217,14 @@ inline uint32_t window ( T & blob, int start, int count )
 		return window8(&blob,sizeof(T),start,count);
 	}
 }
-*/
 
-// template<> 
+template<> 
 inline uint32_t window ( uint32_t & blob, int start, int count )
 {
 	return _rotr(blob,start) & ((1<<count)-1);
 }
 
-// template<> 
+template<> 
 inline uint32_t window ( uint64_t & blob, int start, int count )
 {
 	return (uint32_t)_rotr64(blob,start) & ((1<<count)-1);
diff --git a/Core.cpp b/Core.cpp
deleted file mode 100644
index f42541e..0000000
--- a/Core.cpp
+++ /dev/null
@@ -1,186 +0,0 @@
-#include "Core.h"
-
-#include <math.h>
-
-//-----------------------------------------------------------------------------
-
-int SolveLinear ( double a, double b, double & r )
-{
-	if(a == 0)
-	{
-		return 0;
-	}
-
-	r = -b/a;
-
-	return 1;
-}
-
-//----------
-
-int SolveQuadratic ( double a, double b, double c, double & r1, double & r2 )
-{
-	if(a == 0)
-	{
-		return SolveLinear(b,c,r1);
-	}
-
-	double d = (b*b) - (4*a*c);
-
-	if(d < 0) return 0;
-
-	double d2 = sqrt(d);
-
-	r1 = (-b - d2) / (2.0 * a);
-	r2 = (-b + d2) / (2.0 * a);
-
-	return (r1 == r2) ? 1 : 2;
-}
-
-//----------
-
-uint32_t multinv ( uint32_t x )
-{
-	uint32_t y = 1;
-	uint32_t t = x;
-
-	for(int i = 1; i < 32; i++)
-	{
-		uint32_t b = (1 << i);
-
-		if(t & b)
-		{
-			y |= b;
-			t += (x << i);
-		}
-	}
-
-	return y;
-};
-
-//-----------------------------------------------------------------------------
-// this is random stuff that needs to go somewhere else
-
-uint32_t modmul ( uint32_t k, uint32_t m )
-{
-	uint64_t k2 = k;
-	
-	k2 *= m;
-	k2 -= k2 >> 32;
-
-	return (uint32_t)k2;
-}
-
-uint32_t splitmul ( uint32_t k, uint32_t m )
-{
-	k *= m;
-	k -= k >> 16;
-
-	return k;
-}
-
-uint32_t expand16 ( uint32_t k, uint32_t m )
-{
-	return modmul(k+1,m);
-}
-
-bool compare_ham16 ( uint32_t mulA, uint32_t mulB, 
-                     int cutoffA, int cutoffXA, int cutoffAB,
-                     int & minA, int & minB, int & minXA, int & minXB, int & minAB )
-{
-	const int count = 65536;
-
-	uint32_t fA[count];
-	uint32_t fB[count];
-
-	for(int i = 0; i < count; i++)
-	{
-		fA[i] = expand16(i,mulA);
-		fB[i] = expand16(i,mulB);
-	}
-
-	minA = 100000;
-	minB = 100000;
-
-	minXA = 100000;
-	minXB = 100000;
-	minAB = 100000;
-
-	for(int j =   0; j < count-1; j++)
-	for(int k = j+1; k < count;   k++)
-	{
-		int X = popcount(j^k);
-		int A = popcount(fA[j]^fA[k]);
-		int B = popcount(fB[j]^fB[k]);
-
-		int XA = X+A;
-		int XB = X+B;
-		int AB = A+B;
-
-		if(A < minA) minA = A;
-		if(B < minB) minB = B;
-		if(XA < minXA) minXA = XA;
-		if(XB < minXB) minXB = XB;
-		if(AB < minAB) minAB = AB;
-
-		if(A < cutoffA) 
-			goto dead;
-
-		if(B < cutoffA) 
-			goto dead;
-
-		if(XA < cutoffXA) 
-			goto dead;
-
-		if(XB < cutoffXA) 
-			goto dead;
-
-		if(AB < cutoffAB) 
-			goto dead;
-	}
-
-	return true;
-
-	dead:
-
-	return false;
-}
-
-bool test_ham16 ( uint32_t mulA, int cutoffA, int cutoffXA, int & minA, int & minXA )
-{
-	int minB,minXB,minAB;
-
-	return compare_ham16(mulA,mulA, cutoffA,cutoffXA,0, minA,minB,minXA,minXB,minAB);
-}
-
-inline uint32_t foldmul ( uint32_t k, uint32_t m )
-{
-	uint64_t k2 = k;
-	
-	k2 *= m;
-	k2 ^= k2 >> 32;
-
-	return (uint32_t)k2;
-}
-
-inline uint32_t revmul ( const uint32_t v, const uint32_t m )
-{
-	uint32_t k1 = (uint32_t)(uint64_t(v) * m);
-	uint32_t k2 = (uint32_t)((uint64_t(v) * m) >> 32);
-
-	uint32_t k = k1-k2;
-
-	if(k2 > k1) k++;
-
-	return k;
-}
-
-inline uint32_t idmul ( const uint32_t v, const uint32_t m )
-{
-	uint32_t k1 = (uint32_t)(uint64_t(v) * m);
-	uint32_t k2 = (uint32_t)((uint64_t(v) * m) >> 32);
-
-	return k1 - k2;
-}
-
-//-----------------------------------------------------------------------------
diff --git a/Core.h b/Core.h
deleted file mode 100644
index 0f0f32a..0000000
--- a/Core.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#pragma once
-
-#include "Types.h"
-#include "Bitvec.h"
-#include "Random.h"
-
-//-----------------------------------------------------------------------------
-
-int SolveQuadratic ( double a, double b, double c, double & r1, double & r2 );
-
-void AccumDiffCounts ( void * a, void * b, double * counts, int len, double inc );
-
-unsigned int multinv ( unsigned int x );
-
-//-----------------------------------------------------------------------------
diff --git a/CycleTest.cpp b/CycleTest.cpp
deleted file mode 100644
index 125e60e..0000000
--- a/CycleTest.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "CycleTest.h"
diff --git a/CycleTest.h b/CycleTest.h
deleted file mode 100644
index f221674..0000000
--- a/CycleTest.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#pragma once
-
-#include "Types.h"
-
-template < typename hashtype >
-bool CycleTest ( pfHash hash, int cycleLen, int cycleReps, const int reps )
-{
-	printf("Keyset 'Cycles' (%dk keys, %d cycles, %d bytes)",reps / 1000,cycleReps,cycleLen);
-
-	bool result = true;
-
-	std::vector<hashtype> hashes;
-	hashes.resize(reps);
-
-	int keyLen = cycleLen * cycleReps;
-
-	uint8_t * cycle = new uint8_t[cycleLen + 16];
-	uint8_t * key = new uint8_t[keyLen];
-
-	for(int i = 0; i < reps; i++)
-	{
-		if(i % (reps/10) == 0) printf(".");
-
-		oracle(i,0,cycle,cycleLen);
-
-		*(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);
-
-		for(int j = 0; j < keyLen; j++)
-		{
-			key[j] = cycle[j % cycleLen];
-		}
-
-		hash(key,keyLen,0,&hashes[i]);
-	}
-	printf("\n");
-
-	testhashlist(hashes,true,false);
-
-	delete [] cycle;
-	delete [] key;
-
-	return result;
-}
-
-
diff --git a/DifferentialTest.h b/DifferentialTest.h
index d5b17db..69e158b 100644
--- a/DifferentialTest.h
+++ b/DifferentialTest.h
@@ -3,52 +3,96 @@
 // see what happens to the hash value when we flip a few bits of the key.
 
 #pragma once
+
 #include "Types.h"
+#include <vector>
 
 //-----------------------------------------------------------------------------
-// Check all possible keybits-choose-N differentials for collisions, report
-// ones that occur significantly more often than expected.
+// Sort through the differentials, ignoring collisions that only occured once 
+// (these could be false positives). If we find collisions of 3 or more, the
+// differential test fails.
 
-// Random collisions can happen with probability 1 in 2^32 - if we do more than
-// 2^32 tests, we'll probably see some spurious random collisions, so don't report
-// them.
-
-template < typename keytype, typename hashtype >
-void DiffTest ( pfHash hash, int diffbits, int reps )
+template < class keytype >
+bool ProcessDifferentials ( std::vector<keytype> & diffs, int reps, bool dumpCollisions )
 {
-	const int keybits = sizeof(keytype) * 8;
-	const int hashbits = sizeof(hashtype) * 8;
+	std::sort(diffs.begin(), diffs.end());
 
-	double diffcount = chooseUpToK(keybits,diffbits);
-	double testcount = (diffcount * double(reps));
-	double expected  = testcount / 4294967296.0;
+	int count = 1;
+	int ignore = 0;
 
-	std::vector<keytype> diffs;
+	bool result = true;
 
-	keytype k1,k2;
-	hashtype h1,h2;
+	if(diffs.size())
+	{
+		keytype kp = diffs[0];
 
-	printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",diffcount,diffbits,keybits,hashbits);
-	printf("%d reps, %0.f total tests, expecting %2.2f random collisions",reps,testcount,expected);
+		for(int i = 1; i < (int)diffs.size(); i++)
+		{
+			if(diffs[i] == kp)
+			{
+				count++;
+				continue;
+			}
+			else
+			{
+				if(count > 1)
+				{
+					result = false;
 
-	for(int i = 0; i < reps; i++)
-	{
-		if(i % (reps/10) == 0) printf(".");
+					double pct = 100 * (double(count) / double(reps));
 
-		rand_p(&k1,sizeof(k1));
-		k2 = k1;
+					if(dumpCollisions)
+					{
+						printbits((unsigned char*)&kp,sizeof(kp));
+						printf(" - %4.2f%%\n", pct );
+					}
+				}
+				else 
+				{
+					ignore++;
+				}
 
-		hash(&k1,sizeof(k1),0,(uint32_t*)&h1);
+				kp = diffs[i];
+				count = 1;
+			}
+		}
 
-		DiffTestRecurse<keytype,hashtype>(hash,k1,k2,h1,h2,0,diffbits,diffs);
+		if(count > 1)
+		{
+			double pct = 100 * (double(count) / double(reps));
+
+			if(dumpCollisions)
+			{
+				printbits((unsigned char*)&kp,sizeof(kp));
+				printf(" - %4.2f%%\n", pct );
+			}
+		}
+		else 
+		{
+			ignore++;
+		}
+	}
+
+	printf("%d total collisions, of which %d single collisions were ignored",(int)diffs.size(),ignore);
+
+	if(result == false)
+	{
+		printf(" !!!!! ");
 	}
-	printf("\n");
 
-	printdiffs(diffs,reps);
 	printf("\n");
+	printf("\n");
+
+	return result;
 }
 
-//----------
+//-----------------------------------------------------------------------------
+// Check all possible keybits-choose-N differentials for collisions, report
+// ones that occur significantly more often than expected.
+
+// Random collisions can happen with probability 1 in 2^32 - if we do more than
+// 2^32 tests, we'll probably see some spurious random collisions, so don't report
+// them.
 
 template < typename keytype, typename hashtype >
 void DiffTestRecurse ( pfHash hash, keytype & k1, keytype & k2, hashtype & h1, hashtype & h2, int start, int bitsleft, std::vector<keytype> & diffs )
@@ -79,56 +123,42 @@ void DiffTestRecurse ( pfHash hash, keytype & k1, keytype & k2, hashtype & h1, h
 
 //----------
 
-template < class keytype >
-void printdiffs ( std::vector<keytype> & diffs, int reps )
+template < typename keytype, typename hashtype >
+bool DiffTest ( pfHash hash, int diffbits, int reps, bool dumpCollisions )
 {
-	std::sort(diffs.begin(), diffs.end());
+	const int keybits = sizeof(keytype) * 8;
+	const int hashbits = sizeof(hashtype) * 8;
 
-	int count = 1;
-	int ignore = 0;
+	double diffcount = chooseUpToK(keybits,diffbits);
+	double testcount = (diffcount * double(reps));
+	double expected  = testcount / pow(2.0,double(hashbits));
 
-	if(diffs.size())
+	std::vector<keytype> diffs;
+
+	keytype k1,k2;
+	hashtype h1,h2;
+
+	printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",diffcount,diffbits,keybits,hashbits);
+	printf("%d reps, %0.f total tests, expecting %2.2f random collisions",reps,testcount,expected);
+
+	for(int i = 0; i < reps; i++)
 	{
-		keytype kp = diffs[0];
+		if(i % (reps/10) == 0) printf(".");
 
-		for(int i = 1; i < (int)diffs.size(); i++)
-		{
-			if(diffs[i] == kp)
-			{
-				count++;
-				continue;
-			}
-			else
-			{
-				if(count > 1)
-				{
-					double pct = 100 * (double(count) / double(reps));
-					printbits((unsigned char*)&kp,sizeof(kp));
-					printf(" - %4.2f%%\n", pct );
-				}
-				else 
-				{
-					ignore++;
-				}
+		rand_p(&k1,sizeof(k1));
+		k2 = k1;
 
-				kp = diffs[i];
-				count = 1;
-			}
-		}
+		hash(&k1,sizeof(k1),0,(uint32_t*)&h1);
 
-		if(count > 1)
-		{
-			double pct = 100 * (double(count) / double(reps));
-			printbits((unsigned char*)&kp,sizeof(kp));
-			printf(" - %4.2f%%\n", pct );
-		}
-		else 
-		{
-			ignore++;
-		}
+		DiffTestRecurse<keytype,hashtype>(hash,k1,k2,h1,h2,0,diffbits,diffs);
 	}
+	printf("\n");
+
+	bool result = true;
+
+	result &= ProcessDifferentials(diffs,reps,dumpCollisions);
 
-	printf("%d total collisions, of which %d single collisions were ignored\n",(int)diffs.size(),ignore);
+	return result;
 }
 
 //-----------------------------------------------------------------------------
@@ -155,9 +185,9 @@ void DiffDistTest ( pfHash hash, const int diffbits, int trials, double & worst,
 
 	for(int i = 0; i < trials; i++)
 	{
-		rand_t(keys[i]);
+		rand_p(&keys[i],sizeof(keytype));
 
-		hash(&keys[i],sizeof(keys[i]),0,(uint32_t*)&A[i]);
+		hash(&keys[i],sizeof(keytype),0,(uint32_t*)&A[i]);
 	}
 
 	//----------
diff --git a/Hashes.cpp b/Hashes.cpp
index b27a2f0..f6787c6 100644
--- a/Hashes.cpp
+++ b/Hashes.cpp
@@ -5,9 +5,23 @@
 //----------------------------------------------------------------------------
 // fake / bad hashes
 
-void randhash ( const void *, int, uint32_t, void * out )
+void randhash_32 ( const void *, int, uint32_t, void * out )
 {
-	*(uint32_t*)out = rand_u32();
+	((uint32_t*)out)[0] = rand_u32();
+}
+
+void randhash_64 ( const void *, int, uint32_t, void * out )
+{
+	((uint32_t*)out)[0] = rand_u32();
+	((uint32_t*)out)[1] = rand_u32();
+}
+
+void randhash_128 ( const void *, int, uint32_t, void * out )
+{
+	((uint32_t*)out)[0] = rand_u32();
+	((uint32_t*)out)[1] = rand_u32();
+	((uint32_t*)out)[2] = rand_u32();
+	((uint32_t*)out)[3] = rand_u32();
 }
 
 void BadHash ( const void * key, int len, uint32_t seed, void * out )
diff --git a/Hashes.h b/Hashes.h
index 8bf998e..1aad04c 100644
--- a/Hashes.h
+++ b/Hashes.h
@@ -6,17 +6,37 @@
 #include "MurmurHash2.h"
 #include "MurmurHash3.h"
 
+//----------
+// These are _not_ hash functions (even though people tend to use crc32 as one...)
+
 void DoNothingHash         ( const void * key, int len, uint32_t seed, void * out );
+void crc32                 ( const void * key, int len, uint32_t seed, void * out );
+
+void randhash_32           ( const void * key, int len, uint32_t seed, void * out );
+void randhash_64           ( const void * key, int len, uint32_t seed, void * out );
+void randhash_128          ( const void * key, int len, uint32_t seed, void * out );
+
+//----------
+// Cryptographic hashes
+
+void md5_32                ( const void * key, int len, uint32_t seed, void * out );
+void sha1_32a              ( const void * key, int len, uint32_t seed, void * out );
+
+//----------
+// General purpose hashes
+
 void FNV                   ( const void * key, int len, uint32_t seed, void * out );
 void SuperFastHash         ( const void * key, int len, uint32_t seed, void * out );
 void lookup3_test          ( const void * key, int len, uint32_t seed, void * out );
-void md5_32                ( const void * key, int len, uint32_t seed, void * out );
-void crc32                 ( const void * key, int len, uint32_t seed, void * out );
+
+//----------
+// MurmurHash2
 
 void MurmurHash2_test      ( const void * key, int len, uint32_t seed, void * out );
 void MurmurHash2A_test     ( const void * key, int len, uint32_t seed, void * out );
 
 //-----------------------------------------------------------------------------
+// Test harnesses for Murmur1/2
 
 inline void MurmurHash1_test ( const void * key, int len, uint32_t seed, void * out )
 {
@@ -33,3 +53,12 @@ inline void MurmurHash2A_test ( const void * key, int len, uint32_t seed, void *
 	*(uint32_t*)out = MurmurHash2A(key,len,seed);
 }
 
+inline void MurmurHash64A_test ( const void * key, int len, uint32_t seed, void * out )
+{
+	*(uint64_t*)out = MurmurHash64A(key,len,seed);
+}
+
+inline void MurmurHash64B_test ( const void * key, int len, uint32_t seed, void * out )
+{
+	*(uint64_t*)out = MurmurHash64B(key,len,seed);
+}
\ No newline at end of file
diff --git a/KeysetTest.cpp b/KeysetTest.cpp
new file mode 100644
index 0000000..3424ce6
--- /dev/null
+++ b/KeysetTest.cpp
@@ -0,0 +1,203 @@
+#include "KeysetTest.h"
+
+#include "Random.h"
+
+//-----------------------------------------------------------------------------
+
+void QuickBrownFox ( pfHash hash, const int hashbits )
+{
+	const int hashbytes = hashbits / 8;
+
+	const char * text1 = "The quick brown fox jumps over the lazy dog";
+	const char * text2 = "The quick brown fox jumps over the lazy cog";
+
+	uint8_t h1[128];
+	uint8_t h2[128];
+
+	hash(text1,(int)strlen(text1),0,h1);
+	hash(text2,(int)strlen(text2),0,h2);
+
+	printf("\"%s\" => ",text1);
+	printhex32(h1,hashbytes);
+	printf("\n");
+
+	printf("\"%s\" => ",text2);
+	printhex32(h2,hashbytes);
+	printf("\n");
+
+	printf("\n");
+}
+
+//----------------------------------------------------------------------------
+// Alignment of the keys should not affect the hash value - if it does,
+// something is broken.
+
+void AlignmentTest ( pfHash hash, const int hashbits )
+{
+	const int hashbytes = hashbits / 8;
+
+	printf("Testing alignment handling on small keys..........");
+
+	char bufs[16][64];
+
+	char * strings[16];
+
+	for(int i = 0; i < 16; i++)
+	{
+		uint32_t b = uint32_t(&bufs[i][0]);
+
+		b = (b+15)&(~15);
+
+		strings[i] = (char*)(b + i);
+
+		strcpy_s(strings[i],32,"DeadBeefDeadBeef");
+	}
+
+	uint32_t hash1[64];
+	uint32_t hash2[64];
+
+	for(int k = 1; k <= 16; k++)
+	for(int j = 0; j < 15; j++)
+	for(int i = j+1; i < 16; i++)
+	{
+		const char * s1 = strings[i];
+		const char * s2 = strings[j];
+
+		hash(s1,k,0,hash1);
+		hash(s2,k,0,hash2);
+
+		if(memcmp(hash1,hash2,hashbytes) != 0)
+		{
+			printf("*********FAIL*********\n");
+			return;
+		}
+	}
+
+	printf("PASS\n");
+}
+
+//----------------------------------------------------------------------------
+// Appending zero bytes to a key should always cause it to produce a different
+// hash value
+
+void AppendedZeroesTest ( pfHash hash, const int hashbits )
+{
+	const int hashbytes = hashbits/8;
+
+	printf("Testing zero-appending");
+
+	for(int rep = 0; rep < 100; rep++)
+	{
+		if(rep % 10 == 0) printf(".");
+
+		unsigned char key[256];
+
+		memset(key,0,sizeof(key));
+
+		rand_p(key,32);
+
+		uint32_t h1[16];
+		uint32_t h2[16];
+
+		memset(h1,0,hashbytes);
+		memset(h2,0,hashbytes);
+
+		for(int i = 0; i < 32; i++)
+		{
+			hash(key,32+i,0,h1);
+
+			if(memcmp(h1,h2,hashbytes) == 0)
+			{
+				printf("\n*********FAIL*********\n");
+				return;
+			}
+
+			memcpy(h2,h1,hashbytes);
+		}
+	}
+
+	printf("PASS\n");
+}
+
+//----------------------------------------------------------------------------
+// Basic sanity checks -
+
+// A hash function should not be reading outside the bounds of the key.
+
+// Flipping a bit of a key should, with overwhelmingly high probability,
+// result in a different hash.
+
+// Hashing the same key twice should always produce the same result.
+
+bool SanityTest ( pfHash hash, const int hashbits )
+{
+	bool result = true;
+
+	const int hashbytes = hashbits/8;
+	const int reps = 100;
+
+	printf("Testing bit twiddling");
+
+	uint8_t buffer[256];
+	uint8_t * key = &buffer[64];
+
+	uint8_t * h1 = new uint8_t[hashbytes];
+	uint8_t * h2 = new uint8_t[hashbytes];
+
+	for(int irep = 0; irep < reps; irep++)
+	{
+		if(irep % (reps/10) == 0) printf(".");
+
+		for(int len = 1; len <= 128; len++)
+		{
+			// Generate a random key in the middle of the buffer, hash it,
+			// and then fill the space around the key with garbage. If a
+			// broken hash function reads past the ends of the key, it should
+			// fail the "did we get the same hash?" test below.
+
+			rand_p(key,len);
+			hash(key,len,0,h1);
+
+			rand_p(buffer,64);
+			rand_p(key+len,64);
+
+			// Flip a bit, hash the key -> we should get a different result.
+			// Flip it back, hash again -> we should get the same result.
+
+			for(int bit = 0; bit < (len * 8); bit++)
+			{
+				flipbit(key,len,bit);
+				hash(key,len,0,h2);
+
+				if(memcmp(h1,h2,hashbytes) == 0)
+				{
+					result = false;
+				}
+
+				flipbit(key,len,bit);
+				hash(key,len,0,h2);
+
+				if(memcmp(h1,h2,hashbytes) != 0)
+				{
+					result = false;
+				}
+			}
+		}
+	}
+
+	if(result == false)
+	{
+		printf("*********FAIL*********\n");
+	}
+	else
+	{
+		printf("PASS\n");
+	}
+
+	delete [] h1;
+	delete [] h2;
+
+	return result;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/KeysetTest.h b/KeysetTest.h
new file mode 100644
index 0000000..ae2bfcf
--- /dev/null
+++ b/KeysetTest.h
@@ -0,0 +1,354 @@
+//-----------------------------------------------------------------------------
+// Keyset tests generate various sorts of difficult-to-hash keysets and compare
+// the distribution and collision frequency of the hash results against an
+// ideal random distribution
+
+// The sanity checks are also in this cpp/h
+
+#pragma once
+
+#include "Types.h"
+#include "Stats.h"
+
+//-----------------------------------------------------------------------------
+// Sanity tests
+
+bool SanityTest         ( pfHash hash, const int hashbits );
+void QuickBrownFox      ( pfHash hash, const int hashbits );
+void AlignmentTest      ( pfHash hash, const int hashbits );
+void AppendedZeroesTest ( pfHash hash, const int hashbits );
+
+//----------------------------------------------------------------------------
+// Keyset 'Permutation' - given a set of 32-bit blocks, generate keys
+// consisting of all possible permutations of those blocks
+
+template< typename hashtype >
+void PermutationKeygenRecurse ( pfHash hash, uint32_t * blocks, int blockcount, int k, std::vector<hashtype> & hashes )
+{
+	if(k == blockcount-1)
+	{
+		hashtype h;
+
+		hash(blocks,blockcount * sizeof(uint32_t),0,&h);
+
+		hashes.push_back(h);
+
+		return;
+	}
+
+	for(int i = k; i < blockcount; i++)
+	{
+		swap(blocks[k],blocks[i]);
+
+		PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes);
+
+		swap(blocks[k],blocks[i]);
+	}
+}
+
+template< typename hashtype >
+bool PermutationKeyTest ( hashfunc<hashtype> hash, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
+{
+	printf("Keyset 'Permutation' - %d blocks - ",blockcount);
+
+	//----------
+
+	std::vector<hashtype> hashes;
+
+	PermutationKeygenRecurse<hashtype>(hash,blocks,blockcount,0,hashes);
+
+	printf("%d keys\n",(int)hashes.size());
+
+	//----------
+
+	bool result = true;
+
+	result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
+	
+	printf("\n");
+
+	return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Sparse' - generate all possible N-bit keys with up to K bits set
+
+template < typename keytype, typename hashtype >
+void SparseKeygenRecurse ( pfHash hash, int start, int bitsleft, bool inclusive, keytype & k, std::vector<hashtype> & hashes )
+{
+	const int nbytes = sizeof(keytype);
+	const int nbits = nbytes * 8;
+
+	hashtype h;
+
+	for(int i = start; i < nbits; i++)
+	{
+		flipbit(&k,nbytes,i);
+
+		if(inclusive || (bitsleft == 1))
+		{
+			hash(&k,sizeof(keytype),0,&h);
+			hashes.push_back(h);
+		}
+
+		if(bitsleft > 1)
+		{
+			SparseKeygenRecurse(hash,i+1,bitsleft-1,inclusive,k,hashes);
+		}
+
+		flipbit(&k,nbytes,i);
+	}
+}
+
+//----------
+
+template < int keybits, typename hashtype >
+bool SparseKeyTest ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram  )
+{
+	printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits);
+
+	typedef Blob<keybits> keytype;
+
+	std::vector<hashtype> hashes;
+
+	keytype k;
+	memset(&k,0,sizeof(k));
+
+	if(inclusive)
+	{
+		hashtype h;
+
+		hash(&k,sizeof(keytype),0,&h);
+
+		hashes.push_back(h);
+	}
+
+	SparseKeygenRecurse(hash,0,setbits,inclusive,k,hashes);
+
+	printf("%d keys\n",(int)hashes.size());
+
+	bool result = true;
+	
+	result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
+
+	printf("\n");
+
+	return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Windows' - for all possible N-bit windows of a K-bit key, generate
+// all possible keys with bits set in that window
+
+template < typename keytype, typename hashtype >
+bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testCollision, bool testDistribution, bool drawDiagram )
+{
+	const int keybits = sizeof(keytype) * 8;
+	const int keycount = 1 << windowbits;
+
+	std::vector<hashtype> hashes;
+	hashes.resize(keycount);
+
+	bool result = true;
+
+	int testcount = (keybits-windowbits);
+
+	printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount);
+
+	for(int j = 0; j <= testcount; j++)
+	{
+		int minbit = j;
+
+		keytype key;
+
+		for(int i = 0; i < keycount; i++)
+		{
+			key = i;
+			key = key << minbit;
+
+			hash(&key,sizeof(keytype),0,&hashes[i]);
+		}
+
+		printf("Window at %3d - ",j);
+
+		result &= TestHashList(hashes,testCollision,testDistribution,drawDiagram);
+
+		//printf("\n");
+	}
+
+	return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Cyclic' - generate keys that consist solely of N repetitions of M
+// bytes.
+
+// (This keyset type is designed to make MurmurHash2 fail)
+
+template < typename hashtype >
+bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycount, bool drawDiagram )
+{
+	printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount);
+
+	std::vector<hashtype> hashes;
+	hashes.resize(keycount);
+
+	int keyLen = cycleLen * cycleReps;
+
+	uint8_t * cycle = new uint8_t[cycleLen + 16];
+	uint8_t * key = new uint8_t[keyLen];
+
+	//----------
+
+	for(int i = 0; i < keycount; i++)
+	{
+		rand_p(cycle,cycleLen);
+
+		*(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);
+
+		for(int j = 0; j < keyLen; j++)
+		{
+			key[j] = cycle[j % cycleLen];
+		}
+
+		hash(key,keyLen,0,&hashes[i]);
+	}
+
+	//----------
+	
+	bool result = true;
+
+	result &= TestHashList(hashes,true,true,drawDiagram);
+	printf("\n");
+
+	delete [] cycle;
+	delete [] key;
+
+	return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Text' - generate all keys of the form "prefix"+"core"+"suffix",
+// where "core" consists of all possible combinations of the given character
+// set of length N.
+
+template < typename hashtype >
+bool TextKeyTest ( hashfunc<hashtype> hash, const char * prefix, const char * coreset, const int corelen, const char * suffix, bool drawDiagram )
+{
+	const int prefixlen = (int)strlen(prefix);
+	const int suffixlen = (int)strlen(suffix);
+	const int corecount = (int)strlen(coreset);
+
+	const int keybytes = prefixlen + corelen + suffixlen;
+	const int keycount = (int)pow(double(corecount),double(corelen));
+
+	printf("Keyset 'Text' - keys of form \"%s[",prefix);
+	for(int i = 0; i < corelen; i++) printf("X");		
+	printf("]%s\" - %d keys\n",suffix,keycount);
+
+	uint8_t * key = new uint8_t[keybytes+1];
+
+	key[keybytes] = 0;
+
+	memcpy(key,prefix,prefixlen);
+	memcpy(key+prefixlen+corelen,suffix,suffixlen);
+
+	//----------
+
+	std::vector<hashtype> hashes;
+	hashes.resize(keycount);
+
+	for(int i = 0; i < keycount; i++)
+	{
+		int t = i;
+
+		for(int j = 0; j < corelen; j++)
+		{
+			key[prefixlen+j] = coreset[t % corecount]; t /= corecount;
+		}
+
+		hash(key,keybytes,0,&hashes[i]);
+	}
+
+	//----------
+
+	bool result = true;
+
+	result &= TestHashList(hashes,true,true,drawDiagram);
+
+	printf("\n");
+
+	delete [] key;
+
+	return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Zeroes' - keys consisting of all zeroes, differing only in length
+
+// We reuse one block of empty bytes, otherwise the RAM cost is enormous.
+
+template < typename hashtype >
+bool ZeroKeyTest ( pfHash hash, bool drawDiagram )
+{
+	int keycount = 64*1024;
+
+	printf("Keyset 'Zeroes' - %d keys\n",keycount);
+
+	unsigned char * nullblock = new unsigned char[keycount];
+	memset(nullblock,0,keycount);
+
+	//----------
+
+	std::vector<hashtype> hashes;
+
+	hashes.resize(keycount);
+
+	for(int i = 0; i < keycount; i++)
+	{
+		hash(nullblock,i,0,&hashes[i]);
+	}
+
+	bool result = true;
+
+	result &= TestHashList(hashes,true,true,drawDiagram);
+
+	printf("\n");
+
+	delete [] nullblock;
+
+	return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Seed' - hash "the quick brown fox..." using different seeds
+
+template < typename hashtype >
+bool SeedTest ( pfHash hash, int keycount, bool drawDiagram )
+{
+	printf("Keyset 'Seed' - %d keys\n",keycount);
+
+	const char * text = "The quick brown fox jumps over the lazy dog";
+	const int len = (int)strlen(text);
+
+	//----------
+
+	std::vector<hashtype> hashes;
+
+	hashes.resize(keycount);
+
+	for(int i = 0; i < keycount; i++)
+	{
+		hash(text,len,i,&hashes[i]);
+	}
+
+	bool result = true;
+
+	result &= TestHashList(hashes,true,true,drawDiagram);
+
+	printf("\n");
+
+	return result;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 581d1d3..2942f10 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -2,49 +2,19 @@
 
 #include <stdlib.h>    // for _rotl
 
-#pragma warning(disable:4100)
-
 //-----------------------------------------------------------------------------
-// need to replace this
-
-inline uint32_t kmix ( uint32_t k, uint32_t c1, uint32_t c2 ) 
-{
-	k *= c1; 
-	k  = _rotl(k,11); 
-	k *= c2;
-
-	return k;
-}
-
-// block mix
-
-inline void bmix1 ( uint32_t & h, uint32_t k, uint32_t c1, uint32_t c2 )
-{
-	k = kmix(k,c1,c2);
-	
-	h = h*5+0xa6b84e31;
-	h ^= k;
-}
-
-// xor before mul is faster on x64
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
 
-inline void bmix2 ( uint32_t & h, uint32_t k, uint32_t c1, uint32_t c2 )
+inline uint32_t getblock ( const uint32_t * p, int i )
 {
-	k = kmix(k,c1,c2);
-	
-	h ^= k;
-	h = h*3+0xa6b84e31;
+	return p[i];
 }
 
-// block constant mix
-
-inline void cmix ( uint32_t & c1, uint32_t & c2 )
-{
-	c1 = c1*9+0x273581d8;
-	c2 = c2*5+0xee700bac;
-}
+//----------
+// Finalization mix - force all bits of a hash block to avalanche
 
-// finalizer mix - avalanches all bits to within 0.25% bias
+// avalanches all bits to within 0.25% bias
 
 inline uint32_t fmix32 ( uint32_t h )
 {
@@ -57,232 +27,432 @@ inline uint32_t fmix32 ( uint32_t h )
 	return h;
 }
 
-// 64-bit finalizer mix - avalanches all bits to within 0.05% bias
+//-----------------------------------------------------------------------------
 
-inline uint64_t fmix64 ( uint64_t k )
+inline void bmix32 ( uint32_t & h1, uint32_t & k1, uint32_t & c1, uint32_t & c2 )
 {
-	k ^= k >> 33;
-	k *= 0xff51afd7ed558ccd;
-	k ^= k >> 33;
-	k *= 0xc4ceb9fe1a85ec53;
-	k ^= k >> 33;
+	k1 *= c1; 
+	k1  = _rotl(k1,11); 
+	k1 *= c2;
+	h1 ^= k1;
+	
+	h1 = h1*3+0x52dce729;
 
-	return k;
+	c1 = c1*5+0x7b7d159c;
+	c2 = c2*5+0x6bce6396;
 }
 
-//-----------------------------------------------------------------------------
+//----------
 
 void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out )
 {
-	uint32_t h = 0x971e137b ^ seed;
+	const uint8_t * data = (const uint8_t*)key;
+	const int nblocks = len / 4;
 
-	const uint8_t * tail = (const uint8_t*)(key) + (len & ~3);
+	uint32_t h1 = 0x971e137b ^ seed;
+
+	uint32_t c1 = 0x95543787;
+	uint32_t c2 = 0x2ad7eb25;
 
 	//----------
 	// body
 
-	const uint32_t * block = (const uint32_t *)tail;
+	const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
 
-	uint32_t c1 = 0x95543787;
-	uint32_t c2 = 0x2ad7eb25;
-
-	for(int l = -(len/4); l; l++)
+	for(int i = -nblocks; i; i++)
 	{
-		bmix1(h,block[l],c1,c2);
-		cmix(c1,c2);
+		uint32_t k1 = getblock(blocks,i);
+
+		bmix32(h1,k1,c1,c2);
 	}
 
 	//----------
 	// tail
 
-	uint32_t k = 0;
+	const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
+
+	uint32_t k1 = 0;
 
 	switch(len & 3)
 	{
-	case 3: k ^= tail[2] << 16;
-	case 2: k ^= tail[1] << 8;
-	case 1: k ^= tail[0];
-			bmix1(h,k,c1,c2);
+	case 3: k1 ^= tail[2] << 16;
+	case 2: k1 ^= tail[1] << 8;
+	case 1: k1 ^= tail[0];
+			bmix32(h1,k1,c1,c2);
 	};
 
 	//----------
 	// finalization
 
-	h ^= len;
+	h1 ^= len;
 
-	h = fmix32(h);
+	h1 = fmix32(h1);
 
-	*(uint32_t*)out = h;
+	*(uint32_t*)out = h1;
 } 
 
 //-----------------------------------------------------------------------------
 
-void merge64 ( uint32_t h[2], const uint32_t * blocks, uint32_t c1, uint32_t c2 )
+inline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & k1, uint32_t & k2, uint32_t & c1, uint32_t & c2 )
 {
-	h[0] = _rotl(h[0],9);
-	h[1] = _rotl(h[1],24);
+	k1 *= c1; 
+	k1  = _rotl(k1,11); 
+	k1 *= c2;
+	h1 ^= k1;
+	h1 += h2;
+
+	h2 = _rotl(h2,17);
 
-	h[0] += h[1];
-	h[1] += h[0];
+	k2 *= c2; 
+	k2  = _rotl(k2,11);
+	k2 *= c1;
+	h2 ^= k2;
+	h2 += h1;
 
-	bmix1(h[0],blocks[0],c1,c2);
-	bmix1(h[1],blocks[1],c1,c2);
+	h1 = h1*3+0x52dce729;
+	h2 = h2*3+0x38495ab5;
+
+	c1 = c1*5+0x7b7d159c;
+	c2 = c2*5+0x6bce6396;
 }
 
 //----------
 
-void MurmurHash3_x86_64 ( const void * data, int len, uint32_t seed, void * out )
+void MurmurHash3_x86_64 ( const void * key, const int len, const uint32_t seed, void * out )
 {
-	uint32_t h[2];
+	const uint8_t * data = (const uint8_t*)key;
+	const int nblocks = len / 8;
+
+	uint32_t h1 = 0x8de1c3ac ^ seed;
+	uint32_t h2 = 0xbab98226 ^ seed;
 
-	h[0] = 0x8de1c3ac ^ seed;
-	h[1] = 0xbab98226 ^ seed;
+	uint32_t c1 = 0x95543787;
+	uint32_t c2 = 0x2ad7eb25;
 
 	//----------
 	// body
 
-	const uint32_t * blocks = (const uint32_t *)data;
-
-	uint32_t c1 = 0x95543787;
-	uint32_t c2 = 0x2ad7eb25;
+	const uint32_t * blocks = (const uint32_t *)(data + nblocks*8);
 
-	while(len >= 8)
+	for(int i = -nblocks; i; i++)
 	{
-		merge64(h,blocks,c1,c2);
-		cmix(c1,c2);
+		uint32_t k1 = getblock(blocks,i*2+0);
+		uint32_t k2 = getblock(blocks,i*2+1);
 
-		blocks += 2;
-		len -= 8;
+		bmix32(h1,h2,k1,k2,c1,c2);
 	}
 
 	//----------
 	// tail
 	
-	uint32_t k[2] = { 0, 0 };
+	const uint8_t * tail = (const uint8_t*)(data + nblocks*8);
 
-	const uint8_t * tail = (const uint8_t*)blocks;
+	uint32_t k1 = 0;
+	uint32_t k2 = 0;
 
-	switch(len)
+	switch(len & 7)
 	{
-	case 7: k[1] ^= tail[6] << 16;
-	case 6: k[1] ^= tail[5] << 8;
-	case 5: k[1] ^= tail[4] << 0;
-	case 4: k[0] ^= tail[3] << 24;
-	case 3: k[0] ^= tail[2] << 16;
-	case 2: k[0] ^= tail[1] << 8;
-	case 1: k[0] ^= tail[0] << 0;
-			merge64(h,k,c1,c2);
+	case 7: k2 ^= tail[6] << 16;
+	case 6: k2 ^= tail[5] << 8;
+	case 5: k2 ^= tail[4] << 0;
+	case 4: k1 ^= tail[3] << 24;
+	case 3: k1 ^= tail[2] << 16;
+	case 2: k1 ^= tail[1] << 8;
+	case 1: k1 ^= tail[0] << 0;
+	        bmix32(h1,h2,k1,k2,c1,c2);
 	};
 
 	//----------
 	// finalization
 
-	h[1] ^= len;
+	h2 ^= len;
+
+	h1 += h2;
+	h2 += h1;
 
-	h[0] =  fmix32(h[0]);
-	h[1] ^= kmix(h[0],c1,c2);
-	h[0] ^= fmix32(h[1]);
-	h[1] ^= kmix(h[0],c1,c2);
+	h1 = fmix32(h1);
+	h2 = fmix32(h2);
 
-	((uint32_t*)out)[0] = h[0];
-	((uint32_t*)out)[1] = h[1];
+	h1 += h2;
+	h2 += h1;
+
+	((uint32_t*)out)[0] = h1;
+	((uint32_t*)out)[1] = h2;
 }
 
 //-----------------------------------------------------------------------------
+// This mix is large enough that VC++ refuses to inline it unless we use
+// __forceinline. It's also not all that fast due to register spillage.
 
-void merge128 ( uint32_t h[4], const uint32_t * blocks, uint32_t c1, uint32_t c2 )
+__forceinline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & h3, uint32_t & h4, 
+						    uint32_t & k1, uint32_t & k2, uint32_t & k3, uint32_t & k4, 
+						    uint32_t & c1, uint32_t & c2 )
 {
-	h[0] = _rotl(h[0],3);
-	h[1] = _rotl(h[1],10);
-	h[2] = _rotl(h[2],19);
-	h[3] = _rotl(h[3],26);
-
-	h[0] += h[1];
-	h[0] += h[2];
-	h[0] += h[3];
-
-	h[1] += h[0];
-	h[2] += h[0];
-	h[3] += h[0];
-
-	bmix1(h[0],blocks[0],c1,c2);
-	bmix1(h[1],blocks[1],c1,c2);
-	bmix1(h[2],blocks[2],c1,c2);
-	bmix1(h[3],blocks[3],c1,c2);
+	k1 *= c1; 
+	k1  = _rotl(k1,11); 
+	k1 *= c2;
+	h1 ^= k1;
+	h1 += h2;
+	h1 += h3;
+	h1 += h4;
+
+	k2 *= c2; 
+	k2  = _rotl(k2,11);
+	k2 *= c1;
+	h2 ^= k2;
+	h2 += h1;
+
+	h1 = h1*3+0x52dce729;
+	h2 = h2*3+0x38495ab5;
+
+	c1 = c1*5+0x7b7d159c;
+	c2 = c2*5+0x6bce6396;
+
+	k3 *= c1; 
+	k3  = _rotl(k3,11); 
+	k3 *= c2;
+	h3 ^= k3;
+	h3 += h1;
+
+	k4 *= c2; 
+	k4  = _rotl(k4,11);
+	k4 *= c1;
+	h4 ^= k4;
+	h4 += h1;
+
+	h3 = h3*3+0x52dce729;
+	h4 = h4*3+0x38495ab5;
+
+	c1 = c1*5+0x7b7d159c;
+	c2 = c2*5+0x6bce6396;
 }
 
 //----------
 
-void MurmurHash3_x86_128 ( const void * data, int len, uint32_t seed, uint32_t * out )
+void MurmurHash3_x86_128 ( const void * key, const int len, const uint32_t seed, void * out )
 {
-	uint32_t h[4] =
+	const uint8_t * data = (const uint8_t*)key;
+	const int nblocks = len / 16;
+
+	uint32_t h1 = 0x8de1c3ac ^ seed;
+	uint32_t h2 = 0xbab98226 ^ seed;
+	uint32_t h3 = 0xfcba5b2d ^ seed;
+	uint32_t h4 = 0x32452e3e ^ seed;
+
+	uint32_t c1 = 0x95543787;
+	uint32_t c2 = 0x2ad7eb25;
+
+	//----------
+	// body
+
+	const uint32_t * blocks = (const uint32_t *)(data);
+
+	for(int i = 0; i < nblocks; i++)
+	{
+		uint32_t k1 = getblock(blocks,i*4+0);
+		uint32_t k2 = getblock(blocks,i*4+1);
+		uint32_t k3 = getblock(blocks,i*4+2);
+		uint32_t k4 = getblock(blocks,i*4+3);
+
+		bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
+	}
+
+	//----------
+	// tail
+
+	const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+	uint32_t k1 = 0;
+	uint32_t k2 = 0;
+	uint32_t k3 = 0;
+	uint32_t k4 = 0;
+
+	switch(len & 15)
 	{
-		0x8de1c3ac ^ seed,
-		0xbab98226 ^ seed,
-		0xfcba5b2d ^ seed,
-		0x32452e3e ^ seed
+	case 15: k4 ^= tail[14] << 16;
+	case 14: k4 ^= tail[13] << 8;
+	case 13: k4 ^= tail[12] << 0;
+	case 12: k3 ^= tail[11] << 24;
+	case 11: k3 ^= tail[10] << 16;
+	case 10: k3 ^= tail[ 9] << 8;
+	case  9: k3 ^= tail[ 8] << 0;
+	case  8: k2 ^= tail[ 7] << 24;
+	case  7: k2 ^= tail[ 6] << 16;
+	case  6: k2 ^= tail[ 5] << 8;
+	case  5: k2 ^= tail[ 4] << 0;
+	case  4: k1 ^= tail[ 3] << 24;
+	case  3: k1 ^= tail[ 2] << 16;
+	case  2: k1 ^= tail[ 1] << 8;
+	case  1: k1 ^= tail[ 0] << 0;
+	         bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
 	};
 
 	//----------
-	// body
+	// finalization
 
-	const uint32_t * blocks = (const uint32_t *)data;
+	h4 ^= len;
 
-	uint32_t c1 = 0x95543787;
-	uint32_t c2 = 0x2ad7eb25;
+	h1 += h2; h1 += h3; h1 += h4;
+	h2 += h1; h3 += h1; h4 += h1;
+
+	h1 = fmix32(h1);
+	h2 = fmix32(h2);
+	h3 = fmix32(h3);
+	h4 = fmix32(h4);
+
+	h1 += h2; h1 += h3; h1 += h4;
+	h2 += h1; h3 += h1; h4 += h1;
+
+	((uint32_t*)out)[0] = h1;
+	((uint32_t*)out)[1] = h2;
+	((uint32_t*)out)[2] = h3;
+	((uint32_t*)out)[3] = h4;
+}
+
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+
+inline uint64_t getblock ( const uint64_t * p, int i )
+{
+	return p[i];
+}
+
+//----------
+// Block mix - combine the key bits with the hash bits and scramble everything
+
+inline void bmix64 ( uint64_t & h1, uint64_t & h2, uint64_t & k1, uint64_t & k2, uint64_t & c1, uint64_t & c2 )
+{
+	k1 *= c1; 
+	k1  = _rotl64(k1,23); 
+	k1 *= c2;
+	h1 ^= k1;
+	h1 += h2;
+
+	h2 = _rotl64(h2,41);
+
+	k2 *= c2; 
+	k2  = _rotl64(k2,23);
+	k2 *= c1;
+	h2 ^= k2;
+	h2 += h1;
+
+	h1 = h1*3+0x52dce729;
+	h2 = h2*3+0x38495ab5;
+
+	c1 = c1*5+0x7b7d159c;
+	c2 = c2*5+0x6bce6396;
+}
+
+//----------
+// Finalization mix - avalanches all bits to within 0.05% bias
+
+inline uint64_t fmix64 ( uint64_t k )
+{
+	k ^= k >> 33;
+	k *= 0xff51afd7ed558ccd;
+	k ^= k >> 33;
+	k *= 0xc4ceb9fe1a85ec53;
+	k ^= k >> 33;
+
+	return k;
+}
+
+//----------
+
+void MurmurHash3_x64_128 ( const void * key, const int len, const uint32_t seed, void * out )
+{
+	const uint8_t * data = (const uint8_t*)key;
+	const int nblocks = len / 16;
+
+	uint64_t h1 = 0x9368e53c2f6af274 ^ seed;
+	uint64_t h2 = 0x586dcd208f7cd3fd ^ seed;
+
+	uint64_t c1 = 0x87c37b91114253d5;
+	uint64_t c2 = 0x4cf5ad432745937f;
+
+	//----------
+	// body
+
+	const uint64_t * blocks = (const uint64_t *)(data);
 
-	while(len >= 16)
+	for(int i = 0; i < nblocks; i++)
 	{
-		merge128(h,blocks,c1,c2);
-		cmix(c1,c2);
+		uint64_t k1 = getblock(blocks,i*2+0);
+		uint64_t k2 = getblock(blocks,i*2+1);
 
-		blocks += 4;
-		len -= 16;
+		bmix64(h1,h2,k1,k2,c1,c2);
 	}
 
 	//----------
 	// tail
 
-	uint32_t k[4] = { 0, 0, 0, 0 };
+	const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
 
-	const uint8_t * tail = (const uint8_t*)blocks;
+	uint64_t k1 = 0;
+	uint64_t k2 = 0;
 
-	switch(len)
+	switch(len & 15)
 	{
-	case 15: k[3] ^= tail[14] << 16;
-	case 14: k[3] ^= tail[13] << 8;
-	case 13: k[3] ^= tail[12] << 0;
-	case 12: k[2] ^= tail[11] << 24;
-	case 11: k[2] ^= tail[10] << 16;
-	case 10: k[2] ^= tail[ 9] << 8;
-	case  9: k[2] ^= tail[ 8] << 0;
-	case  8: k[1] ^= tail[ 7] << 24;
-	case  7: k[1] ^= tail[ 6] << 16;
-	case  6: k[1] ^= tail[ 5] << 8;
-	case  5: k[1] ^= tail[ 4] << 0;
-	case  4: k[0] ^= tail[ 3] << 24;
-	case  3: k[0] ^= tail[ 2] << 16;
-	case  2: k[0] ^= tail[ 1] << 8;
-	case  1: k[0] ^= tail[ 0] << 0;
-			merge128(h,k,c1,c2);
+	case 15: k2 ^= uint64_t(tail[14]) << 48;
+	case 14: k2 ^= uint64_t(tail[13]) << 40;
+	case 13: k2 ^= uint64_t(tail[12]) << 32;
+	case 12: k2 ^= uint64_t(tail[11]) << 24;
+	case 11: k2 ^= uint64_t(tail[10]) << 16;
+	case 10: k2 ^= uint64_t(tail[ 9]) << 8;
+	case  9: k2 ^= uint64_t(tail[ 8]) << 0;
+
+	case  8: k1 ^= uint64_t(tail[ 7]) << 56;
+	case  7: k1 ^= uint64_t(tail[ 6]) << 48;
+	case  6: k1 ^= uint64_t(tail[ 5]) << 40;
+	case  5: k1 ^= uint64_t(tail[ 4]) << 32;
+	case  4: k1 ^= uint64_t(tail[ 3]) << 24;
+	case  3: k1 ^= uint64_t(tail[ 2]) << 16;
+	case  2: k1 ^= uint64_t(tail[ 1]) << 8;
+	case  1: k1 ^= uint64_t(tail[ 0]) << 0;
+	         bmix64(h1,h2,k1,k2,c1,c2);
 	};
 
 	//----------
 	// finalization
 
-	h[3] ^= len;
+	h2 ^= len;
 
-	h[0] ^= fmix32(h[1]); h[2] ^= fmix32(h[3]);
-	h[1] ^= kmix(h[0],c1,c2); h[3] ^= kmix(h[2],c1,c2);
-	h[3] ^= fmix32(h[0]); h[1] ^= fmix32(h[2]);
-	h[0] ^= kmix(h[3],c1,c2); h[2] ^= kmix(h[1],c1,c2);
-	h[1] ^= fmix32(h[0]); h[3] ^= fmix32(h[2]);
+	h1 += h2;
+	h2 += h1;
 
-	out[0] = h[0];
-	out[1] = h[1];
-	out[2] = h[2];
-	out[3] = h[3];
+	h1 = fmix64(h1);
+	h2 = fmix64(h2);
+
+	h1 += h2;
+	h2 += h1;
+
+	((uint64_t*)out)[0] = h1;
+	((uint64_t*)out)[1] = h2;
 }
 
 //-----------------------------------------------------------------------------
+// If we need a smaller hash value, it's faster to just use a portion of the 
+// 128-bit hash
+
+void MurmurHash3_x64_32 ( const void * key, int len, uint32_t seed, void * out )
+{
+	uint32_t temp[4];
 
+	MurmurHash3_x64_128(key,len,seed,temp);
+
+	*(uint32_t*)out = temp[0];
+}
+
+//----------
+
+void MurmurHash3_x64_64 ( const void * key, int len, uint32_t seed, void * out )
+{
+	uint64_t temp[2];
+
+	MurmurHash3_x64_128(key,len,seed,temp);
+
+	*(uint64_t*)out = temp[0];
+} 
+
+//-----------------------------------------------------------------------------
diff --git a/MurmurHash3.h b/MurmurHash3.h
index 5e19064..a65faa8 100644
--- a/MurmurHash3.h
+++ b/MurmurHash3.h
@@ -3,9 +3,12 @@
 //-----------------------------------------------------------------------------
 
 void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
-void MurmurHash3_x64_32  ( const void * key, int len, uint32_t seed, void * out );
 void MurmurHash3_x86_64  ( const void * key, int len, uint32_t seed, void * out );
 void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
 
+void MurmurHash3_x64_32  ( const void * key, int len, uint32_t seed, void * out );
+void MurmurHash3_x64_64  ( const void * key, int len, uint32_t seed, void * out );
+void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
+
 //-----------------------------------------------------------------------------
 
diff --git a/Random.cpp b/Random.cpp
index 12d7077..e98f5ef 100644
--- a/Random.cpp
+++ b/Random.cpp
@@ -6,56 +6,3 @@ Rand g_rand3(3);
 Rand g_rand4(4);
 
 //-----------------------------------------------------------------------------
-// Pseudo-random oracle. Mix avalanches x/y/z to < 0.07% bias.
-
-inline void omix ( uint32_t & x, uint32_t & y, uint32_t & z )
-{
-	uint64_t m = 0x65a3d38b;
-	uint64_t t = 0;
-
-	t = x * m; y ^= t; z ^= (t >> 32);
-	t = z * m; x ^= t; y ^= (t >> 32);
-	t = y * m; z ^= t; x ^= (t >> 32);
-	t = x * m; y ^= t; z ^= (t >> 32);
-	t = z * m; x ^= t; y ^= (t >> 32);
-	t = y * m; z ^= t; x ^= (t >> 32);
-}
-
-void oracle ( uint32_t key, uint32_t nonce, void * blob, int size )
-{
-	uint32_t x = 0x498b3bc5;
-	uint32_t y = 0x9c3ed699;
-	uint32_t z = 0x5a05089a;
-
-	x ^= key;
-	y ^= nonce;
-	z ^= size;
-
-	uint8_t * cursor = (uint8_t*)blob;
-
-	while(size)
-	{
-		omix(x,y,z);
-
-		if(size > 4)
-		{
-			*(uint32_t*)cursor = x;
-
-			cursor += 4;
-			size -= 4;
-		}
-		else
-		{
-			switch(size)
-			{
-			case 3: cursor[2] = (uint8_t)(x >> 16);
-			case 2: cursor[1] = (uint8_t)(x >>  8);
-			case 1: cursor[0] = (uint8_t)(x >>  0);
-			};
-
-			return;
-		}
-	}
-}
-
-//-----------------------------------------------------------------------------
diff --git a/Random.h b/Random.h
index 87ed656..033e5f8 100644
--- a/Random.h
+++ b/Random.h
@@ -3,12 +3,8 @@
 #include "Types.h"
 
 //-----------------------------------------------------------------------------
-// random oracle (stateless)
-
-void oracle ( uint32_t key, uint32_t nonce, void * blob, int size );
-
-//-----------------------------------------------------------------------------
-// Xorshift-based RNG from George Marsaglia, algorithm taken from Wikipedia
+// Xorshift RNG based on code by George Marsaglia
+// http://en.wikipedia.org/wiki/Xorshift
 
 struct Rand
 {
@@ -27,54 +23,50 @@ struct Rand
 		reseed(seed);
 	}
 
-	uint32_t rand_u32 ( void )
-	{
-		uint32_t t = x ^ (x << 11);
-
-		x = y; 
-		y = z; 
-		z = w;
-		w = (w ^ (w >> 19)) ^ (t ^ (t >> 8));
-
-		return w;
-	}
-
-
 	void reseed ( uint32_t seed )
 	{
 		x = 0x498b3bc5 ^ seed;
-		y = 0x9c3ed699 ^ seed;
-		z = 0x5a05089a ^ seed;
-		w = 0x2c8a5c59 ^ seed; 
+		y = 0;
+		z = 0;
+		w = 0;
 
-		for(int i = 0; i < 10; i++) rand_u32();
+		for(int i = 0; i < 10; i++) mix();
 	}
 
 	void reseed ( uint64_t seed )
 	{
 		x = 0x498b3bc5 ^ (uint32_t)(seed >>  0);
-		y = 0x9c3ed699 ^ (uint32_t)(seed >> 11);
-		z = 0x5a05089a ^ (uint32_t)(seed >> 22);
-		w = 0x2c8a5c59 ^ (uint32_t)(seed >> 32); 
+		y = 0x5a05089a ^ (uint32_t)(seed >> 32);
+		z = 0;
+		w = 0;
 
-		for(int i = 0; i < 10; i++) rand_u32();
+		for(int i = 0; i < 10; i++) mix();
 	}
 
 	//-----------------------------------------------------------------------------
 
-	operator uint32_t ( void ) 
+	void mix ( void )
+	{
+		uint32_t t = x ^ (x << 11);
+		x = y; y = z; z = w;
+		w = w ^ (w >> 19) ^ t ^ (t >> 8); 
+	}
+
+	uint32_t rand_u32 ( void )
 	{
-		return rand_u32();
+		mix();
+
+		return x;
 	}
 
-	operator uint64_t ( void ) 
+	uint64_t rand_u64 ( void ) 
 	{
-		uint64_t a = rand_u32();
+		mix();
 
-		a <<= 32;
-		a |= rand_u32();
+		uint64_t a = x;
+		uint64_t b = y;
 
-		return a;
+		return (a << 32) | b;
 	}
 
 	void rand_p ( void * blob, int bytes )
@@ -100,8 +92,8 @@ struct Rand
 
 extern Rand g_rand1;
 
-inline uint32_t rand_u32 ( void ) { return g_rand1; }
-inline uint64_t rand_u64 ( void ) { return g_rand1; }
+inline uint32_t rand_u32 ( void ) { return g_rand1.rand_u32(); }
+inline uint64_t rand_u64 ( void ) { return g_rand1.rand_u64(); }
 
 inline void rand_p ( void * blob, int bytes )
 {
@@ -122,23 +114,3 @@ inline void rand_p ( void * blob, int bytes )
 }
 
 //-----------------------------------------------------------------------------
-
-template < typename T >
-inline void rand_t ( T & t )
-{
-	rand_p(&,sizeof(t));
-}
-
-template<> inline void rand_t ( uint32_t & t ) { t = rand_u32(); }
-template<> inline void rand_t ( uint64_t & t ) { t = rand_u64(); }
-
-template<> inline void rand_t ( u128 & t )
-{
-	uint32_t * b  = (uint32_t*)&t;
-	b[0] = rand_u32();
-	b[1] = rand_u32();
-	b[2] = rand_u32();
-	b[3] = rand_u32();
-}
-
-//-----------------------------------------------------------------------------
diff --git a/SMHasher.vcproj b/SMHasher.vcproj
index ab2b022..f64135a 100644
--- a/SMHasher.vcproj
+++ b/SMHasher.vcproj
@@ -394,14 +394,6 @@
 				RelativePath=".\AvalancheTest.h"
 				>
 			</File>
-			<File
-				RelativePath=".\CycleTest.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\CycleTest.h"
-				>
-			</File>
 			<File
 				RelativePath=".\DifferentialTest.cpp"
 				>
@@ -411,19 +403,19 @@
 				>
 			</File>
 			<File
-				RelativePath=".\SparseKeyTest.cpp"
+				RelativePath=".\KeysetTest.cpp"
 				>
 			</File>
 			<File
-				RelativePath=".\SparseKeyTest.h"
+				RelativePath=".\KeysetTest.h"
 				>
 			</File>
 			<File
-				RelativePath=".\Tests.cpp"
+				RelativePath=".\SpeedTest.cpp"
 				>
 			</File>
 			<File
-				RelativePath=".\Tests.h"
+				RelativePath=".\SpeedTest.h"
 				>
 			</File>
 		</Filter>
@@ -439,11 +431,7 @@
 				>
 			</File>
 			<File
-				RelativePath=".\Core.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\Core.h"
+				RelativePath=".\pstdint.h"
 				>
 			</File>
 			<File
@@ -475,10 +463,6 @@
 			RelativePath=".\main.cpp"
 			>
 		</File>
-		<File
-			RelativePath=".\pstdint.h"
-			>
-		</File>
 	</Files>
 	<Globals>
 	</Globals>
diff --git a/SparseKeyTest.cpp b/SparseKeyTest.cpp
deleted file mode 100644
index 234b6bb..0000000
--- a/SparseKeyTest.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-#include "SparseKeyTest.h"
-
-#include "Types.h"
-#include "Stats.h" // for testkeylist
-
-//----------------------------------------------------------------------------
-
-template < int keybits, typename hashtype >
-bool SparseKeyTest3 ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram  )
-{
-	printf("Testing %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits);
-
-	typedef Blob<keybits> keytype;
-
-	std::vector<keytype> keys;
-
-	keytype k;
-	memset(&k,0,sizeof(k));
-
-	if(inclusive) keys.push_back(k);
-
-	SparseKeygenRecurse(0,setbits,inclusive,k,keys);
-
-	printf("%d keys, %d bytes\n",(int)keys.size(),(int)keys.size() * sizeof(keytype));
-
-	bool result = testkeylist<keytype,hashtype>(hash,keys,testColl,testDist,drawDiagram);
-
-	printf("\n");
-
-	return result;
-}
-
-//----------------------------------------------------------------------------
-
-template< typename hashtype >
-bool SparsePermuteKeyTest2 ( hashfunc<hashtype> hash, bool testColl, bool testDist, bool drawDiagram )
-{
-	bool result = true;
-
-	typedef Blob<320> keytype;
-
-	std::vector<keytype> keys;
-
-	printf("Testing %d-bit sparse-permute keys - ",sizeof(keytype)*8);
-
-	//----------
-
-	keytype key;
-
-	const int ndwords = sizeof(keytype) / 4;
-	uint32_t * dwords = (uint32_t*)&key;
-
-	for(int i = 0; i < ndwords; i++)
-	{
-		dwords[i] = uint32_t(1) << ((i+2) * 3);
-	}
-
-	SPKeygenRecurse2(key,0,keys);
-
-	printf("%d keys, %d bytes\n",(int)keys.size(),(int)keys.size() * sizeof(keytype));
-
-	//----------
-
-	result &= testkeylist<keytype,hashtype>(hash,keys,testColl,testDist,drawDiagram);
-
-	return result;
-}
-
-//----------------------------------------------------------------------------
-// Inclusive test produces about the same distribution on poor hashes, and
-// tends to create more collisions.
-
-template < typename hashtype >
-bool SparseKeyTest2 ( hashfunc<hashtype> hash, bool drawDiagram )
-{
-	bool result = true;
-
-	result &= SparseKeyTest3<32,hashtype>(hash,6,true,true,true,drawDiagram);
-	result &= SparseKeyTest3<40,hashtype>(hash,6,true,true,true,drawDiagram);
-	result &= SparseKeyTest3<48,hashtype>(hash,5,true,true,true,drawDiagram);
-	result &= SparseKeyTest3<56,hashtype>(hash,5,true,true,true,drawDiagram);
-
-	result &= SparseKeyTest3<64,hashtype>(hash,5,true,true,true,drawDiagram);
-	result &= SparseKeyTest3<96,hashtype>(hash,4,true,true,true,drawDiagram); 
-	result &= SparseKeyTest3<256,hashtype>(hash,3,true,true,true,drawDiagram);
-	result &= SparseKeyTest3<1536,hashtype>(hash,2,true,true,true,drawDiagram);
-
-	// 192-bit sparse keys with 4 bits set generates 1.4 gigs of keydata - use
-	// at your own risk
-
-	// SparseKeyTest3<192,4,hashtype>(hash,true,true,true); 
-
-	result &= SparsePermuteKeyTest2<hashtype>(hash,true,true,drawDiagram);
-
-	return result;
-}
-
-bool SparseKeyTest ( hashfunc<uint32_t> hash, bool drawDiagram )
-{
-	return SparseKeyTest2<uint32_t>(hash,drawDiagram);
-}
-
-bool SparseKeyTest ( hashfunc<uint64_t> hash, bool drawDiagram )
-{
-	return SparseKeyTest2<uint64_t>(hash,drawDiagram);
-}
-
-bool SparseKeyTest ( hashfunc<u128> hash, bool drawDiagram )
-{
-	return SparseKeyTest2<u128>(hash,drawDiagram);
-}
diff --git a/SparseKeyTest.h b/SparseKeyTest.h
deleted file mode 100644
index bfec8c5..0000000
--- a/SparseKeyTest.h
+++ /dev/null
@@ -1,89 +0,0 @@
-#pragma once
-
-#include "Types.h"
-
-#pragma warning(push)
-#pragma warning(disable:4200) // user-defined type contains zero length array
-#pragma warning(disable:4127) // conditional expression is constant
-
-//-----------------------------------------------------------------------------
-
-template < typename keytype >
-void SparseKeygenRecurse ( int start, int bitsleft, bool inclusive, keytype & k, std::vector<keytype> & keys )
-{
-	const int nbytes = sizeof(keytype);
-	const int nbits = nbytes * 8;
-
-	for(int i = start; i < nbits; i++)
-	{
-		flipbit(&k,nbytes,i);
-
-		if(inclusive || (bitsleft == 1))
-		{
-			keys.push_back(k);
-		}
-
-		if(bitsleft > 1)
-		{
-			SparseKeygenRecurse(i+1,bitsleft-1,inclusive,k,keys);
-		}
-
-		flipbit(&k,nbytes,i);
-	}
-}
-
-//----------
-
-template < typename keytype >
-void SparseKeygenRecurse_R ( int start, int bitsleft, bool inclusive, keytype & k, std::vector<keytype> & keys )
-{
-	const int nbytes = sizeof(keytype);
-	const int nbits = nbytes * 8;
-
-	for(int i = start; i < nbits; i++)
-	{
-		flipbit(&k,nbytes,(bits-i-1));
-
-		if(inclusive || (bitsleft == 1))
-		{
-			keys.push_back(k);
-		}
-
-		if(bitsleft > 1)
-		{
-			SparseKeygenRecurse(i+1,bitsleft-1,inclusive,k,keys);
-		}
-
-		flipbit(&k,nbytes,(bits-i-1));
-	}
-}
-
-//----------
-
-template< typename keytype >
-void SPKeygenRecurse2 ( keytype & key, int k, std::vector<keytype> & keys )
-{
-	//assert(keytype::align4);
-
-	const int ndwords = key.nbytes/4;
-	uint32_t * dwords = (uint32_t*)&key;
-
-	if(k == ndwords-1)
-	{
-		keys.push_back(key);
-		return;
-	}
-
-	for(int i = k; i < ndwords; i++)
-	{
-		swap(dwords[k],dwords[i]);
-
-		SPKeygenRecurse2(key,k+1,keys);
-
-		swap(dwords[k],dwords[i]);
-	}
-}
-
-//-----------------------------------------------------------------------------
-
-#pragma warning(pop)
diff --git a/SpeedTest.cpp b/SpeedTest.cpp
new file mode 100644
index 0000000..dbfadcb
--- /dev/null
+++ b/SpeedTest.cpp
@@ -0,0 +1,50 @@
+#include "SpeedTest.h"
+
+#include "Random.h"
+
+#include <stdio.h>  // for printf
+#include <intrin.h> // for __rdtsc
+
+//-----------------------------------------------------------------------------
+// 256k blocks seem to give the best results.
+
+void BulkSpeedTest ( pfHash hash )
+{
+	const int trials = 9999;
+	const int blocksize = 256 * 1024;
+
+	printf("Bulk speed test - %d-byte keys\n",blocksize);
+
+	char * block = new char[blocksize + 16];
+
+	rand_p(block,blocksize+16);
+
+	uint32_t temp[16];
+
+	for(int align = 0; align < 8; align++)
+	{
+		double bestbpc = 0;
+
+		for(int itrial = 0; itrial < trials; itrial++)
+		{
+			__int64 begin,end;
+
+			begin = __rdtsc();
+
+			hash(block + align,blocksize,itrial,temp);
+
+			end = __rdtsc();
+
+			blackhole(temp[0]);
+
+			double cycles = double(end-begin);
+			double bpc = double(blocksize) / cycles;
+			if(bpc > bestbpc) bestbpc = bpc;
+		}
+
+		double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
+		printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);
+	}
+
+	delete [] block;
+}
diff --git a/SpeedTest.h b/SpeedTest.h
new file mode 100644
index 0000000..5a5ed54
--- /dev/null
+++ b/SpeedTest.h
@@ -0,0 +1,61 @@
+#pragma once
+
+#include "Types.h"
+
+void BulkSpeedTest ( pfHash hash );
+
+//----------------------------------------------------------------------------
+
+template < typename hashtype, int keysize >
+void TinySpeedTest ( pfHash hash )
+{
+	const int trials = 100000;
+
+	printf("Small key speed test - %4d-byte keys - ",keysize);
+
+	uint8_t k[keysize];
+	hashtype h;
+
+	double bestcycles = 1e9;
+
+	for(int itrial = 0; itrial < trials; itrial++)
+	{
+		__int64 begin,end;
+
+		rand_p(k,keysize);
+
+		begin = __rdtsc();
+		
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
+
+		end = __rdtsc();
+
+		blackhole(*(uint32_t*)(&h));
+
+		double cycles = double(end-begin) / 64;
+		if(cycles < bestcycles) bestcycles = cycles;
+	}
+
+	double bestbpc = double(keysize) / bestcycles;
+	printf("%8.2f cycles/hash, %8.4f bytes/cycle\n",bestcycles,bestbpc);
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Stats.cpp b/Stats.cpp
index 31ca481..ec51c8a 100644
--- a/Stats.cpp
+++ b/Stats.cpp
@@ -2,231 +2,6 @@
 
 //-----------------------------------------------------------------------------
 
-// If you want to compute these two statistics, uncomment the code and link with
-// the GSL library.
-
-/*
-extern "C"
-{
-	double gsl_sf_gamma_inc_P(const double a, const double x);
-	double gsl_sf_gamma_inc_Q(const double a, const double x);
-};
-
-// P-val for a set of binomial distributions
-
-void pval_binomial ( int * buckets, int len, int n, double p, double & sdev, double & pval )
-{
-	double c = 0;
-
-	double u = n*p;
-	double s = sqrt(n*p*(1-p));
-
-	for(int i = 0; i < len; i++)
-	{
-		double x = buckets[i];
-
-		double n = (x-u)/s;
-
-		c += n*n;
-	}
-
-	sdev = sqrt(c / len);
-
-	pval = gsl_sf_gamma_inc_P( len/2, c/2 );
-}
-
-// P-val for a histogram - K keys distributed between N buckets
-// Note the (len-1) due to the degree-of-freedom reduction
-
-void pval_pearson ( int * buckets, int len, int keys, double & sdev, double & pval )
-{
-	double c = 0;
-
-	double n = keys;
-	double p = 1.0 / double(len);
-
-	double u = n*p;
-	double s = sqrt(n*p*(1-p));
-
-	for(int i = 0; i < len; i++)
-	{
-		double x = buckets[i];
-
-		double n = (x-u)/s;
-
-		c += n*n;
-	}
-
-	sdev = sqrt(c / len);
-
-	pval = gsl_sf_gamma_inc_P( (len-1)/2, c/2 );
-}
-*/
-
-//----------------------------------------------------------------------------
-
-double erf2 ( double x )
-{
-    const double a1 =  0.254829592;
-    const double a2 = -0.284496736;
-    const double a3 =  1.421413741;
-    const double a4 = -1.453152027;
-    const double a5 =  1.061405429;
-    const double p  =  0.3275911;
-
-    double sign = 1;
-    if(x < 0) sign = -1;
-
-    x = abs(x);
-
-    double t = 1.0/(1.0 + p*x);
-    double y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-x*x);
-
-    return sign*y;
-}
-
-double normal_cdf ( double u, double s2, double x )
-{
-	x = (x - u) / sqrt(2*s2);
-
-	double c = (1 + erf2(x)) / 2;
-
-	return c;
-}
-
-double binom_cdf ( double n, double p, double k )
-{
-	double u = n*p;
-	double s2 = n*p*(1-p);
-
-	return normal_cdf(u,s2,k);
-}
-
-// return the probability that a random variable from distribution A is greater than a random variable from distribution B
-
-double comparenorms ( double uA, double sA, double uB, double sB )
-{
-	double c = 1.0 - normal_cdf(uA-uB,sA*sA+sB*sB,0);
-
-	return c;
-}
-
-// convert beta distribution to normal distribution approximation
-
-void beta2norm ( double a, double b, double & u, double & s )
-{
-	u = a / (a+b);
-
-	double t1 = a*b;
-	double t2 = a+b;
-	double t3 = t2*t2*(t2+1);
-
-	s = sqrt( t1 / t3 );
-}
-
-#pragma warning(disable : 4189)
-
-double comparecoins ( double hA, double tA, double hB, double tB )
-{
-	double uA,sA,uB,sB;
-
-	beta2norm(hA+1,tA+1,uA,sA);
-	beta2norm(hB+1,tB+1,uB,sB);
-
-	// this is not the right way to handle the discontinuity at 0.5, but i don't want to deal with truncated normal distributions...
-
-	if(uA < 0.5) uA = 1.0 - uA;
-	if(uB < 0.5) uB = 1.0 - uB;
-
-	return 1.0 - comparenorms(uA,sA,uB,sB);
-}
-
-// Binomial distribution using the normal approximation
-
-double binom2 ( double n, double p, double k )
-{
-	double u = n*p;
-	double s2 = n*p*(1-p);
-
-	double a = k-u;
-
-	const double pi = 3.14159265358979323846264338327950288419716939937510;
-
-	a = a*a / (-2.0*s2);
-	a = exp(a) / sqrt(s2*2.0*pi);
-
-	return a;
-}
-
-double RandWork ( double bucketcount, double keycount )
-{
-	double avgload = keycount / bucketcount; 
-
-	double total = 0;
-
-	if(avgload <= 16)
-	{
-		// if the load is low enough we can compute the expected work directly
-
-		double p = pow((bucketcount-1)/bucketcount,keycount);
-
-		double work = 0;
-
-		for(double i = 0; i < 50; i++)
-		{
-			work  += i;
-			total += work * p;
-
-			p *= (keycount-i) / ( (i+1) * (bucketcount-1) );
-		}
-	}
-	else
-	{
-		// otherwise precision errors screw up the calculation, and so we fall back
-		// to the normal approxmation to the binomial distribution
-
-		double min = avgload / 5.0;
-		double max = avgload * 5.0;
-
-		for(double i = min; i <= max; i++)
-		{
-			double p = binom2(keycount,1.0 / bucketcount,i);
-
-			total += double((i*i+i) / 2) * p;
-		}
-	}
-
-	return total / avgload;
-}
-
-// Normalized standard deviation.
-
-double nsdev ( int * buckets, int len, int keys )
-{
-	double n = len;
-	double k = keys;
-	double p = 1.0/n;
-
-	double u = k*p;
-	double s = sqrt(k*p*(1-p));
-
-	double c = 0;
-
-	for(int i = 0; i < len; i++)
-	{
-		double d = buckets[i];
-
-		d = (d-u)/s;
-
-		c += d*d;
-	}
-
-	double nsd = sqrt(c / n);
-
-	return nsd;
-}
-
-
 double chooseK ( int n, int k )
 {
     if(k > (n - k)) k = n - k;
@@ -256,20 +31,6 @@ double chooseUpToK ( int n, int k )
 }
 
 //-----------------------------------------------------------------------------
-
-uint32_t bitrev ( uint32_t v )
-{
-	v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
-	v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
-	v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
-	v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
-	v = ( v >> 16             ) | ( v               << 16);
-
-	return v;
-}
-
-//-----------------------------------------------------------------------------
-
 // Distribution "score"
 // TODO - big writeup of what this score means
 
@@ -281,16 +42,16 @@ uint32_t bitrev ( uint32_t v )
 // (as long as # keys > # bins * 3 or so, otherwise random fluctuations show up
 // as distribution weaknesses)
 
-double calcScore ( std::vector<int> const & bins, int keys )
+double calcScore ( const int * bins, const int bincount, const int keycount )
 {
-	double n = (int)bins.size();
-	double k = keys;
+	double n = bincount;
+	double k = keycount;
 
 	// compute rms value
 
 	double r = 0;
 
-	for(size_t i = 0; i < bins.size(); i++)
+	for(int i = 0; i < bincount; i++)
 	{
 		double b = bins[i];
 
@@ -321,7 +82,7 @@ void plot ( double n )
 
 	if(n2 > 64) n2 = 64;
 
-	int n3 = (int)floor(n2 + 0.5);
+	int n3 = (int)n2;
 
 	if(n3 == 0)
 		printf(".");
diff --git a/Stats.h b/Stats.h
index 5cae64e..b4afe2c 100644
--- a/Stats.h
+++ b/Stats.h
@@ -1,13 +1,12 @@
 #pragma once
 
-#include "Core.h"
+#include "Types.h"
 
-#include <algorithm>
 #include <math.h>
-#include <assert.h>
-#include <float.h>
+#include <vector>
+#include <algorithm>   // for std::sort
 
-double calcScore ( std::vector<int> const & bins, int balls );
+double calcScore ( const int * bins, const int bincount, const int ballcount );
 
 void plot ( double n );
 
@@ -16,12 +15,11 @@ inline double ExpectedCollisions ( double balls, double bins )
 	return balls - bins + bins * pow(1 - 1/bins,balls);
 }
 
-double comparenorms ( double u1, double s1, double u2, double s2 );
-void beta2norm ( double a, double b, double & u, double & s );
-
 double chooseK ( int b, int k );
 double chooseUpToK ( int n, int k );
 
+//-----------------------------------------------------------------------------
+
 inline uint32_t f3mix ( uint32_t k )
 {
 	k ^= k >> 16;
@@ -53,7 +51,6 @@ int CountCollisions ( std::vector<hashtype> const & hashes )
 
 //-----------------------------------------------------------------------------
 
-/*
 template < class keytype, typename hashtype >
 int PrintCollisions ( hashfunc<hashtype> hash, std::vector<keytype> & keys )
 {
@@ -81,20 +78,18 @@ int PrintCollisions ( hashfunc<hashtype> hash, std::vector<keytype> & keys )
 		}
 		else
 		{
-			htab.insert( htab::value_type(h,k);
+			htab.insert( htab::value_type(h,k) );
 		}
 	}
 
 	return collcount;
 }
-*/
 
 //----------------------------------------------------------------------------
 
 template < typename hashtype >
-bool testhashlist( std::vector<hashtype> & hashes, bool testColl, bool testDist, bool drawDiagram )
+bool TestHashList ( std::vector<hashtype> & hashes, bool testColl, bool testDist, bool drawDiagram )
 {
-	bool verbose = true;
 	bool result = true;
 
 	if(testColl)
@@ -103,39 +98,33 @@ bool testhashlist( std::vector<hashtype> & hashes, bool testColl, bool testDist,
 
 		double expected = (double(count) * double(count-1)) / pow(2.0,double(sizeof(hashtype) * 8 + 1));
 
-		if(verbose) printf("Testing collisions - Expected %8.2f, ",expected);
+		printf("Testing collisions   - Expected %8.2f, ",expected);
 
 		double collcount = 0;
 
 		collcount = CountCollisions(hashes);
 
-		if(verbose)
-		{
-			printf("actual %8.2f (%5.2fx) \n",collcount, collcount / expected);
-		}
-		else
-		{
-			double collscore = collcount / expected;
-
-			printf("Coll score %5.3f, ",collscore);
-		}
+		printf("actual %8.2f (%5.2fx)",collcount, collcount / expected);
 
 		// 2x expected collisions = fail
 
+		// #TODO - collision failure cutoff needs to be expressed as a standard deviation instead
+		// of a scale factor, otherwise we fail erroneously if there are a small expected number
+		// of collisions
+
 		if(double(collcount) / double(expected) > 2.0)
 		{
+			printf(" !!!!! ");
 			result = false;
 		}
+
+		printf("\n");
 	}
 
 	//----------
 
 	if(testDist)
 	{
-		if(verbose) printf("Testing distribution - ");
-
-		if(drawDiagram) printf("\n");
-
 		TestDistribution(hashes,drawDiagram);
 	}
 
@@ -145,7 +134,7 @@ bool testhashlist( std::vector<hashtype> & hashes, bool testColl, bool testDist,
 //-----------------------------------------------------------------------------
 
 template < class keytype, typename hashtype >
-bool testkeylist ( hashfunc<hashtype> hash, std::vector<keytype> & keys, bool testColl, bool testDist, bool drawDiagram )
+bool TestKeyList ( hashfunc<hashtype> hash, std::vector<keytype> & keys, bool testColl, bool testDist, bool drawDiagram )
 {
 	int keycount = (int)keys.size();
 
@@ -153,49 +142,22 @@ bool testkeylist ( hashfunc<hashtype> hash, std::vector<keytype> & keys, bool te
 
 	hashes.resize(keycount);
 
-	//printf("Hashing keyset");
+	printf("Hashing");
 
-	for(int ikey = 0; ikey < keycount; ikey++)
+	for(int i = 0; i < keycount; i++)
 	{
-		keytype & k = keys[ikey];
+		if(i % (keycount / 10) == 0) printf(".");
 
-		//if(ikey % (keycount / 10) == 0) printf(".");
+		keytype & k = keys[i];
 
-		hashes[ikey] = hash(&k,sizeof(k),0);
+		hash(&k,sizeof(k),0,&hashes[i]);
 	}
 
-	//printf("\n");
+	printf("\n");
 
-	bool result = testhashlist(hashes,testColl,testDist,drawDiagram);
+	bool result = TestHashList(hashes,testColl,testDist,drawDiagram);
 
-	return result;
-}
-
-//-----------------------------------------------------------------------------
-
-template < typename hashtype >
-bool testkeylist_string ( hashfunc<hashtype> hash, std::vector<std::string> & keys, bool testColl, bool testDist )
-{
-	int keycount = (int)keys.size();
-
-	std::vector<hashtype> hashes;
-
-	hashes.resize(keycount);
-
-	//printf("Hashing keyset");
-
-	for(int ikey = 0; ikey < keycount; ikey++)
-	{
-		std::string & k = keys[ikey];
-
-		//if(ikey % (keycount / 10) == 0) printf(".");
-
-		hashes[ikey] = hash(&k[0],(int)k.size(),0);
-	}
-
-	//printf("\n");
-
-	bool result = testhashlist(hashes,testColl,testDist);
+	printf("\n");
 
 	return result;
 }
@@ -214,7 +176,7 @@ template < typename hashtype >
 double TestDistributionBytepairs ( std::vector<hashtype> & hashes, bool drawDiagram )
 {
 	const int nbytes = sizeof(hashtype);
-	const int nbits = nbytes * 8;
+	const int hashbits = nbytes * 8;
 	
 	const int nbins = 65536;
 
@@ -222,13 +184,13 @@ double TestDistributionBytepairs ( std::vector<hashtype> & hashes, bool drawDiag
 
 	double worst = 0;
 
-	for(int a = 0; a < nbits; a++)
+	for(int a = 0; a < hashbits; a++)
 	{
 		if(drawDiagram) if((a % 8 == 0) && (a > 0)) printf("\n");
 
 		if(drawDiagram) printf("[");
 
-		for(int b = 0; b < nbits; b++)
+		for(int b = 0; b < hashbits; b++)
 		{
 			if(drawDiagram) if((b % 8 == 0) && (b > 0)) printf(" ");
 
@@ -245,7 +207,7 @@ double TestDistributionBytepairs ( std::vector<hashtype> & hashes, bool drawDiag
 				bins[pa | (pb << 8)]++;
 			}
 
-			double s = calcScore(bins,hashes.size());
+			double s = calcScore(bins,bins.size(),hashes.size());
 
 			if(drawDiagram) plot(s);
 
@@ -263,48 +225,60 @@ double TestDistributionBytepairs ( std::vector<hashtype> & hashes, bool drawDiag
 
 
 //----------------------------------------------------------------------------
-// Measure the distribution "score" for each possible N-bit span up to 16 bits
-// and draw a nice graph of the output. 'X' in graph = 10% deviation from ideal.
+// Measure the distribution "score" for each possible N-bit span up to 20 bits
 
 template< typename hashtype >
 double TestDistribution ( std::vector<hashtype> & hashes, bool drawDiagram )
 {
-	bool verbose = false;
+	printf("Testing distribution - ");
+
+	if(drawDiagram) printf("\n");
+
+	const int hashbits = sizeof(hashtype) * 8;
 
-	const int nbits = sizeof(hashtype) * 8;
-	const int maxwidth = 20;
+	int maxwidth = 20;
+
+	// We need at least 5 keys per bin to reliably test distribution biases
+	// down to 1%, so don't bother to test sparser distributions than that
+
+	while(double(hashes.size()) / double(1 << maxwidth) < 5.0)
+	{
+		maxwidth--;
+	}
 
 	std::vector<int> bins;
+	bins.resize(1 << maxwidth);
 
 	double worst = 0;
 	int worstStart = -1;
 	int worstWidth = -1;
 
-	for(int width = 1; width <= maxwidth; width++)
+	for(int start = 0; start < hashbits; start++)
 	{
-		const int bincount = (1 << width);
+		int width = maxwidth;
+		int bincount = (1 << width);
 
-		//If we don't have enough keys to get 2 per bin, skip the test
+		memset(&bins[0],0,sizeof(int)*bincount);
 
-		//if(double(hashes.size()) / double(bincount) < 2.0) continue;
+		for(size_t j = 0; j < hashes.size(); j++)
+		{
+			hashtype & hash = hashes[j];
 
-		if(drawDiagram) printf("%2d - [",width);
+			uint32_t index = window(&hash,sizeof(hash),start,width);
 
-		for(int start = 0; start < nbits; start++)
-		{
-			bins.clear();
-			bins.resize(bincount, 0);
+			bins[index]++;
+		}
 
-			for(size_t j = 0; j < hashes.size(); j++)
-			{
-				hashtype & hash = hashes[j];
+		// Test the distribution, then fold the bins in half,
+		// repeat until we're down to 256 bins
 
-				uint32_t index = window(&hash,sizeof(hash),start,width);
+		if(drawDiagram) printf("[");
 
-				bins[index]++;
-			}
+		while(bincount >= 256)
+		{
+			double n = calcScore(&bins[0],bincount,(int)hashes.size());
 
-			double n = calcScore(bins,(int)hashes.size());
+			if(drawDiagram) plot(n);
 
 			if(n > worst)
 			{
@@ -313,20 +287,25 @@ double TestDistribution ( std::vector<hashtype> & hashes, bool drawDiagram )
 				worstWidth = width;
 			}
 
-			if(drawDiagram) plot(n);
+			width--;
+			bincount /= 2;
+
+			if(width < 8) break;
+
+			for(int i = 0; i < bincount; i++)
+			{
+				bins[i] += bins[i+bincount];
+			}
 		}
 
 		if(drawDiagram) printf("]\n");
 	}
 
-	if(verbose)
-	{
-		printf("Worst distribution is for (%d:%d) - %f\n",worstStart,(worstStart+worstWidth-1)%32,worst);
-	}
-	else
-	{
-		printf("Dist score %6.3f\n",(1.0 - worst) * 100);
-	}
+	double pct = worst * 100.0;
+
+	printf("Worst bias is the %3d-bit window at bit %3d - %5.3f%%",worstWidth,worstStart,pct);
+	if(pct >= 1.0) printf(" !!!!! ");
+	printf("\n");
 
 	return worst;
 }
@@ -337,7 +316,7 @@ double TestDistribution ( std::vector<hashtype> & hashes, bool drawDiagram )
 template < typename hashtype >
 void TestDistributionFast ( std::vector<hashtype> & hashes, double & dworst, double & davg )
 {
-	const int nbits = sizeof(hashtype) * 8;
+	const int hashbits = sizeof(hashtype) * 8;
 	const int nbins = 65536;
 	
 	std::vector<int> bins(nbins,0);
@@ -345,7 +324,7 @@ void TestDistributionFast ( std::vector<hashtype> & hashes, double & dworst, dou
 	dworst = -1.0e90;
 	davg = 0;
 
-	for(int start = 0; start < nbits; start += 8)
+	for(int start = 0; start < hashbits; start += 8)
 	{
 		bins.clear();
 		bins.resize(nbins,0);
@@ -366,194 +345,7 @@ void TestDistributionFast ( std::vector<hashtype> & hashes, double & dworst, dou
 		if(n > dworst) dworst = n;
 	}
 
-	davg /= double(nbits/8);
+	davg /= double(hashbits/8);
 }
 
 //-----------------------------------------------------------------------------
-
-/*
-struct Stats 
-{
-	enum mode
-	{
-		AVALANCHE,
-		HISTOGRAM,
-	};
-
-	Stats ( int mode, std::vector<int> const & bins, int balls )
-	{
-		switch(mode)
-		{
-		case AVALANCHE:
-			calcAvalanche(bins,balls);
-			break;
-
-		case HISTOGRAM:
-			calcHistogram(bins,balls);
-			break;
-
-		default:
-			assert(false);
-			break;
-		}
-	}
-
-	//----------
-	// Histogram mode
-
-	void calcHistogram ( std::vector<int> const & bins, int balls )
-	{
-		m_nbins  = (int)bins.size();
-		m_nballs = balls;
-
-		m_mean   = 0;
-		m_rms    = 0;
-		m_sigma  = 0;
-		m_max    = -DBL_MAX;
-		m_min    = DBL_MAX;
-
-		for(size_t i = 0; i < bins.size(); i++)
-		{
-			double x = bins[i];
-
-			m_mean += x;
-			m_rms += x*x;
-
-			m_max = x > m_max ? x : m_max;
-			m_min = x < m_min ? x : m_min;
-		}
-
-		m_mean /= m_nbins;
-		m_rms /= m_nbins;
-		m_rms = sqrt(m_rms);
-
-		for(size_t i = 0; i < bins.size(); i++)
-		{
-			double d = bins[i] - m_mean;
-			
-			m_sigma += d*d;
-		}
-
-		m_sigma /= m_nbins;
-		m_sigma = sqrt(m_sigma);
-	}
-	
-	//----------
-	// Normalized standard deviation
-
-	double calcNSD ( std::vector<int> const & bins, int balls )
-	{
-		double n = (int)bins.size();
-		double k = balls;
-		double p = 1.0/n;
-
-		double u = k*p;
-		double s = sqrt(k*p*(1-p));
-
-		double c = 0;
-
-		for(size_t i = 0; i < bins.size(); i++)
-		{
-			double d = bins[i];
-
-			d = (d-u)/s;
-
-			c += d*d;
-		}
-
-		m_nsd = sqrt(c / m_nbins);
-	}
-
-	double calcScore ( std::vector<int> const & bins, int balls )
-	{
-		double n = (int)bins.size();
-		double k = balls;
-
-		// compute rms value
-
-		double r = 0;
-
-		for(size_t i = 0; i < bins.size(); i++)
-		{
-			double b = bins[i];
-
-			r += b*b;
-		}
-
-		r = sqrt(r / n);
-
-		// compute fill factor
-
-		double f = (k*k - 1) / (n*r*r - k);
-
-		// rescale to (0,1) with 0 = good, 1 = bad
-
-		m_score = 1 - (f / n);
-	}
-
-	//----------
-	// Avalanche statistics - convert each table entry to a bias value
-	// and compute stats based on that.
-
-	void calcAvalanche ( std::vector<int> const & bins, int balls )
-	{
-		m_nbins  = (int)bins.size();
-		m_nballs = balls;
-
-		m_mean   = 0;
-		m_rms    = 0;
-		m_sigma  = 0;
-		m_max    = -DBL_MAX;
-		m_min    = DBL_MAX;
-		m_nbad   = 0;
-
-		for(size_t i = 0; i < bins.size(); i++)
-		{
-			double x = (bins[i] / m_nballs) * 2 - 1;
-
-			m_mean += x;
-			m_rms += x*x;
-
-			x = fabs(x);
-
-			if(x > 0.7) m_nbad++;
-
-			m_max = x > m_max ? x : m_max;
-			m_min = x < m_min ? x : m_min;
-		}
-
-		m_mean /= m_nbins;
-		m_rms /= m_nbins;
-		m_rms = sqrt(m_rms);
-
-		for(size_t i = 0; i < bins.size(); i++)
-		{
-			double x = (bins[i] / m_nballs) * 2 - 1;
-
-			double d = x - m_mean;
-			
-			m_sigma += d*d;
-		}
-
-		m_sigma /= m_nbins;
-		m_sigma = sqrt(m_sigma);
-	}
-
-	double m_nbins;
-	double m_nballs;
-
-	double m_mean;
-	double m_rms;
-	double m_sigma;
-
-	double m_nsd;
-	double m_score;
-
-	double m_nbad;
-
-	double m_max;
-	double m_min;
-};
-*/
-
-//-----------------------------------------------------------------------------
diff --git a/Tests.cpp b/Tests.cpp
deleted file mode 100644
index 22bea53..0000000
--- a/Tests.cpp
+++ /dev/null
@@ -1,542 +0,0 @@
-#include "Tests.h"
-
-#include "Stats.h"  // for CountCollisions
-#include "Random.h"
-#include "Bitvec.h"
-
-#include <string.h>
-#include <math.h>
-#include <set>
-#include <vector>
-#include <intrin.h>
-
-#pragma warning(disable:4127)
-
-#pragma warning(disable:4127) // warning C4985: 'ceil': attributes not present on previous declaration.
-
-//-----------------------------------------------------------------------------
-// 256k blocks seem to give the best results.
-
-void BulkSpeedTest ( pfHash hash )
-{
-	const int trials = 9999;
-	const int blocksize = 256 * 1024;
-
-	printf("Bulk speed test - %d-byte keys\n",blocksize);
-
-	char * block = new char[blocksize + 16];
-
-	rand_p(block,blocksize+16);
-
-	uint32_t temp[16];
-
-	for(int align = 0; align < 4; align++)
-	{
-		double bestbpc = 0;
-
-		for(int itrial = 0; itrial < trials; itrial++)
-		{
-			__int64 begin,end;
-
-			begin = __rdtsc();
-
-			hash(block + align,blocksize,itrial,temp);
-
-			end = __rdtsc();
-
-			blackhole(temp[0]);
-
-			double cycles = double(end-begin);
-			double bpc = double(blocksize) / cycles;
-			if(bpc > bestbpc) bestbpc = bpc;
-		}
-
-		double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
-		printf("Alignment %2d - %8.4f bytes/cycle, (%.2f MiB/sec @ 3 ghz)\n",align,bestbpc,bestbps);
-	}
-	printf("\n");
-
-	delete [] block;
-}
-
-//----------------------------------------------------------------------------
-// Tests the Bit Independence Criteron. Not sure if this still works.
-
-template< int _bits, int _reps, class hashtype >
-void BicTest ( hashfunc<uint32_t> hash )
-{
-	const int bits = _bits;
-	const int reps = _reps;
-	const int bytes = bits / 8;
-	const int hashbytes = sizeof(hashtype);
-	const int hashbits = hashbytes * 8;
-
-	int counts[bits][hashbits][hashbits];
-
-	memset(counts,0,sizeof(counts));
-
-	unsigned char k[bytes];
-
-	for(int irep = 0; irep < reps; irep++)
-	{
-		if(irep % 1000 == 0) printf(".");
-
-		rand_p(k,bytes);
-		unsigned int h1 = hash(k,bytes,0);
-
-		for(int in = 0; in < bits; in++)
-		{
-			flipbit(k,in);
-			unsigned int h2 = hash(k,bytes,0);
-			flipbit(k,in);
-
-			unsigned int h = h1 ^ h2;
-
-			for(int out1 = 0; out1 < hashbits; out1++)
-			for(int out2 = out1; out2 < hashbits; out2++)
-			{
-				int b1 = (h >> out1) & 1;
-				int b2 = (h >> out2) & 1;
-
-				int b = b1 ^ b2;
-
-				if(b1 ^ b2)
-				{
-					counts[in][out1][out2]++;
-				}
-			}
-		}
-	}
-
-	printf("\n");
-
-	int biases[4] = {0,0,0,0};
-
-	for(int i = 0; i < hashbits; i++)
-	{
-		for(int j = 0; j < hashbits; j++)
-		{
-			if(i == j)
-			{
-				printf("\\");
-			}
-			else if(i > j)
-			{
-				printf(" ");
-			}
-			else
-			{
-				double d = double(counts[16][i][j]) / double(reps);
-
-				int b = (int)(fabs(d * 2 - 1) * 100);
-
-				if(b == 0) printf(".");
-				else if(b < 5) printf("o");
-				else if(b < 33) printf("O");
-				else printf("X");
-
-				if(b == 0)      biases[0]++;
-				else if(b < 5)  biases[1]++;
-				else if(b < 33) biases[2]++;
-				else            biases[3]++;
-			}
-		}
-
-		printf("\n");
-	}
-
-
-	printf("Bias distribution - %3d : %3d : %3d : %3d\n",biases[0],biases[1],biases[2],biases[3]);
-
-	printf("\n");
-}
-
-//----------------------------------------------------------------------------
-// Bijection test = hash all possible 32-bit keys, see how many 32-bit values
-// are missing. Each missing value indicates a collision.
-
-void BijectionTest ( hashfunc<uint32_t> hash )
-{
-	const int nbytes = 512 * 1024 * 1024;
-
-	unsigned char * block = new unsigned char[nbytes];
-
-	memset(block,0,nbytes);
-
-	printf("Testing bijection for 32-bit keys");
-
-	unsigned int k = 0;
-	
-	do
-	{
-		unsigned int h = hash(&k,4,0);
-		setbit(block,nbytes,h);
-
-		if(k % 100000000 == 0) printf(".");
-
-		k++;
-	}
-	while(k != 0);
-
-	int missing = 0;
-
-	do
-	{
-		if(!getbit(block,nbytes,k)) missing++;
-
-		k++;
-	}
-	while(k != 0);
-
-	printf("\nMissing values - %d\n",missing);
-
-	delete [] block;
-}
-
-//----------------------------------------------------------------------------
-// Loop counting
-
-// Repeatedly hash the same 4-byte value over and over, and track how many
-// loops are in the output. 
-
-void LoopTest ( hashfunc<uint32_t> hash )
-{
-	const int nbytes = 512 * 1024 * 1024;
-
-	unsigned char * block = new unsigned char[nbytes];
-
-	memset(block,0,nbytes);
-
-	int loops = 0;
-	unsigned int start = 0;
-
-	while(1)
-	{
-		if(!getbit(block,nbytes,start)) 
-		{
-			loops++;
-
-			unsigned int r = 0;
-			unsigned int h = start;
-
-			printf("Testing loop starting at %u",start);
-
-			while(1)
-			{
-				setbit(block,nbytes,h);
-				r++;
-				h = hash(&h,4,0);
-
-				if(h == start) break;
-
-				if(r % 100000000 == 0) printf(".");
-			}
-
-			printf("\nStart point %u looped after %u\n",start,r);
-		}
-
-		start++;
-		if(start == 0) break;
-	}
-
-	printf("Total loops - %d\n",loops);
-
-	delete [] block;
-}
-
-//-----------------------------------------------------------------------------
-// Trickle test, not really usable by itself. Used for testing the diffusion
-// properties of a sequence of adds, xors, rotates, etc - replace the adds and
-// xors with ors, compute how many iterations it takes to convert an input with
-// a single 1 bit into all 1s.
-
-// I was using it to find optimal rotation constants for various types of 
-// diffusion functions, but didn't end up using them
-
-int trickle ( int r[4], int reps )
-{
-	int worst = 1000;
-
-	for(int i = 0; i < 128; i++)
-	{
-		uint32_t t[4] = { 0, 0, 0, 0 };
-
-		setbit(t,4,i);
-
-		for(int j = 0; j < reps; j++)
-		{
-			t[0] |= t[1];
-			t[2] |= t[3];
-			t[1] |= t[0];
-			t[0] |= t[2];
-
-			t[2] |= t[1];
-			t[1] |= t[0];
-			t[3] |= t[0];
-
-			t[0] = _rotl(t[0],r[0]); 
-			t[1] = _rotl(t[1],r[1]);
-			t[2] = _rotl(t[2],r[2]); 
-			t[3] = _rotl(t[3],r[3]);
-		}
-
-		int p = popcount(t[0]) + popcount(t[1]) +  popcount(t[2]) + popcount(t[3]);
-
-		if(p < worst) worst = p;
-	}
-
-	return worst;
-}
-
-void TrickleTest ( void )
-{
-	int best = 0;
-
-	int r[4];
-
-	for(int i = 0; i < 1024*1024; i++)
-	{
-		r[0] = (i >>  0) & 31;
-		r[1] = (i >>  5) & 31;
-		r[2] = (i >> 10) & 31;
-		r[3] = (i >> 15) & 31;
-
-		//if(trickle(r,2) < 16) continue;
-		//if(trickle(r,3) < 40) continue;
-		//if(trickle(r,4) < 80) continue;
-
-		int worst = trickle(r,6);
-
-		if(worst >= best)
-		//if(i % 10000 == 0)
-		{
-			best = worst;
-
-			printf("\t{");
-			for(int i = 0; i < 4; i++)
-			{
-				printf("%2d, ",r[i]);
-			}
-			printf("}, // %3d\n",worst);
-		}
-	}
-}
-
-//----------------------------------------------------------------------------
-// Alignment of the keys should not affect the hash value - if it does,
-// something is broken.
-
-void AlignmentTest ( pfHash hash, const int hashbits )
-{
-	const int hashbytes = hashbits / 8;
-
-	printf("Testing alignment handling on small keys..........");
-
-	char bufs[16][64];
-
-	char * strings[16];
-
-	for(int i = 0; i < 16; i++)
-	{
-		uint32_t b = uint32_t(&bufs[i][0]);
-
-		b = (b+15)&(~15);
-
-		strings[i] = (char*)(b + i);
-
-		strcpy_s(strings[i],32,"DeadBeefDeadBeef");
-	}
-
-	uint32_t hash1[64];
-	uint32_t hash2[64];
-
-	for(int k = 1; k <= 16; k++)
-	for(int j = 0; j < 15; j++)
-	for(int i = j+1; i < 16; i++)
-	{
-		const char * s1 = strings[i];
-		const char * s2 = strings[j];
-
-		hash(s1,k,0,hash1);
-		hash(s2,k,0,hash2);
-
-		if(memcmp(hash1,hash2,hashbytes) != 0)
-		{
-			printf("*********FAIL*********\n");
-			return;
-		}
-	}
-
-	printf("PASS\n");
-}
-
-//----------------------------------------------------------------------------
-// Appending zero bytes to a key should always cause it to produce a different
-// hash value
-
-void AppendedZeroesTest ( pfHash hash, const int hashbits )
-{
-	const int hashbytes = hashbits/8;
-
-	printf("Testing zero-appending..........");
-
-	for(int rep = 0; rep < 100; rep++)
-	{
-		if(rep % 10 == 0) printf(".");
-
-		unsigned char key[256];
-
-		memset(key,0,sizeof(key));
-
-		rand_p(key,32);
-
-		uint32_t h1[16];
-		uint32_t h2[16];
-
-		memset(h1,0,hashbytes);
-		memset(h2,0,hashbytes);
-
-		for(int i = 0; i < 32; i++)
-		{
-			hash(key,32+i,0,h1);
-
-			if(memcmp(h1,h2,hashbytes) == 0)
-			{
-				printf("\n*********FAIL*********\n");
-				return;
-			}
-
-			memcpy(h2,h1,hashbytes);
-		}
-	}
-
-	printf("\nPASS\n");
-}
-
-//----------------------------------------------------------------------------
-// Flipping a bit of a key should, with very high probability, result in a 
-// different hash.
-
-bool TwiddleTest ( pfHash hash, const int hashbits )
-{
-	bool result = true;
-
-	const int hashbytes = hashbits/8;
-
-	printf("Testing bit twiddling..........");
-
-	uint8_t key[256];
-	uint32_t h1[16];
-	uint32_t h2[16];
-
-	for(int len = 1; len < 16; len++)
-	{
-		for(int bit = 0; bit < (len * 8); bit++)
-		{
-			rand_p(key,len);
-
-			hash(key,len,0,h1);
-			flipbit(key,len,bit);
-			hash(key,len,0,h2);
-
-			if(memcmp(h1,h2,hashbytes) == 0)
-			{
-				//printf("X");
-				result = false;
-			}
-			else
-			{
-				//printf(".");
-			}
-		}
-
-		//printf("\n");
-	}
-
-	if(result == false)
-		printf("*********FAIL*********\n");
-	else
-		printf("PASS\n");
-
-	return result;
-}
-
-//-----------------------------------------------------------------------------
-// Create a bunch of zero-byte keys of different lengths and check distribution.
-
-// We reuse one block of empty bytes, otherwise the RAM cost is enormous.
-// The distribution table will have some random 1s in the bottom rows due to
-// there not being enough keys for a good test.
-
-void NullKeysetTest ( hashfunc<uint32_t> hash, bool drawDiagram )
-{
-	int keycount = 64*1024;
-
-	unsigned char * nullblock = new unsigned char[keycount];
-	memset(nullblock,0,keycount);
-
-	//----------
-
-	std::vector<uint32_t> hashes;
-
-	hashes.resize(keycount);
-
-	//----------
-
-	printf("Collision test - Hash keyset 100 times, count collisions");
-
-	for(int i = 0; i < keycount; i++)
-	{
-		if(i % (keycount/10) == 0) printf(".");
-
-		uint32_t h = hash(nullblock,i,0);
-		hashes[i] = h;
-	}
-
-	testhashlist(hashes,true,true,drawDiagram);
-
-	delete [] nullblock;
-}
-
-//-----------------------------------------------------------------------------
-// Simple collections of alphanumeric strings
-
-template < typename hashtype >
-void TextKeyTest2 ( hashfunc<hashtype> hash, bool drawDiagram )
-{
-	const char * s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456790123456789";
-
-	const int nbytes = 7;
-	const int nbits = 56;
-	const int nkeys = 1024*1024;
-
-	printf("Testing %d 7-character text keys - \n",nkeys,nbits);
-
-	//----------
-	// generate keyset
-
-	typedef Blob<nbits> keytype;
-
-	std::vector<keytype> keys;
-	keys.reserve(nkeys);
-
-	keytype key;
-
-	uint8_t * bytes = (uint8_t*)&key;
-
-	for(int i = 0; i < nkeys; i++)
-	{
-		for(int j = 0; j < nbytes; j++)
-		{
-			int d = i >> (j * 3);
-
-			bytes[j] = s[d % 64];
-		}
-
-		keys.push_back(key);
-	}
-
-	//----------
-
-	testkeylist<keytype,hashtype>(hash,keys,true,true,drawDiagram);
-}
diff --git a/Tests.h b/Tests.h
deleted file mode 100644
index 05486af..0000000
--- a/Tests.h
+++ /dev/null
@@ -1,194 +0,0 @@
-#pragma once
-
-#include "Types.h"
-#include "Random.h"
-//#include "Stats.h"
-
-//#include <intrin.h>
-
-#include "AvalancheTest.h"
-#include "CycleTest.h"
-#include "DifferentialTest.h"
-
-//-----------------------------------------------------------------------------
-
-template< typename hashtype >
-void QuickBrownFox ( hashfunc<hashtype> hash )
-{
-	const char * text1 = "The quick brown fox jumps over the lazy dog";
-	const char * text2 = "The quick brown fox jumps over the lazy cog";
-
-	hashtype h1, h2;
-
-	hash(text1,(int)strlen(text1),0,&h1);
-	hash(text2,(int)strlen(text2),0,&h2);
-
-	printf("Quick Brown Fox -\n");
-	printhex32(&h1,sizeof(hashtype)); printf("\n");
-	printhex32(&h2,sizeof(hashtype)); printf("\n");
-	printf("\n");
-}
-
-//-----------------------------------------------------------------------------
-
-void BulkSpeedTest ( pfHash hash );
-
-/*
-template < typename hashtype >
-void BulkSpeedTest ( hashfunc<hashtype> hash )
-{
-	BulkSpeedTest(hash,sizeof(hashtype) * 8);
-}
-*/
-
-//----------------------------------------------------------------------------
-
-template < typename hashtype, int keysize >
-void TinySpeedTest ( pfHash hash )
-{
-	const int trials = 100000;
-
-	printf("Small key speed test - %4d-byte keys - ",keysize);
-
-	uint8_t k[keysize];
-	hashtype h;
-
-	double bestcycles = 1e9;
-
-	for(int itrial = 0; itrial < trials; itrial++)
-	{
-		__int64 begin,end;
-
-		rand_p(k,keysize);
-
-		begin = __rdtsc();
-		
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-
-		end = __rdtsc();
-
-		blackhole(*(uint32_t*)(&h));
-
-		double cycles = double(end-begin) / 64;
-		if(cycles < bestcycles) bestcycles = cycles;
-	}
-
-	double bestbpc = double(keysize) / bestcycles;
-	printf("%8.2f cycles/hash, %8.4f bytes/cycle\n",bestcycles,bestbpc);
-}
-
-//-----------------------------------------------------------------------------
-
-void AlignmentTest ( pfHash hash, const int hashbits );
-
-template < typename hashtype >
-void AlignmentTest ( hashfunc<hashtype> hash )
-{
-	AlignmentTest(hash,sizeof(hashtype) * 8);
-}
-
-//-----------------------------------------------------------------------------
-
-void AppendedZeroesTest ( pfHash hash, const int hashbits );
-
-template < typename hashtype >
-void AppendedZeroesTest ( hashfunc<hashtype> hash )
-{
-	AppendedZeroesTest(hash,sizeof(hashtype) * 8);
-}
-
-//-----------------------------------------------------------------------------
-
-bool TwiddleTest ( pfHash hash, const int hashbits );
-
-template < typename hashtype >
-bool TwiddleTest ( hashfunc<hashtype> hash )
-{
-	return TwiddleTest(hash,sizeof(hashtype) * 8);
-}
-
-//-----------------------------------------------------------------------------
-
-template < typename hashtype >
-bool AvalancheTest ( hashfunc<hashtype> hash )
-{
-	bool result = true;
-
-	const int nbytes = sizeof(hashtype);
-	const int nbits = nbytes * 8;
-
-	for(int i = 4; i <= 10; i++)
-	{
-		result &= AvalancheTest(hash,8*i,nbits,2000000);
-	}
-
-	if(!result) printf("*********FAIL*********\n");
-
-
-	return result;
-}
-
-//-----------------------------------------------------------------------------
-
-template < typename hashtype >
-bool SparseKeyTest2 ( hashfunc<hashtype> hash, bool drawDiagram );
-
-template < typename hashtype >
-bool SparseKeyTest ( hashfunc<hashtype> hash, bool drawDiagram )
-{
-	return SparseKeyTest2<hashtype>(hash,drawDiagram);
-}
-
-//-----------------------------------------------------------------------------
-// For a given 20-bit window of a 64-bit key, generate all possible keys with
-// bits set in that window
-
-template < typename hashtype >
-void BitrangeKeysetTest ( hashfunc<hashtype> hash, bool drawDiagram )
-{
-	const int keybits = 64;
-
-	for(int j = 0; j <= (keybits-20); j++)
-	{
-		int minbit = j;
-		int maxbit = j+20-1;
-
-		int keycount = 1 << (maxbit - minbit + 1);
-
-		printf("Bitrange keyset (%2d,%2d) - %d keys - ",minbit,maxbit,keycount);
-
-		std::vector<uint64_t> keys;
-		keys.reserve(keycount);
-
-		for(int i = 0; i < keycount; i++)
-		{
-			uint64_t k = i;
-
-			k = k << minbit;
-
-			keys.push_back(k);
-		}
-
-		testkeylist<uint64_t,hashtype>(hash,keys,true,true,drawDiagram);
-	}
-}
-
-//-----------------------------------------------------------------------------
diff --git a/Types.cpp b/Types.cpp
index 8ae5185..04d5c6e 100644
--- a/Types.cpp
+++ b/Types.cpp
@@ -1,7 +1,6 @@
 #include "Types.h"
 
-// Throw a value in the oubliette to prevent the compiler from optimizing away
-// the code that calculated it
+//-----------------------------------------------------------------------------
 
 #pragma optimize( "", off )
 
@@ -15,3 +14,5 @@ uint32_t whitehole ( void )
 }
 
 #pragma optimize( "", on ) 
+
+//-----------------------------------------------------------------------------
diff --git a/Types.h b/Types.h
index 7dd7cda..d31a647 100644
--- a/Types.h
+++ b/Types.h
@@ -2,13 +2,20 @@
 
 #include "pstdint.h"
 #include "Bitvec.h"
-#include <vector>
-#include <assert.h>
+
+//-----------------------------------------------------------------------------
+// If the optimizer detects that a value in a speed test is constant or unused,
+// the optimizer may remove references to it or otherwise create code that
+// would not occur in a real-world application. To prevent the optimizer from
+// doing this we declare two trivial functions that either sink or source data,
+// and bar the compiler from optimizing them.
 
 void     blackhole ( uint32_t x );
 uint32_t whitehole ( void );
 
-typedef void (*pfHash) ( const void * blob, int len, uint32_t seed, void * out );
+//-----------------------------------------------------------------------------
+
+typedef void (*pfHash) ( const void * blob, const int len, const uint32_t seed, void * out );
 
 template < typename T >
 void swap ( T & a, T & b )
@@ -29,7 +36,7 @@ public:
 	{
 	}
 
-	inline void operator () ( const void * key, int len, uint32_t seed, uint32_t * out )
+	inline void operator () ( const void * key, const int len, const uint32_t seed, uint32_t * out )
 	{
 		m_hash(key,len,seed,out);
 	}
@@ -39,7 +46,7 @@ public:
 		return m_hash;
 	}
 
-	inline T operator () ( const void * key, int len, uint32_t seed ) 
+	inline T operator () ( const void * key, const int len, const uint32_t seed ) 
 	{
 		T result;
 
@@ -48,43 +55,11 @@ public:
 		return result;
 	}
 
-	/*
-	T operator () ( T const & key )
-	{
-		T result;
-
-		m_hash(&key,sizeof(T),0,&result);
-
-		return result;
-	}
-	*/
-
 	pfHash m_hash;
 };
 
 //-----------------------------------------------------------------------------
 
-template < class T >
-class mixfunc
-{
-public:
-
-	typedef T (*pfMix) ( T key );
-
-	mixfunc ( pfMix m ) : m_mix(m)
-	{
-	}
-
-	T operator () ( T key )
-	{
-		return m_mix(key);
-	}
-
-	pfMix m_mix;
-};
-
-//-----------------------------------------------------------------------------
-
 template < int _bits >
 class Blob
 {
@@ -220,7 +195,7 @@ public:
 	{
 		Blob t = *this;
 
-		lshift(t.bytes,nbytes,c);
+		lshift(&t.bytes[0],nbytes,c);
 
 		return t;
 	}
@@ -229,21 +204,21 @@ public:
 	{
 		Blob t = *this;
 
-		rshift(t.bytes,nbytes,c);
+		rshift(&t.bytes[0],nbytes,c);
 
 		return t;
 	}
 
 	Blob & operator <<= ( int c )
 	{
-		lshift(bytes,nbytes,c);
+		lshift(&bytes[0],nbytes,c);
 
 		return *this;
 	}
 
 	Blob & operator >>= ( int c )
 	{
-		rshift(bytes,nbytes,c);
+		rshift(&bytes[0],nbytes,c);
 
 		return *this;
 	}
@@ -265,185 +240,6 @@ private:
 	uint8_t bytes[nbytes];
 };
 
-typedef Blob<128> u128;
-
-//-----------------------------------------------------------------------------
-
-class VBlob : public std::vector<uint8_t>
-{
-public:
-
-	VBlob( int len ) : std::vector<uint8_t>(len,0)
-	{
-	}
-
-	/*
-	VBlob ( const VBlob & k )
-	{
-		for(size_t i = 0; i < size(); i++)
-		{
-			at(i) = k.at(i);
-		}
-	}
-	*/
-
-	/*
-	VBlob & operator = ( const VBlob & k )
-	{
-		for(size_t i = 0; i < size(); i++)
-		{
-			at(i) = k.at(i);
-		}
-
-		return *this;
-	}
-	*/
-
-	void set ( const void * VBlob, int len )
-	{
-		assert(size() == (size_t)len);
-
-		const uint8_t * k = (const uint8_t*)VBlob;
-
-		len = len > (int)size() ? (int)size() : len;
-
-		for(int i = 0; i < len; i++)
-		{
-			at(i) = k[i];
-		}
-
-		for(size_t i = len; i < size(); i++)
-		{
-			at(i) = 0;
-		}
-	}
-
-	//----------
-	// boolean operations
-	
-	bool operator < ( const VBlob & k ) const
-	{
-		assert(size() == k.size());
-
-		for(size_t i = 0; i < size(); i++)
-		{
-			if(at(i) < k.at(i)) return true;
-			if(at(i) > k.at(i)) return false;
-		}
-
-		return false;
-	}
-
-	bool operator == ( const VBlob & k ) const
-	{
-		assert(size() == k.size());
-
-		for(size_t i = 0; i < size(); i++)
-		{
-			if(at(i) != k.at(i)) return false;
-		}
-
-		return true;
-	}
-
-	bool operator != ( const VBlob & k ) const
-	{
-		assert(size() == k.size());
-
-		return !(*this == k);
-	}
-
-	//----------
-	// bitwise operations
-
-	VBlob operator ^ ( const VBlob & k ) const 
-	{
-		assert(size() == k.size());
-
-		VBlob t((int)k.size());
-
-		for(size_t i = 0; i < size(); i++)
-		{
-			t.at(i) = at(i) ^ k.at(i);
-		}
-
-		return t;
-	}
-
-	VBlob & operator ^= ( const VBlob & k )
-	{
-		assert(size() == k.size());
-
-		for(size_t i = 0; i < size(); i++)
-		{
-			at(i) ^= k.at(i);
-		}
-
-		return *this;
-	}
-
-	VBlob & operator &= ( const VBlob & k )
-	{
-		assert(size() == k.size());
-
-		for(size_t i = 0; i < size(); i++)
-		{
-			at(i) &= k.at(i);
-		}
-	}
-
-	VBlob & operator <<= ( int c )
-	{
-		lshift(&at(0),(int)size(),c);
-
-		return *this;
-	}
-
-	VBlob & operator >>= ( int c )
-	{
-		rshift(&at(0),(int)size(),c);
-
-		return *this;
-	}
-};
-
-//-----------------------------------------------------------------------------
-
-/*
-class Blobvec
-{
-public:
-
-	Blobvec ( int stride, int size )
-	{
-		m_data = new uint8_t[stride*size];
-	}
-
-	~Blobvec ( void )
-	{
-		delete [] m_data;
-	}
-
-	int size ( void ) const
-	{
-		return m_size;
-	}
-
-	const void * operator [] ( const int index ) const
-	{
-		return &m_data[index * m_stride];
-	}
-
-	void * operator [] ( const int index )
-	{
-		return &m_data[index * m_stride];
-	}
-
-	int m_stride;
-	int m_size;
-
-	uint8_t * m_data;
-};
-*/
+typedef Blob<128> uint128_t;
 
 //-----------------------------------------------------------------------------
diff --git a/main.cpp b/main.cpp
index 1f6e136..25d3c89 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1,93 +1,413 @@
 #include <stdio.h>
 
-#include <time.h>
 #include "hashes.h"
-#include "tests.h"
+#include "KeysetTest.h"
+#include "SpeedTest.h"
+#include "AvalancheTest.h"
+#include "DifferentialTest.h"
 
+#include <time.h>
+#include <intrin.h>
 #include <windows.h>
 
-#pragma warning(disable:4702)
+#pragma warning(disable : 4127) // "conditional expression is constant" in the if()s for avalanchetest
+
+bool g_testAll = false;
+
+/*
+bool g_testSanity      = true;
+bool g_testSpeed       = true;
+bool g_testDiff        = true;
+bool g_testAvalanche   = true;
+bool g_testCyclic      = true;
+bool g_testSparse      = true;
+bool g_testPermutation = true;
+bool g_testWindow      = true;
+bool g_testText        = true;
+bool g_testZeroes      = true;
+bool g_testSeed        = true;
+*/
+
+//*
+bool g_testSanity      = false;
+bool g_testSpeed       = false;
+bool g_testDiff        = false;
+bool g_testAvalanche   = false;
+bool g_testCyclic      = false;
+bool g_testSparse      = false;
+bool g_testPermutation = false;
+bool g_testWindow      = false;
+bool g_testText        = false;
+bool g_testZeroes      = false;
+bool g_testSeed        = false;
+//*/
+
+//-----------------------------------------------------------------------------
+
+struct HashInfo
+{
+	pfHash hash;
+	int hashbits;
+	const char * name;
+	const char * desc;
+};
+
+HashInfo g_hashes[] = 
+{
+	{ randhash_32,          32, "rand32",      "Random number generator, 32-bit" },
+	{ randhash_64,          64, "rand64",      "Random number generator, 64-bit" },
+	{ randhash_128,        128, "rand128",     "Random number generator, 128-bit" },
+
+	{ crc32,                32, "crc32",       "CRC-32" },
+	{ DoNothingHash,        32, "donothing32", "Do-Nothing Function (only valid for speed test comparison)" },
+
+	{ md5_32,               32, "md5_32a",     "MD5, first 32 bits of result" },
+	{ sha1_32a,             32, "sha1_32a",    "SHA1, first 32 bits of result" },
+
+	{ FNV,                  32, "FNV",         "Fowler-Noll-Vo hash, 32-bit" },
+	{ lookup3_test,         32, "lookup3",     "Bob Jenkins' lookup3" },
+	{ SuperFastHash,        32, "superfast",   "Paul Hsieh's SuperFastHash" },
+	
+	// MurmurHash2
+
+	{ MurmurHash2_test,     32, "Murmur2",     "MurmurHash2 for x86, 32-bit" },
+	{ MurmurHash2A_test,    32, "Murmur2A",    "MurmurHash2A for x86, 32-bit" },
+	{ MurmurHash64A_test,   64, "Murmur2B",    "MurmurHash2 for x64, 64-bit" },
+	{ MurmurHash64B_test,   64, "Murmur2C",    "MurmurHash2 for x86, 64-bit" },
+
+	// MurmurHash3
+
+	{ MurmurHash3_x86_32,   32, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
+	{ MurmurHash3_x86_64,   64, "Murmur3B",    "MurmurHash3 for x86, 64-bit" },
+	{ MurmurHash3_x86_128, 128, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
+
+	{ MurmurHash3_x64_32,   32, "Murmur3D",    "MurmurHash3 for x64, 32-bit" },
+	{ MurmurHash3_x64_64,   64, "Murmur3E",    "MurmurHash3 for x64, 64-bit" },
+	{ MurmurHash3_x64_128, 128, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
+
+};
+
+HashInfo * findHash ( const char * name ) 
+{
+	for(int i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
+	{
+		if(_stricmp(name,g_hashes[i].name) == 0) return &g_hashes[i];
+	}
+
+	return NULL;
+}
 
 //----------------------------------------------------------------------------
 
 template < typename hashtype >
 void test ( hashfunc<hashtype> hash, const char * hashname )
 {
-	printf("Testing %s\n",hashname);
+	const int hashbits = sizeof(hashtype) * 8;
+
+	printf("-------------------------------------------------------------------------------\n");
+	printf("--- Testing %s\n\n",hashname);
+
+	//-----------------------------------------------------------------------------
+	// Sanity tests
+
+	if(g_testSanity || g_testAll)
+	{
+		printf("[[[ Sanity Tests ]]]\n\n");
+
+		QuickBrownFox(hash,hashbits);
+		SanityTest(hash,hashbits);
+		AlignmentTest(hash,hashbits);
+		AppendedZeroesTest(hash,hashbits);
+		printf("\n");
+	}
+
+	//-----------------------------------------------------------------------------
+	// Speed tests
+
+	if(g_testSpeed || g_testAll)
+	{
+		printf("[[[ Speed Tests ]]]\n\n");
+
+		BulkSpeedTest(hash);
+		printf("\n");
+
+		TinySpeedTest<hashtype,4>(hash);
+		TinySpeedTest<hashtype,8>(hash);
+		TinySpeedTest<hashtype,16>(hash);
+		TinySpeedTest<hashtype,32>(hash);
+		TinySpeedTest<hashtype,64>(hash);
+		TinySpeedTest<hashtype,128>(hash);
+		printf("\n");
+	}
+
+	//-----------------------------------------------------------------------------
+	// Differential tests
+
+	if(g_testDiff || g_testAll)
+	{
+		printf("[[[ Differential Tests ]]]\n\n");
+
+		bool result = true;
+		bool dumpCollisions = false;
+
+		result &= DiffTest< Blob<64>,  hashtype >(hash,5,1000,dumpCollisions);
+		result &= DiffTest< Blob<128>, hashtype >(hash,4,1000,dumpCollisions);
+		result &= DiffTest< Blob<256>, hashtype >(hash,3,1000,dumpCollisions);
+
+		if(!result) printf("*********FAIL*********\n");
+		printf("\n");
+	}
+
+	//-----------------------------------------------------------------------------
+	// Avalanche tests.
+	
+	// 2 million reps is enough to measure bias down to ~0.25%
+	
+	if(g_testAvalanche || g_testAll)
+	{
+		printf("[[[ Avalanche Tests ]]]\n\n");
+
+		const int hashbits = sizeof(hashtype) * 8;
+		bool result = true;
+
+		result &= AvalancheTest< Blob<hashbits * 2>, hashtype > (hash,2000000);
+
+		// The bit independence test is slow and not particularly useful...
+		//result &= BicTest < Blob<hashbits * 2>, hashtype > ( hash, 1000000 );
+
+		if(!result) printf("*********FAIL*********\n");
+		printf("\n");
+	}
 
-	//const int hbytes = sizeof(hashtype);
-	//const int hbits  = hbytes * 8;
+	//-----------------------------------------------------------------------------
+	// Keyset 'Cyclic'
 
-	TwiddleTest(hash);
-	AlignmentTest(hash);
-	AppendedZeroesTest(hash);
-	QuickBrownFox(hash);
-	printf("\n");
+	if(g_testCyclic || g_testAll)
+	{
+		printf("[[[ Keyset 'Cyclic' Tests ]]]\n\n");
 
-	BulkSpeedTest(hash);
+		bool result = true;
+		bool drawDiagram = false;
 
-	TinySpeedTest<hashtype,4>(hash);
-	TinySpeedTest<hashtype,5>(hash);
-	TinySpeedTest<hashtype,6>(hash);
-	TinySpeedTest<hashtype,7>(hash);
-	TinySpeedTest<hashtype,8>(hash);
-	TinySpeedTest<hashtype,256>(hash);
-	printf("\n");
+		result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+0,8,10000000,drawDiagram);
+		result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+1,8,10000000,drawDiagram);
+		result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+2,8,10000000,drawDiagram);
+		result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+3,8,10000000,drawDiagram);
+		result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+4,8,10000000,drawDiagram);
+		
+		if(!result) printf("*********FAIL*********\n");
+		printf("\n");
+	}
 
-	// # of bytes in the cycle must be at least # of bytes in the hash output
+	//-----------------------------------------------------------------------------
+	// Keyset 'Sparse'
 
-	//CycleTest<hashtype>(hash,sizeof(hashtype)+0,8,10000000);
-	//CycleTest<hashtype>(hash,sizeof(hashtype)+1,8,10000000);
-	//CycleTest<hashtype>(hash,sizeof(hashtype)+2,8,10000000);
-	//CycleTest<hashtype>(hash,sizeof(hashtype)+3,8,10000000);
-	//CycleTest<hashtype>(hash,sizeof(hashtype)+4,8,10000000);
+	if(g_testSparse || g_testAll)
+	{
+		printf("[[[ Keyset 'Sparse' Tests ]]]\n\n");
 
-	printf("\n");
+		bool result = true;
+		bool drawDiagram = false;
 
-	/*
-	DiffTest< Blob<64>,  hashtype >(hash,5,1000);
-	DiffTest< Blob<128>, hashtype >(hash,4,1000);
-	DiffTest< Blob<256>, hashtype >(hash,3,1000);
+		result &= SparseKeyTest<  32,hashtype>(hash,6,true,true,true,drawDiagram);
+		result &= SparseKeyTest<  40,hashtype>(hash,6,true,true,true,drawDiagram);
+		result &= SparseKeyTest<  48,hashtype>(hash,5,true,true,true,drawDiagram);
+		result &= SparseKeyTest<  56,hashtype>(hash,5,true,true,true,drawDiagram);
+		result &= SparseKeyTest<  64,hashtype>(hash,5,true,true,true,drawDiagram);
+		result &= SparseKeyTest<  96,hashtype>(hash,4,true,true,true,drawDiagram); 
+		result &= SparseKeyTest< 256,hashtype>(hash,3,true,true,true,drawDiagram);
+		result &= SparseKeyTest<2048,hashtype>(hash,2,true,true,true,drawDiagram);
 
-	printf("\n");
+		if(!result) printf("*********FAIL*********\n");
+		printf("\n");
+	}
 
-	AvalancheTest(hash);
-	*/
+	//-----------------------------------------------------------------------------
+	// Keyset 'Permutation'
 
-	SparseKeyTest(hash,false);
+	if(g_testPermutation || g_testAll)
+	{
+		printf("[[[ Keyset 'Permutation' Tests ]]]\n\n");
 
-	//BitrangeKeysetTest(hash,false);
-	//TextKeyTest(hash.m_hash);
+		bool result = true;
+		bool drawDiagram = false;
+
+		// This very sparse set of blocks is particularly hard for SuperFastHash
+
+		uint32_t blocks[] =
+		{
+			0x00000000,
+			0x00000001,
+			0x00000002,
+			
+			0x00000400,
+			0x00008000,
+			
+			0x00080000,
+			0x00200000,
+
+			0x20000000,
+			0x40000000,
+			0x80000000,
+		};
+
+		result &= PermutationKeyTest<hashtype>(hash,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+		if(!result) printf("*********FAIL*********\n");
+		printf("\n");
+	}
+
+	//-----------------------------------------------------------------------------
+	// Keyset 'Window'
+
+	// Skip distribution test for these - they're too easy to distribute well,
+	// and it generates a _lot_ of testing
+
+	if(g_testWindow || g_testAll)
+	{
+		printf("[[[ Keyset 'Window' Tests ]]]\n\n");
+
+		bool result = true;
+		bool testCollision = true;
+		bool testDistribution = false;
+		bool drawDiagram = false;
+
+		result &= WindowedKeyTest< Blob<hashbits*2>, hashtype > ( hash, 20, testCollision, testDistribution, drawDiagram );
+
+		if(!result) printf("*********FAIL*********\n");
+		printf("\n");
+	}
+
+	//-----------------------------------------------------------------------------
+	// Keyset 'Text'
+
+	if(g_testText || g_testAll)
+	{
+		printf("[[[ Keyset 'Text' Tests ]]]\n\n");
+
+		bool result = true;
+		bool drawDiagram = false;
+
+		const char * alnum = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+
+		result &= TextKeyTest( hash, "Foo",    alnum,4, "Bar",    drawDiagram );
+		result &= TextKeyTest( hash, "FooBar", alnum,4, "",       drawDiagram );
+		result &= TextKeyTest( hash, "",       alnum,4, "FooBar", drawDiagram );
+
+		if(!result) printf("*********FAIL*********\n");
+		printf("\n");
+	}
+
+	//-----------------------------------------------------------------------------
+	// Keyset 'Zeroes'
+
+	if(g_testZeroes || g_testAll)
+	{
+		printf("[[[ Keyset 'Zeroes' Tests ]]]\n\n");
+
+		bool result = true;
+		bool drawDiagram = false;
+
+		result &= ZeroKeyTest<hashtype>( hash, drawDiagram );
+
+		if(!result) printf("*********FAIL*********\n");
+		printf("\n");
+	}
+
+	//-----------------------------------------------------------------------------
+	// Keyset 'Seed'
+
+	if(g_testSeed || g_testAll)
+	{
+		printf("[[[ Keyset 'Seed' Tests ]]]\n\n");
+
+		bool result = true;
+		bool drawDiagram = false;
+
+		result &= SeedTest<hashtype>( hash, 1000000, drawDiagram );
+
+		if(!result) printf("*********FAIL*********\n");
+		printf("\n");
+	}
 }
 
 //-----------------------------------------------------------------------------
 
-void optimize_fmix64 ( void );
+void testHash ( const char * name )
+{
+	HashInfo * pInfo = findHash(name);
+
+	if(pInfo == NULL)
+	{
+		printf("Invalid hash '%s' specified\n",name);
+		return;
+	}
+	else
+	{
+		if(pInfo->hashbits == 32)
+		{
+			test<uint32_t>( pInfo->hash, pInfo->desc );
+		}
+		else if(pInfo->hashbits == 64)
+		{
+			test<uint64_t>( pInfo->hash, pInfo->desc );
+		}
+		else if(pInfo->hashbits == 128)
+		{
+			test<uint128_t>( pInfo->hash, pInfo->desc );
+		}
+		else
+		{
+			printf("Invalid hash bit width %d for hash '%s'",pInfo->hashbits,pInfo->name);
+		}
+	}
+}
+//-----------------------------------------------------------------------------
+
+#pragma warning(disable : 4100)
+#pragma warning(disable : 4702)
 
-void main ( void )
+int main ( int argc, char ** argv )
 {
 	SetProcessAffinityMask(GetCurrentProcess(),2);
 
 	int a = clock();
 
-	//----------
+	g_testAll = true;
+
+	//g_testWindow = true;
+	//g_testSanity = true;
+	//g_testSpeed = true;
+	//g_testAvalanche = true;
+	//g_testDiff = true;
+	//g_testSparse = true;
+	//g_testPermutation = true;
+
+	//testHash("rand32");
+	//testHash("rand64");
+	//testHash("rand128");
 
-	//test<uint32_t>  ( md5_32,  "MD5, first 32 bits" );
-	//test<uint32_t>  ( lookup3_test,  "Jenkins lookup3" );
-	//test<uint32_t>  ( SuperFastHash,  "SuperFastHash" );
-	//test<uint32_t>  ( MurmurHash2_test,  "MurmurHash2 32-bit" );
-	//test<uint32_t>  ( MurmurHash2A_test,  "MurmurHash2 32-bit" );
-	//test<uint32_t>  ( FNV,  "FNV 32-bit" );
-	//test<uint32_t>  ( crc32,  "CRC-32" );
-	//test<uint32_t>  ( DoNothingHash,  "MurmurHash3 32-bit" );
+	//testHash("fnv");
+	//testHash("superfast");
+	//testHash("lookup3");
 
-	//test<uint32_t>  ( MurmurHash3_x86_32,  "MurmurHash3 32-bit" );
-	test<uint64_t>  ( MurmurHash3_x86_64,  "MurmurHash3 64-bit" );
-	//test<k128> ( MurmurHash3_128, "MurmurHash3 128-bit" );
+	//testHash("murmur2");
+	//testHash("murmur2B");
+	//testHash("murmur2C");
 
-	//test<uint32_t>  ( MurmurHash3x64_32,  "MurmurHash3 32-bit" );
+	testHash("murmur3a");
+	testHash("murmur3b");
+	testHash("murmur3c");
+
+	testHash("murmur3d");
+	testHash("murmur3e");
+	testHash("murmur3f");
+
+	//----------
 
 	int b = clock();
 
 	printf("time %d\n",b-a);
-}
\ No newline at end of file
+
+	return 0;
+}
diff --git a/sha1.cpp b/sha1.cpp
index c4b79b8..03894d6 100644
--- a/sha1.cpp
+++ b/sha1.cpp
@@ -1,603 +1,323 @@
-/*
- *  sha1.cpp
- *
- *  Copyright (C) 1998, 2009
- *  Paul E. Jones <paulej@packetizer.com>
- *  All Rights Reserved.
- *
- *****************************************************************************
- *  $Id: sha1.cpp 12 2009-06-22 19:34:25Z paulej $
- *****************************************************************************
- *
- *  Description:
- *      This class implements the Secure Hashing Standard as defined
- *      in FIPS PUB 180-1 published April 17, 1995.
- *
- *      The Secure Hashing Standard, which uses the Secure Hashing
- *      Algorithm (SHA), produces a 160-bit message digest for a
- *      given data stream.  In theory, it is highly improbable that
- *      two messages will produce the same message digest.  Therefore,
- *      this algorithm can serve as a means of providing a "fingerprint"
- *      for a message.
- *
- *  Portability Issues:
- *      SHA-1 is defined in terms of 32-bit "words".  This code was
- *      written with the expectation that the processor has at least
- *      a 32-bit machine word size.  If the machine word size is larger,
- *      the code should still function properly.  One caveat to that
- *      is that the input functions taking characters and character arrays
- *      assume that only 8 bits of information are stored in each character.
- *
- *  Caveats:
- *      SHA-1 is designed to work with messages less than 2^64 bits long.
- *      Although SHA-1 allows a message digest to be generated for
- *      messages of any number of bits less than 2^64, this implementation
- *      only works with messages with a length that is a multiple of 8
- *      bits.
- *
- */
-
-
-#include "sha1.h"
-
-/*  
- *  SHA1
- *
- *  Description:
- *      This is the constructor for the sha1 class.
- *
- *  Parameters:
- *      None.
- *
- *  Returns:
- *      Nothing.
- *
- *  Comments:
- *
- */
-SHA1::SHA1()
-{
-    Reset();
-}
-
-/*  
- *  ~SHA1
- *
- *  Description:
- *      This is the destructor for the sha1 class
- *
- *  Parameters:
- *      None.
- *
- *  Returns:
- *      Nothing.
- *
- *  Comments:
- *
- */
-SHA1::~SHA1()
-{
-    // The destructor does nothing
-}
-
-/*  
- *  Reset
- *
- *  Description:
- *      This function will initialize the sha1 class member variables
- *      in preparation for computing a new message digest.
- *
- *  Parameters:
- *      None.
- *
- *  Returns:
- *      Nothing.
- *
- *  Comments:
- *
- */
-void SHA1::Reset()
-{
-    Length_Low          = 0;
-    Length_High         = 0;
-    Message_Block_Index = 0;
-
-    H[0]        = 0x67452301;
-    H[1]        = 0xEFCDAB89;
-    H[2]        = 0x98BADCFE;
-    H[3]        = 0x10325476;
-    H[4]        = 0xC3D2E1F0;
-
-    Computed    = false;
-    Corrupted   = false;
-}
-
-/*  
- *  Result
- *
- *  Description:
- *      This function will return the 160-bit message digest into the
- *      array provided.
- *
- *  Parameters:
- *      message_digest_array: [out]
- *          This is an array of five unsigned integers which will be filled
- *          with the message digest that has been computed.
- *
- *  Returns:
- *      True if successful, false if it failed.
- *
- *  Comments:
- *
- */
-bool SHA1::Result(unsigned *message_digest_array)
-{
-    int i;                                  // Counter
-
-    if (Corrupted)
-    {
-        return false;
-    }
-
-    if (!Computed)
-    {
-        PadMessage();
-        Computed = true;
-    }
-
-    for(i = 0; i < 5; i++)
-    {
-        message_digest_array[i] = H[i];
-    }
-
-    return true;
-}
-
-/*  
- *  Input
- *
- *  Description:
- *      This function accepts an array of octets as the next portion of
- *      the message.
- *
- *  Parameters:
- *      message_array: [in]
- *          An array of characters representing the next portion of the
- *          message.
- *
- *  Returns:
- *      Nothing.
- *
- *  Comments:
- *
- */
-void SHA1::Input(   const unsigned char *message_array,
-                    unsigned            length)
-{
-    if (!length)
-    {
-        return;
-    }
-
-    if (Computed || Corrupted)
-    {
-        Corrupted = true;
-        return;
-    }
-
-    while(length-- && !Corrupted)
-    {
-        Message_Block[Message_Block_Index++] = (*message_array & 0xFF);
-
-        Length_Low += 8;
-        Length_Low &= 0xFFFFFFFF;               // Force it to 32 bits
-        if (Length_Low == 0)
-        {
-            Length_High++;
-            Length_High &= 0xFFFFFFFF;          // Force it to 32 bits
-            if (Length_High == 0)
-            {
-                Corrupted = true;               // Message is too long
-            }
-        }
-
-        if (Message_Block_Index == 64)
-        {
-            ProcessMessageBlock();
-        }
-
-        message_array++;
-    }
-}
-
-/*  
- *  Input
- *
- *  Description:
- *      This function accepts an array of octets as the next portion of
- *      the message.
- *
- *  Parameters:
- *      message_array: [in]
- *          An array of characters representing the next portion of the
- *          message.
- *      length: [in]
- *          The length of the message_array
- *
- *  Returns:
- *      Nothing.
- *
- *  Comments:
- *
- */
-void SHA1::Input(   const char  *message_array,
-                    unsigned    length)
-{
-    Input((unsigned char *) message_array, length);
-}
-
-/*  
- *  Input
- *
- *  Description:
- *      This function accepts a single octets as the next message element.
- *
- *  Parameters:
- *      message_element: [in]
- *          The next octet in the message.
- *
- *  Returns:
- *      Nothing.
- *
- *  Comments:
- *
- */
-void SHA1::Input(unsigned char message_element)
-{
-    Input(&message_element, 1);
-}
-
-/*  
- *  Input
- *
- *  Description:
- *      This function accepts a single octet as the next message element.
- *
- *  Parameters:
- *      message_element: [in]
- *          The next octet in the message.
- *
- *  Returns:
- *      Nothing.
- *
- *  Comments:
- *
- */
-void SHA1::Input(char message_element)
-{
-    Input((unsigned char *) &message_element, 1);
-}
-
-/*  
- *  operator<<
- *
- *  Description:
- *      This operator makes it convenient to provide character strings to
- *      the SHA1 object for processing.
- *
- *  Parameters:
- *      message_array: [in]
- *          The character array to take as input.
- *
- *  Returns:
- *      A reference to the SHA1 object.
- *
- *  Comments:
- *      Each character is assumed to hold 8 bits of information.
- *
- */
-SHA1& SHA1::operator<<(const char *message_array)
-{
-    const char *p = message_array;
-
-    while(*p)
-    {
-        Input(*p);
-        p++;
-    }
-
-    return *this;
-}
-
-/*  
- *  operator<<
- *
- *  Description:
- *      This operator makes it convenient to provide character strings to
- *      the SHA1 object for processing.
- *
- *  Parameters:
- *      message_array: [in]
- *          The character array to take as input.
- *
- *  Returns:
- *      A reference to the SHA1 object.
- *
- *  Comments:
- *      Each character is assumed to hold 8 bits of information.
- *
- */
-SHA1& SHA1::operator<<(const unsigned char *message_array)
-{
-    const unsigned char *p = message_array;
-
-    while(*p)
-    {
-        Input(*p);
-        p++;
-    }
-
-    return *this;
-}
-
-/*  
- *  operator<<
- *
- *  Description:
- *      This function provides the next octet in the message.
- *
- *  Parameters:
- *      message_element: [in]
- *          The next octet in the message
- *
- *  Returns:
- *      A reference to the SHA1 object.
- *
- *  Comments:
- *      The character is assumed to hold 8 bits of information.
- *
- */
-SHA1& SHA1::operator<<(const char message_element)
-{
-    Input((unsigned char *) &message_element, 1);
-
-    return *this;
-}
-
-/*  
- *  operator<<
- *
- *  Description:
- *      This function provides the next octet in the message.
- *
- *  Parameters:
- *      message_element: [in]
- *          The next octet in the message
- *
- *  Returns:
- *      A reference to the SHA1 object.
- *
- *  Comments:
- *      The character is assumed to hold 8 bits of information.
- *
- */
-SHA1& SHA1::operator<<(const unsigned char message_element)
-{
-    Input(&message_element, 1);
-
-    return *this;
-}
-
-/*  
- *  ProcessMessageBlock
- *
- *  Description:
- *      This function will process the next 512 bits of the message
- *      stored in the Message_Block array.
- *
- *  Parameters:
- *      None.
- *
- *  Returns:
- *      Nothing.
- *
- *  Comments:
- *      Many of the variable names in this function, especially the single
- *      character names, were used because those were the names used
- *      in the publication.
- *
- */
-void SHA1::ProcessMessageBlock()
-{
-    const unsigned K[] =    {               // Constants defined for SHA-1
-                                0x5A827999,
-                                0x6ED9EBA1,
-                                0x8F1BBCDC,
-                                0xCA62C1D6
-                            };
-    int         t;                          // Loop counter
-    unsigned    temp;                       // Temporary word value
-    unsigned    W[80];                      // Word sequence
-    unsigned    A, B, C, D, E;              // Word buffers
-
-    /*
-     *  Initialize the first 16 words in the array W
-     */
-    for(t = 0; t < 16; t++)
-    {
-        W[t] = ((unsigned) Message_Block[t * 4]) << 24;
-        W[t] |= ((unsigned) Message_Block[t * 4 + 1]) << 16;
-        W[t] |= ((unsigned) Message_Block[t * 4 + 2]) << 8;
-        W[t] |= ((unsigned) Message_Block[t * 4 + 3]);
-    }
-
-    for(t = 16; t < 80; t++)
-    {
-       W[t] = CircularShift(1,W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16]);
-    }
-
-    A = H[0];
-    B = H[1];
-    C = H[2];
-    D = H[3];
-    E = H[4];
-
-    for(t = 0; t < 20; t++)
-    {
-        temp = CircularShift(5,A) + ((B & C) | ((~B) & D)) + E + W[t] + K[0];
-        temp &= 0xFFFFFFFF;
-        E = D;
-        D = C;
-        C = CircularShift(30,B);
-        B = A;
-        A = temp;
-    }
-
-    for(t = 20; t < 40; t++)
-    {
-        temp = CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[1];
-        temp &= 0xFFFFFFFF;
-        E = D;
-        D = C;
-        C = CircularShift(30,B);
-        B = A;
-        A = temp;
-    }
-
-    for(t = 40; t < 60; t++)
-    {
-        temp = CircularShift(5,A) +
-               ((B & C) | (B & D) | (C & D)) + E + W[t] + K[2];
-        temp &= 0xFFFFFFFF;
-        E = D;
-        D = C;
-        C = CircularShift(30,B);
-        B = A;
-        A = temp;
-    }
-
-    for(t = 60; t < 80; t++)
-    {
-        temp = CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[3];
-        temp &= 0xFFFFFFFF;
-        E = D;
-        D = C;
-        C = CircularShift(30,B);
-        B = A;
-        A = temp;
-    }
-
-    H[0] = (H[0] + A) & 0xFFFFFFFF;
-    H[1] = (H[1] + B) & 0xFFFFFFFF;
-    H[2] = (H[2] + C) & 0xFFFFFFFF;
-    H[3] = (H[3] + D) & 0xFFFFFFFF;
-    H[4] = (H[4] + E) & 0xFFFFFFFF;
-
-    Message_Block_Index = 0;
-}
-
-/*  
- *  PadMessage
- *
- *  Description:
- *      According to the standard, the message must be padded to an even
- *      512 bits.  The first padding bit must be a '1'.  The last 64 bits
- *      represent the length of the original message.  All bits in between
- *      should be 0.  This function will pad the message according to those
- *      rules by filling the message_block array accordingly.  It will also
- *      call ProcessMessageBlock() appropriately.  When it returns, it
- *      can be assumed that the message digest has been computed.
- *
- *  Parameters:
- *      None.
- *
- *  Returns:
- *      Nothing.
- *
- *  Comments:
- *
- */
-void SHA1::PadMessage()
-{
-    /*
-     *  Check to see if the current message block is too small to hold
-     *  the initial padding bits and length.  If so, we will pad the
-     *  block, process it, and then continue padding into a second block.
-     */
-    if (Message_Block_Index > 55)
-    {
-        Message_Block[Message_Block_Index++] = 0x80;
-        while(Message_Block_Index < 64)
-        {
-            Message_Block[Message_Block_Index++] = 0;
-        }
-
-        ProcessMessageBlock();
-
-        while(Message_Block_Index < 56)
-        {
-            Message_Block[Message_Block_Index++] = 0;
-        }
-    }
-    else
-    {
-        Message_Block[Message_Block_Index++] = 0x80;
-        while(Message_Block_Index < 56)
-        {
-            Message_Block[Message_Block_Index++] = 0;
-        }
-
-    }
-
-    /*
-     *  Store the message length as the last 8 octets
-     */
-    Message_Block[56] = (Length_High >> 24) & 0xFF;
-    Message_Block[57] = (Length_High >> 16) & 0xFF;
-    Message_Block[58] = (Length_High >> 8) & 0xFF;
-    Message_Block[59] = (Length_High) & 0xFF;
-    Message_Block[60] = (Length_Low >> 24) & 0xFF;
-    Message_Block[61] = (Length_Low >> 16) & 0xFF;
-    Message_Block[62] = (Length_Low >> 8) & 0xFF;
-    Message_Block[63] = (Length_Low) & 0xFF;
-
-    ProcessMessageBlock();
-}
-
-
-/*  
- *  CircularShift
- *
- *  Description:
- *      This member function will perform a circular shifting operation.
- *
- *  Parameters:
- *      bits: [in]
- *          The number of bits to shift (1-31)
- *      word: [in]
- *          The value to shift (assumes a 32-bit integer)
- *
- *  Returns:
- *      The shifted value.
- *
- *  Comments:
- *
- */
-unsigned SHA1::CircularShift(int bits, unsigned word)
-{
-    return ((word << bits) & 0xFFFFFFFF) | ((word & 0xFFFFFFFF) >> (32-bits));
-}
-
-
-//-----------------------------------------------------------------------------
-// Adapter for HashTest
-
-void sha1hash ( const void * key, int len, unsigned int seed, unsigned int * result )
-{
-	SHA1 s;
-
-	s.Input((const unsigned char*)&seed,4);
-	s.Input((const unsigned char*)key,len);
-
-	s.Result(result);
-}
\ No newline at end of file
+/*
+SHA-1 in C
+By Steve Reid <sreid@sea-to-sky.net>
+100% Public Domain
+
+-----------------
+Modified 7/98
+By James H. Brown <jbrown@burgoyne.com>
+Still 100% Public Domain
+
+Corrected a problem which generated improper hash values on 16 bit machines
+Routine SHA1Update changed from
+	void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned int
+len)
+to
+	void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned
+long len)
+
+The 'len' parameter was declared an int which works fine on 32 bit machines.
+However, on 16 bit machines an int is too small for the shifts being done
+against
+it.  This caused the hash function to generate incorrect values if len was
+greater than 8191 (8K - 1) due to the 'len << 3' on line 3 of SHA1Update().
+
+Since the file IO in main() reads 16K at a time, any file 8K or larger would
+be guaranteed to generate the wrong hash (e.g. Test Vector #3, a million
+"a"s).
+
+I also changed the declaration of variables i & j in SHA1Update to
+unsigned long from unsigned int for the same reason.
+
+These changes should make no difference to any 32 bit implementations since
+an
+int and a long are the same size in those environments.
+
+--
+I also corrected a few compiler warnings generated by Borland C.
+1. Added #include <process.h> for exit() prototype
+2. Removed unused variable 'j' in SHA1Final
+3. Changed exit(0) to return(0) at end of main.
+
+ALL changes I made can be located by searching for comments containing 'JHB'
+-----------------
+Modified 8/98
+By Steve Reid <sreid@sea-to-sky.net>
+Still 100% public domain
+
+1- Removed #include <process.h> and used return() instead of exit()
+2- Fixed overwriting of finalcount in SHA1Final() (discovered by Chris Hall)
+3- Changed email address from steve@edmweb.com to sreid@sea-to-sky.net
+
+-----------------
+Modified 4/01
+By Saul Kravitz <Saul.Kravitz@celera.com>
+Still 100% PD
+Modified to run on Compaq Alpha hardware.
+
+-----------------
+Modified 07/2002
+By Ralph Giles <giles@ghostscript.com>
+Still 100% public domain
+modified for use with stdint types, autoconf
+code cleanup, removed attribution comments
+switched SHA1Final() argument order for consistency
+use SHA1_ prefix for public api
+move public api to sha1.h
+*/
+
+/*
+Test Vectors (from FIPS PUB 180-1)
+"abc"
+  A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D
+"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+  84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1
+A million repetitions of "a"
+  34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "sha1.h"
+
+#pragma warning(disable : 4267)
+#pragma warning(disable : 4996)
+#pragma warning(disable : 4100)
+
+void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64]);
+
+#define rol _rotl
+
+/* blk0() and blk() perform the initial expand. */
+/* I got the idea of expanding during the round function from SSLeay */
+/* FIXME: can we do this in an endian-proof way? */
+
+#ifdef WORDS_BIGENDIAN
+#define blk0(i) block->l[i]
+#else
+#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) | (rol(block->l[i],8)&0x00FF00FF))
+#endif
+#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] ^ block->l[(i+2)&15]^block->l[i&15],1))
+
+/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
+#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30);
+#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);
+#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
+
+
+/* Hash a single 512-bit block. This is the core of the algorithm. */
+void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64])
+{
+    uint32_t a, b, c, d, e;
+    typedef union {
+        uint8_t c[64];
+        uint32_t l[16];
+    } CHAR64LONG16;
+    CHAR64LONG16* block;
+
+    block = (CHAR64LONG16*)buffer;
+
+    /* Copy context->state[] to working vars */
+    a = state[0];
+    b = state[1];
+    c = state[2];
+    d = state[3];
+    e = state[4];
+
+    /* 4 rounds of 20 operations each. Loop unrolled. */
+    R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
+    R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
+    R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
+    R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
+    R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
+    R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
+    R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
+    R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
+    R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
+    R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
+    R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
+    R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
+    R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
+    R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
+    R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
+    R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
+    R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
+    R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
+    R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
+    R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
+
+    /* Add the working vars back into context.state[] */
+    state[0] += a;
+    state[1] += b;
+    state[2] += c;
+    state[3] += d;
+    state[4] += e;
+
+    /* Wipe variables */
+    a = b = c = d = e = 0;
+}
+
+
+/* SHA1Init - Initialize new context */
+void SHA1_Init(SHA1_CTX* context)
+{
+    /* SHA1 initialization constants */
+    context->state[0] = 0x67452301;
+    context->state[1] = 0xEFCDAB89;
+    context->state[2] = 0x98BADCFE;
+    context->state[3] = 0x10325476;
+    context->state[4] = 0xC3D2E1F0;
+    context->count[0] = 0;
+	context->count[1] = 0;
+}
+
+
+/* Run your data through this. */
+void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len)
+{
+    size_t i, j;
+
+    j = (context->count[0] >> 3) & 63;
+    if ((context->count[0] += len << 3) < (len << 3)) context->count[1]++;
+
+    context->count[1] += (len >> 29);
+
+    if ((j + len) > 63) 
+	{
+        memcpy(&context->buffer[j], data, (i = 64-j));
+        SHA1_Transform(context->state, context->buffer);
+
+        for ( ; i + 63 < len; i += 64) 
+		{
+            SHA1_Transform(context->state, data + i);
+        }
+
+        j = 0;
+    }
+    else i = 0;
+    memcpy(&context->buffer[j], &data[i], len - i);
+}
+
+
+/* Add padding and return the message digest. */
+void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE])
+{
+    uint32_t i;
+    uint8_t  finalcount[8];
+
+    for (i = 0; i < 8; i++) {
+        finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)]
+         >> ((3-(i & 3)) * 8) ) & 255);  /* Endian independent */
+    }
+    SHA1_Update(context, (uint8_t *)"\200", 1);
+    while ((context->count[0] & 504) != 448) {
+        SHA1_Update(context, (uint8_t *)"\0", 1);
+    }
+    SHA1_Update(context, finalcount, 8);  /* Should cause a SHA1_Transform() */
+    for (i = 0; i < SHA1_DIGEST_SIZE; i++) {
+        digest[i] = (uint8_t)
+         ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
+    }
+
+    /* Wipe variables */
+    i = 0;
+    memset(context->buffer, 0, 64);
+    memset(context->state, 0, 20);
+    memset(context->count, 0, 8);
+    memset(finalcount, 0, 8);	/* SWR */
+}
+
+//-----------------------------------------------------------------------------
+
+void sha1_32a ( const void * key, int len, uint32_t seed, void * out )
+{
+	SHA1_CTX context;
+
+	uint8_t digest[20];
+
+	SHA1_Init(&context);
+	SHA1_Update(&context, (uint8_t*)key, len);
+	SHA1_Final(&context, digest);
+
+	memcpy(out,&digest[0],4);
+}
+
+//-----------------------------------------------------------------------------
+// self test
+
+//#define TEST
+
+#ifdef TEST
+
+static char *test_data[] = {
+    "abc",
+    "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
+    "A million repetitions of 'a'"};
+static char *test_results[] = {
+    "A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D",
+    "84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1",
+    "34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F"};
+
+
+void digest_to_hex(const uint8_t digest[SHA1_DIGEST_SIZE], char *output)
+{
+    int i,j;
+    char *c = output;
+
+    for (i = 0; i < SHA1_DIGEST_SIZE/4; i++) {
+        for (j = 0; j < 4; j++) {
+            sprintf(c,"%02X", digest[i*4+j]);
+            c += 2;
+        }
+        sprintf(c, " ");
+        c += 1;
+    }
+    *(c - 1) = '\0';
+}
+
+int main(int argc, char** argv)
+{
+    int k;
+    SHA1_CTX context;
+    uint8_t digest[20];
+    char output[80];
+
+    fprintf(stdout, "verifying SHA-1 implementation... ");
+
+    for (k = 0; k < 2; k++){
+        SHA1_Init(&context);
+        SHA1_Update(&context, (uint8_t*)test_data[k], strlen(test_data[k]));
+        SHA1_Final(&context, digest);
+	digest_to_hex(digest, output);
+
+        if (strcmp(output, test_results[k])) {
+            fprintf(stdout, "FAIL\n");
+            fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[k]);
+            fprintf(stderr,"\t%s returned\n", output);
+            fprintf(stderr,"\t%s is correct\n", test_results[k]);
+            return (1);
+        }
+    }
+    /* million 'a' vector we feed separately */
+    SHA1_Init(&context);
+    for (k = 0; k < 1000000; k++)
+        SHA1_Update(&context, (uint8_t*)"a", 1);
+    SHA1_Final(&context, digest);
+    digest_to_hex(digest, output);
+    if (strcmp(output, test_results[2])) {
+        fprintf(stdout, "FAIL\n");
+        fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[2]);
+        fprintf(stderr,"\t%s returned\n", output);
+        fprintf(stderr,"\t%s is correct\n", test_results[2]);
+        return (1);
+    }
+
+    /* success */
+    fprintf(stdout, "ok\n");
+    return(0);
+}
+#endif /* TEST */
diff --git a/sha1.h b/sha1.h
index c0efa1c..a866aac 100644
--- a/sha1.h
+++ b/sha1.h
@@ -1,89 +1,21 @@
-/*
- *  sha1.h
- *
- *  Copyright (C) 1998, 2009
- *  Paul E. Jones <paulej@packetizer.com>
- *  All Rights Reserved.
- *
- *****************************************************************************
- *  $Id: sha1.h 12 2009-06-22 19:34:25Z paulej $
- *****************************************************************************
- *
- *  Description:
- *      This class implements the Secure Hashing Standard as defined
- *      in FIPS PUB 180-1 published April 17, 1995.
- *
- *      Many of the variable names in this class, especially the single
- *      character names, were used because those were the names used
- *      in the publication.
- *
- *      Please read the file sha1.cpp for more information.
- *
- */
-
-#ifndef _SHA1_H_
-#define _SHA1_H_
-
-class SHA1
-{
-
-    public:
-
-        SHA1();
-        virtual ~SHA1();
-
-        /*
-         *  Re-initialize the class
-         */
-        void Reset();
-
-        /*
-         *  Returns the message digest
-         */
-        bool Result(unsigned *message_digest_array);
-
-        /*
-         *  Provide input to SHA1
-         */
-        void Input( const unsigned char *message_array,
-                    unsigned            length);
-        void Input( const char  *message_array,
-                    unsigned    length);
-        void Input(unsigned char message_element);
-        void Input(char message_element);
-        SHA1& operator<<(const char *message_array);
-        SHA1& operator<<(const unsigned char *message_array);
-        SHA1& operator<<(const char message_element);
-        SHA1& operator<<(const unsigned char message_element);
-
-    private:
-
-        /*
-         *  Process the next 512 bits of the message
-         */
-        void ProcessMessageBlock();
-
-        /*
-         *  Pads the current message block to 512 bits
-         */
-        void PadMessage();
-
-        /*
-         *  Performs a circular left shift operation
-         */
-        inline unsigned CircularShift(int bits, unsigned word);
-
-        unsigned H[5];                      // Message digest buffers
-
-        unsigned Length_Low;                // Message length in bits
-        unsigned Length_High;               // Message length in bits
-
-        unsigned char Message_Block[64];    // 512-bit message blocks
-        int Message_Block_Index;            // Index into message block array
-
-        bool Computed;                      // Is the digest computed?
-        bool Corrupted;                     // Is the message digest corruped?
-    
-};
-
-#endif
+/* public api for steve reid's public domain SHA-1 implementation */
+/* this file is in the public domain */
+
+#pragma once
+
+#include "pstdint.h"
+
+struct SHA1_CTX
+{
+    uint32_t state[5];
+    uint32_t count[2];
+    uint8_t  buffer[64];
+};
+
+#define SHA1_DIGEST_SIZE 20
+
+void SHA1_Init(SHA1_CTX* context);
+void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len);
+void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE]);
+
+void sha1_32a ( const void * key, int len, uint32_t seed, void * out );
\ No newline at end of file
-- 
cgit v1.2.3


From 31a9e8ef5b491fda1cc2f09ebef2353a30b0eb40 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Tue, 9 Nov 2010 20:29:19 +0000
Subject: MurmurHash3_x86_128 was missing a rotate instruction

git-svn-id: http://smhasher.googlecode.com/svn/trunk@58 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 MurmurHash3.cpp |  2 ++
 main.cpp        | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 2942f10..c66bd87 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -196,6 +196,8 @@ __forceinline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & h3, uint32_
 	h1 += h3;
 	h1 += h4;
 
+	h1 = _rotl(h1,17);
+
 	k2 *= c2; 
 	k2  = _rotl(k2,11);
 	k2 *= c1;
diff --git a/main.cpp b/main.cpp
index 25d3c89..f5fcf04 100644
--- a/main.cpp
+++ b/main.cpp
@@ -42,6 +42,19 @@ bool g_testZeroes      = false;
 bool g_testSeed        = false;
 //*/
 
+
+int64_t g_hashcount = 0;
+int64_t g_bytecount = 0;
+
+void counterhash ( const void * , const int len, const uint32_t , void * out )
+{
+	g_hashcount++;
+	g_bytecount += len;
+
+	*(uint32_t*)out = rand_u32();
+}
+
+
 //-----------------------------------------------------------------------------
 
 struct HashInfo
@@ -54,6 +67,7 @@ struct HashInfo
 
 HashInfo g_hashes[] = 
 {
+	{ counterhash,          32, "count",       "Counts how many times the hash function is called" },
 	{ randhash_32,          32, "rand32",      "Random number generator, 32-bit" },
 	{ randhash_64,          64, "rand64",      "Random number generator, 64-bit" },
 	{ randhash_128,        128, "rand128",     "Random number generator, 128-bit" },
@@ -387,6 +401,14 @@ int main ( int argc, char ** argv )
 	//testHash("rand64");
 	//testHash("rand128");
 
+	//testHash("donothing");
+
+	//testHash("count");
+
+	//printf("Called the hash function %I64d times, %I64d bytes hashed\n",g_hashcount,g_bytecount);
+
+	//testHash("crc32");
+
 	//testHash("fnv");
 	//testHash("superfast");
 	//testHash("lookup3");
-- 
cgit v1.2.3


From 9d17d0b842230279e2a89b2e65a7dde05494d529 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Wed, 17 Nov 2010 04:07:51 +0000
Subject: remove swap() and use std::swap instead, make alignment test a bit
 more robust

git-svn-id: http://smhasher.googlecode.com/svn/trunk@69 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 KeysetTest.cpp | 31 ++++++++++++++++++++++---------
 KeysetTest.h   |  6 ++++--
 Types.h        |  8 --------
 3 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/KeysetTest.cpp b/KeysetTest.cpp
index 3424ce6..70b63de 100644
--- a/KeysetTest.cpp
+++ b/KeysetTest.cpp
@@ -36,27 +36,32 @@ void AlignmentTest ( pfHash hash, const int hashbits )
 {
 	const int hashbytes = hashbits / 8;
 
-	printf("Testing alignment handling on small keys..........");
+	printf("Testing alignment handling..........");
 
-	char bufs[16][64];
+	char testblob[512];
+	rand_p(testblob,512);
 
+	char * bufs[16];
 	char * strings[16];
 
 	for(int i = 0; i < 16; i++)
 	{
-		uint32_t b = uint32_t(&bufs[i][0]);
+		bufs[i] = new char[1024];
+		uint32_t b = uint32_t(bufs[i]);
 
 		b = (b+15)&(~15);
 
 		strings[i] = (char*)(b + i);
-
-		strcpy_s(strings[i],32,"DeadBeefDeadBeef");
+		
+		memcpy(strings[i],testblob,512);
 	}
 
+	bool result = true;
+
 	uint32_t hash1[64];
 	uint32_t hash2[64];
 
-	for(int k = 1; k <= 16; k++)
+	for(int k = 1; k <= 512; k++)
 	for(int j = 0; j < 15; j++)
 	for(int i = j+1; i < 16; i++)
 	{
@@ -68,12 +73,20 @@ void AlignmentTest ( pfHash hash, const int hashbits )
 
 		if(memcmp(hash1,hash2,hashbytes) != 0)
 		{
-			printf("*********FAIL*********\n");
-			return;
+			result = false;
 		}
 	}
 
-	printf("PASS\n");
+	if(!result)
+	{
+		printf("*********FAIL*********\n");
+	}
+	else
+	{
+		printf("PASS\n");
+	}
+
+	for(int i = 0; i < 16; i++) delete [] bufs[i];
 }
 
 //----------------------------------------------------------------------------
diff --git a/KeysetTest.h b/KeysetTest.h
index ae2bfcf..53b61ec 100644
--- a/KeysetTest.h
+++ b/KeysetTest.h
@@ -10,6 +10,8 @@
 #include "Types.h"
 #include "Stats.h"
 
+#include <algorithm>  // for std::swap
+
 //-----------------------------------------------------------------------------
 // Sanity tests
 
@@ -38,11 +40,11 @@ void PermutationKeygenRecurse ( pfHash hash, uint32_t * blocks, int blockcount,
 
 	for(int i = k; i < blockcount; i++)
 	{
-		swap(blocks[k],blocks[i]);
+		std::swap(blocks[k],blocks[i]);
 
 		PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes);
 
-		swap(blocks[k],blocks[i]);
+		std::swap(blocks[k],blocks[i]);
 	}
 }
 
diff --git a/Types.h b/Types.h
index d31a647..eb2a309 100644
--- a/Types.h
+++ b/Types.h
@@ -17,14 +17,6 @@ uint32_t whitehole ( void );
 
 typedef void (*pfHash) ( const void * blob, const int len, const uint32_t seed, void * out );
 
-template < typename T >
-void swap ( T & a, T & b )
-{
-	T t = a;
-	a = b;
-	b = t;
-}
-
 //-----------------------------------------------------------------------------
 
 template < class T >
-- 
cgit v1.2.3


From babb553380f2d223ebc4730830233c5d224963e8 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Mon, 28 Feb 2011 06:03:12 +0000
Subject: Tweak Murmur3, new more rigorous sanity test, combinatorial block
 test

git-svn-id: http://smhasher.googlecode.com/svn/trunk@76 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 Bitvec.cpp             |  14 +++
 Bitvec.h               |   1 +
 Hashes.cpp             |  32 ++++++-
 Hashes.h               |   4 +
 KeysetTest.cpp         | 178 +++++++++++++-----------------------
 KeysetTest.h           |  56 ++++++++++++
 MurmurHash3.cpp        | 200 ++++++++++++-----------------------------
 MurmurHashAligned.cpp  |   2 +-
 MurmurHashAligned2.cpp |   2 +-
 MurmurHashNeutral2.cpp |   2 +-
 MurmurHashTest.cpp     |  17 ----
 SpeedTest.cpp          |  57 ++++++++++++
 SpeedTest.h            |  55 +-----------
 Types.h                |   1 +
 main.cpp               | 238 ++++++++++++++++++++++++++++++++++++++++---------
 15 files changed, 480 insertions(+), 379 deletions(-)

diff --git a/Bitvec.cpp b/Bitvec.cpp
index 463c1e4..6939980 100644
--- a/Bitvec.cpp
+++ b/Bitvec.cpp
@@ -71,6 +71,20 @@ void printhex32 ( void * blob, int len )
 	printf("}");
 }
 
+void printbytes ( void * blob, int len )
+{
+	uint8_t * d = (uint8_t*)blob;
+
+	printf("{ ");
+
+	for(int i = 0; i < len; i++)
+	{
+		printf("0x%02x, ",d[i]);
+	}
+
+	printf(" };");
+}
+
 //-----------------------------------------------------------------------------
 
 uint32_t getbit ( void * block, int len, uint32_t bit )
diff --git a/Bitvec.h b/Bitvec.h
index fce97d5..1a475d1 100644
--- a/Bitvec.h
+++ b/Bitvec.h
@@ -8,6 +8,7 @@
 
 void     printbits   ( void * blob, int len );
 void     printhex32  ( void * blob, int len );
+void     printbytes  ( void * blob, int len );
 
 uint32_t getbit      ( void * blob, int len, uint32_t bit );
 uint32_t getbit_wrap ( void * blob, int len, uint32_t bit );
diff --git a/Hashes.cpp b/Hashes.cpp
index f6787c6..cebd37f 100644
--- a/Hashes.cpp
+++ b/Hashes.cpp
@@ -2,6 +2,13 @@
 
 #include "Random.h"
 
+
+#include <stdlib.h>
+//#include <stdint.h>
+#include <assert.h>
+#include <emmintrin.h>
+#include <xmmintrin.h>
+
 //----------------------------------------------------------------------------
 // fake / bad hashes
 
@@ -54,6 +61,20 @@ void sumhash ( const void * key, int len, uint32_t seed, void * out )
 	*(uint32_t*)out = h;
 }
 
+void sumhash32 ( const void * key, int len, uint32_t seed, void * out )
+{
+	uint32_t h = seed;
+
+	const uint32_t * data = (const uint32_t*)key;
+
+	for(int i = 0; i < len/4; i++)
+	{
+		h += data[i];
+	}
+
+	*(uint32_t*)out = h;
+}
+
 void DoNothingHash ( const void *, int, uint32_t, void * )
 {
 	return;
@@ -62,20 +83,23 @@ void DoNothingHash ( const void *, int, uint32_t, void * )
 //-----------------------------------------------------------------------------
 // One-byte-at-a-time hash based on Murmur's mix
 
-uint32_t MurmurOAAT ( const void * key, int len, uint32_t h )
+void MurmurOAAT ( const void * key, int len, uint32_t seed, void * out )
 {
 	const uint8_t * data = (const uint8_t*)key;
 
-	h ^= len;
+	uint32_t h = seed ^ len;
 
 	for(int i = 0; i < len; i++)
 	{
 		h ^= data[i];
 		h *= 0x5bd1e995;
-		h ^= h >> 16;
+		h ^= h >> 15;
 	}
 
-	return h;
+	h *= 0x5bd1e995;
+	h ^= h >> 15;
+
+	*(uint32_t*)out = h;
 }
 
 //----------------------------------------------------------------------------
diff --git a/Hashes.h b/Hashes.h
index 1aad04c..a580113 100644
--- a/Hashes.h
+++ b/Hashes.h
@@ -9,6 +9,9 @@
 //----------
 // These are _not_ hash functions (even though people tend to use crc32 as one...)
 
+void sumhash               ( const void * key, int len, uint32_t seed, void * out );
+void sumhash32             ( const void * key, int len, uint32_t seed, void * out );
+
 void DoNothingHash         ( const void * key, int len, uint32_t seed, void * out );
 void crc32                 ( const void * key, int len, uint32_t seed, void * out );
 
@@ -28,6 +31,7 @@ void sha1_32a              ( const void * key, int len, uint32_t seed, void * ou
 void FNV                   ( const void * key, int len, uint32_t seed, void * out );
 void SuperFastHash         ( const void * key, int len, uint32_t seed, void * out );
 void lookup3_test          ( const void * key, int len, uint32_t seed, void * out );
+void MurmurOAAT            ( const void * key, int len, uint32_t seed, void * out );
 
 //----------
 // MurmurHash2
diff --git a/KeysetTest.cpp b/KeysetTest.cpp
index 70b63de..6436826 100644
--- a/KeysetTest.cpp
+++ b/KeysetTest.cpp
@@ -29,55 +29,81 @@ void QuickBrownFox ( pfHash hash, const int hashbits )
 }
 
 //----------------------------------------------------------------------------
-// Alignment of the keys should not affect the hash value - if it does,
-// something is broken.
+// Basic sanity checks -
 
-void AlignmentTest ( pfHash hash, const int hashbits )
-{
-	const int hashbytes = hashbits / 8;
+// A hash function should not be reading outside the bounds of the key.
 
-	printf("Testing alignment handling..........");
+// Flipping a bit of a key should, with overwhelmingly high probability,
+// result in a different hash.
 
-	char testblob[512];
-	rand_p(testblob,512);
+// Hashing the same key twice should always produce the same result.
+
+// The memory alignment of the key should not affect the hash result.
 
-	char * bufs[16];
-	char * strings[16];
+bool SanityTest ( pfHash hash, const int hashbits )
+{	printf("Testing bit twiddling");
+
+	bool result = true;
 
-	for(int i = 0; i < 16; i++)
+	const int hashbytes = hashbits/8;
+	const int reps = 10;
+	const int keymax = 128;
+	const int pad = 16;
+	const int buflen = keymax + pad*3;
+	
+	uint8_t * buffer1 = new uint8_t[buflen];
+	uint8_t * buffer2 = new uint8_t[buflen];
+
+	uint8_t * hash1 = new uint8_t[hashbytes];
+	uint8_t * hash2 = new uint8_t[hashbytes];
+
+	//----------
+	
+	for(int irep = 0; irep < reps; irep++)
 	{
-		bufs[i] = new char[1024];
-		uint32_t b = uint32_t(bufs[i]);
+		if(irep % (reps/10) == 0) printf(".");
 
-		b = (b+15)&(~15);
+		for(int len = 4; len <= keymax; len++)
+		{
+			for(int offset = pad; offset < pad*2; offset++)
+			{
+				uint8_t * key1 = &buffer1[pad];
+				uint8_t * key2 = &buffer2[pad+offset];
 
-		strings[i] = (char*)(b + i);
-		
-		memcpy(strings[i],testblob,512);
-	}
+				rand_p(buffer1,buflen);
+				rand_p(buffer2,buflen);
 
-	bool result = true;
+				memcpy(key2,key1,len);
 
-	uint32_t hash1[64];
-	uint32_t hash2[64];
+				hash(key1,len,0,hash1);
 
-	for(int k = 1; k <= 512; k++)
-	for(int j = 0; j < 15; j++)
-	for(int i = j+1; i < 16; i++)
-	{
-		const char * s1 = strings[i];
-		const char * s2 = strings[j];
+				for(int bit = 0; bit < (len * 8); bit++)
+				{
+					// Flip a bit, hash the key -> we should get a different result.
 
-		hash(s1,k,0,hash1);
-		hash(s2,k,0,hash2);
+					flipbit(key2,len,bit);
+					hash(key2,len,0,hash2);
 
-		if(memcmp(hash1,hash2,hashbytes) != 0)
-		{
-			result = false;
+					if(memcmp(hash1,hash2,hashbytes) == 0)
+					{
+						result = false;
+					}
+
+					// Flip it back, hash again -> we should get the original result.
+
+					flipbit(key2,len,bit);
+					hash(key2,len,0,hash2);
+
+					if(memcmp(hash1,hash2,hashbytes) != 0)
+					{
+						result = false;
+					}
+				}
+			}
 		}
 	}
 
-	if(!result)
+	if(result == false)
 	{
 		printf("*********FAIL*********\n");
 	}
@@ -86,7 +112,10 @@ void AlignmentTest ( pfHash hash, const int hashbits )
 		printf("PASS\n");
 	}
 
-	for(int i = 0; i < 16; i++) delete [] bufs[i];
+	delete [] hash1;
+	delete [] hash2;
+
+	return result;
 }
 
 //----------------------------------------------------------------------------
@@ -132,85 +161,4 @@ void AppendedZeroesTest ( pfHash hash, const int hashbits )
 	printf("PASS\n");
 }
 
-//----------------------------------------------------------------------------
-// Basic sanity checks -
-
-// A hash function should not be reading outside the bounds of the key.
-
-// Flipping a bit of a key should, with overwhelmingly high probability,
-// result in a different hash.
-
-// Hashing the same key twice should always produce the same result.
-
-bool SanityTest ( pfHash hash, const int hashbits )
-{
-	bool result = true;
-
-	const int hashbytes = hashbits/8;
-	const int reps = 100;
-
-	printf("Testing bit twiddling");
-
-	uint8_t buffer[256];
-	uint8_t * key = &buffer[64];
-
-	uint8_t * h1 = new uint8_t[hashbytes];
-	uint8_t * h2 = new uint8_t[hashbytes];
-
-	for(int irep = 0; irep < reps; irep++)
-	{
-		if(irep % (reps/10) == 0) printf(".");
-
-		for(int len = 1; len <= 128; len++)
-		{
-			// Generate a random key in the middle of the buffer, hash it,
-			// and then fill the space around the key with garbage. If a
-			// broken hash function reads past the ends of the key, it should
-			// fail the "did we get the same hash?" test below.
-
-			rand_p(key,len);
-			hash(key,len,0,h1);
-
-			rand_p(buffer,64);
-			rand_p(key+len,64);
-
-			// Flip a bit, hash the key -> we should get a different result.
-			// Flip it back, hash again -> we should get the same result.
-
-			for(int bit = 0; bit < (len * 8); bit++)
-			{
-				flipbit(key,len,bit);
-				hash(key,len,0,h2);
-
-				if(memcmp(h1,h2,hashbytes) == 0)
-				{
-					result = false;
-				}
-
-				flipbit(key,len,bit);
-				hash(key,len,0,h2);
-
-				if(memcmp(h1,h2,hashbytes) != 0)
-				{
-					result = false;
-				}
-			}
-		}
-	}
-
-	if(result == false)
-	{
-		printf("*********FAIL*********\n");
-	}
-	else
-	{
-		printf("PASS\n");
-	}
-
-	delete [] h1;
-	delete [] h2;
-
-	return result;
-}
-
 //-----------------------------------------------------------------------------
diff --git a/KeysetTest.h b/KeysetTest.h
index 53b61ec..7ef398c 100644
--- a/KeysetTest.h
+++ b/KeysetTest.h
@@ -20,6 +20,62 @@ void QuickBrownFox      ( pfHash hash, const int hashbits );
 void AlignmentTest      ( pfHash hash, const int hashbits );
 void AppendedZeroesTest ( pfHash hash, const int hashbits );
 
+//-----------------------------------------------------------------------------
+// Keyset 'Combination' - all possible combinations of input blocks
+
+template< typename hashtype >
+void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen, 
+							    uint32_t * blocks, int blockcount, 
+								pfHash hash, std::vector<hashtype> & hashes )
+{
+	if(len == maxlen) return;
+
+	for(int i = 0; i < blockcount; i++)
+	{
+		key[len] = blocks[i];
+	
+		//if(len == maxlen-1)
+		{
+			hashtype h;
+			hash(key,(len+1) * sizeof(uint32_t),0,&h);
+			hashes.push_back(h);
+		}
+
+		//else
+		{
+			CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes);
+		}
+	}
+}
+
+template< typename hashtype >
+bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
+{
+	printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount);
+
+	//----------
+
+	std::vector<hashtype> hashes;
+
+	uint32_t * key = new uint32_t[maxlen];
+
+	CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes);
+
+	delete [] key;
+
+	printf("%d keys\n",(int)hashes.size());
+
+	//----------
+
+	bool result = true;
+
+	result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
+	
+	printf("\n");
+
+	return result;
+}
+
 //----------------------------------------------------------------------------
 // Keyset 'Permutation' - given a set of 32-bit blocks, generate keys
 // consisting of all possible permutations of those blocks
diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index c66bd87..75e0209 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -1,7 +1,12 @@
 #include "MurmurHash3.h"
-
 #include <stdlib.h>    // for _rotl
 
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+
 //-----------------------------------------------------------------------------
 // Block read - if your platform needs to do endian-swapping or can only
 // handle aligned reads, do the conversion here
@@ -12,43 +17,32 @@ inline uint32_t getblock ( const uint32_t * p, int i )
 }
 
 //----------
-// Finalization mix - force all bits of a hash block to avalanche
-
-// avalanches all bits to within 0.25% bias
-
-inline uint32_t fmix32 ( uint32_t h )
-{
-	h ^= h >> 16;
-	h *= 0x85ebca6b;
-	h ^= h >> 13;
-	h *= 0xc2b2ae35;
-	h ^= h >> 16;
-
-	return h;
-}
-
-//-----------------------------------------------------------------------------
 
 inline void bmix32 ( uint32_t & h1, uint32_t & k1, uint32_t & c1, uint32_t & c2 )
 {
+	c1 = c1*5+0x7b7d159c;
+	c2 = c2*5+0x6bce6396;
+
 	k1 *= c1; 
-	k1  = _rotl(k1,11); 
+	k1 = _rotl(k1,11); 
 	k1 *= c2;
-	h1 ^= k1;
-	
-	h1 = h1*3+0x52dce729;
 
-	c1 = c1*5+0x7b7d159c;
-	c2 = c2*5+0x6bce6396;
+	h1 = _rotl(h1,13);
+	h1 = h1*5+0x52dce729;
+	h1 ^= k1;
 }
 
 //----------
 
+//void MurmurHash3_x86_32 ( const void * key, int len, const void * seed, void * out )
 void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out )
 {
 	const uint8_t * data = (const uint8_t*)key;
 	const int nblocks = len / 4;
 
+	//uint32_t * s = (uint32_t*)seed;
+
+	//uint32_t h1 = 0x971e137b ^ s[0];
 	uint32_t h1 = 0x971e137b ^ seed;
 
 	uint32_t c1 = 0x95543787;
@@ -86,100 +80,17 @@ void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out )
 
 	h1 ^= len;
 
-	h1 = fmix32(h1);
+	h1 *= 0x85ebca6b;
+	h1 ^= h1 >> 13;
+	h1 *= 0xc2b2ae35;
+	h1 ^= h1 >> 16;
+
+	//h1 ^= s[0];
+	h1 ^= seed;
 
 	*(uint32_t*)out = h1;
 } 
 
-//-----------------------------------------------------------------------------
-
-inline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & k1, uint32_t & k2, uint32_t & c1, uint32_t & c2 )
-{
-	k1 *= c1; 
-	k1  = _rotl(k1,11); 
-	k1 *= c2;
-	h1 ^= k1;
-	h1 += h2;
-
-	h2 = _rotl(h2,17);
-
-	k2 *= c2; 
-	k2  = _rotl(k2,11);
-	k2 *= c1;
-	h2 ^= k2;
-	h2 += h1;
-
-	h1 = h1*3+0x52dce729;
-	h2 = h2*3+0x38495ab5;
-
-	c1 = c1*5+0x7b7d159c;
-	c2 = c2*5+0x6bce6396;
-}
-
-//----------
-
-void MurmurHash3_x86_64 ( const void * key, const int len, const uint32_t seed, void * out )
-{
-	const uint8_t * data = (const uint8_t*)key;
-	const int nblocks = len / 8;
-
-	uint32_t h1 = 0x8de1c3ac ^ seed;
-	uint32_t h2 = 0xbab98226 ^ seed;
-
-	uint32_t c1 = 0x95543787;
-	uint32_t c2 = 0x2ad7eb25;
-
-	//----------
-	// body
-
-	const uint32_t * blocks = (const uint32_t *)(data + nblocks*8);
-
-	for(int i = -nblocks; i; i++)
-	{
-		uint32_t k1 = getblock(blocks,i*2+0);
-		uint32_t k2 = getblock(blocks,i*2+1);
-
-		bmix32(h1,h2,k1,k2,c1,c2);
-	}
-
-	//----------
-	// tail
-	
-	const uint8_t * tail = (const uint8_t*)(data + nblocks*8);
-
-	uint32_t k1 = 0;
-	uint32_t k2 = 0;
-
-	switch(len & 7)
-	{
-	case 7: k2 ^= tail[6] << 16;
-	case 6: k2 ^= tail[5] << 8;
-	case 5: k2 ^= tail[4] << 0;
-	case 4: k1 ^= tail[3] << 24;
-	case 3: k1 ^= tail[2] << 16;
-	case 2: k1 ^= tail[1] << 8;
-	case 1: k1 ^= tail[0] << 0;
-	        bmix32(h1,h2,k1,k2,c1,c2);
-	};
-
-	//----------
-	// finalization
-
-	h2 ^= len;
-
-	h1 += h2;
-	h2 += h1;
-
-	h1 = fmix32(h1);
-	h2 = fmix32(h2);
-
-	h1 += h2;
-	h2 += h1;
-
-	((uint32_t*)out)[0] = h1;
-	((uint32_t*)out)[1] = h2;
-}
-
 //-----------------------------------------------------------------------------
 // This mix is large enough that VC++ refuses to inline it unless we use
 // __forceinline. It's also not all that fast due to register spillage.
@@ -230,12 +141,34 @@ __forceinline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & h3, uint32_
 }
 
 //----------
+// Finalization mix - force all bits of a hash block to avalanche
 
-void MurmurHash3_x86_128 ( const void * key, const int len, const uint32_t seed, void * out )
+// avalanches all bits to within 0.25% bias
+
+inline uint32_t fmix32 ( uint32_t h )
+{
+	h ^= h >> 16;
+	h *= 0x85ebca6b;
+	h ^= h >> 13;
+	h *= 0xc2b2ae35;
+	h ^= h >> 16;
+
+	return h;
+}
+
+//void MurmurHash3_x86_128 ( const void * key, const int len, const void * seed, void * out )
+void MurmurHash3_x86_128 ( const void * key, const int len, uint32_t seed, void * out )
 {
 	const uint8_t * data = (const uint8_t*)key;
 	const int nblocks = len / 16;
 
+	//uint32_t * s = (uint32_t*)(seed);
+
+	//uint32_t h1 = 0x8de1c3ac ^ s[0];
+	//uint32_t h2 = 0xbab98226 ^ s[1];
+	//uint32_t h3 = 0xfcba5b2d ^ s[2];
+	//uint32_t h4 = 0x32452e3e ^ s[3];
+
 	uint32_t h1 = 0x8de1c3ac ^ seed;
 	uint32_t h2 = 0xbab98226 ^ seed;
 	uint32_t h3 = 0xfcba5b2d ^ seed;
@@ -274,14 +207,17 @@ void MurmurHash3_x86_128 ( const void * key, const int len, const uint32_t seed,
 	case 15: k4 ^= tail[14] << 16;
 	case 14: k4 ^= tail[13] << 8;
 	case 13: k4 ^= tail[12] << 0;
+
 	case 12: k3 ^= tail[11] << 24;
 	case 11: k3 ^= tail[10] << 16;
 	case 10: k3 ^= tail[ 9] << 8;
 	case  9: k3 ^= tail[ 8] << 0;
+
 	case  8: k2 ^= tail[ 7] << 24;
 	case  7: k2 ^= tail[ 6] << 16;
 	case  6: k2 ^= tail[ 5] << 8;
 	case  5: k2 ^= tail[ 4] << 0;
+
 	case  4: k1 ^= tail[ 3] << 24;
 	case  3: k1 ^= tail[ 2] << 16;
 	case  2: k1 ^= tail[ 1] << 8;
@@ -328,16 +264,18 @@ inline void bmix64 ( uint64_t & h1, uint64_t & h2, uint64_t & k1, uint64_t & k2,
 	k1 *= c1; 
 	k1  = _rotl64(k1,23); 
 	k1 *= c2;
-	h1 ^= k1;
-	h1 += h2;
-
-	h2 = _rotl64(h2,41);
 
 	k2 *= c2; 
 	k2  = _rotl64(k2,23);
 	k2 *= c1;
-	h2 ^= k2;
+
+	h1 = _rotl64(h1,17);
+	h1 += h2;
+	h1 ^= k1;
+
+	h2 = _rotl64(h2,41);
 	h2 += h1;
+	h2 ^= k2;
 
 	h1 = h1*3+0x52dce729;
 	h2 = h2*3+0x38495ab5;
@@ -434,27 +372,3 @@ void MurmurHash3_x64_128 ( const void * key, const int len, const uint32_t seed,
 }
 
 //-----------------------------------------------------------------------------
-// If we need a smaller hash value, it's faster to just use a portion of the 
-// 128-bit hash
-
-void MurmurHash3_x64_32 ( const void * key, int len, uint32_t seed, void * out )
-{
-	uint32_t temp[4];
-
-	MurmurHash3_x64_128(key,len,seed,temp);
-
-	*(uint32_t*)out = temp[0];
-}
-
-//----------
-
-void MurmurHash3_x64_64 ( const void * key, int len, uint32_t seed, void * out )
-{
-	uint64_t temp[2];
-
-	MurmurHash3_x64_128(key,len,seed,temp);
-
-	*(uint64_t*)out = temp[0];
-} 
-
-//-----------------------------------------------------------------------------
diff --git a/MurmurHashAligned.cpp b/MurmurHashAligned.cpp
index 716dda6..63dfe61 100644
--- a/MurmurHashAligned.cpp
+++ b/MurmurHashAligned.cpp
@@ -1,2 +1,2 @@
-#include "stdafx.h"
+//#include "stdafx.h"
 
diff --git a/MurmurHashAligned2.cpp b/MurmurHashAligned2.cpp
index 23dced4..83e9723 100644
--- a/MurmurHashAligned2.cpp
+++ b/MurmurHashAligned2.cpp
@@ -1,4 +1,4 @@
-#include "stdafx.h"
+//#include "stdafx.h"
 
 #pragma warning(disable:4311)
 
diff --git a/MurmurHashNeutral2.cpp b/MurmurHashNeutral2.cpp
index 716dda6..63dfe61 100644
--- a/MurmurHashNeutral2.cpp
+++ b/MurmurHashNeutral2.cpp
@@ -1,2 +1,2 @@
-#include "stdafx.h"
+//#include "stdafx.h"
 
diff --git a/MurmurHashTest.cpp b/MurmurHashTest.cpp
index 6b18f53..3bc6a96 100644
--- a/MurmurHashTest.cpp
+++ b/MurmurHashTest.cpp
@@ -7,20 +7,3 @@ uint64_t MurmurHash64B      ( const void * key, int len, uint64_t seed );
 uint32_t MurmurHash2A       ( const void * key, int len, uint32_t seed );
 uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed );
 uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed );
-
-
-void MurmurHash1_test ( const void * key, int len, uint32_t seed, void * out )
-{
-	*(uint32_t*)out = MurmurHash1(key,len,seed);
-}
-
-void MurmurHash2_test ( const void * key, int len, uint32_t seed, void * out )
-{
-	*(uint32_t*)out = MurmurHash2(key,len,seed);
-}
-
-void MurmurHash2A_test ( const void * key, int len, uint32_t seed, void * out )
-{
-	*(uint32_t*)out = MurmurHash2A(key,len,seed);
-}
-
diff --git a/SpeedTest.cpp b/SpeedTest.cpp
index dbfadcb..d95acd4 100644
--- a/SpeedTest.cpp
+++ b/SpeedTest.cpp
@@ -48,3 +48,60 @@ void BulkSpeedTest ( pfHash hash )
 
 	delete [] block;
 }
+
+//-----------------------------------------------------------------------------
+
+void TinySpeedTest ( pfHash hash, int hashsize, int keysize, bool verbose, double & outCycles )
+{
+	const int trials = 100000;
+
+	if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);
+
+	uint8_t * h = new uint8_t[hashsize];
+	uint8_t * k = new uint8_t[keysize];
+
+	double bestcycles = 1e9;
+
+	for(int itrial = 0; itrial < trials; itrial++)
+	{
+		__int64 begin,end;
+
+		rand_p(k,keysize);
+
+		begin = __rdtsc();
+		
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+
+		end = __rdtsc();
+
+		//blackhole(*(uint32_t*)(&h));
+
+		double cycles = double(end-begin) / 64;
+		if(cycles < bestcycles) bestcycles = cycles;
+	}
+
+	double bestbpc = double(keysize) / bestcycles;
+	if(verbose) printf("%8.2f cycles/hash, %8.4f bytes/cycle\n",bestcycles,bestbpc);
+
+	outCycles = bestcycles;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/SpeedTest.h b/SpeedTest.h
index 5a5ed54..a8f0086 100644
--- a/SpeedTest.h
+++ b/SpeedTest.h
@@ -3,59 +3,6 @@
 #include "Types.h"
 
 void BulkSpeedTest ( pfHash hash );
-
-//----------------------------------------------------------------------------
-
-template < typename hashtype, int keysize >
-void TinySpeedTest ( pfHash hash )
-{
-	const int trials = 100000;
-
-	printf("Small key speed test - %4d-byte keys - ",keysize);
-
-	uint8_t k[keysize];
-	hashtype h;
-
-	double bestcycles = 1e9;
-
-	for(int itrial = 0; itrial < trials; itrial++)
-	{
-		__int64 begin,end;
-
-		rand_p(k,keysize);
-
-		begin = __rdtsc();
-		
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-
-		end = __rdtsc();
-
-		blackhole(*(uint32_t*)(&h));
-
-		double cycles = double(end-begin) / 64;
-		if(cycles < bestcycles) bestcycles = cycles;
-	}
-
-	double bestbpc = double(keysize) / bestcycles;
-	printf("%8.2f cycles/hash, %8.4f bytes/cycle\n",bestcycles,bestbpc);
-}
+void TinySpeedTest ( pfHash hash, int hashsize, int keysize, bool verbose, double & outCycles );
 
 //-----------------------------------------------------------------------------
diff --git a/Types.h b/Types.h
index eb2a309..d1eae9f 100644
--- a/Types.h
+++ b/Types.h
@@ -233,5 +233,6 @@ private:
 };
 
 typedef Blob<128> uint128_t;
+typedef Blob<256> uint256_t;
 
 //-----------------------------------------------------------------------------
diff --git a/main.cpp b/main.cpp
index f5fcf04..36636ea 100644
--- a/main.cpp
+++ b/main.cpp
@@ -81,6 +81,7 @@ HashInfo g_hashes[] =
 	{ FNV,                  32, "FNV",         "Fowler-Noll-Vo hash, 32-bit" },
 	{ lookup3_test,         32, "lookup3",     "Bob Jenkins' lookup3" },
 	{ SuperFastHash,        32, "superfast",   "Paul Hsieh's SuperFastHash" },
+	{ MurmurOAAT,           32, "MurmurOAAT",  "Murmur one-at-a-time" },
 	
 	// MurmurHash2
 
@@ -92,11 +93,7 @@ HashInfo g_hashes[] =
 	// MurmurHash3
 
 	{ MurmurHash3_x86_32,   32, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
-	{ MurmurHash3_x86_64,   64, "Murmur3B",    "MurmurHash3 for x86, 64-bit" },
 	{ MurmurHash3_x86_128, 128, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
-
-	{ MurmurHash3_x64_32,   32, "Murmur3D",    "MurmurHash3 for x64, 32-bit" },
-	{ MurmurHash3_x64_64,   64, "Murmur3E",    "MurmurHash3 for x64, 64-bit" },
 	{ MurmurHash3_x64_128, 128, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
 
 };
@@ -130,7 +127,6 @@ void test ( hashfunc<hashtype> hash, const char * hashname )
 
 		QuickBrownFox(hash,hashbits);
 		SanityTest(hash,hashbits);
-		AlignmentTest(hash,hashbits);
 		AppendedZeroesTest(hash,hashbits);
 		printf("\n");
 	}
@@ -145,12 +141,21 @@ void test ( hashfunc<hashtype> hash, const char * hashname )
 		BulkSpeedTest(hash);
 		printf("\n");
 
-		TinySpeedTest<hashtype,4>(hash);
-		TinySpeedTest<hashtype,8>(hash);
-		TinySpeedTest<hashtype,16>(hash);
-		TinySpeedTest<hashtype,32>(hash);
-		TinySpeedTest<hashtype,64>(hash);
-		TinySpeedTest<hashtype,128>(hash);
+		for(int i = 1; i < 32; i++)
+		{
+			double cycles;
+
+			TinySpeedTest(hash,sizeof(hashtype),i,true,cycles);
+		}
+
+		for(int i = 32; i <= 2048; i += 32)
+		{
+			double cycles;
+
+
+			TinySpeedTest(hash,sizeof(hashtype),i,true,cycles);
+		}
+
 		printf("\n");
 	}
 
@@ -181,10 +186,44 @@ void test ( hashfunc<hashtype> hash, const char * hashname )
 	{
 		printf("[[[ Avalanche Tests ]]]\n\n");
 
-		const int hashbits = sizeof(hashtype) * 8;
+		//const int hashbits = sizeof(hashtype) * 8;
 		bool result = true;
 
-		result &= AvalancheTest< Blob<hashbits * 2>, hashtype > (hash,2000000);
+		result &= AvalancheTest< Blob< 32>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob< 40>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob< 48>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob< 56>, hashtype > (hash,300000);
+
+		result &= AvalancheTest< Blob< 64>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob< 72>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob< 80>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob< 88>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob< 96>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<104>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<112>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<120>, hashtype > (hash,300000);
+
+		result &= AvalancheTest< Blob<128>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<136>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<144>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<152>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<160>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<168>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<176>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<184>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<192>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<200>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<208>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<216>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<224>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<232>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<240>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<248>, hashtype > (hash,300000);
+
+		result &= AvalancheTest< Blob<256>, hashtype > (hash,300000);
+
+		//result &= AvalancheTest< Blob<hashbits * 2>, hashtype > (hash,200000);
+		//result &= AvalancheTest< Blob<768>, hashtype > (hash,2000000);
 
 		// The bit independence test is slow and not particularly useful...
 		//result &= BicTest < Blob<hashbits * 2>, hashtype > ( hash, 1000000 );
@@ -241,34 +280,139 @@ void test ( hashfunc<hashtype> hash, const char * hashname )
 
 	if(g_testPermutation || g_testAll)
 	{
-		printf("[[[ Keyset 'Permutation' Tests ]]]\n\n");
+		{
+			// This one breaks lookup3, surprisingly
 
-		bool result = true;
-		bool drawDiagram = false;
+			printf("[[[ Keyset 'Combination Lowbits' Tests ]]]\n\n");
+
+			bool result = true;
+			bool drawDiagram = false;
 
-		// This very sparse set of blocks is particularly hard for SuperFastHash
+			uint32_t blocks[] =
+			{
+				0x00000000, 
+				
+				0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
+			};
+
+			result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+			if(!result) printf("*********FAIL*********\n");
+			printf("\n");
+		}
 
-		uint32_t blocks[] =
 		{
-			0x00000000,
-			0x00000001,
-			0x00000002,
-			
-			0x00000400,
-			0x00008000,
-			
-			0x00080000,
-			0x00200000,
-
-			0x20000000,
-			0x40000000,
-			0x80000000,
-		};
-
-		result &= PermutationKeyTest<hashtype>(hash,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+			printf("[[[ Keyset 'Combination Highbits' Tests ]]]\n\n");
 
-		if(!result) printf("*********FAIL*********\n");
-		printf("\n");
+			bool result = true;
+			bool drawDiagram = false;
+
+			uint32_t blocks[] =
+			{
+				0x00000000, 
+				
+				0x20000000, 0x40000000, 0x60000000, 0x80000000, 0xA0000000, 0xC0000000, 0xE0000000
+			};
+
+			result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+			if(!result) printf("*********FAIL*********\n");
+			printf("\n");
+		}
+
+		{
+			printf("[[[ Keyset 'Combination 0x8000000' Tests ]]]\n\n");
+
+			bool result = true;
+			bool drawDiagram = false;
+
+			uint32_t blocks[] =
+			{
+				0x00000000, 
+				
+				0x80000000,
+			};
+
+			result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+			if(!result) printf("*********FAIL*********\n");
+			printf("\n");
+		}
+
+		{
+			printf("[[[ Keyset 'Combination 0x0000001' Tests ]]]\n\n");
+
+			bool result = true;
+			bool drawDiagram = false;
+
+			uint32_t blocks[] =
+			{
+				0x00000000, 
+				
+				0x00000001,
+			};
+
+			result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+			if(!result) printf("*********FAIL*********\n");
+			printf("\n");
+		}
+
+		{
+			printf("[[[ Keyset 'Combination Hi-Lo' Tests ]]]\n\n");
+
+			bool result = true;
+			bool drawDiagram = false;
+
+			uint32_t blocks[] =
+			{
+				0x00000000, 
+				
+				0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
+
+				0x80000000, 0x40000000, 0xC0000000, 0x20000000, 0xA0000000, 0x60000000, 0xE0000000
+			};
+
+			result &= CombinationKeyTest<hashtype>(hash,6,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+			if(!result) printf("*********FAIL*********\n");
+			printf("\n");
+		}
+
+		//----------
+
+		/*
+		{
+			printf("[[[ Keyset 'Permutation' Tests ]]]\n\n");
+
+			bool result = true;
+			bool drawDiagram = false;
+
+			// This very sparse set of blocks is particularly hard for SuperFastHash
+
+			uint32_t blocks[] =
+			{
+				0x00000000,
+				0x00000001,
+				0x00000002,
+				
+				0x00000400,
+				0x00008000,
+				
+				0x00080000,
+				0x00200000,
+
+				0x20000000,
+				0x40000000,
+				0x80000000,
+			};
+
+			result &= PermutationKeyTest<hashtype>(hash,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+			if(!result) printf("*********FAIL*********\n");
+			printf("\n");
+		}
+		*/
 	}
 
 	//-----------------------------------------------------------------------------
@@ -370,6 +514,10 @@ void testHash ( const char * name )
 		{
 			test<uint128_t>( pInfo->hash, pInfo->desc );
 		}
+		else if(pInfo->hashbits == 256)
+		{
+			test<uint256_t>( pInfo->hash, pInfo->desc );
+		}
 		else
 		{
 			printf("Invalid hash bit width %d for hash '%s'",pInfo->hashbits,pInfo->name);
@@ -393,9 +541,11 @@ int main ( int argc, char ** argv )
 	//g_testSanity = true;
 	//g_testSpeed = true;
 	//g_testAvalanche = true;
+	//g_testCyclic = true;
 	//g_testDiff = true;
 	//g_testSparse = true;
 	//g_testPermutation = true;
+	//g_testZeroes = true;
 
 	//testHash("rand32");
 	//testHash("rand64");
@@ -408,22 +558,24 @@ int main ( int argc, char ** argv )
 	//printf("Called the hash function %I64d times, %I64d bytes hashed\n",g_hashcount,g_bytecount);
 
 	//testHash("crc32");
+	//testHash("rand128");
 
 	//testHash("fnv");
 	//testHash("superfast");
 	//testHash("lookup3");
+	//testHash("MurmurOAAT");
 
 	//testHash("murmur2");
 	//testHash("murmur2B");
 	//testHash("murmur2C");
 
-	testHash("murmur3a");
-	testHash("murmur3b");
-	testHash("murmur3c");
+	//testHash("murmur3a");
+	//testHash("murmur3b");
+	//testHash("murmur3c");
 
-	testHash("murmur3d");
-	testHash("murmur3e");
-	testHash("murmur3f");
+	//testHash("murmur3d");
+	//testHash("murmur3e");
+	//testHash("murmur3f");
 
 	//----------
 
@@ -432,4 +584,4 @@ int main ( int argc, char ** argv )
 	printf("time %d\n",b-a);
 
 	return 0;
-}
+}
\ No newline at end of file
-- 
cgit v1.2.3


From 1d160f0d4e28fd329f36dee1998073b0ef4e9a2e Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Mon, 28 Feb 2011 06:06:41 +0000
Subject: test murmur3a by default

git-svn-id: http://smhasher.googlecode.com/svn/trunk@77 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 main.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.cpp b/main.cpp
index 36636ea..4fe99cf 100644
--- a/main.cpp
+++ b/main.cpp
@@ -569,7 +569,7 @@ int main ( int argc, char ** argv )
 	//testHash("murmur2B");
 	//testHash("murmur2C");
 
-	//testHash("murmur3a");
+	testHash("murmur3a");
 	//testHash("murmur3b");
 	//testHash("murmur3c");
 
-- 
cgit v1.2.3


From b0f11814aac5f47821471c79785b1f6b11966ab8 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Mon, 28 Feb 2011 06:40:16 +0000
Subject: Remove a few bits of commented-out code.

git-svn-id: http://smhasher.googlecode.com/svn/trunk@78 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 MurmurHash3.cpp | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 75e0209..88d08cb 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -34,15 +34,11 @@ inline void bmix32 ( uint32_t & h1, uint32_t & k1, uint32_t & c1, uint32_t & c2
 
 //----------
 
-//void MurmurHash3_x86_32 ( const void * key, int len, const void * seed, void * out )
 void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out )
 {
 	const uint8_t * data = (const uint8_t*)key;
 	const int nblocks = len / 4;
 
-	//uint32_t * s = (uint32_t*)seed;
-
-	//uint32_t h1 = 0x971e137b ^ s[0];
 	uint32_t h1 = 0x971e137b ^ seed;
 
 	uint32_t c1 = 0x95543787;
@@ -85,7 +81,6 @@ void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out )
 	h1 *= 0xc2b2ae35;
 	h1 ^= h1 >> 16;
 
-	//h1 ^= s[0];
 	h1 ^= seed;
 
 	*(uint32_t*)out = h1;
@@ -156,19 +151,11 @@ inline uint32_t fmix32 ( uint32_t h )
 	return h;
 }
 
-//void MurmurHash3_x86_128 ( const void * key, const int len, const void * seed, void * out )
 void MurmurHash3_x86_128 ( const void * key, const int len, uint32_t seed, void * out )
 {
 	const uint8_t * data = (const uint8_t*)key;
 	const int nblocks = len / 16;
 
-	//uint32_t * s = (uint32_t*)(seed);
-
-	//uint32_t h1 = 0x8de1c3ac ^ s[0];
-	//uint32_t h2 = 0xbab98226 ^ s[1];
-	//uint32_t h3 = 0xfcba5b2d ^ s[2];
-	//uint32_t h4 = 0x32452e3e ^ s[3];
-
 	uint32_t h1 = 0x8de1c3ac ^ seed;
 	uint32_t h2 = 0xbab98226 ^ seed;
 	uint32_t h3 = 0xfcba5b2d ^ seed;
-- 
cgit v1.2.3


From f67ce942f6432ceb7ced0c3d12370b6376c05c09 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Mon, 14 Mar 2011 09:11:18 +0000
Subject: Add beginnings of bitslice test Add initial cross-platform support
 header De-tabulate MurmurHash3, and make it compile under GCC w/ Platform.h

git-svn-id: http://smhasher.googlecode.com/svn/trunk@86 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 Bitslice.cpp    | 127 +++++++++++++
 Bitvec.cpp      |  11 ++
 Bitvec.h        |   8 +
 MurmurHash3.cpp | 568 +++++++++++++++++++++++++++++++-------------------------
 MurmurHash3.h   |   2 +-
 Platform.cpp    |  18 ++
 Platform.h      |  48 +++++
 SMHasher.vcproj |  12 ++
 main.cpp        |  59 +-----
 9 files changed, 549 insertions(+), 304 deletions(-)
 create mode 100644 Bitslice.cpp
 create mode 100644 Platform.cpp
 create mode 100644 Platform.h

diff --git a/Bitslice.cpp b/Bitslice.cpp
new file mode 100644
index 0000000..bce8eba
--- /dev/null
+++ b/Bitslice.cpp
@@ -0,0 +1,127 @@
+#include "Bitvec.h"
+#include <vector>
+#include <assert.h>
+
+// handle xnor
+
+typedef std::vector<uint32_t> slice;
+typedef std::vector<slice> slice_vec;
+
+int countbits ( slice & v )
+{
+	int c = 0;
+
+	for(size_t i = 0; i < v.size(); i++)
+	{
+		int d = countbits(v[i]);
+
+		c += d;
+	}
+
+	return c;
+}
+
+int countxor ( slice & a, slice & b )
+{
+	assert(a.size() == b.size());
+
+	int c = 0;
+
+	for(size_t i = 0; i < a.size(); i++)
+	{
+		int d = countbits(a[i] ^ b[i]);
+
+		c += d;
+	}
+
+	return c;
+}
+
+void xoreq ( slice & a, slice & b )
+{
+	assert(a.size() == b.size());
+
+	for(size_t i = 0; i < a.size(); i++)
+	{
+		a[i] ^= b[i];
+	}
+}
+
+//-----------------------------------------------------------------------------
+// Bitslice a hash set
+
+template< typename hashtype >
+void Bitslice ( std::vector<hashtype> & hashes, slice_vec & slices )
+{
+	const int hashbytes = sizeof(hashtype);
+	const int hashbits = hashbytes * 8;
+	const int slicelen = (hashes.size() + 31) / 32;
+
+	slices.clear();
+	slices.resize(hashbits);
+
+	for(size_t i = 0; i < slices.size(); i++)
+	{
+		slices[i].resize(slicelen,0);
+	}
+
+	for(size_t j = 0; j < hashbits; j++)
+	{
+		void * sliceblob = &(slices[j][0]);
+
+		for(size_t i = 0; i < hashes.size(); i++)
+		{
+			int b = getbit(hashes[i],j);
+
+			setbit(sliceblob,slicelen*4,i,b);
+		}
+	}
+}
+
+void FactorSlices ( slice_vec & slices )
+{
+	std::vector<int> counts(slices.size(),0);
+
+	for(size_t i = 0; i < slices.size(); i++)
+	{
+		counts[i] = countbits(slices[i]);
+	}
+
+	bool changed = true;
+
+	while(changed)
+	{
+		int bestA = -1;
+		int bestB = -1;
+
+		for(size_t j = 0; j < slices.size()-1; j++)
+		{
+			for(size_t i = j+1; i < slices.size(); i++)
+			{
+				int d = countxor(slices[i],slices[j]);
+
+				if((d < counts[i]) && (d < counts[j]))
+				{
+					if(counts[i] < counts[j])
+					{
+						bestA = j;
+						bestB = i;
+					}
+				}
+				else if(d < counts[i])
+				{
+					//bestA = 
+				}
+			}
+		}
+	}
+}
+
+
+void foo ( void )
+{
+	slice a;
+	slice_vec b;
+
+	Bitslice(a,b);
+}
\ No newline at end of file
diff --git a/Bitvec.cpp b/Bitvec.cpp
index 6939980..58f0aca 100644
--- a/Bitvec.cpp
+++ b/Bitvec.cpp
@@ -146,6 +146,17 @@ void flipbit ( void * block, int len, uint32_t bit )
 	if(byte < len) b[byte] ^= (1 << bit);
 }
 
+// from the "Bit Twiddling Hacks" webpage
+
+int countbits ( uint32_t v )
+{
+	v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
+	v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
+	int c = ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
+
+	return c;
+}
+
 //-----------------------------------------------------------------------------
 
 void lshift1 ( void * blob, int len, int c )
diff --git a/Bitvec.h b/Bitvec.h
index 1a475d1..76797c8 100644
--- a/Bitvec.h
+++ b/Bitvec.h
@@ -3,6 +3,7 @@
 #include "pstdint.h"
 
 #include <stdlib.h> // for _rotl, _rotr, etc.
+#include <vector>
 
 //-----------------------------------------------------------------------------
 
@@ -20,6 +21,13 @@ void     clearbit    ( void * blob, int len, uint32_t bit );
 
 void     flipbit     ( void * blob, int len, uint32_t bit );
 
+int      countbits   ( uint32_t v );
+int      countbits   ( std::vector<uint32_t> & v );
+
+int      countbits   ( void * blob, int len );
+
+void     invert      ( std::vector<uint32_t> & v );
+
 //----------
 
 template< typename T >
diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 88d08cb..fa2dafc 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -1,5 +1,4 @@
 #include "MurmurHash3.h"
-#include <stdlib.h>    // for _rotl
 
 // Note - The x86 and x64 versions do _not_ produce the same results, as the
 // algorithms are optimized for their respective platforms. You can still
@@ -11,128 +10,132 @@
 // Block read - if your platform needs to do endian-swapping or can only
 // handle aligned reads, do the conversion here
 
-inline uint32_t getblock ( const uint32_t * p, int i )
+FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
 {
-	return p[i];
+  return p[i];
 }
 
 //----------
 
-inline void bmix32 ( uint32_t & h1, uint32_t & k1, uint32_t & c1, uint32_t & c2 )
+FORCE_INLINE void bmix32 ( uint32_t & h1, uint32_t & k1, 
+                           uint32_t & c1, uint32_t & c2 )
 {
-	c1 = c1*5+0x7b7d159c;
-	c2 = c2*5+0x6bce6396;
+  c1 = c1*5+0x7b7d159c;
+  c2 = c2*5+0x6bce6396;
 
-	k1 *= c1; 
-	k1 = _rotl(k1,11); 
-	k1 *= c2;
+  k1 *= c1; 
+  k1 = ROTL32(k1,11); 
+  k1 *= c2;
 
-	h1 = _rotl(h1,13);
-	h1 = h1*5+0x52dce729;
-	h1 ^= k1;
+  h1 = ROTL32(h1,13);
+  h1 = h1*5+0x52dce729;
+  h1 ^= k1;
 }
 
 //----------
 
-void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out )
+void MurmurHash3_x86_32 ( const void * key, int len,
+                          uint32_t seed, void * out )
 {
-	const uint8_t * data = (const uint8_t*)key;
-	const int nblocks = len / 4;
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 4;
 
-	uint32_t h1 = 0x971e137b ^ seed;
+  uint32_t h1 = 0x971e137b ^ seed;
 
-	uint32_t c1 = 0x95543787;
-	uint32_t c2 = 0x2ad7eb25;
+  uint32_t c1 = 0x95543787;
+  uint32_t c2 = 0x2ad7eb25;
 
-	//----------
-	// body
+  //----------
+  // body
 
-	const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
 
-	for(int i = -nblocks; i; i++)
-	{
-		uint32_t k1 = getblock(blocks,i);
+  for(int i = -nblocks; i; i++)
+  {
+    uint32_t k1 = getblock(blocks,i);
 
-		bmix32(h1,k1,c1,c2);
-	}
+    bmix32(h1,k1,c1,c2);
+  }
 
-	//----------
-	// tail
+  //----------
+  // tail
 
-	const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
 
-	uint32_t k1 = 0;
+  uint32_t k1 = 0;
 
-	switch(len & 3)
-	{
-	case 3: k1 ^= tail[2] << 16;
-	case 2: k1 ^= tail[1] << 8;
-	case 1: k1 ^= tail[0];
-			bmix32(h1,k1,c1,c2);
-	};
+  switch(len & 3)
+  {
+  case 3: k1 ^= tail[2] << 16;
+  case 2: k1 ^= tail[1] << 8;
+  case 1: k1 ^= tail[0];
+      bmix32(h1,k1,c1,c2);
+  };
 
-	//----------
-	// finalization
+  //----------
+  // finalization
 
-	h1 ^= len;
+  h1 ^= len;
 
-	h1 *= 0x85ebca6b;
-	h1 ^= h1 >> 13;
-	h1 *= 0xc2b2ae35;
-	h1 ^= h1 >> 16;
+  h1 *= 0x85ebca6b;
+  h1 ^= h1 >> 13;
+  h1 *= 0xc2b2ae35;
+  h1 ^= h1 >> 16;
 
-	h1 ^= seed;
+  h1 ^= seed;
 
-	*(uint32_t*)out = h1;
+  *(uint32_t*)out = h1;
 } 
 
 //-----------------------------------------------------------------------------
 // This mix is large enough that VC++ refuses to inline it unless we use
 // __forceinline. It's also not all that fast due to register spillage.
 
-__forceinline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & h3, uint32_t & h4, 
-						    uint32_t & k1, uint32_t & k2, uint32_t & k3, uint32_t & k4, 
-						    uint32_t & c1, uint32_t & c2 )
+FORCE_INLINE void bmix32 ( uint32_t & h1, uint32_t & h2,
+                           uint32_t & h3, uint32_t & h4, 
+                           uint32_t & k1, uint32_t & k2, 
+                           uint32_t & k3, uint32_t & k4, 
+                           uint32_t & c1, uint32_t & c2 )
 {
-	k1 *= c1; 
-	k1  = _rotl(k1,11); 
-	k1 *= c2;
-	h1 ^= k1;
-	h1 += h2;
-	h1 += h3;
-	h1 += h4;
-
-	h1 = _rotl(h1,17);
-
-	k2 *= c2; 
-	k2  = _rotl(k2,11);
-	k2 *= c1;
-	h2 ^= k2;
-	h2 += h1;
-
-	h1 = h1*3+0x52dce729;
-	h2 = h2*3+0x38495ab5;
-
-	c1 = c1*5+0x7b7d159c;
-	c2 = c2*5+0x6bce6396;
-
-	k3 *= c1; 
-	k3  = _rotl(k3,11); 
-	k3 *= c2;
-	h3 ^= k3;
-	h3 += h1;
-
-	k4 *= c2; 
-	k4  = _rotl(k4,11);
-	k4 *= c1;
-	h4 ^= k4;
-	h4 += h1;
-
-	h3 = h3*3+0x52dce729;
-	h4 = h4*3+0x38495ab5;
-
-	c1 = c1*5+0x7b7d159c;
-	c2 = c2*5+0x6bce6396;
+  k1 *= c1; 
+  k1  = ROTL32(k1,11); 
+  k1 *= c2;
+  h1 ^= k1;
+  h1 += h2;
+  h1 += h3;
+  h1 += h4;
+
+  h1 = ROTL32(h1,17);
+
+  k2 *= c2; 
+  k2  = ROTL32(k2,11);
+  k2 *= c1;
+  h2 ^= k2;
+  h2 += h1;
+
+  h1 = h1*3+0x52dce729;
+  h2 = h2*3+0x38495ab5;
+
+  c1 = c1*5+0x7b7d159c;
+  c2 = c2*5+0x6bce6396;
+
+  k3 *= c1; 
+  k3  = ROTL32(k3,11); 
+  k3 *= c2;
+  h3 ^= k3;
+  h3 += h1;
+
+  k4 *= c2; 
+  k4  = ROTL32(k4,11);
+  k4 *= c1;
+  h4 ^= k4;
+  h4 += h1;
+
+  h3 = h3*3+0x52dce729;
+  h4 = h4*3+0x38495ab5;
+
+  c1 = c1*5+0x7b7d159c;
+  c2 = c2*5+0x6bce6396;
 }
 
 //----------
@@ -140,222 +143,289 @@ __forceinline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & h3, uint32_
 
 // avalanches all bits to within 0.25% bias
 
-inline uint32_t fmix32 ( uint32_t h )
+FORCE_INLINE uint32_t fmix32 ( uint32_t h )
 {
-	h ^= h >> 16;
-	h *= 0x85ebca6b;
-	h ^= h >> 13;
-	h *= 0xc2b2ae35;
-	h ^= h >> 16;
+  h ^= h >> 16;
+  h *= 0x85ebca6b;
+  h ^= h >> 13;
+  h *= 0xc2b2ae35;
+  h ^= h >> 16;
 
-	return h;
+  return h;
 }
 
-void MurmurHash3_x86_128 ( const void * key, const int len, uint32_t seed, void * out )
+void MurmurHash3_x86_128 ( const void * key, const int len,
+                           uint32_t seed, void * out )
 {
-	const uint8_t * data = (const uint8_t*)key;
-	const int nblocks = len / 16;
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
 
-	uint32_t h1 = 0x8de1c3ac ^ seed;
-	uint32_t h2 = 0xbab98226 ^ seed;
-	uint32_t h3 = 0xfcba5b2d ^ seed;
-	uint32_t h4 = 0x32452e3e ^ seed;
+  uint32_t h1 = 0x8de1c3ac ^ seed;
+  uint32_t h2 = 0xbab98226 ^ seed;
+  uint32_t h3 = 0xfcba5b2d ^ seed;
+  uint32_t h4 = 0x32452e3e ^ seed;
 
-	uint32_t c1 = 0x95543787;
-	uint32_t c2 = 0x2ad7eb25;
+  uint32_t c1 = 0x95543787;
+  uint32_t c2 = 0x2ad7eb25;
 
-	//----------
-	// body
+  //----------
+  // body
 
-	const uint32_t * blocks = (const uint32_t *)(data);
+  const uint32_t * blocks = (const uint32_t *)(data);
 
-	for(int i = 0; i < nblocks; i++)
-	{
-		uint32_t k1 = getblock(blocks,i*4+0);
-		uint32_t k2 = getblock(blocks,i*4+1);
-		uint32_t k3 = getblock(blocks,i*4+2);
-		uint32_t k4 = getblock(blocks,i*4+3);
+  for(int i = 0; i < nblocks; i++)
+  {
+    uint32_t k1 = getblock(blocks,i*4+0);
+    uint32_t k2 = getblock(blocks,i*4+1);
+    uint32_t k3 = getblock(blocks,i*4+2);
+    uint32_t k4 = getblock(blocks,i*4+3);
 
-		bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
-	}
+    bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
+  }
 
-	//----------
-	// tail
+  //----------
+  // tail
 
-	const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
 
-	uint32_t k1 = 0;
-	uint32_t k2 = 0;
-	uint32_t k3 = 0;
-	uint32_t k4 = 0;
+  uint32_t k1 = 0;
+  uint32_t k2 = 0;
+  uint32_t k3 = 0;
+  uint32_t k4 = 0;
 
-	switch(len & 15)
-	{
-	case 15: k4 ^= tail[14] << 16;
-	case 14: k4 ^= tail[13] << 8;
-	case 13: k4 ^= tail[12] << 0;
-
-	case 12: k3 ^= tail[11] << 24;
-	case 11: k3 ^= tail[10] << 16;
-	case 10: k3 ^= tail[ 9] << 8;
-	case  9: k3 ^= tail[ 8] << 0;
-
-	case  8: k2 ^= tail[ 7] << 24;
-	case  7: k2 ^= tail[ 6] << 16;
-	case  6: k2 ^= tail[ 5] << 8;
-	case  5: k2 ^= tail[ 4] << 0;
-
-	case  4: k1 ^= tail[ 3] << 24;
-	case  3: k1 ^= tail[ 2] << 16;
-	case  2: k1 ^= tail[ 1] << 8;
-	case  1: k1 ^= tail[ 0] << 0;
-	         bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
-	};
-
-	//----------
-	// finalization
-
-	h4 ^= len;
-
-	h1 += h2; h1 += h3; h1 += h4;
-	h2 += h1; h3 += h1; h4 += h1;
-
-	h1 = fmix32(h1);
-	h2 = fmix32(h2);
-	h3 = fmix32(h3);
-	h4 = fmix32(h4);
-
-	h1 += h2; h1 += h3; h1 += h4;
-	h2 += h1; h3 += h1; h4 += h1;
-
-	((uint32_t*)out)[0] = h1;
-	((uint32_t*)out)[1] = h2;
-	((uint32_t*)out)[2] = h3;
-	((uint32_t*)out)[3] = h4;
+  switch(len & 15)
+  {
+  case 15: k4 ^= tail[14] << 16;
+  case 14: k4 ^= tail[13] << 8;
+  case 13: k4 ^= tail[12] << 0;
+
+  case 12: k3 ^= tail[11] << 24;
+  case 11: k3 ^= tail[10] << 16;
+  case 10: k3 ^= tail[ 9] << 8;
+  case  9: k3 ^= tail[ 8] << 0;
+
+  case  8: k2 ^= tail[ 7] << 24;
+  case  7: k2 ^= tail[ 6] << 16;
+  case  6: k2 ^= tail[ 5] << 8;
+  case  5: k2 ^= tail[ 4] << 0;
+
+  case  4: k1 ^= tail[ 3] << 24;
+  case  3: k1 ^= tail[ 2] << 16;
+  case  2: k1 ^= tail[ 1] << 8;
+  case  1: k1 ^= tail[ 0] << 0;
+           bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
+  };
+
+  //----------
+  // finalization
+
+  h4 ^= len;
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  h1 = fmix32(h1);
+  h2 = fmix32(h2);
+  h3 = fmix32(h3);
+  h4 = fmix32(h4);
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  ((uint32_t*)out)[0] = h1;
+  ((uint32_t*)out)[1] = h2;
+  ((uint32_t*)out)[2] = h3;
+  ((uint32_t*)out)[3] = h4;
 }
 
 //-----------------------------------------------------------------------------
 // Block read - if your platform needs to do endian-swapping or can only
 // handle aligned reads, do the conversion here
 
-inline uint64_t getblock ( const uint64_t * p, int i )
+FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
 {
-	return p[i];
+  return p[i];
 }
 
 //----------
 // Block mix - combine the key bits with the hash bits and scramble everything
 
-inline void bmix64 ( uint64_t & h1, uint64_t & h2, uint64_t & k1, uint64_t & k2, uint64_t & c1, uint64_t & c2 )
+FORCE_INLINE void bmix64 ( uint64_t & h1, uint64_t & h2, 
+                           uint64_t & k1, uint64_t & k2, 
+                           uint64_t & c1, uint64_t & c2 )
 {
-	k1 *= c1; 
-	k1  = _rotl64(k1,23); 
-	k1 *= c2;
+  k1 *= c1; 
+  k1  = ROTL64(k1,23); 
+  k1 *= c2;
 
-	k2 *= c2; 
-	k2  = _rotl64(k2,23);
-	k2 *= c1;
+  k2 *= c2; 
+  k2  = ROTL64(k2,23);
+  k2 *= c1;
 
-	h1 = _rotl64(h1,17);
-	h1 += h2;
-	h1 ^= k1;
+  h1 = ROTL64(h1,17);
+  h1 += h2;
+  h1 ^= k1;
 
-	h2 = _rotl64(h2,41);
-	h2 += h1;
-	h2 ^= k2;
+  h2 = ROTL64(h2,41);
+  h2 += h1;
+  h2 ^= k2;
 
-	h1 = h1*3+0x52dce729;
-	h2 = h2*3+0x38495ab5;
+  h1 = h1*3+0x52dce729;
+  h2 = h2*3+0x38495ab5;
 
-	c1 = c1*5+0x7b7d159c;
-	c2 = c2*5+0x6bce6396;
+  c1 = c1*5+0x7b7d159c;
+  c2 = c2*5+0x6bce6396;
 }
 
 //----------
 // Finalization mix - avalanches all bits to within 0.05% bias
 
-inline uint64_t fmix64 ( uint64_t k )
+FORCE_INLINE uint64_t fmix64 ( uint64_t k )
 {
-	k ^= k >> 33;
-	k *= 0xff51afd7ed558ccd;
-	k ^= k >> 33;
-	k *= 0xc4ceb9fe1a85ec53;
-	k ^= k >> 33;
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+  k ^= k >> 33;
 
-	return k;
+  return k;
 }
 
 //----------
 
-void MurmurHash3_x64_128 ( const void * key, const int len, const uint32_t seed, void * out )
+void MurmurHash3_x64_128 ( const void * key, const int len,
+                           const uint32_t seed, void * out )
 {
-	const uint8_t * data = (const uint8_t*)key;
-	const int nblocks = len / 16;
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
 
-	uint64_t h1 = 0x9368e53c2f6af274 ^ seed;
-	uint64_t h2 = 0x586dcd208f7cd3fd ^ seed;
+  uint64_t h1 = BIG_CONSTANT(0x9368e53c2f6af274) ^ seed;
+  uint64_t h2 = BIG_CONSTANT(0x586dcd208f7cd3fd) ^ seed;
 
-	uint64_t c1 = 0x87c37b91114253d5;
-	uint64_t c2 = 0x4cf5ad432745937f;
+  uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+  uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
 
-	//----------
-	// body
+  //----------
+  // body
 
-	const uint64_t * blocks = (const uint64_t *)(data);
+  const uint64_t * blocks = (const uint64_t *)(data);
 
-	for(int i = 0; i < nblocks; i++)
-	{
-		uint64_t k1 = getblock(blocks,i*2+0);
-		uint64_t k2 = getblock(blocks,i*2+1);
+  for(int i = 0; i < nblocks; i++)
+  {
+    uint64_t k1 = getblock(blocks,i*2+0);
+    uint64_t k2 = getblock(blocks,i*2+1);
 
-		bmix64(h1,h2,k1,k2,c1,c2);
-	}
+    bmix64(h1,h2,k1,k2,c1,c2);
+  }
 
-	//----------
-	// tail
+  //----------
+  // tail
 
-	const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
 
-	uint64_t k1 = 0;
-	uint64_t k2 = 0;
+  uint64_t k1 = 0;
+  uint64_t k2 = 0;
 
-	switch(len & 15)
-	{
-	case 15: k2 ^= uint64_t(tail[14]) << 48;
-	case 14: k2 ^= uint64_t(tail[13]) << 40;
-	case 13: k2 ^= uint64_t(tail[12]) << 32;
-	case 12: k2 ^= uint64_t(tail[11]) << 24;
-	case 11: k2 ^= uint64_t(tail[10]) << 16;
-	case 10: k2 ^= uint64_t(tail[ 9]) << 8;
-	case  9: k2 ^= uint64_t(tail[ 8]) << 0;
-
-	case  8: k1 ^= uint64_t(tail[ 7]) << 56;
-	case  7: k1 ^= uint64_t(tail[ 6]) << 48;
-	case  6: k1 ^= uint64_t(tail[ 5]) << 40;
-	case  5: k1 ^= uint64_t(tail[ 4]) << 32;
-	case  4: k1 ^= uint64_t(tail[ 3]) << 24;
-	case  3: k1 ^= uint64_t(tail[ 2]) << 16;
-	case  2: k1 ^= uint64_t(tail[ 1]) << 8;
-	case  1: k1 ^= uint64_t(tail[ 0]) << 0;
-	         bmix64(h1,h2,k1,k2,c1,c2);
-	};
-
-	//----------
-	// finalization
-
-	h2 ^= len;
-
-	h1 += h2;
-	h2 += h1;
-
-	h1 = fmix64(h1);
-	h2 = fmix64(h2);
-
-	h1 += h2;
-	h2 += h1;
-
-	((uint64_t*)out)[0] = h1;
-	((uint64_t*)out)[1] = h2;
+  switch(len & 15)
+  {
+  case 15: k2 ^= uint64_t(tail[14]) << 48;
+  case 14: k2 ^= uint64_t(tail[13]) << 40;
+  case 13: k2 ^= uint64_t(tail[12]) << 32;
+  case 12: k2 ^= uint64_t(tail[11]) << 24;
+  case 11: k2 ^= uint64_t(tail[10]) << 16;
+  case 10: k2 ^= uint64_t(tail[ 9]) << 8;
+  case  9: k2 ^= uint64_t(tail[ 8]) << 0;
+
+  case  8: k1 ^= uint64_t(tail[ 7]) << 56;
+  case  7: k1 ^= uint64_t(tail[ 6]) << 48;
+  case  6: k1 ^= uint64_t(tail[ 5]) << 40;
+  case  5: k1 ^= uint64_t(tail[ 4]) << 32;
+  case  4: k1 ^= uint64_t(tail[ 3]) << 24;
+  case  3: k1 ^= uint64_t(tail[ 2]) << 16;
+  case  2: k1 ^= uint64_t(tail[ 1]) << 8;
+  case  1: k1 ^= uint64_t(tail[ 0]) << 0;
+           bmix64(h1,h2,k1,k2,c1,c2);
+  };
+
+  //----------
+  // finalization
+
+  h2 ^= len;
+
+  h1 += h2;
+  h2 += h1;
+
+  h1 = fmix64(h1);
+  h2 = fmix64(h2);
+
+  h1 += h2;
+  h2 += h1;
+
+  ((uint64_t*)out)[0] = h1;
+  ((uint64_t*)out)[1] = h2;
 }
 
 //-----------------------------------------------------------------------------
+// Quick copy-pasted test code for GCC build
+
+// This should print -
+
+// "The quick brown fox jumps over the lazy dog" => { 0x38585ecf, 0x5f6d752a, 0x0157c98a, 0x8c686b9b, }
+// "The quick brown fox jumps over the lazy cog" => { 0x6d3fd6f0, 0xc86a98a0, 0x4d6fac1c, 0x8f3e52b4, }
+
+#ifndef _MSC_VER
+
+/*
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+typedef void (*pfHash) ( const void * blob, const int len, const uint32_t seed, void * out );
+
+void printhex32 ( void * blob, int len )
+{
+	assert((len & 3) == 0);
+
+	uint32_t * d = (uint32_t*)blob;
+
+	printf("{ ");
+
+	for(int i = 0; i < len/4; i++) 
+	{
+		printf("0x%08x, ",d[i]);
+	}
+
+	printf("}");
+}
+
+void QuickBrownFox ( pfHash hash, const int hashbits )
+{
+	const int hashbytes = hashbits / 8;
+
+	const char * text1 = "The quick brown fox jumps over the lazy dog";
+	const char * text2 = "The quick brown fox jumps over the lazy cog";
+
+	uint8_t h1[128];
+	uint8_t h2[128];
+
+	hash(text1,(int)strlen(text1),0,h1);
+	hash(text2,(int)strlen(text2),0,h2);
+
+	printf("\"%s\" => ",text1);
+	printhex32(h1,hashbytes);
+	printf("\n");
+
+	printf("\"%s\" => ",text2);
+	printhex32(h2,hashbytes);
+	printf("\n");
+
+	printf("\n");
+}
+
+int main ( int argc, char** argv )
+{
+  QuickBrownFox(&MurmurHash3_x64_128,128);
+}
+*/
+
+#endif
diff --git a/MurmurHash3.h b/MurmurHash3.h
index a65faa8..a547f0f 100644
--- a/MurmurHash3.h
+++ b/MurmurHash3.h
@@ -1,4 +1,4 @@
-#include "pstdint.h"
+#include "Platform.h"
 
 //-----------------------------------------------------------------------------
 
diff --git a/Platform.cpp b/Platform.cpp
new file mode 100644
index 0000000..42d0575
--- /dev/null
+++ b/Platform.cpp
@@ -0,0 +1,18 @@
+#include "Platform.h"
+
+#if defined(_MSC_VER)
+
+void SetAffinity ( int /*cpu*/ )
+{
+}
+
+#else
+
+#include <windows.h>
+
+void SetAffinity ( int cpu )
+{
+	SetProcessAffinityMask(GetCurrentProcess(),cpu);
+}
+
+#endif
\ No newline at end of file
diff --git a/Platform.h b/Platform.h
new file mode 100644
index 0000000..4c8fa4b
--- /dev/null
+++ b/Platform.h
@@ -0,0 +1,48 @@
+// Platform-specific functions and macros
+
+void SetAffinity ( int cpu );
+
+//-----------------------------------------------------------------------------
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE	__forceinline
+
+#include <stdlib.h>
+#include "pstdint.h"
+
+#define ROTL32(x,y)	_rotl(x,y)
+#define ROTL64(x,y)	_rotl64(x,y)
+
+#pragma warning(disable : 4127) // "conditional expression is constant" in the if()s for avalanchetest
+#pragma warning(disable : 4100)
+#pragma warning(disable : 4702)
+
+#define BIG_CONSTANT(x) (x)
+
+//-----------------------------------------------------------------------------
+// Other compilers
+
+#else	//	defined(_MSC_VER)
+
+#include <stdint.h>
+
+#define	FORCE_INLINE __attribute__((always_inline))
+
+uint32_t inline rotl32 ( uint32_t x, int8_t r )
+{
+	return (x << r) | (x >> (32 - r));
+}
+
+uint64_t inline rotl64 ( uint64_t x, int8_t r )
+{
+	return (x << r) | (x >> (64 - r));
+}
+
+#define	ROTL32(x,y)	rotl32(x,y)
+#define ROTL64(x,y)	rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif	//	!defined(_MSC_VER)
diff --git a/SMHasher.vcproj b/SMHasher.vcproj
index f64135a..51ba878 100644
--- a/SMHasher.vcproj
+++ b/SMHasher.vcproj
@@ -394,6 +394,10 @@
 				RelativePath=".\AvalancheTest.h"
 				>
 			</File>
+			<File
+				RelativePath=".\Bitslice.cpp"
+				>
+			</File>
 			<File
 				RelativePath=".\DifferentialTest.cpp"
 				>
@@ -430,6 +434,14 @@
 				RelativePath=".\Bitvec.h"
 				>
 			</File>
+			<File
+				RelativePath=".\Platform.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\Platform.h"
+				>
+			</File>
 			<File
 				RelativePath=".\pstdint.h"
 				>
diff --git a/main.cpp b/main.cpp
index 4fe99cf..8b873a3 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1,34 +1,15 @@
-#include <stdio.h>
-
+#include "Platform.h"
 #include "hashes.h"
 #include "KeysetTest.h"
 #include "SpeedTest.h"
 #include "AvalancheTest.h"
 #include "DifferentialTest.h"
 
+#include <stdio.h>
 #include <time.h>
-#include <intrin.h>
-#include <windows.h>
-
-#pragma warning(disable : 4127) // "conditional expression is constant" in the if()s for avalanchetest
 
 bool g_testAll = false;
 
-/*
-bool g_testSanity      = true;
-bool g_testSpeed       = true;
-bool g_testDiff        = true;
-bool g_testAvalanche   = true;
-bool g_testCyclic      = true;
-bool g_testSparse      = true;
-bool g_testPermutation = true;
-bool g_testWindow      = true;
-bool g_testText        = true;
-bool g_testZeroes      = true;
-bool g_testSeed        = true;
-*/
-
-//*
 bool g_testSanity      = false;
 bool g_testSpeed       = false;
 bool g_testDiff        = false;
@@ -40,8 +21,8 @@ bool g_testWindow      = false;
 bool g_testText        = false;
 bool g_testZeroes      = false;
 bool g_testSeed        = false;
-//*/
 
+//-----------------------------------------------------------------------------
 
 int64_t g_hashcount = 0;
 int64_t g_bytecount = 0;
@@ -54,7 +35,6 @@ void counterhash ( const void * , const int len, const uint32_t , void * out )
 	*(uint32_t*)out = rand_u32();
 }
 
-
 //-----------------------------------------------------------------------------
 
 struct HashInfo
@@ -526,18 +506,14 @@ void testHash ( const char * name )
 }
 //-----------------------------------------------------------------------------
 
-#pragma warning(disable : 4100)
-#pragma warning(disable : 4702)
-
 int main ( int argc, char ** argv )
 {
-	SetProcessAffinityMask(GetCurrentProcess(),2);
+	SetAffinity(2);
 
 	int a = clock();
 
 	g_testAll = true;
 
-	//g_testWindow = true;
 	//g_testSanity = true;
 	//g_testSpeed = true;
 	//g_testAvalanche = true;
@@ -547,35 +523,10 @@ int main ( int argc, char ** argv )
 	//g_testPermutation = true;
 	//g_testZeroes = true;
 
-	//testHash("rand32");
-	//testHash("rand64");
-	//testHash("rand128");
-
-	//testHash("donothing");
-
 	//testHash("count");
-
 	//printf("Called the hash function %I64d times, %I64d bytes hashed\n",g_hashcount,g_bytecount);
 
-	//testHash("crc32");
-	//testHash("rand128");
-
-	//testHash("fnv");
-	//testHash("superfast");
-	//testHash("lookup3");
-	//testHash("MurmurOAAT");
-
-	//testHash("murmur2");
-	//testHash("murmur2B");
-	//testHash("murmur2C");
-
-	testHash("murmur3a");
-	//testHash("murmur3b");
-	//testHash("murmur3c");
-
-	//testHash("murmur3d");
-	//testHash("murmur3e");
-	//testHash("murmur3f");
+	testHash("murmur3f");
 
 	//----------
 
-- 
cgit v1.2.3


From 2aa29c327f897ef89eea7cceac42bfe76088522e Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Sat, 19 Mar 2011 08:53:53 +0000
Subject: Cross-platform compliation fixes. Everything should compile under GCC
 now, but no guarantees that it runs correctly yet...

Thanks to McKay Davis for help with the cross-platform testing.

git-svn-id: http://smhasher.googlecode.com/svn/trunk@87 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 AvalancheTest.cpp  |  4 ---
 AvalancheTest.h    |  3 +++
 Bitslice.cpp       | 12 ++++-----
 Bitvec.cpp         | 14 +++++-----
 Bitvec.h           | 15 +++++------
 DifferentialTest.h |  5 ++++
 Hashes.cpp         |  6 ++---
 KeysetTest.h       |  1 +
 MurmurHash1.h      |  2 +-
 MurmurHash2.cpp    |  2 +-
 MurmurHash2.h      |  2 +-
 Platform.cpp       | 17 +++++++++---
 Platform.h         | 39 +++++++++++++++++++++++++--
 SMHasher.vcproj    |  4 ---
 SpeedTest.cpp      | 13 +++++----
 Stats.h            | 15 ++++++-----
 Types.h            |  2 +-
 crc.cpp            |  5 ++--
 crc.h              | 77 ------------------------------------------------------
 main.cpp           | 24 +++++++++++------
 sha1.cpp           |  4 +--
 sha1.h             |  2 +-
 22 files changed, 122 insertions(+), 146 deletions(-)
 delete mode 100644 crc.h

diff --git a/AvalancheTest.cpp b/AvalancheTest.cpp
index bb68b2f..5cdebd8 100644
--- a/AvalancheTest.cpp
+++ b/AvalancheTest.cpp
@@ -1,9 +1,5 @@
 #include "AvalancheTest.h"
 
-#include "Random.h"
-
-#include <math.h>
-
 //-----------------------------------------------------------------------------
 
 void PrintAvalancheDiagram ( int x, int y, int reps, double scale, int * bins )
diff --git a/AvalancheTest.h b/AvalancheTest.h
index b8f693a..1fbf095 100644
--- a/AvalancheTest.h
+++ b/AvalancheTest.h
@@ -11,7 +11,10 @@
 
 #include "Types.h"
 #include "Random.h"
+
 #include <vector>
+#include <stdio.h>
+#include <math.h>
 
 // Avalanche fails if a bit is biased by more than 1%
 
diff --git a/Bitslice.cpp b/Bitslice.cpp
index bce8eba..7e9edd3 100644
--- a/Bitslice.cpp
+++ b/Bitslice.cpp
@@ -55,21 +55,21 @@ void Bitslice ( std::vector<hashtype> & hashes, slice_vec & slices )
 {
 	const int hashbytes = sizeof(hashtype);
 	const int hashbits = hashbytes * 8;
-	const int slicelen = (hashes.size() + 31) / 32;
+	const int slicelen = ((int)hashes.size() + 31) / 32;
 
 	slices.clear();
 	slices.resize(hashbits);
 
-	for(size_t i = 0; i < slices.size(); i++)
+	for(int i = 0; i < (int)slices.size(); i++)
 	{
 		slices[i].resize(slicelen,0);
 	}
 
-	for(size_t j = 0; j < hashbits; j++)
+	for(int j = 0; j < hashbits; j++)
 	{
 		void * sliceblob = &(slices[j][0]);
 
-		for(size_t i = 0; i < hashes.size(); i++)
+		for(int i = 0; i < (int)hashes.size(); i++)
 		{
 			int b = getbit(hashes[i],j);
 
@@ -94,9 +94,9 @@ void FactorSlices ( slice_vec & slices )
 		int bestA = -1;
 		int bestB = -1;
 
-		for(size_t j = 0; j < slices.size()-1; j++)
+		for(int j = 0; j < (int)slices.size()-1; j++)
 		{
-			for(size_t i = j+1; i < slices.size(); i++)
+			for(int i = j+1; i < (int)slices.size(); i++)
 			{
 				int d = countxor(slices[i],slices[j]);
 
diff --git a/Bitvec.cpp b/Bitvec.cpp
index 58f0aca..a8a3007 100644
--- a/Bitvec.cpp
+++ b/Bitvec.cpp
@@ -615,13 +615,13 @@ bool test_shift ( void )
 			b = a; rshift8  (&b,nbytes,i);  assert(b == (a >> i));
 			b = a; rshift32 (&b,nbytes,i);  assert(b == (a >> i));
 
-			b = a; lrot1    (&b,nbytes,i);  assert(b == _rotl64(a,i));
-			b = a; lrot8    (&b,nbytes,i);  assert(b == _rotl64(a,i));
-			b = a; lrot32   (&b,nbytes,i);  assert(b == _rotl64(a,i));
+			b = a; lrot1    (&b,nbytes,i);  assert(b == ROTL64(a,i));
+			b = a; lrot8    (&b,nbytes,i);  assert(b == ROTL64(a,i));
+			b = a; lrot32   (&b,nbytes,i);  assert(b == ROTL64(a,i));
 
-			b = a; rrot1    (&b,nbytes,i);  assert(b == _rotr64(a,i));
-			b = a; rrot8    (&b,nbytes,i);  assert(b == _rotr64(a,i));
-			b = a; rrot32   (&b,nbytes,i);  assert(b == _rotr64(a,i));
+			b = a; rrot1    (&b,nbytes,i);  assert(b == ROTR64(a,i));
+			b = a; rrot8    (&b,nbytes,i);  assert(b == ROTR64(a,i));
+			b = a; rrot32   (&b,nbytes,i);  assert(b == ROTR64(a,i));
 		}
 	}
 
@@ -686,7 +686,7 @@ bool test_window ( void )
 		{
 			for(int count = 0; count < 32; count++)
 			{
-				uint32_t a = (uint32_t)_rotr64(x,start);
+				uint32_t a = (uint32_t)ROTR64(x,start);
 				a &= ((1 << count)-1);
 				
 				uint32_t b = window1 (&x,nbytes,start,count);
diff --git a/Bitvec.h b/Bitvec.h
index 76797c8..b06fd10 100644
--- a/Bitvec.h
+++ b/Bitvec.h
@@ -1,8 +1,7 @@
 #pragma once
 
-#include "pstdint.h"
+#include "Platform.h"
 
-#include <stdlib.h> // for _rotl, _rotr, etc.
 #include <vector>
 
 //-----------------------------------------------------------------------------
@@ -190,10 +189,10 @@ inline void rrot ( T & blob, int c )
 	}
 }
 
-template<> inline void lrot ( uint32_t & blob, int c ) { blob = _rotl(blob,c); }
-template<> inline void lrot ( uint64_t & blob, int c ) { blob = _rotl64(blob,c); }
-template<> inline void rrot ( uint32_t & blob, int c ) { blob = _rotr(blob,c); }
-template<> inline void rrot ( uint64_t & blob, int c ) { blob = _rotr64(blob,c); }
+template<> inline void lrot ( uint32_t & blob, int c ) { blob = ROTL32(blob,c); }
+template<> inline void lrot ( uint64_t & blob, int c ) { blob = ROTL64(blob,c); }
+template<> inline void rrot ( uint32_t & blob, int c ) { blob = ROTR32(blob,c); }
+template<> inline void rrot ( uint64_t & blob, int c ) { blob = ROTR64(blob,c); }
 
 //-----------------------------------------------------------------------------
 // Bit-windowing functions - select some N-bit subset of the input blob
@@ -230,13 +229,13 @@ inline uint32_t window ( T & blob, int start, int count )
 template<> 
 inline uint32_t window ( uint32_t & blob, int start, int count )
 {
-	return _rotr(blob,start) & ((1<<count)-1);
+	return ROTR32(blob,start) & ((1<<count)-1);
 }
 
 template<> 
 inline uint32_t window ( uint64_t & blob, int start, int count )
 {
-	return (uint32_t)_rotr64(blob,start) & ((1<<count)-1);
+	return (uint32_t)ROTR64(blob,start) & ((1<<count)-1);
 }
 
 //-----------------------------------------------------------------------------
diff --git a/DifferentialTest.h b/DifferentialTest.h
index 69e158b..16d2049 100644
--- a/DifferentialTest.h
+++ b/DifferentialTest.h
@@ -5,7 +5,12 @@
 #pragma once
 
 #include "Types.h"
+#include "Stats.h"      // for chooseUpToK
+#include "KeysetTest.h" // for SparseKeygenRecurse
+
 #include <vector>
+#include <algorithm>
+#include <stdio.h>
 
 //-----------------------------------------------------------------------------
 // Sort through the differentials, ignoring collisions that only occured once 
diff --git a/Hashes.cpp b/Hashes.cpp
index cebd37f..943847c 100644
--- a/Hashes.cpp
+++ b/Hashes.cpp
@@ -6,8 +6,8 @@
 #include <stdlib.h>
 //#include <stdint.h>
 #include <assert.h>
-#include <emmintrin.h>
-#include <xmmintrin.h>
+//#include <emmintrin.h>
+//#include <xmmintrin.h>
 
 //----------------------------------------------------------------------------
 // fake / bad hashes
@@ -110,7 +110,7 @@ void FNV ( const void * key, int len, uint32_t seed, void * out )
 
 	const uint8_t * data = (const uint8_t*)key;
 
-	h ^= 2166136261;
+	h ^= BIG_CONSTANT(2166136261);
 
 	for(int i = 0; i < len; i++)
 	{
diff --git a/KeysetTest.h b/KeysetTest.h
index 7ef398c..17ded7b 100644
--- a/KeysetTest.h
+++ b/KeysetTest.h
@@ -9,6 +9,7 @@
 
 #include "Types.h"
 #include "Stats.h"
+#include "Random.h"   // for rand_p
 
 #include <algorithm>  // for std::swap
 
diff --git a/MurmurHash1.h b/MurmurHash1.h
index e297035..eff8d11 100644
--- a/MurmurHash1.h
+++ b/MurmurHash1.h
@@ -1,4 +1,4 @@
-#include "pstdint.h"
+#include "Platform.h"
 
 //-----------------------------------------------------------------------------
 
diff --git a/MurmurHash2.cpp b/MurmurHash2.cpp
index 349ed8e..5e98330 100644
--- a/MurmurHash2.cpp
+++ b/MurmurHash2.cpp
@@ -75,7 +75,7 @@ uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
 
 uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed )
 {
-	const uint64_t m = 0xc6a4a7935bd1e995;
+	const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
 	const int r = 47;
 
 	uint64_t h = seed ^ (len * m);
diff --git a/MurmurHash2.h b/MurmurHash2.h
index e3b00da..f119653 100644
--- a/MurmurHash2.h
+++ b/MurmurHash2.h
@@ -1,4 +1,4 @@
-#include "pstdint.h"
+#include "Platform.h"
 
 //-----------------------------------------------------------------------------
 
diff --git a/Platform.cpp b/Platform.cpp
index 42d0575..5f38872 100644
--- a/Platform.cpp
+++ b/Platform.cpp
@@ -1,12 +1,15 @@
 #include "Platform.h"
 
-#if defined(_MSC_VER)
+#include <stdio.h>
 
-void SetAffinity ( int /*cpu*/ )
+void testRDTSC ( void )
 {
+  int64_t temp = rdtsc();
+
+  printf("%d",(int)temp);
 }
 
-#else
+#if defined(_MSC_VER)
 
 #include <windows.h>
 
@@ -15,4 +18,10 @@ void SetAffinity ( int cpu )
 	SetProcessAffinityMask(GetCurrentProcess(),cpu);
 }
 
-#endif
\ No newline at end of file
+#else
+
+void SetAffinity ( int /*cpu*/ )
+{
+}
+
+#endif
diff --git a/Platform.h b/Platform.h
index 4c8fa4b..92ae44a 100644
--- a/Platform.h
+++ b/Platform.h
@@ -1,5 +1,8 @@
+//-----------------------------------------------------------------------------
 // Platform-specific functions and macros
 
+#pragma once
+
 void SetAffinity ( int cpu );
 
 //-----------------------------------------------------------------------------
@@ -10,10 +13,14 @@ void SetAffinity ( int cpu );
 #define FORCE_INLINE	__forceinline
 
 #include <stdlib.h>
+#include <math.h>   // Has to be included before intrin.h or VC complains about 'ceil'
+#include <intrin.h> // for __rdtsc
 #include "pstdint.h"
 
 #define ROTL32(x,y)	_rotl(x,y)
 #define ROTL64(x,y)	_rotl64(x,y)
+#define ROTR32(x,y)	_rotr(x,y)
+#define ROTR64(x,y)	_rotr64(x,y)
 
 #pragma warning(disable : 4127) // "conditional expression is constant" in the if()s for avalanchetest
 #pragma warning(disable : 4100)
@@ -21,6 +28,10 @@ void SetAffinity ( int cpu );
 
 #define BIG_CONSTANT(x) (x)
 
+// RDTSC == Read Time Stamp Counter
+
+#define rdtsc() __rdtsc()
+
 //-----------------------------------------------------------------------------
 // Other compilers
 
@@ -30,19 +41,43 @@ void SetAffinity ( int cpu );
 
 #define	FORCE_INLINE __attribute__((always_inline))
 
-uint32_t inline rotl32 ( uint32_t x, int8_t r )
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
 {
 	return (x << r) | (x >> (32 - r));
 }
 
-uint64_t inline rotl64 ( uint64_t x, int8_t r )
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
 {
 	return (x << r) | (x >> (64 - r));
 }
 
+inline uint32_t rotr32 ( uint32_t x, int8_t r )
+{
+	return (x >> r) | (x << (32 - r));
+}
+
+inline uint64_t rotr64 ( uint64_t x, int8_t r )
+{
+	return (x >> r) | (x << (64 - r));
+}
+
 #define	ROTL32(x,y)	rotl32(x,y)
 #define ROTL64(x,y)	rotl64(x,y)
+#define	ROTR32(x,y)	rotr32(x,y)
+#define ROTR64(x,y)	rotr64(x,y)
 
 #define BIG_CONSTANT(x) (x##LLU)
 
+__inline__ unsigned long long int rdtsc()
+{
+    unsigned long long int x;
+    __asm__ volatile ("rdtsc" : "=A" (x));
+    return x;
+}
+
+#include <strings.h>
+#define _stricmp strcasecmp
+
 #endif	//	!defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
diff --git a/SMHasher.vcproj b/SMHasher.vcproj
index 51ba878..05586f7 100644
--- a/SMHasher.vcproj
+++ b/SMHasher.vcproj
@@ -326,10 +326,6 @@
 				RelativePath=".\crc.cpp"
 				>
 			</File>
-			<File
-				RelativePath=".\crc.h"
-				>
-			</File>
 			<File
 				RelativePath=".\Hashes.cpp"
 				>
diff --git a/SpeedTest.cpp b/SpeedTest.cpp
index d95acd4..c7c742b 100644
--- a/SpeedTest.cpp
+++ b/SpeedTest.cpp
@@ -3,7 +3,6 @@
 #include "Random.h"
 
 #include <stdio.h>  // for printf
-#include <intrin.h> // for __rdtsc
 
 //-----------------------------------------------------------------------------
 // 256k blocks seem to give the best results.
@@ -27,13 +26,13 @@ void BulkSpeedTest ( pfHash hash )
 
 		for(int itrial = 0; itrial < trials; itrial++)
 		{
-			__int64 begin,end;
+			int64_t begin,end;
 
-			begin = __rdtsc();
+			begin = rdtsc();
 
 			hash(block + align,blocksize,itrial,temp);
 
-			end = __rdtsc();
+			end = rdtsc();
 
 			blackhole(temp[0]);
 
@@ -64,11 +63,11 @@ void TinySpeedTest ( pfHash hash, int hashsize, int keysize, bool verbose, doubl
 
 	for(int itrial = 0; itrial < trials; itrial++)
 	{
-		__int64 begin,end;
+		int64_t begin,end;
 
 		rand_p(k,keysize);
 
-		begin = __rdtsc();
+		begin = rdtsc();
 		
 		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
 		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
@@ -90,7 +89,7 @@ void TinySpeedTest ( pfHash hash, int hashsize, int keysize, bool verbose, doubl
 		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
 		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
 
-		end = __rdtsc();
+		end = rdtsc();
 
 		//blackhole(*(uint32_t*)(&h));
 
diff --git a/Stats.h b/Stats.h
index b4afe2c..dd0188c 100644
--- a/Stats.h
+++ b/Stats.h
@@ -4,7 +4,10 @@
 
 #include <math.h>
 #include <vector>
+#include <map>
 #include <algorithm>   // for std::sort
+#include <string.h>    // for memset
+#include <stdio.h>     // for printf
 
 double calcScore ( const int * bins, const int bincount, const int ballcount );
 
@@ -63,22 +66,22 @@ int PrintCollisions ( hashfunc<hashtype> hash, std::vector<keytype> & keys )
 	{
 		keytype & k1 = keys[i];
 
-		hashtype h = hash(&k1,sizeof(k),0);
+		hashtype h = hash(&k1,sizeof(keytype),0);
 
-		htab::iterator it = tab.find(h);
+		typename htab::iterator it = tab.find(h);
 
 		if(it != tab.end())
 		{
 			keytype & k2 = (*it).second;
 
 			printf("A: ");
-			printbits(&k1,sizeof(k1));
+			printbits(&k1,sizeof(keytype));
 			printf("B: ");
-			printbits(&k2,sizeof(k2));
+			printbits(&k2,sizeof(keytype));
 		}
 		else
 		{
-			htab.insert( htab::value_type(h,k) );
+      tab.insert( std::make_pair(h,k1) );
 		}
 	}
 
@@ -338,7 +341,7 @@ void TestDistributionFast ( std::vector<hashtype> & hashes, double & dworst, dou
 			bins[index]++;
 		}
 
-		double n = calcScore(bins,(int)hashes.size());
+		double n = calcScore((int*)bins.begin(),(int)hashes.size(),(int)bins.size());
 		
 		davg += n;
 
diff --git a/Types.h b/Types.h
index d1eae9f..9e4ae10 100644
--- a/Types.h
+++ b/Types.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include "pstdint.h"
+#include "Platform.h"
 #include "Bitvec.h"
 
 //-----------------------------------------------------------------------------
diff --git a/crc.cpp b/crc.cpp
index 7e65d3a..97d84db 100644
--- a/crc.cpp
+++ b/crc.cpp
@@ -1,3 +1,5 @@
+#include "Platform.h"
+
 /*
  * This file is derived from crc32.c from the zlib-1.1.3 distribution
  * by Jean-loup Gailly and Mark Adler.
@@ -8,9 +10,6 @@
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
-typedef unsigned long uint32_t;
-typedef unsigned char uint8_t;
-
 
 /* ========================================================================
  * Table of CRC-32's of all single-byte values (made by make_crc_table)
diff --git a/crc.h b/crc.h
deleted file mode 100644
index 893fa61..0000000
--- a/crc.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/**********************************************************************
- *
- * Filename:    crc.h
- * 
- * Description: A header file describing the various CRC standards.
- *
- * Notes:       
- *
- * 
- * Copyright (c) 2000 by Michael Barr.  This software is placed into
- * the public domain and may be used for any purpose.  However, this
- * notice must not be changed or removed and no warranty is either
- * expressed or implied by its publication or distribution.
- **********************************************************************/
-
-#ifndef _crc_h
-#define _crc_h
-
-
-#define FALSE	0
-#define TRUE	!FALSE
-
-/*
- * Select the CRC standard from the list that follows.
- */
-#define CRC32
-
-
-#if defined(CRC_CCITT)
-
-typedef unsigned short  crc;
-
-#define CRC_NAME			"CRC-CCITT"
-#define POLYNOMIAL			0x1021
-#define INITIAL_REMAINDER	0xFFFF
-#define FINAL_XOR_VALUE		0x0000
-#define REFLECT_DATA		FALSE
-#define REFLECT_REMAINDER	FALSE
-#define CHECK_VALUE			0x29B1
-
-#elif defined(CRC16)
-
-typedef unsigned short  crc;
-
-#define CRC_NAME			"CRC-16"
-#define POLYNOMIAL			0x8005
-#define INITIAL_REMAINDER	0x0000
-#define FINAL_XOR_VALUE		0x0000
-#define REFLECT_DATA		TRUE
-#define REFLECT_REMAINDER	TRUE
-#define CHECK_VALUE			0xBB3D
-
-#elif defined(CRC32)
-
-typedef unsigned long  crc;
-
-#define CRC_NAME			"CRC-32"
-#define POLYNOMIAL			0x04C11DB7
-#define INITIAL_REMAINDER	0xFFFFFFFF
-#define FINAL_XOR_VALUE		0xFFFFFFFF
-#define REFLECT_DATA		TRUE
-#define REFLECT_REMAINDER	TRUE
-#define CHECK_VALUE			0xCBF43926
-
-#else
-
-#error "One of CRC_CCITT, CRC16, or CRC32 must be #define'd."
-
-#endif
-
-
-void  crcInit(void);
-crc   crcSlow(unsigned char const message[], int nBytes);
-crc   crcFast(unsigned char const message[], int nBytes);
-
-
-#endif /* _crc_h */
\ No newline at end of file
diff --git a/main.cpp b/main.cpp
index 8b873a3..21d8041 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1,5 +1,5 @@
 #include "Platform.h"
-#include "hashes.h"
+#include "Hashes.h"
 #include "KeysetTest.h"
 #include "SpeedTest.h"
 #include "AvalancheTest.h"
@@ -128,6 +128,7 @@ void test ( hashfunc<hashtype> hash, const char * hashname )
 			TinySpeedTest(hash,sizeof(hashtype),i,true,cycles);
 		}
 
+    /*
 		for(int i = 32; i <= 2048; i += 32)
 		{
 			double cycles;
@@ -135,6 +136,7 @@ void test ( hashfunc<hashtype> hash, const char * hashname )
 
 			TinySpeedTest(hash,sizeof(hashtype),i,true,cycles);
 		}
+    */
 
 		printf("\n");
 	}
@@ -169,6 +171,7 @@ void test ( hashfunc<hashtype> hash, const char * hashname )
 		//const int hashbits = sizeof(hashtype) * 8;
 		bool result = true;
 
+    /*
 		result &= AvalancheTest< Blob< 32>, hashtype > (hash,300000);
 		result &= AvalancheTest< Blob< 40>, hashtype > (hash,300000);
 		result &= AvalancheTest< Blob< 48>, hashtype > (hash,300000);
@@ -182,13 +185,16 @@ void test ( hashfunc<hashtype> hash, const char * hashname )
 		result &= AvalancheTest< Blob<104>, hashtype > (hash,300000);
 		result &= AvalancheTest< Blob<112>, hashtype > (hash,300000);
 		result &= AvalancheTest< Blob<120>, hashtype > (hash,300000);
+    */
 
 		result &= AvalancheTest< Blob<128>, hashtype > (hash,300000);
 		result &= AvalancheTest< Blob<136>, hashtype > (hash,300000);
 		result &= AvalancheTest< Blob<144>, hashtype > (hash,300000);
 		result &= AvalancheTest< Blob<152>, hashtype > (hash,300000);
 		result &= AvalancheTest< Blob<160>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob<168>, hashtype > (hash,300000);
+
+    /*
+    result &= AvalancheTest< Blob<168>, hashtype > (hash,300000);
 		result &= AvalancheTest< Blob<176>, hashtype > (hash,300000);
 		result &= AvalancheTest< Blob<184>, hashtype > (hash,300000);
 		result &= AvalancheTest< Blob<192>, hashtype > (hash,300000);
@@ -201,6 +207,7 @@ void test ( hashfunc<hashtype> hash, const char * hashname )
 		result &= AvalancheTest< Blob<248>, hashtype > (hash,300000);
 
 		result &= AvalancheTest< Blob<256>, hashtype > (hash,300000);
+    */
 
 		//result &= AvalancheTest< Blob<hashbits * 2>, hashtype > (hash,200000);
 		//result &= AvalancheTest< Blob<768>, hashtype > (hash,2000000);
@@ -510,7 +517,7 @@ int main ( int argc, char ** argv )
 {
 	SetAffinity(2);
 
-	int a = clock();
+	int timeBegin = clock();
 
 	g_testAll = true;
 
@@ -526,13 +533,14 @@ int main ( int argc, char ** argv )
 	//testHash("count");
 	//printf("Called the hash function %I64d times, %I64d bytes hashed\n",g_hashcount,g_bytecount);
 
-	testHash("murmur3f");
+	testHash("murmur3a");
 
 	//----------
 
-	int b = clock();
-
-	printf("time %d\n",b-a);
+	int timeEnd = clock();
 
+  printf("\n");
+	printf("Testing took %f seconds\n",double(timeEnd-timeBegin)/double(CLOCKS_PER_SEC));
+  printf("-------------------------------------------------------------------------------\n");
 	return 0;
-}
\ No newline at end of file
+}
diff --git a/sha1.cpp b/sha1.cpp
index 03894d6..f663d23 100644
--- a/sha1.cpp
+++ b/sha1.cpp
@@ -88,7 +88,7 @@ A million repetitions of "a"
 
 void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64]);
 
-#define rol _rotl
+#define rol ROTL32
 
 /* blk0() and blk() perform the initial expand. */
 /* I got the idea of expanding during the round function from SSLeay */
@@ -320,4 +320,4 @@ int main(int argc, char** argv)
     fprintf(stdout, "ok\n");
     return(0);
 }
-#endif /* TEST */
+#endif /* TEST */
\ No newline at end of file
diff --git a/sha1.h b/sha1.h
index a866aac..b81088f 100644
--- a/sha1.h
+++ b/sha1.h
@@ -3,7 +3,7 @@
 
 #pragma once
 
-#include "pstdint.h"
+#include "Platform.h"
 
 struct SHA1_CTX
 {
-- 
cgit v1.2.3


From 0eed080e82389d157b2b841dc9062d2ce45b4269 Mon Sep 17 00:00:00 2001
From: "McKay.Davis@gmail.com"
 <McKay.Davis@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Sat, 19 Mar 2011 17:48:10 +0000
Subject: Initial commit of CMake file.  Builds SMHasherSupport lib and
 SMHasher executable.

git-svn-id: http://smhasher.googlecode.com/svn/trunk@88 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 CMakeLists.txt | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 CMakeLists.txt

diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..3b3cca6
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,42 @@
+project(SMHasher)
+
+cmake_minimum_required(VERSION 2.4)
+
+add_library(
+  SMHasherSupport
+  AvalancheTest.cpp
+  Bitslice.cpp
+  Bitvec.cpp
+  crc.cpp
+  DifferentialTest.cpp
+  Hashes.cpp
+  KeysetTest.cpp
+  lookup3.cpp
+  md5.cpp
+  MurmurHash1.cpp
+  MurmurHash2.cpp
+  MurmurHash2_test.cpp
+  MurmurHash3.cpp
+  MurmurHash64.cpp
+  MurmurHashAligned2.cpp
+  MurmurHashAligned.cpp
+  MurmurHashNeutral2.cpp
+  MurmurHashTest.cpp
+  Platform.cpp
+  Random.cpp
+  sha1.cpp
+  SpeedTest.cpp
+  Stats.cpp
+  SuperFastHash.cpp
+  Types.cpp
+)
+
+add_executable(
+  SMHasher
+  main.cpp
+)
+
+target_link_libraries(
+  SMHasher
+  SMHasherSupport
+)
-- 
cgit v1.2.3


From 3ef87613ed6bd520128d5ba169f1cb99b86b4018 Mon Sep 17 00:00:00 2001
From: "McKay.Davis@gmail.com"
 <McKay.Davis@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Sat, 19 Mar 2011 17:52:18 +0000
Subject: Fix x86_64 gcc 4.4.1 build errors:
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

/smhasher/MurmurHash1.cpp: In function ‘unsigned int MurmurHash1Aligned(const void*, int, unsigned int)’:
/smhasher/MurmurHash1.cpp:82: error: cast from ‘const unsigned char*’ to ‘int’ loses precision

/smhasher/MurmurHash2.cpp: In function ‘uint32_t MurmurHashAligned2(const void*, int, uint32_t)’:
/smhasher/MurmurHash2.cpp:382: error: cast from ‘const unsigned char*’ to ‘int’ loses precision


git-svn-id: http://smhasher.googlecode.com/svn/trunk@89 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 MurmurHash1.cpp | 2 +-
 MurmurHash2.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/MurmurHash1.cpp b/MurmurHash1.cpp
index 4322bb8..6499a3d 100644
--- a/MurmurHash1.cpp
+++ b/MurmurHash1.cpp
@@ -79,7 +79,7 @@ unsigned int MurmurHash1Aligned ( const void * key, int len, unsigned int seed )
 
 	unsigned int h = seed ^ (len * m);
 
-	int align = (int)data & 3;
+	int align = (uint64_t)data & 3;
 
 	if(align && (len >= 4))
 	{
diff --git a/MurmurHash2.cpp b/MurmurHash2.cpp
index 5e98330..e020628 100644
--- a/MurmurHash2.cpp
+++ b/MurmurHash2.cpp
@@ -379,7 +379,7 @@ uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed )
 
 	uint32_t h = seed ^ len;
 
-	int align = (int)data & 3;
+	int align = (uint64_t)data & 3;
 
 	if(align && (len >= 4))
 	{
-- 
cgit v1.2.3


From 6ffe01004546290235d45b2d8c436180947395a7 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Sat, 19 Mar 2011 21:28:26 +0000
Subject: Add startup self-test Remove randhash (will fail self-test) Remove
 QuickBrownFox (replaced by VerificationTest) De-tabulate all files

git-svn-id: http://smhasher.googlecode.com/svn/trunk@90 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 AvalancheTest.cpp  |  60 ++--
 AvalancheTest.h    | 294 +++++++++---------
 Bitslice.cpp       | 152 +++++-----
 Bitvec.cpp         | 850 ++++++++++++++++++++++++++---------------------------
 Bitvec.h           | 170 +++++------
 DifferentialTest.h | 276 ++++++++---------
 Hashes.cpp         | 120 ++++----
 Hashes.h           |  10 +-
 KeysetTest.cpp     | 250 +++++++++-------
 KeysetTest.h       | 401 +++++++++++++------------
 MurmurHash1.cpp    | 272 ++++++++---------
 MurmurHash2.cpp    | 704 ++++++++++++++++++++++----------------------
 MurmurHash3.cpp    |  63 ----
 Platform.cpp       |   2 +-
 Platform.h         |   8 +-
 Random.h           | 178 +++++------
 SpeedTest.cpp      | 118 ++++----
 Stats.cpp          |  78 ++---
 Stats.h            | 368 +++++++++++------------
 SuperFastHash.cpp  |  70 ++---
 Types.cpp          |   2 +-
 Types.h            | 376 ++++++++++++------------
 crc.cpp            |  26 +-
 lookup3.cpp        |  64 ++--
 main.cpp           | 800 +++++++++++++++++++++++--------------------------
 md5.cpp            |  14 +-
 pstdint.h          |  80 ++---
 sha1.cpp           |  24 +-
 28 files changed, 2857 insertions(+), 2973 deletions(-)

diff --git a/AvalancheTest.cpp b/AvalancheTest.cpp
index 5cdebd8..38aa452 100644
--- a/AvalancheTest.cpp
+++ b/AvalancheTest.cpp
@@ -4,53 +4,53 @@
 
 void PrintAvalancheDiagram ( int x, int y, int reps, double scale, int * bins )
 {
-	const char * symbols = ".123456789X";
+  const char * symbols = ".123456789X";
 
-	for(int i = 0; i < y; i++)
-	{
-		printf("[");
-		for(int j = 0; j < x; j++)
-		{
-			int k = (y - i) -1;
+  for(int i = 0; i < y; i++)
+  {
+    printf("[");
+    for(int j = 0; j < x; j++)
+    {
+      int k = (y - i) -1;
 
-			int bin = bins[k + (j*y)];
+      int bin = bins[k + (j*y)];
 
-			double b = double(bin) / double(reps);
-			b = fabs(b*2 - 1);
+      double b = double(bin) / double(reps);
+      b = fabs(b*2 - 1);
 
-			b *= scale;
+      b *= scale;
 
-			int s = (int)floor(b*10);
+      int s = (int)floor(b*10);
 
-			if(s > 10) s = 10;
-			if(s < 0) s = 0;
+      if(s > 10) s = 10;
+      if(s < 0) s = 0;
 
-			printf("%c",symbols[s]);
-		}
+      printf("%c",symbols[s]);
+    }
 
-		printf("]\n");
-	}
+    printf("]\n");
+  }
 }
 
 //----------------------------------------------------------------------------
 
 double maxBias ( std::vector<int> & counts, int reps )
 {
-	double worst = 0;
+  double worst = 0;
 
-	for(int i = 0; i < (int)counts.size(); i++)
-	{
-		double c = double(counts[i]) / double(reps);
+  for(int i = 0; i < (int)counts.size(); i++)
+  {
+    double c = double(counts[i]) / double(reps);
 
-		double d = fabs(c * 2 - 1);
-			
-		if(d > worst)
-		{
-			worst = d;
-		}
-	}
+    double d = fabs(c * 2 - 1);
+      
+    if(d > worst)
+    {
+      worst = d;
+    }
+  }
 
-	return worst;
+  return worst;
 }
 
 //-----------------------------------------------------------------------------
diff --git a/AvalancheTest.h b/AvalancheTest.h
index 1fbf095..966f8b0 100644
--- a/AvalancheTest.h
+++ b/AvalancheTest.h
@@ -27,40 +27,40 @@ double maxBias ( std::vector<int> & counts, int reps );
 template < typename keytype, typename hashtype >
 void calcBias ( pfHash hash, std::vector<int> & counts, int reps )
 {
-	const int keybytes = sizeof(keytype);
-	const int hashbytes = sizeof(hashtype);
+  const int keybytes = sizeof(keytype);
+  const int hashbytes = sizeof(hashtype);
 
-	const int keybits = keybytes * 8;
-	const int hashbits = hashbytes * 8;
+  const int keybits = keybytes * 8;
+  const int hashbits = hashbytes * 8;
 
-	keytype K;
-	hashtype A,B;
+  keytype K;
+  hashtype A,B;
 
-	for(int irep = 0; irep < reps; irep++)
-	{
-		if(irep % (reps/10) == 0) printf(".");
+  for(int irep = 0; irep < reps; irep++)
+  {
+    if(irep % (reps/10) == 0) printf(".");
 
-		rand_p(&K,keybytes);
+    rand_p(&K,keybytes);
 
-		hash(&K,keybytes,0,&A);
+    hash(&K,keybytes,0,&A);
 
-		int * cursor = &counts[0];
+    int * cursor = &counts[0];
 
-		for(int iBit = 0; iBit < keybits; iBit++)
-		{
-			flipbit(&K,keybytes,iBit);
-			hash(&K,keybytes,0,&B);
-			flipbit(&K,keybytes,iBit);
+    for(int iBit = 0; iBit < keybits; iBit++)
+    {
+      flipbit(&K,keybytes,iBit);
+      hash(&K,keybytes,0,&B);
+      flipbit(&K,keybytes,iBit);
 
-			for(int iOut = 0; iOut < hashbits; iOut++)
-			{
-				int bitA = getbit(&A,hashbytes,iOut);
-				int bitB = getbit(&B,hashbytes,iOut);
+      for(int iOut = 0; iOut < hashbits; iOut++)
+      {
+        int bitA = getbit(&A,hashbytes,iOut);
+        int bitB = getbit(&B,hashbytes,iOut);
 
-				(*cursor++) += (bitA ^ bitB);
-			}
-		}
-	}
+        (*cursor++) += (bitA ^ bitB);
+      }
+    }
+  }
 }
 
 //-----------------------------------------------------------------------------
@@ -68,37 +68,37 @@ void calcBias ( pfHash hash, std::vector<int> & counts, int reps )
 template < typename keytype, typename hashtype >
 bool AvalancheTest ( pfHash hash, const int reps )
 {
-	const int keybytes = sizeof(keytype);
-	const int hashbytes = sizeof(hashtype);
+  const int keybytes = sizeof(keytype);
+  const int hashbytes = sizeof(hashtype);
 
-	const int keybits = keybytes * 8;
-	const int hashbits = hashbytes * 8;
+  const int keybits = keybytes * 8;
+  const int hashbits = hashbytes * 8;
 
-	printf("Testing %3d-bit keys -> %3d-bit hashes, %8d reps",keybits,hashbits,reps);
+  printf("Testing %3d-bit keys -> %3d-bit hashes, %8d reps",keybits,hashbits,reps);
 
-	//----------
+  //----------
 
-	std::vector<int> bins(keybits*hashbits,0);
+  std::vector<int> bins(keybits*hashbits,0);
 
-	calcBias<keytype,hashtype>(hash,bins,reps);
-	
-	//----------
+  calcBias<keytype,hashtype>(hash,bins,reps);
+  
+  //----------
 
-	bool result = true;
+  bool result = true;
 
-	double b = maxBias(bins,reps);
+  double b = maxBias(bins,reps);
 
-	printf(" worst bias is %f%%",b * 100.0);
+  printf(" worst bias is %f%%",b * 100.0);
 
-	if(b > AVALANCHE_FAIL)
-	{
-		printf(" !!!!! ");
-		result = false;
-	}
+  if(b > AVALANCHE_FAIL)
+  {
+    printf(" !!!!! ");
+    result = false;
+  }
 
-	printf("\n");
+  printf("\n");
 
-	return result;
+  return result;
 }
 
 //----------------------------------------------------------------------------
@@ -108,83 +108,83 @@ bool AvalancheTest ( pfHash hash, const int reps )
 template< typename keytype, typename hashtype >
 void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose )
 {
-	const int keybytes = sizeof(keytype);
-	const int hashbytes = sizeof(hashtype);
-	const int hashbits = hashbytes * 8;
-
-	std::vector<int> bins(hashbits*hashbits*4,0);
-
-	keytype key;
-	hashtype h1,h2;
-
-	for(int irep = 0; irep < reps; irep++)
-	{
-		if(verbose)
-		{
-			if(irep % (reps/10) == 0) printf(".");
-		}
-
-		rand_p(&key,keybytes);
-		hash(&key,keybytes,0,&h1);
-
-		flipbit(key,keybit);
-		hash(&key,keybytes,0,&h2);
-
-		keytype d = h1 ^ h2;
-
-		for(int out1 = 0; out1 < hashbits; out1++)
-		for(int out2 = 0; out2 < hashbits; out2++)
-		{
-			if(out1 == out2) continue;
-
-			uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
-
-			bins[(out1 * hashbits + out2) * 4 + b]++;
-		}
-	}
-
-	if(verbose) printf("\n");
-
-	maxBias = 0;
-
-	for(int out1 = 0; out1 < hashbits; out1++)
-	{
-		for(int out2 = 0; out2 < hashbits; out2++)
-		{
-			if(out1 == out2)
-			{
-				if(verbose) printf("\\");
-				continue;
-			}
-
-			double bias = 0;
-
-			for(int b = 0; b < 4; b++)
-			{
-				double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2);
-				b2 = fabs(b2 * 2 - 1);
-
-				if(b2 > bias) bias = b2;
-			}
-
-			if(bias > maxBias)
-			{
-				maxBias = bias;
-				maxA = out1;
-				maxB = out2;
-			}
-
-			if(verbose) 
-			{
-				if     (bias < 0.01) printf(".");
-				else if(bias < 0.05) printf("o");
-				else if(bias < 0.33) printf("O");
-				else                 printf("X");
-			}
-		}
-
-		if(verbose) printf("\n");
-	}
+  const int keybytes = sizeof(keytype);
+  const int hashbytes = sizeof(hashtype);
+  const int hashbits = hashbytes * 8;
+
+  std::vector<int> bins(hashbits*hashbits*4,0);
+
+  keytype key;
+  hashtype h1,h2;
+
+  for(int irep = 0; irep < reps; irep++)
+  {
+    if(verbose)
+    {
+      if(irep % (reps/10) == 0) printf(".");
+    }
+
+    rand_p(&key,keybytes);
+    hash(&key,keybytes,0,&h1);
+
+    flipbit(key,keybit);
+    hash(&key,keybytes,0,&h2);
+
+    keytype d = h1 ^ h2;
+
+    for(int out1 = 0; out1 < hashbits; out1++)
+    for(int out2 = 0; out2 < hashbits; out2++)
+    {
+      if(out1 == out2) continue;
+
+      uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
+
+      bins[(out1 * hashbits + out2) * 4 + b]++;
+    }
+  }
+
+  if(verbose) printf("\n");
+
+  maxBias = 0;
+
+  for(int out1 = 0; out1 < hashbits; out1++)
+  {
+    for(int out2 = 0; out2 < hashbits; out2++)
+    {
+      if(out1 == out2)
+      {
+        if(verbose) printf("\\");
+        continue;
+      }
+
+      double bias = 0;
+
+      for(int b = 0; b < 4; b++)
+      {
+        double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2);
+        b2 = fabs(b2 * 2 - 1);
+
+        if(b2 > bias) bias = b2;
+      }
+
+      if(bias > maxBias)
+      {
+        maxBias = bias;
+        maxA = out1;
+        maxB = out2;
+      }
+
+      if(verbose) 
+      {
+        if     (bias < 0.01) printf(".");
+        else if(bias < 0.05) printf("o");
+        else if(bias < 0.33) printf("O");
+        else                 printf("X");
+      }
+    }
+
+    if(verbose) printf("\n");
+  }
 }
 
 //----------
@@ -192,39 +192,39 @@ void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias,
 template< typename keytype, typename hashtype >
 bool BicTest ( pfHash hash, const int reps )
 {
-	const int keybytes = sizeof(keytype);
-	const int keybits = keybytes * 8;
+  const int keybytes = sizeof(keytype);
+  const int keybits = keybytes * 8;
 
-	double maxBias = 0;
-	int maxK = 0;
-	int maxA = 0;
-	int maxB = 0;
+  double maxBias = 0;
+  int maxK = 0;
+  int maxA = 0;
+  int maxB = 0;
 
-	for(int i = 0; i < keybits; i++)
-	{
-		if(i % (keybits/10) == 0) printf(".");
+  for(int i = 0; i < keybits; i++)
+  {
+    if(i % (keybits/10) == 0) printf(".");
 
-		double bias;
-		int a,b;
-		
-		BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,false);
+    double bias;
+    int a,b;
+    
+    BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,false);
 
-		if(bias > maxBias)
-		{
-			maxBias = bias;
-			maxK = i;
-			maxA = a;
-			maxB = b;
-		}
-	}
+    if(bias > maxBias)
+    {
+      maxBias = bias;
+      maxK = i;
+      maxA = a;
+      maxB = b;
+    }
+  }
 
-	printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
+  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
 
-	// Bit independence is harder to pass than avalanche, so we're a bit more lax here.
+  // Bit independence is harder to pass than avalanche, so we're a bit more lax here.
 
-	bool result = (maxBias < 0.05);
+  bool result = (maxBias < 0.05);
 
-	return result;
+  return result;
 }
 
 //-----------------------------------------------------------------------------
diff --git a/Bitslice.cpp b/Bitslice.cpp
index 7e9edd3..428e355 100644
--- a/Bitslice.cpp
+++ b/Bitslice.cpp
@@ -9,42 +9,42 @@ typedef std::vector<slice> slice_vec;
 
 int countbits ( slice & v )
 {
-	int c = 0;
+  int c = 0;
 
-	for(size_t i = 0; i < v.size(); i++)
-	{
-		int d = countbits(v[i]);
+  for(size_t i = 0; i < v.size(); i++)
+  {
+    int d = countbits(v[i]);
 
-		c += d;
-	}
+    c += d;
+  }
 
-	return c;
+  return c;
 }
 
 int countxor ( slice & a, slice & b )
 {
-	assert(a.size() == b.size());
+  assert(a.size() == b.size());
 
-	int c = 0;
+  int c = 0;
 
-	for(size_t i = 0; i < a.size(); i++)
-	{
-		int d = countbits(a[i] ^ b[i]);
+  for(size_t i = 0; i < a.size(); i++)
+  {
+    int d = countbits(a[i] ^ b[i]);
 
-		c += d;
-	}
+    c += d;
+  }
 
-	return c;
+  return c;
 }
 
 void xoreq ( slice & a, slice & b )
 {
-	assert(a.size() == b.size());
+  assert(a.size() == b.size());
 
-	for(size_t i = 0; i < a.size(); i++)
-	{
-		a[i] ^= b[i];
-	}
+  for(size_t i = 0; i < a.size(); i++)
+  {
+    a[i] ^= b[i];
+  }
 }
 
 //-----------------------------------------------------------------------------
@@ -53,75 +53,75 @@ void xoreq ( slice & a, slice & b )
 template< typename hashtype >
 void Bitslice ( std::vector<hashtype> & hashes, slice_vec & slices )
 {
-	const int hashbytes = sizeof(hashtype);
-	const int hashbits = hashbytes * 8;
-	const int slicelen = ((int)hashes.size() + 31) / 32;
+  const int hashbytes = sizeof(hashtype);
+  const int hashbits = hashbytes * 8;
+  const int slicelen = ((int)hashes.size() + 31) / 32;
 
-	slices.clear();
-	slices.resize(hashbits);
+  slices.clear();
+  slices.resize(hashbits);
 
-	for(int i = 0; i < (int)slices.size(); i++)
-	{
-		slices[i].resize(slicelen,0);
-	}
+  for(int i = 0; i < (int)slices.size(); i++)
+  {
+    slices[i].resize(slicelen,0);
+  }
 
-	for(int j = 0; j < hashbits; j++)
-	{
-		void * sliceblob = &(slices[j][0]);
+  for(int j = 0; j < hashbits; j++)
+  {
+    void * sliceblob = &(slices[j][0]);
 
-		for(int i = 0; i < (int)hashes.size(); i++)
-		{
-			int b = getbit(hashes[i],j);
+    for(int i = 0; i < (int)hashes.size(); i++)
+    {
+      int b = getbit(hashes[i],j);
 
-			setbit(sliceblob,slicelen*4,i,b);
-		}
-	}
+      setbit(sliceblob,slicelen*4,i,b);
+    }
+  }
 }
 
 void FactorSlices ( slice_vec & slices )
 {
-	std::vector<int> counts(slices.size(),0);
-
-	for(size_t i = 0; i < slices.size(); i++)
-	{
-		counts[i] = countbits(slices[i]);
-	}
-
-	bool changed = true;
-
-	while(changed)
-	{
-		int bestA = -1;
-		int bestB = -1;
-
-		for(int j = 0; j < (int)slices.size()-1; j++)
-		{
-			for(int i = j+1; i < (int)slices.size(); i++)
-			{
-				int d = countxor(slices[i],slices[j]);
-
-				if((d < counts[i]) && (d < counts[j]))
-				{
-					if(counts[i] < counts[j])
-					{
-						bestA = j;
-						bestB = i;
-					}
-				}
-				else if(d < counts[i])
-				{
-					//bestA = 
-				}
-			}
-		}
-	}
+  std::vector<int> counts(slices.size(),0);
+
+  for(size_t i = 0; i < slices.size(); i++)
+  {
+    counts[i] = countbits(slices[i]);
+  }
+
+  bool changed = true;
+
+  while(changed)
+  {
+    int bestA = -1;
+    int bestB = -1;
+
+    for(int j = 0; j < (int)slices.size()-1; j++)
+    {
+      for(int i = j+1; i < (int)slices.size(); i++)
+      {
+        int d = countxor(slices[i],slices[j]);
+
+        if((d < counts[i]) && (d < counts[j]))
+        {
+          if(counts[i] < counts[j])
+          {
+            bestA = j;
+            bestB = i;
+          }
+        }
+        else if(d < counts[i])
+        {
+          //bestA = 
+        }
+      }
+    }
+  }
 }
 
 
 void foo ( void )
 {
-	slice a;
-	slice_vec b;
+  slice a;
+  slice_vec b;
 
-	Bitslice(a,b);
+  Bitslice(a,b);
 }
\ No newline at end of file
diff --git a/Bitvec.cpp b/Bitvec.cpp
index a8a3007..667902d 100644
--- a/Bitvec.cpp
+++ b/Bitvec.cpp
@@ -16,617 +16,617 @@ void assert ( bool )
 
 void printbits ( void * blob, int len )
 {
-	uint8_t * data = (uint8_t*)blob;
+  uint8_t * data = (uint8_t*)blob;
 
-	printf("[");
-	for(int i = 0; i < len; i++)
-	{
-		unsigned char byte = data[i];
+  printf("[");
+  for(int i = 0; i < len; i++)
+  {
+    unsigned char byte = data[i];
 
-		int hi = (byte >> 4);
-		int lo = (byte & 0xF);
+    int hi = (byte >> 4);
+    int lo = (byte & 0xF);
 
-		if(hi) printf("%01x",hi);
-		else   printf(".");
+    if(hi) printf("%01x",hi);
+    else   printf(".");
 
-		if(lo) printf("%01x",lo);
-		else   printf(".");
+    if(lo) printf("%01x",lo);
+    else   printf(".");
 
-		if(i != len-1) printf(" ");
-	}
-	printf("]");
+    if(i != len-1) printf(" ");
+  }
+  printf("]");
 }
 
 void printbits2 ( uint8_t * k, int nbytes )
 {
-	printf("[");
+  printf("[");
 
-	for(int i = nbytes-1; i >= 0; i--)
-	{
-		uint8_t b = k[i];
+  for(int i = nbytes-1; i >= 0; i--)
+  {
+    uint8_t b = k[i];
 
-		for(int j = 7; j >= 0; j--)
-		{
-			uint8_t c = (b & (1 << j)) ? '#' : ' ';
+    for(int j = 7; j >= 0; j--)
+    {
+      uint8_t c = (b & (1 << j)) ? '#' : ' ';
 
-			putc(c,stdout);
-		}
-	}
-	printf("]");
+      putc(c,stdout);
+    }
+  }
+  printf("]");
 }
 
 void printhex32 ( void * blob, int len )
 {
-	assert((len & 3) == 0);
+  assert((len & 3) == 0);
 
-	uint32_t * d = (uint32_t*)blob;
+  uint32_t * d = (uint32_t*)blob;
 
-	printf("{ ");
+  printf("{ ");
 
-	for(int i = 0; i < len/4; i++) 
-	{
-		printf("0x%08x, ",d[i]);
-	}
+  for(int i = 0; i < len/4; i++) 
+  {
+    printf("0x%08x, ",d[i]);
+  }
 
-	printf("}");
+  printf("}");
 }
 
 void printbytes ( void * blob, int len )
 {
-	uint8_t * d = (uint8_t*)blob;
+  uint8_t * d = (uint8_t*)blob;
 
-	printf("{ ");
+  printf("{ ");
 
-	for(int i = 0; i < len; i++)
-	{
-		printf("0x%02x, ",d[i]);
-	}
+  for(int i = 0; i < len; i++)
+  {
+    printf("0x%02x, ",d[i]);
+  }
 
-	printf(" };");
+  printf(" };");
 }
 
 //-----------------------------------------------------------------------------
 
 uint32_t getbit ( void * block, int len, uint32_t bit )
 {
-	uint8_t * b = (uint8_t*)block;
+  uint8_t * b = (uint8_t*)block;
 
-	int byte = bit >> 3;
-	bit = bit & 0x7;
-	
-	if(byte < len) return (b[byte] >> bit) & 1;
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  if(byte < len) return (b[byte] >> bit) & 1;
 
-	return 0;
+  return 0;
 }
 
 uint32_t getbit_wrap ( void * block, int len, uint32_t bit )
 {
-	uint8_t * b = (uint8_t*)block;
-
-	int byte = bit >> 3;
-	bit = bit & 0x7;
-	
-	byte %= len;
-		
-	return (b[byte] >> bit) & 1;
+  uint8_t * b = (uint8_t*)block;
+
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  byte %= len;
+    
+  return (b[byte] >> bit) & 1;
 }
 
 void setbit ( void * block, int len, uint32_t bit )
 {
-	uint8_t * b = (uint8_t*)block;
+  uint8_t * b = (uint8_t*)block;
 
-	int byte = bit >> 3;
-	bit = bit & 0x7;
-	
-	if(byte < len) b[byte] |= (1 << bit);
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  if(byte < len) b[byte] |= (1 << bit);
 }
 
 void setbit ( void * block, int len, uint32_t bit, uint32_t val )
 {
-	val ? setbit(block,len,bit) : clearbit(block,len,bit);
+  val ? setbit(block,len,bit) : clearbit(block,len,bit);
 }
 
 void clearbit ( void * block, int len, uint32_t bit )
 {
-	uint8_t * b = (uint8_t*)block;
+  uint8_t * b = (uint8_t*)block;
 
-	int byte = bit >> 3;
-	bit = bit & 0x7;
-	
-	if(byte < len) b[byte] &= ~(1 << bit);
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  if(byte < len) b[byte] &= ~(1 << bit);
 }
 
 void flipbit ( void * block, int len, uint32_t bit )
 {
-	uint8_t * b = (uint8_t*)block;
+  uint8_t * b = (uint8_t*)block;
 
-	int byte = bit >> 3;
-	bit = bit & 0x7;
-	
-	if(byte < len) b[byte] ^= (1 << bit);
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  if(byte < len) b[byte] ^= (1 << bit);
 }
 
 // from the "Bit Twiddling Hacks" webpage
 
 int countbits ( uint32_t v )
 {
-	v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
-	v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
-	int c = ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
+  v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
+  v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
+  int c = ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
 
-	return c;
+  return c;
 }
 
 //-----------------------------------------------------------------------------
 
 void lshift1 ( void * blob, int len, int c )
 {
-	int nbits = len*8;
+  int nbits = len*8;
 
-	for(int i = nbits-1; i >= 0; i--)
-	{
-		setbit(blob,len,i,getbit(blob,len,i-c));
-	}
+  for(int i = nbits-1; i >= 0; i--)
+  {
+    setbit(blob,len,i,getbit(blob,len,i-c));
+  }
 }
 
 
 void lshift8 ( void * blob, int nbytes, int c )
 {
-	uint8_t * k = (uint8_t*)blob;
+  uint8_t * k = (uint8_t*)blob;
 
-	if(c == 0) return;
+  if(c == 0) return;
 
-	int b = c >> 3;
-	c &= 7;
+  int b = c >> 3;
+  c &= 7;
 
-	for(int i = nbytes-1; i >= b; i--)
-	{
-		k[i] = k[i-b];
-	}
+  for(int i = nbytes-1; i >= b; i--)
+  {
+    k[i] = k[i-b];
+  }
 
-	for(int i = b-1; i >= 0; i--)
-	{
-		k[i] = 0;
-	}
+  for(int i = b-1; i >= 0; i--)
+  {
+    k[i] = 0;
+  }
 
-	if(c == 0) return;
+  if(c == 0) return;
 
-	for(int i = nbytes-1; i >= 0; i--)
-	{
-		uint8_t a = k[i];
-		uint8_t b = (i == 0) ? 0 : k[i-1];
+  for(int i = nbytes-1; i >= 0; i--)
+  {
+    uint8_t a = k[i];
+    uint8_t b = (i == 0) ? 0 : k[i-1];
 
-		k[i] = (a << c) | (b >> (8-c));
-	}
+    k[i] = (a << c) | (b >> (8-c));
+  }
 }
 
 void lshift32 ( void * blob, int len, int c )
 {
-	assert((len & 3) == 0);
+  assert((len & 3) == 0);
 
-	int nbytes  = len;
-	int ndwords = nbytes / 4;
+  int nbytes  = len;
+  int ndwords = nbytes / 4;
 
-	uint32_t * k = (uint32_t*)blob;
+  uint32_t * k = (uint32_t*)blob;
 
-	if(c == 0) return;
+  if(c == 0) return;
 
-	//----------
+  //----------
 
-	int b = c / 32;
-	c &= (32-1);
+  int b = c / 32;
+  c &= (32-1);
 
-	for(int i = ndwords-1; i >= b; i--)
-	{
-		k[i] = k[i-b];
-	}
+  for(int i = ndwords-1; i >= b; i--)
+  {
+    k[i] = k[i-b];
+  }
 
-	for(int i = b-1; i >= 0; i--)
-	{
-		k[i] = 0;
-	}
+  for(int i = b-1; i >= 0; i--)
+  {
+    k[i] = 0;
+  }
 
-	if(c == 0) return;
+  if(c == 0) return;
 
-	for(int i = ndwords-1; i >= 0; i--)
-	{
-		uint32_t a = k[i];
-		uint32_t b = (i == 0) ? 0 : k[i-1];
+  for(int i = ndwords-1; i >= 0; i--)
+  {
+    uint32_t a = k[i];
+    uint32_t b = (i == 0) ? 0 : k[i-1];
 
-		k[i] = (a << c) | (b >> (32-c));
-	}
+    k[i] = (a << c) | (b >> (32-c));
+  }
 }
 
 //-----------------------------------------------------------------------------
 
 void rshift1 ( void * blob, int len, int c )
 {
-	int nbits = len*8;
+  int nbits = len*8;
 
-	for(int i = 0; i < nbits; i++)
-	{
-		setbit(blob,len,i,getbit(blob,len,i+c));
-	}
+  for(int i = 0; i < nbits; i++)
+  {
+    setbit(blob,len,i,getbit(blob,len,i+c));
+  }
 }
 
 void rshift8 ( void * blob, int nbytes, int c )
 {
-	uint8_t * k = (uint8_t*)blob;
+  uint8_t * k = (uint8_t*)blob;
 
-	if(c == 0) return;
+  if(c == 0) return;
 
-	int b = c >> 3;
-	c &= 7;
+  int b = c >> 3;
+  c &= 7;
 
-	for(int i = 0; i < nbytes-b; i++)
-	{
-		k[i] = k[i+b];
-	}
+  for(int i = 0; i < nbytes-b; i++)
+  {
+    k[i] = k[i+b];
+  }
 
-	for(int i = nbytes-b; i < nbytes; i++)
-	{
-		k[i] = 0;
-	}
+  for(int i = nbytes-b; i < nbytes; i++)
+  {
+    k[i] = 0;
+  }
 
-	if(c == 0) return;
+  if(c == 0) return;
 
-	for(int i = 0; i < nbytes; i++)
-	{
-		uint8_t a = (i == nbytes-1) ? 0 : k[i+1];
-		uint8_t b = k[i];
+  for(int i = 0; i < nbytes; i++)
+  {
+    uint8_t a = (i == nbytes-1) ? 0 : k[i+1];
+    uint8_t b = k[i];
 
-		k[i] = (a << (8-c) ) | (b >> c);
-	}
+    k[i] = (a << (8-c) ) | (b >> c);
+  }
 }
 
 void rshift32 ( void * blob, int len, int c )
 {
-	assert((len & 3) == 0);
+  assert((len & 3) == 0);
 
-	int nbytes  = len;
-	int ndwords = nbytes / 4;
+  int nbytes  = len;
+  int ndwords = nbytes / 4;
 
-	uint32_t * k = (uint32_t*)blob;
+  uint32_t * k = (uint32_t*)blob;
 
-	//----------
+  //----------
 
-	if(c == 0) return;
+  if(c == 0) return;
 
-	int b = c / 32;
-	c &= (32-1);
+  int b = c / 32;
+  c &= (32-1);
 
-	for(int i = 0; i < ndwords-b; i++)
-	{
-		k[i] = k[i+b];
-	}
+  for(int i = 0; i < ndwords-b; i++)
+  {
+    k[i] = k[i+b];
+  }
 
-	for(int i = ndwords-b; i < ndwords; i++)
-	{
-		k[i] = 0;
-	}
+  for(int i = ndwords-b; i < ndwords; i++)
+  {
+    k[i] = 0;
+  }
 
-	if(c == 0) return;
+  if(c == 0) return;
 
-	for(int i = 0; i < ndwords; i++)
-	{
-		uint32_t a = (i == ndwords-1) ? 0 : k[i+1];
-		uint32_t b = k[i];
+  for(int i = 0; i < ndwords; i++)
+  {
+    uint32_t a = (i == ndwords-1) ? 0 : k[i+1];
+    uint32_t b = k[i];
 
-		k[i] = (a << (32-c) ) | (b >> c);
-	}
+    k[i] = (a << (32-c) ) | (b >> c);
+  }
 }
 
 //-----------------------------------------------------------------------------
 
 void lrot1 ( void * blob, int len, int c )
 {
-	int nbits = len * 8;
+  int nbits = len * 8;
 
-	for(int i = 0; i < c; i++)
-	{
-		uint32_t bit = getbit(blob,len,nbits-1);
+  for(int i = 0; i < c; i++)
+  {
+    uint32_t bit = getbit(blob,len,nbits-1);
 
-		lshift1(blob,len,1);
+    lshift1(blob,len,1);
 
-		setbit(blob,len,0,bit);
-	}
+    setbit(blob,len,0,bit);
+  }
 }
 
 void lrot8 ( void * blob, int len, int c )
 {
-	int nbytes  = len;
+  int nbytes  = len;
 
-	uint8_t * k = (uint8_t*)blob;
+  uint8_t * k = (uint8_t*)blob;
 
-	if(c == 0) return;
+  if(c == 0) return;
 
-	//----------
+  //----------
 
-	int b = c / 8;
-	c &= (8-1);
+  int b = c / 8;
+  c &= (8-1);
 
-	for(int j = 0; j < b; j++)
-	{
-		uint8_t t = k[nbytes-1];
+  for(int j = 0; j < b; j++)
+  {
+    uint8_t t = k[nbytes-1];
 
-		for(int i = nbytes-1; i > 0; i--)
-		{
-			k[i] = k[i-1];
-		}
+    for(int i = nbytes-1; i > 0; i--)
+    {
+      k[i] = k[i-1];
+    }
 
-		k[0] = t;
-	}
+    k[0] = t;
+  }
 
-	uint8_t t = k[nbytes-1];
+  uint8_t t = k[nbytes-1];
 
-	if(c == 0) return;
+  if(c == 0) return;
 
-	for(int i = nbytes-1; i >= 0; i--)
-	{
-		uint8_t a = k[i];
-		uint8_t b = (i == 0) ? t : k[i-1];
+  for(int i = nbytes-1; i >= 0; i--)
+  {
+    uint8_t a = k[i];
+    uint8_t b = (i == 0) ? t : k[i-1];
 
-		k[i] = (a << c) | (b >> (8-c));
-	}
+    k[i] = (a << c) | (b >> (8-c));
+  }
 }
 
 void lrot32 ( void * blob, int len, int c )
 {
-	assert((len & 3) == 0);
+  assert((len & 3) == 0);
 
-	int nbytes  = len;
-	int ndwords = nbytes/4;
+  int nbytes  = len;
+  int ndwords = nbytes/4;
 
-	uint32_t * k = (uint32_t*)blob;
+  uint32_t * k = (uint32_t*)blob;
 
-	if(c == 0) return;
+  if(c == 0) return;
 
-	//----------
+  //----------
 
-	int b = c / 32;
-	c &= (32-1);
+  int b = c / 32;
+  c &= (32-1);
 
-	for(int j = 0; j < b; j++)
-	{
-		uint32_t t = k[ndwords-1];
+  for(int j = 0; j < b; j++)
+  {
+    uint32_t t = k[ndwords-1];
 
-		for(int i = ndwords-1; i > 0; i--)
-		{
-			k[i] = k[i-1];
-		}
+    for(int i = ndwords-1; i > 0; i--)
+    {
+      k[i] = k[i-1];
+    }
 
-		k[0] = t;
-	}
+    k[0] = t;
+  }
 
-	uint32_t t = k[ndwords-1];
+  uint32_t t = k[ndwords-1];
 
-	if(c == 0) return;
+  if(c == 0) return;
 
-	for(int i = ndwords-1; i >= 0; i--)
-	{
-		uint32_t a = k[i];
-		uint32_t b = (i == 0) ? t : k[i-1];
+  for(int i = ndwords-1; i >= 0; i--)
+  {
+    uint32_t a = k[i];
+    uint32_t b = (i == 0) ? t : k[i-1];
 
-		k[i] = (a << c) | (b >> (32-c));
-	}
+    k[i] = (a << c) | (b >> (32-c));
+  }
 }
 
 //-----------------------------------------------------------------------------
 
 void rrot1 ( void * blob, int len, int c )
 {
-	int nbits = len * 8;
+  int nbits = len * 8;
 
-	for(int i = 0; i < c; i++)
-	{
-		uint32_t bit = getbit(blob,len,0);
+  for(int i = 0; i < c; i++)
+  {
+    uint32_t bit = getbit(blob,len,0);
 
-		rshift1(blob,len,1);
+    rshift1(blob,len,1);
 
-		setbit(blob,len,nbits-1,bit);
-	}
+    setbit(blob,len,nbits-1,bit);
+  }
 }
 
 void rrot8 ( void * blob, int len, int c )
 {
-	int nbytes  = len;
+  int nbytes  = len;
 
-	uint8_t * k = (uint8_t*)blob;
+  uint8_t * k = (uint8_t*)blob;
 
-	if(c == 0) return;
+  if(c == 0) return;
 
-	//----------
+  //----------
 
-	int b = c / 8;
-	c &= (8-1);
+  int b = c / 8;
+  c &= (8-1);
 
-	for(int j = 0; j < b; j++)
-	{
-		uint8_t t = k[0];
+  for(int j = 0; j < b; j++)
+  {
+    uint8_t t = k[0];
 
-		for(int i = 0; i < nbytes-1; i++)
-		{
-			k[i] = k[i+1];
-		}
+    for(int i = 0; i < nbytes-1; i++)
+    {
+      k[i] = k[i+1];
+    }
 
-		k[nbytes-1] = t;
-	}
+    k[nbytes-1] = t;
+  }
 
-	if(c == 0) return;
+  if(c == 0) return;
 
-	//----------
+  //----------
 
-	uint8_t t = k[0];
+  uint8_t t = k[0];
 
-	for(int i = 0; i < nbytes; i++)
-	{
-		uint8_t a = (i == nbytes-1) ? t : k[i+1];
-		uint8_t b = k[i];
+  for(int i = 0; i < nbytes; i++)
+  {
+    uint8_t a = (i == nbytes-1) ? t : k[i+1];
+    uint8_t b = k[i];
 
-		k[i] = (a << (8-c)) | (b >> c);
-	}
+    k[i] = (a << (8-c)) | (b >> c);
+  }
 }
 
 void rrot32 ( void * blob, int len, int c )
 {
-	assert((len & 3) == 0);
+  assert((len & 3) == 0);
 
-	int nbytes  = len;
-	int ndwords = nbytes/4;
+  int nbytes  = len;
+  int ndwords = nbytes/4;
 
-	uint32_t * k = (uint32_t*)blob;
+  uint32_t * k = (uint32_t*)blob;
 
-	if(c == 0) return;
+  if(c == 0) return;
 
-	//----------
+  //----------
 
-	int b = c / 32;
-	c &= (32-1);
+  int b = c / 32;
+  c &= (32-1);
 
-	for(int j = 0; j < b; j++)
-	{
-		uint32_t t = k[0];
+  for(int j = 0; j < b; j++)
+  {
+    uint32_t t = k[0];
 
-		for(int i = 0; i < ndwords-1; i++)
-		{
-			k[i] = k[i+1];
-		}
+    for(int i = 0; i < ndwords-1; i++)
+    {
+      k[i] = k[i+1];
+    }
 
-		k[ndwords-1] = t;
-	}
+    k[ndwords-1] = t;
+  }
 
-	if(c == 0) return;
+  if(c == 0) return;
 
-	//----------
+  //----------
 
-	uint32_t t = k[0];
+  uint32_t t = k[0];
 
-	for(int i = 0; i < ndwords; i++)
-	{
-		uint32_t a = (i == ndwords-1) ? t : k[i+1];
-		uint32_t b = k[i];
+  for(int i = 0; i < ndwords; i++)
+  {
+    uint32_t a = (i == ndwords-1) ? t : k[i+1];
+    uint32_t b = k[i];
 
-		k[i] = (a << (32-c)) | (b >> c);
-	}
+    k[i] = (a << (32-c)) | (b >> c);
+  }
 }
 
 //-----------------------------------------------------------------------------
 
 uint32_t window1 ( void * blob, int len, int start, int count )
 {
-	int nbits = len*8;
-	start %= nbits;
+  int nbits = len*8;
+  start %= nbits;
 
-	uint32_t t = 0;
+  uint32_t t = 0;
 
-	for(int i = 0; i < count; i++)
-	{
-		setbit(&t,sizeof(t),i, getbit_wrap(blob,len,start+i));
-	}
+  for(int i = 0; i < count; i++)
+  {
+    setbit(&t,sizeof(t),i, getbit_wrap(blob,len,start+i));
+  }
 
-	return t;
+  return t;
 }
 
 uint32_t window8 ( void * blob, int len, int start, int count )
 {
-	int nbits = len*8;
-	start %= nbits;
+  int nbits = len*8;
+  start %= nbits;
 
-	uint32_t t = 0;
-	uint8_t * k = (uint8_t*)blob;
+  uint32_t t = 0;
+  uint8_t * k = (uint8_t*)blob;
 
-	if(count == 0) return 0;
+  if(count == 0) return 0;
 
-	int c = start & (8-1);
-	int d = start / 8;
+  int c = start & (8-1);
+  int d = start / 8;
 
-	for(int i = 0; i < 4; i++)
-	{
-		int ia = (i + d + 1) % len;
-		int ib = (i + d + 0) % len;
+  for(int i = 0; i < 4; i++)
+  {
+    int ia = (i + d + 1) % len;
+    int ib = (i + d + 0) % len;
 
-		uint32_t a = k[ia];
-		uint32_t b = k[ib];
-		
-		uint32_t m = (a << (8-c)) | (b >> c);
+    uint32_t a = k[ia];
+    uint32_t b = k[ib];
+    
+    uint32_t m = (a << (8-c)) | (b >> c);
 
-		t |= (m << (8*i));
+    t |= (m << (8*i));
 
-	}
+  }
 
-	t &= ((1 << count)-1);
+  t &= ((1 << count)-1);
 
-	return t;
+  return t;
 }
 
 uint32_t window32 ( void * blob, int len, int start, int count )
 {
-	int nbits = len*8;
-	start %= nbits;
+  int nbits = len*8;
+  start %= nbits;
 
-	assert((len & 3) == 0);
+  assert((len & 3) == 0);
 
-	int ndwords = len / 4;
+  int ndwords = len / 4;
 
-	uint32_t * k = (uint32_t*)blob;
+  uint32_t * k = (uint32_t*)blob;
 
-	if(count == 0) return 0;
+  if(count == 0) return 0;
 
-	int c = start & (32-1);
-	int d = start / 32;
+  int c = start & (32-1);
+  int d = start / 32;
 
-	if(c == 0) return (k[d] & ((1 << count) - 1));
+  if(c == 0) return (k[d] & ((1 << count) - 1));
 
-	int ia = (d + 1) % ndwords;
-	int ib = (d + 0) % ndwords;
+  int ia = (d + 1) % ndwords;
+  int ib = (d + 0) % ndwords;
 
-	uint32_t a = k[ia];
-	uint32_t b = k[ib];
-	
-	uint32_t t = (a << (32-c)) | (b >> c);
+  uint32_t a = k[ia];
+  uint32_t b = k[ib];
+  
+  uint32_t t = (a << (32-c)) | (b >> c);
 
-	t &= ((1 << count)-1);
+  t &= ((1 << count)-1);
 
-	return t;
+  return t;
 }
 
 //-----------------------------------------------------------------------------
 
 bool test_shift ( void )
 {
-	int nbits   = 64;
-	int nbytes  = nbits / 8;
-	int reps = 10000;
-
-	for(int j = 0; j < reps; j++)
-	{
-		if(j % (reps/10) == 0) printf(".");
-
-		uint64_t a = rand_u64();
-		uint64_t b;
-
-		for(int i = 0; i < nbits; i++)
-		{
-			b = a; lshift1  (&b,nbytes,i);  assert(b == (a << i));
-			b = a; lshift8  (&b,nbytes,i);  assert(b == (a << i));
-			b = a; lshift32 (&b,nbytes,i);  assert(b == (a << i));
-
-			b = a; rshift1  (&b,nbytes,i);  assert(b == (a >> i));
-			b = a; rshift8  (&b,nbytes,i);  assert(b == (a >> i));
-			b = a; rshift32 (&b,nbytes,i);  assert(b == (a >> i));
-
-			b = a; lrot1    (&b,nbytes,i);  assert(b == ROTL64(a,i));
-			b = a; lrot8    (&b,nbytes,i);  assert(b == ROTL64(a,i));
-			b = a; lrot32   (&b,nbytes,i);  assert(b == ROTL64(a,i));
-
-			b = a; rrot1    (&b,nbytes,i);  assert(b == ROTR64(a,i));
-			b = a; rrot8    (&b,nbytes,i);  assert(b == ROTR64(a,i));
-			b = a; rrot32   (&b,nbytes,i);  assert(b == ROTR64(a,i));
-		}
-	}
-
-	printf("PASS\n");
-	return true;
+  int nbits   = 64;
+  int nbytes  = nbits / 8;
+  int reps = 10000;
+
+  for(int j = 0; j < reps; j++)
+  {
+    if(j % (reps/10) == 0) printf(".");
+
+    uint64_t a = rand_u64();
+    uint64_t b;
+
+    for(int i = 0; i < nbits; i++)
+    {
+      b = a; lshift1  (&b,nbytes,i);  assert(b == (a << i));
+      b = a; lshift8  (&b,nbytes,i);  assert(b == (a << i));
+      b = a; lshift32 (&b,nbytes,i);  assert(b == (a << i));
+
+      b = a; rshift1  (&b,nbytes,i);  assert(b == (a >> i));
+      b = a; rshift8  (&b,nbytes,i);  assert(b == (a >> i));
+      b = a; rshift32 (&b,nbytes,i);  assert(b == (a >> i));
+
+      b = a; lrot1    (&b,nbytes,i);  assert(b == ROTL64(a,i));
+      b = a; lrot8    (&b,nbytes,i);  assert(b == ROTL64(a,i));
+      b = a; lrot32   (&b,nbytes,i);  assert(b == ROTL64(a,i));
+
+      b = a; rrot1    (&b,nbytes,i);  assert(b == ROTR64(a,i));
+      b = a; rrot8    (&b,nbytes,i);  assert(b == ROTR64(a,i));
+      b = a; rrot32   (&b,nbytes,i);  assert(b == ROTR64(a,i));
+    }
+  }
+
+  printf("PASS\n");
+  return true;
 }
 
 //-----------------------------------------------------------------------------
@@ -634,86 +634,86 @@ bool test_shift ( void )
 template < int nbits >
 bool test_window2 ( void )
 {
-	struct keytype
-	{
-		uint8_t bytes[nbits/8];
-	};
+  struct keytype
+  {
+    uint8_t bytes[nbits/8];
+  };
 
-	int nbytes = nbits / 8;
-	int reps = 10000;
+  int nbytes = nbits / 8;
+  int reps = 10000;
 
-	for(int j = 0; j < reps; j++)
-	{
-		if(j % (reps/10) == 0) printf(".");
+  for(int j = 0; j < reps; j++)
+  {
+    if(j % (reps/10) == 0) printf(".");
 
-		keytype k;
+    keytype k;
 
-		rand_p(&k,nbytes);
+    rand_p(&k,nbytes);
 
-		for(int start = 0; start < nbits; start++)
-		{
-			for(int count = 0; count < 32; count++)
-			{
-				uint32_t a = window1(&k,nbytes,start,count);
-				uint32_t b = window8(&k,nbytes,start,count);
-				uint32_t c = window(&k,nbytes,start,count);
+    for(int start = 0; start < nbits; start++)
+    {
+      for(int count = 0; count < 32; count++)
+      {
+        uint32_t a = window1(&k,nbytes,start,count);
+        uint32_t b = window8(&k,nbytes,start,count);
+        uint32_t c = window(&k,nbytes,start,count);
 
-				assert(a == b);
-				assert(a == c);
-			}
-		}
-	}
+        assert(a == b);
+        assert(a == c);
+      }
+    }
+  }
 
-	printf("PASS %d\n",nbits);
+  printf("PASS %d\n",nbits);
 
-	return true;
+  return true;
 }
 
 bool test_window ( void )
 {
-	int reps = 10000;
-
-	for(int j = 0; j < reps; j++)
-	{
-		if(j % (reps/10) == 0) printf(".");
-
-		int nbits   = 64;
-		int nbytes  = nbits / 8;
-
-		uint64_t x = rand_u64();
-
-		for(int start = 0; start < nbits; start++)
-		{
-			for(int count = 0; count < 32; count++)
-			{
-				uint32_t a = (uint32_t)ROTR64(x,start);
-				a &= ((1 << count)-1);
-				
-				uint32_t b = window1 (&x,nbytes,start,count);
-				uint32_t c = window8 (&x,nbytes,start,count);
-				uint32_t d = window32(&x,nbytes,start,count);
-				uint32_t e = window  (x,start,count);
-
-				assert(a == b);
-				assert(a == c);
-				assert(a == d);
-				assert(a == e);
-			}
-		}
-	}
-
-	printf("PASS 64\n");
-
-	test_window2<8>();
-	test_window2<16>();
-	test_window2<24>();
-	test_window2<32>();
-	test_window2<40>();
-	test_window2<48>();
-	test_window2<56>();
-	test_window2<64>();
-
-	return true;
+  int reps = 10000;
+
+  for(int j = 0; j < reps; j++)
+  {
+    if(j % (reps/10) == 0) printf(".");
+
+    int nbits   = 64;
+    int nbytes  = nbits / 8;
+
+    uint64_t x = rand_u64();
+
+    for(int start = 0; start < nbits; start++)
+    {
+      for(int count = 0; count < 32; count++)
+      {
+        uint32_t a = (uint32_t)ROTR64(x,start);
+        a &= ((1 << count)-1);
+        
+        uint32_t b = window1 (&x,nbytes,start,count);
+        uint32_t c = window8 (&x,nbytes,start,count);
+        uint32_t d = window32(&x,nbytes,start,count);
+        uint32_t e = window  (x,start,count);
+
+        assert(a == b);
+        assert(a == c);
+        assert(a == d);
+        assert(a == e);
+      }
+    }
+  }
+
+  printf("PASS 64\n");
+
+  test_window2<8>();
+  test_window2<16>();
+  test_window2<24>();
+  test_window2<32>();
+  test_window2<40>();
+  test_window2<48>();
+  test_window2<56>();
+  test_window2<64>();
+
+  return true;
 }
 
 //-----------------------------------------------------------------------------
diff --git a/Bitvec.h b/Bitvec.h
index b06fd10..ac91c36 100644
--- a/Bitvec.h
+++ b/Bitvec.h
@@ -32,7 +32,7 @@ void     invert      ( std::vector<uint32_t> & v );
 template< typename T >
 inline uint32_t getbit ( T & blob, uint32_t bit )
 {
-	return getbit(&blob,sizeof(blob),bit);
+  return getbit(&blob,sizeof(blob),bit);
 }
 
 template<> inline uint32_t getbit ( uint32_t & blob, uint32_t bit ) { return (blob >> (bit & 31)) & 1; }
@@ -43,7 +43,7 @@ template<> inline uint32_t getbit ( uint64_t & blob, uint32_t bit ) { return (bl
 template< typename T >
 inline void setbit ( T & blob, uint32_t bit )
 {
-	return setbit(&blob,sizeof(blob),bit);
+  return setbit(&blob,sizeof(blob),bit);
 }
 
 template<> inline void setbit ( uint32_t & blob, uint32_t bit ) { blob |= uint32_t(1) << (bit & 31); }
@@ -54,7 +54,7 @@ template<> inline void setbit ( uint64_t & blob, uint32_t bit ) { blob |= uint64
 template< typename T >
 inline void flipbit ( T & blob, uint32_t bit )
 {
-	flipbit(&blob,sizeof(blob),bit);
+  flipbit(&blob,sizeof(blob),bit);
 }
 
 template<> inline void flipbit ( uint32_t & blob, uint32_t bit ) { bit &= 31; blob ^= (uint32_t(1) << bit); }
@@ -74,52 +74,52 @@ void rshift32 ( void * blob, int len, int c );
 
 inline void lshift ( void * blob, int len, int c )
 {
-	if((len & 3) == 0)
-	{
-		lshift32(blob,len,c);
-	}
-	else
-	{
-		lshift8(blob,len,c);
-	}
+  if((len & 3) == 0)
+  {
+    lshift32(blob,len,c);
+  }
+  else
+  {
+    lshift8(blob,len,c);
+  }
 }
 
 inline void rshift ( void * blob, int len, int c )
 {
-	if((len & 3) == 0)
-	{
-		rshift32(blob,len,c);
-	}
-	else
-	{
-		rshift8(blob,len,c);
-	}
+  if((len & 3) == 0)
+  {
+    rshift32(blob,len,c);
+  }
+  else
+  {
+    rshift8(blob,len,c);
+  }
 }
 
 template < typename T >
 inline void lshift ( T & blob, int c )
 {
-	if((sizeof(T) & 3) == 0)
-	{
-		lshift32(&blob,sizeof(T),c);
-	}
-	else
-	{
-		lshift8(&blob,sizeof(T),c);
-	}
+  if((sizeof(T) & 3) == 0)
+  {
+    lshift32(&blob,sizeof(T),c);
+  }
+  else
+  {
+    lshift8(&blob,sizeof(T),c);
+  }
 }
 
 template < typename T >
 inline void rshift ( T & blob, int c )
 {
-	if((sizeof(T) & 3) == 0)
-	{
-		lshift32(&blob,sizeof(T),c);
-	}
-	else
-	{
-		lshift8(&blob,sizeof(T),c);
-	}
+  if((sizeof(T) & 3) == 0)
+  {
+    lshift32(&blob,sizeof(T),c);
+  }
+  else
+  {
+    lshift8(&blob,sizeof(T),c);
+  }
 }
 
 template<> inline void lshift ( uint32_t & blob, int c ) { blob <<= c; }
@@ -141,52 +141,52 @@ void rrot32   ( void * blob, int len, int c );
 
 inline void lrot ( void * blob, int len, int c )
 {
-	if((len & 3) == 0)
-	{
-		return lrot32(blob,len,c);
-	}
-	else
-	{
-		return lrot8(blob,len,c);
-	}
+  if((len & 3) == 0)
+  {
+    return lrot32(blob,len,c);
+  }
+  else
+  {
+    return lrot8(blob,len,c);
+  }
 }
 
 inline void rrot ( void * blob, int len, int c )
 {
-	if((len & 3) == 0)
-	{
-		return rrot32(blob,len,c);
-	}
-	else
-	{
-		return rrot8(blob,len,c);
-	}
+  if((len & 3) == 0)
+  {
+    return rrot32(blob,len,c);
+  }
+  else
+  {
+    return rrot8(blob,len,c);
+  }
 }
 
 template < typename T >
 inline void lrot ( T & blob, int c )
 {
-	if((sizeof(T) & 3) == 0)
-	{
-		return lrot32(&blob,sizeof(T),c);
-	}
-	else
-	{
-		return lrot8(&blob,sizeof(T),c);
-	}
+  if((sizeof(T) & 3) == 0)
+  {
+    return lrot32(&blob,sizeof(T),c);
+  }
+  else
+  {
+    return lrot8(&blob,sizeof(T),c);
+  }
 }
 
 template < typename T >
 inline void rrot ( T & blob, int c )
 {
-	if((sizeof(T) & 3) == 0)
-	{
-		return rrot32(&blob,sizeof(T),c);
-	}
-	else
-	{
-		return rrot8(&blob,sizeof(T),c);
-	}
+  if((sizeof(T) & 3) == 0)
+  {
+    return rrot32(&blob,sizeof(T),c);
+  }
+  else
+  {
+    return rrot8(&blob,sizeof(T),c);
+  }
 }
 
 template<> inline void lrot ( uint32_t & blob, int c ) { blob = ROTL32(blob,c); }
@@ -203,39 +203,39 @@ uint32_t window32 ( void * blob, int len, int start, int count );
 
 inline uint32_t window ( void * blob, int len, int start, int count )
 {
-	if(len & 3)
-	{
-		return window8(blob,len,start,count);
-	}
-	else
-	{
-		return window32(blob,len,start,count);
-	}
+  if(len & 3)
+  {
+    return window8(blob,len,start,count);
+  }
+  else
+  {
+    return window32(blob,len,start,count);
+  }
 }
 
 template < typename T >
 inline uint32_t window ( T & blob, int start, int count )
 {
-	if((sizeof(T) & 3) == 0)
-	{
-		return window32(&blob,sizeof(T),start,count);
-	}
-	else
-	{
-		return window8(&blob,sizeof(T),start,count);
-	}
+  if((sizeof(T) & 3) == 0)
+  {
+    return window32(&blob,sizeof(T),start,count);
+  }
+  else
+  {
+    return window8(&blob,sizeof(T),start,count);
+  }
 }
 
 template<> 
 inline uint32_t window ( uint32_t & blob, int start, int count )
 {
-	return ROTR32(blob,start) & ((1<<count)-1);
+  return ROTR32(blob,start) & ((1<<count)-1);
 }
 
 template<> 
 inline uint32_t window ( uint64_t & blob, int start, int count )
 {
-	return (uint32_t)ROTR64(blob,start) & ((1<<count)-1);
+  return (uint32_t)ROTR64(blob,start) & ((1<<count)-1);
 }
 
 //-----------------------------------------------------------------------------
diff --git a/DifferentialTest.h b/DifferentialTest.h
index 16d2049..0894c0a 100644
--- a/DifferentialTest.h
+++ b/DifferentialTest.h
@@ -20,75 +20,75 @@
 template < class keytype >
 bool ProcessDifferentials ( std::vector<keytype> & diffs, int reps, bool dumpCollisions )
 {
-	std::sort(diffs.begin(), diffs.end());
-
-	int count = 1;
-	int ignore = 0;
-
-	bool result = true;
-
-	if(diffs.size())
-	{
-		keytype kp = diffs[0];
-
-		for(int i = 1; i < (int)diffs.size(); i++)
-		{
-			if(diffs[i] == kp)
-			{
-				count++;
-				continue;
-			}
-			else
-			{
-				if(count > 1)
-				{
-					result = false;
-
-					double pct = 100 * (double(count) / double(reps));
-
-					if(dumpCollisions)
-					{
-						printbits((unsigned char*)&kp,sizeof(kp));
-						printf(" - %4.2f%%\n", pct );
-					}
-				}
-				else 
-				{
-					ignore++;
-				}
-
-				kp = diffs[i];
-				count = 1;
-			}
-		}
-
-		if(count > 1)
-		{
-			double pct = 100 * (double(count) / double(reps));
-
-			if(dumpCollisions)
-			{
-				printbits((unsigned char*)&kp,sizeof(kp));
-				printf(" - %4.2f%%\n", pct );
-			}
-		}
-		else 
-		{
-			ignore++;
-		}
-	}
-
-	printf("%d total collisions, of which %d single collisions were ignored",(int)diffs.size(),ignore);
-
-	if(result == false)
-	{
-		printf(" !!!!! ");
-	}
-
-	printf("\n");
-	printf("\n");
-
-	return result;
+  std::sort(diffs.begin(), diffs.end());
+
+  int count = 1;
+  int ignore = 0;
+
+  bool result = true;
+
+  if(diffs.size())
+  {
+    keytype kp = diffs[0];
+
+    for(int i = 1; i < (int)diffs.size(); i++)
+    {
+      if(diffs[i] == kp)
+      {
+        count++;
+        continue;
+      }
+      else
+      {
+        if(count > 1)
+        {
+          result = false;
+
+          double pct = 100 * (double(count) / double(reps));
+
+          if(dumpCollisions)
+          {
+            printbits((unsigned char*)&kp,sizeof(kp));
+            printf(" - %4.2f%%\n", pct );
+          }
+        }
+        else 
+        {
+          ignore++;
+        }
+
+        kp = diffs[i];
+        count = 1;
+      }
+    }
+
+    if(count > 1)
+    {
+      double pct = 100 * (double(count) / double(reps));
+
+      if(dumpCollisions)
+      {
+        printbits((unsigned char*)&kp,sizeof(kp));
+        printf(" - %4.2f%%\n", pct );
+      }
+    }
+    else 
+    {
+      ignore++;
+    }
+  }
+
+  printf("%d total collisions, of which %d single collisions were ignored",(int)diffs.size(),ignore);
+
+  if(result == false)
+  {
+    printf(" !!!!! ");
+  }
+
+  printf("\n");
+  printf("\n");
+
+  return result;
 }
 
 //-----------------------------------------------------------------------------
@@ -102,28 +102,28 @@ bool ProcessDifferentials ( std::vector<keytype> & diffs, int reps, bool dumpCol
 template < typename keytype, typename hashtype >
 void DiffTestRecurse ( pfHash hash, keytype & k1, keytype & k2, hashtype & h1, hashtype & h2, int start, int bitsleft, std::vector<keytype> & diffs )
 {
-	const int bits = sizeof(keytype)*8;
+  const int bits = sizeof(keytype)*8;
 
-	for(int i = start; i < bits; i++)
-	{
-		flipbit(&k2,sizeof(k2),i);
-		bitsleft--;
+  for(int i = start; i < bits; i++)
+  {
+    flipbit(&k2,sizeof(k2),i);
+    bitsleft--;
 
-		hash(&k2,sizeof(k2),0,&h2);
+    hash(&k2,sizeof(k2),0,&h2);
 
-		if(h1 == h2)
-		{
-			diffs.push_back(k1 ^ k2);
-		}
+    if(h1 == h2)
+    {
+      diffs.push_back(k1 ^ k2);
+    }
 
-		if(bitsleft)
-		{
-			DiffTestRecurse(hash,k1,k2,h1,h2,i+1,bitsleft,diffs);
-		}
+    if(bitsleft)
+    {
+      DiffTestRecurse(hash,k1,k2,h1,h2,i+1,bitsleft,diffs);
+    }
 
-		flipbit(&k2,sizeof(k2),i);
-		bitsleft++;
-	}
+    flipbit(&k2,sizeof(k2),i);
+    bitsleft++;
+  }
 }
 
 //----------
@@ -131,39 +131,39 @@ void DiffTestRecurse ( pfHash hash, keytype & k1, keytype & k2, hashtype & h1, h
 template < typename keytype, typename hashtype >
 bool DiffTest ( pfHash hash, int diffbits, int reps, bool dumpCollisions )
 {
-	const int keybits = sizeof(keytype) * 8;
-	const int hashbits = sizeof(hashtype) * 8;
+  const int keybits = sizeof(keytype) * 8;
+  const int hashbits = sizeof(hashtype) * 8;
 
-	double diffcount = chooseUpToK(keybits,diffbits);
-	double testcount = (diffcount * double(reps));
-	double expected  = testcount / pow(2.0,double(hashbits));
+  double diffcount = chooseUpToK(keybits,diffbits);
+  double testcount = (diffcount * double(reps));
+  double expected  = testcount / pow(2.0,double(hashbits));
 
-	std::vector<keytype> diffs;
+  std::vector<keytype> diffs;
 
-	keytype k1,k2;
-	hashtype h1,h2;
+  keytype k1,k2;
+  hashtype h1,h2;
 
-	printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",diffcount,diffbits,keybits,hashbits);
-	printf("%d reps, %0.f total tests, expecting %2.2f random collisions",reps,testcount,expected);
+  printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",diffcount,diffbits,keybits,hashbits);
+  printf("%d reps, %0.f total tests, expecting %2.2f random collisions",reps,testcount,expected);
 
-	for(int i = 0; i < reps; i++)
-	{
-		if(i % (reps/10) == 0) printf(".");
+  for(int i = 0; i < reps; i++)
+  {
+    if(i % (reps/10) == 0) printf(".");
 
-		rand_p(&k1,sizeof(k1));
-		k2 = k1;
+    rand_p(&k1,sizeof(k1));
+    k2 = k1;
 
-		hash(&k1,sizeof(k1),0,(uint32_t*)&h1);
+    hash(&k1,sizeof(k1),0,(uint32_t*)&h1);
 
-		DiffTestRecurse<keytype,hashtype>(hash,k1,k2,h1,h2,0,diffbits,diffs);
-	}
-	printf("\n");
+    DiffTestRecurse<keytype,hashtype>(hash,k1,k2,h1,h2,0,diffbits,diffs);
+  }
+  printf("\n");
 
-	bool result = true;
+  bool result = true;
 
-	result &= ProcessDifferentials(diffs,reps,dumpCollisions);
+  result &= ProcessDifferentials(diffs,reps,dumpCollisions);
 
-	return result;
+  return result;
 }
 
 //-----------------------------------------------------------------------------
@@ -185,53 +185,53 @@ bool DiffTest ( pfHash hash, int diffbits, int reps, bool dumpCollisions )
 template < typename keytype, typename hashtype >
 void DiffDistTest ( pfHash hash, const int diffbits, int trials, double & worst, double & avg )
 {
-	std::vector<keytype>  keys(trials);
-	std::vector<hashtype> A(trials),B(trials);
+  std::vector<keytype>  keys(trials);
+  std::vector<hashtype> A(trials),B(trials);
 
-	for(int i = 0; i < trials; i++)
-	{
-		rand_p(&keys[i],sizeof(keytype));
+  for(int i = 0; i < trials; i++)
+  {
+    rand_p(&keys[i],sizeof(keytype));
 
-		hash(&keys[i],sizeof(keytype),0,(uint32_t*)&A[i]);
-	}
+    hash(&keys[i],sizeof(keytype),0,(uint32_t*)&A[i]);
+  }
 
-	//----------
+  //----------
 
-	std::vector<keytype> diffs;
+  std::vector<keytype> diffs;
 
-	keytype temp(0);
+  keytype temp(0);
 
-	SparseKeygenRecurse<keytype>(0,diffbits,true,temp,diffs);
+  SparseKeygenRecurse<keytype>(0,diffbits,true,temp,diffs);
 
-	//----------
+  //----------
 
-	worst = 0;
-	avg = 0;
+  worst = 0;
+  avg = 0;
 
-	hashtype h2;
+  hashtype h2;
 
-	for(size_t j = 0; j < diffs.size(); j++)
-	{
-		keytype & d = diffs[j];
+  for(size_t j = 0; j < diffs.size(); j++)
+  {
+    keytype & d = diffs[j];
 
-		for(int i = 0; i < trials; i++)
-		{
-			keytype k2 = keys[i] ^ d;
+    for(int i = 0; i < trials; i++)
+    {
+      keytype k2 = keys[i] ^ d;
 
-			hash(&k2,sizeof(k2),0,&h2);
+      hash(&k2,sizeof(k2),0,&h2);
 
-			B[i] = A[i] ^ h2;
-		}
+      B[i] = A[i] ^ h2;
+    }
 
-		double dworst,davg;
+    double dworst,davg;
 
-		TestDistributionFast(B,dworst,davg);
+    TestDistributionFast(B,dworst,davg);
 
-		avg += davg;
-		worst = (dworst > worst) ? dworst : worst;
-	}
+    avg += davg;
+    worst = (dworst > worst) ? dworst : worst;
+  }
 
-	avg /= double(diffs.size());
+  avg /= double(diffs.size());
 }
 
 //----------------------------------------------------------------------------
diff --git a/Hashes.cpp b/Hashes.cpp
index 943847c..890aeb0 100644
--- a/Hashes.cpp
+++ b/Hashes.cpp
@@ -12,72 +12,52 @@
 //----------------------------------------------------------------------------
 // fake / bad hashes
 
-void randhash_32 ( const void *, int, uint32_t, void * out )
-{
-	((uint32_t*)out)[0] = rand_u32();
-}
-
-void randhash_64 ( const void *, int, uint32_t, void * out )
-{
-	((uint32_t*)out)[0] = rand_u32();
-	((uint32_t*)out)[1] = rand_u32();
-}
-
-void randhash_128 ( const void *, int, uint32_t, void * out )
-{
-	((uint32_t*)out)[0] = rand_u32();
-	((uint32_t*)out)[1] = rand_u32();
-	((uint32_t*)out)[2] = rand_u32();
-	((uint32_t*)out)[3] = rand_u32();
-}
-
 void BadHash ( const void * key, int len, uint32_t seed, void * out )
 {
-	uint32_t h = seed;
+  uint32_t h = seed;
 
-	const uint8_t * data = (const uint8_t*)key;
+  const uint8_t * data = (const uint8_t*)key;
 
-	for(int i = 0; i < len; i++)
-	{
-		h ^= h >> 3;
-		h ^= h << 5;
-		h ^= data[i];
-	}
+  for(int i = 0; i < len; i++)
+  {
+    h ^= h >> 3;
+    h ^= h << 5;
+    h ^= data[i];
+  }
 
-	*(uint32_t*)out = h;
+  *(uint32_t*)out = h;
 }
 
 void sumhash ( const void * key, int len, uint32_t seed, void * out )
 {
-	uint32_t h = seed;
+  uint32_t h = seed;
 
-	const uint8_t * data = (const uint8_t*)key;
+  const uint8_t * data = (const uint8_t*)key;
 
-	for(int i = 0; i < len; i++)
-	{
-		h += data[i];
-	}
+  for(int i = 0; i < len; i++)
+  {
+    h += data[i];
+  }
 
-	*(uint32_t*)out = h;
+  *(uint32_t*)out = h;
 }
 
 void sumhash32 ( const void * key, int len, uint32_t seed, void * out )
 {
-	uint32_t h = seed;
+  uint32_t h = seed;
 
-	const uint32_t * data = (const uint32_t*)key;
+  const uint32_t * data = (const uint32_t*)key;
 
-	for(int i = 0; i < len/4; i++)
-	{
-		h += data[i];
-	}
+  for(int i = 0; i < len/4; i++)
+  {
+    h += data[i];
+  }
 
-	*(uint32_t*)out = h;
+  *(uint32_t*)out = h;
 }
 
 void DoNothingHash ( const void *, int, uint32_t, void * )
 {
-	return;
 }
 
 //-----------------------------------------------------------------------------
@@ -85,50 +65,50 @@ void DoNothingHash ( const void *, int, uint32_t, void * )
 
 void MurmurOAAT ( const void * key, int len, uint32_t seed, void * out )
 {
-	const uint8_t * data = (const uint8_t*)key;
+  const uint8_t * data = (const uint8_t*)key;
 
-	uint32_t h = seed ^ len;
+  uint32_t h = seed ^ len;
 
-	for(int i = 0; i < len; i++)
-	{
-		h ^= data[i];
-		h *= 0x5bd1e995;
-		h ^= h >> 15;
-	}
+  for(int i = 0; i < len; i++)
+  {
+    h ^= data[i];
+    h *= 0x5bd1e995;
+    h ^= h >> 15;
+  }
 
-	h *= 0x5bd1e995;
-	h ^= h >> 15;
+  h *= 0x5bd1e995;
+  h ^= h >> 15;
 
-	*(uint32_t*)out = h;
+  *(uint32_t*)out = h;
 }
 
 //----------------------------------------------------------------------------
 
 void FNV ( const void * key, int len, uint32_t seed, void * out )
 {
-	unsigned int h = seed;
+  unsigned int h = seed;
 
-	const uint8_t * data = (const uint8_t*)key;
+  const uint8_t * data = (const uint8_t*)key;
 
-	h ^= BIG_CONSTANT(2166136261);
+  h ^= BIG_CONSTANT(2166136261);
 
-	for(int i = 0; i < len; i++)
-	{
-		h ^= data[i];
-		h *= 16777619;
-	}
+  for(int i = 0; i < len; i++)
+  {
+    h ^= data[i];
+    h *= 16777619;
+  }
 
-	*(uint32_t*)out = h;
+  *(uint32_t*)out = h;
 }
 
 //-----------------------------------------------------------------------------
 
 uint32_t x17 ( const void * key, int len, uint32_t h ) 
 {
-	const uint8_t * data = (const uint8_t*)key;
+  const uint8_t * data = (const uint8_t*)key;
     
-	for(int i = 0; i < len; ++i) 
-	{
+  for(int i = 0; i < len; ++i) 
+  {
         h = 17 * h + (data[i] - ' ');
     }
 
@@ -139,14 +119,14 @@ uint32_t x17 ( const void * key, int len, uint32_t h )
 
 uint32_t Bernstein ( const void * key, int len, uint32_t h ) 
 {
-	const uint8_t * data = (const uint8_t*)key;
+  const uint8_t * data = (const uint8_t*)key;
     
-	for(int i = 0; i < len; ++i) 
-	{
+  for(int i = 0; i < len; ++i) 
+  {
         h = 33 * h + data[i];
     }
 
-	return h;
+  return h;
 }
 
 //-----------------------------------------------------------------------------
diff --git a/Hashes.h b/Hashes.h
index a580113..8f39f76 100644
--- a/Hashes.h
+++ b/Hashes.h
@@ -44,25 +44,25 @@ void MurmurHash2A_test     ( const void * key, int len, uint32_t seed, void * ou
 
 inline void MurmurHash1_test ( const void * key, int len, uint32_t seed, void * out )
 {
-	*(uint32_t*)out = MurmurHash1(key,len,seed);
+  *(uint32_t*)out = MurmurHash1(key,len,seed);
 }
 
 inline void MurmurHash2_test ( const void * key, int len, uint32_t seed, void * out )
 {
-	*(uint32_t*)out = MurmurHash2(key,len,seed);
+  *(uint32_t*)out = MurmurHash2(key,len,seed);
 }
 
 inline void MurmurHash2A_test ( const void * key, int len, uint32_t seed, void * out )
 {
-	*(uint32_t*)out = MurmurHash2A(key,len,seed);
+  *(uint32_t*)out = MurmurHash2A(key,len,seed);
 }
 
 inline void MurmurHash64A_test ( const void * key, int len, uint32_t seed, void * out )
 {
-	*(uint64_t*)out = MurmurHash64A(key,len,seed);
+  *(uint64_t*)out = MurmurHash64A(key,len,seed);
 }
 
 inline void MurmurHash64B_test ( const void * key, int len, uint32_t seed, void * out )
 {
-	*(uint64_t*)out = MurmurHash64B(key,len,seed);
+  *(uint64_t*)out = MurmurHash64B(key,len,seed);
 }
\ No newline at end of file
diff --git a/KeysetTest.cpp b/KeysetTest.cpp
index 6436826..a59fda4 100644
--- a/KeysetTest.cpp
+++ b/KeysetTest.cpp
@@ -3,29 +3,50 @@
 #include "Random.h"
 
 //-----------------------------------------------------------------------------
+// This should hopefully be a thorough and uambiguous test of whether a hash
+// is correctly implemented on a given platform
 
-void QuickBrownFox ( pfHash hash, const int hashbits )
+bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose )
 {
-	const int hashbytes = hashbits / 8;
+  const int hashbytes = hashbits / 8;
 
-	const char * text1 = "The quick brown fox jumps over the lazy dog";
-	const char * text2 = "The quick brown fox jumps over the lazy cog";
+  uint8_t * key    = new uint8_t[256];
+  uint8_t * hashes = new uint8_t[hashbytes * 256];
+  uint8_t * final  = new uint8_t[hashbytes];
 
-	uint8_t h1[128];
-	uint8_t h2[128];
+  memset(key,0,256);
+  memset(hashes,0,hashbytes*256);
+  memset(final,0,hashbytes);
 
-	hash(text1,(int)strlen(text1),0,h1);
-	hash(text2,(int)strlen(text2),0,h2);
+  for(int i = 0; i < 256; i++)
+  {
+    key[i] = (uint8_t)i;
+    
+    hash(key,i,0,&hashes[i*hashbytes]);
+  }
 
-	printf("\"%s\" => ",text1);
-	printhex32(h1,hashbytes);
-	printf("\n");
+  //----------
 
-	printf("\"%s\" => ",text2);
-	printhex32(h2,hashbytes);
-	printf("\n");
+  hash(hashes,hashbytes*256,0,final);
 
-	printf("\n");
+  uint32_t verification = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) | (final[3] << 24);
+
+  delete [] key;
+  delete [] hashes;
+  delete [] final;
+
+  //----------
+
+  if(expected != verification)
+  {
+    if(verbose) printf("Verification value 0x%08X : Failed! (Expected 0x%08x)\n",verification,expected);
+    return false;
+  }
+  else
+  {
+    if(verbose) printf("Verification value 0x%08X : Passed!\n",verification);
+    return true;
+  }
 }
 
 //----------------------------------------------------------------------------
@@ -41,81 +62,82 @@ void QuickBrownFox ( pfHash hash, const int hashbits )
 // The memory alignment of the key should not affect the hash result.
 
 bool SanityTest ( pfHash hash, const int hashbits )
-{	printf("Testing bit twiddling");
-
-	bool result = true;
-
-	const int hashbytes = hashbits/8;
-	const int reps = 10;
-	const int keymax = 128;
-	const int pad = 16;
-	const int buflen = keymax + pad*3;
-	
-	uint8_t * buffer1 = new uint8_t[buflen];
-	uint8_t * buffer2 = new uint8_t[buflen];
-
-	uint8_t * hash1 = new uint8_t[hashbytes];
-	uint8_t * hash2 = new uint8_t[hashbytes];
-
-	//----------
-	
-	for(int irep = 0; irep < reps; irep++)
-	{
-		if(irep % (reps/10) == 0) printf(".");
-
-		for(int len = 4; len <= keymax; len++)
-		{
-			for(int offset = pad; offset < pad*2; offset++)
-			{
-				uint8_t * key1 = &buffer1[pad];
-				uint8_t * key2 = &buffer2[pad+offset];
-
-				rand_p(buffer1,buflen);
-				rand_p(buffer2,buflen);
-
-				memcpy(key2,key1,len);
-
-				hash(key1,len,0,hash1);
-
-				for(int bit = 0; bit < (len * 8); bit++)
-				{
-					// Flip a bit, hash the key -> we should get a different result.
-
-					flipbit(key2,len,bit);
-					hash(key2,len,0,hash2);
-
-					if(memcmp(hash1,hash2,hashbytes) == 0)
-					{
-						result = false;
-					}
-
-					// Flip it back, hash again -> we should get the original result.
-
-					flipbit(key2,len,bit);
-					hash(key2,len,0,hash2);
-
-					if(memcmp(hash1,hash2,hashbytes) != 0)
-					{
-						result = false;
-					}
-				}
-			}
-		}
-	}
-
-	if(result == false)
-	{
-		printf("*********FAIL*********\n");
-	}
-	else
-	{
-		printf("PASS\n");
-	}
-
-	delete [] hash1;
-	delete [] hash2;
-
-	return result;
+{
+  printf("Running sanity check 1");
+
+  bool result = true;
+
+  const int hashbytes = hashbits/8;
+  const int reps = 10;
+  const int keymax = 128;
+  const int pad = 16;
+  const int buflen = keymax + pad*3;
+  
+  uint8_t * buffer1 = new uint8_t[buflen];
+  uint8_t * buffer2 = new uint8_t[buflen];
+
+  uint8_t * hash1 = new uint8_t[hashbytes];
+  uint8_t * hash2 = new uint8_t[hashbytes];
+
+  //----------
+  
+  for(int irep = 0; irep < reps; irep++)
+  {
+    if(irep % (reps/10) == 0) printf(".");
+
+    for(int len = 4; len <= keymax; len++)
+    {
+      for(int offset = pad; offset < pad*2; offset++)
+      {
+        uint8_t * key1 = &buffer1[pad];
+        uint8_t * key2 = &buffer2[pad+offset];
+
+        rand_p(buffer1,buflen);
+        rand_p(buffer2,buflen);
+
+        memcpy(key2,key1,len);
+
+        hash(key1,len,0,hash1);
+
+        for(int bit = 0; bit < (len * 8); bit++)
+        {
+          // Flip a bit, hash the key -> we should get a different result.
+
+          flipbit(key2,len,bit);
+          hash(key2,len,0,hash2);
+
+          if(memcmp(hash1,hash2,hashbytes) == 0)
+          {
+            result = false;
+          }
+
+          // Flip it back, hash again -> we should get the original result.
+
+          flipbit(key2,len,bit);
+          hash(key2,len,0,hash2);
+
+          if(memcmp(hash1,hash2,hashbytes) != 0)
+          {
+            result = false;
+          }
+        }
+      }
+    }
+  }
+
+  if(result == false)
+  {
+    printf("*********FAIL*********\n");
+  }
+  else
+  {
+    printf("PASS\n");
+  }
+
+  delete [] hash1;
+  delete [] hash2;
+
+  return result;
 }
 
 //----------------------------------------------------------------------------
@@ -124,41 +146,41 @@ bool SanityTest ( pfHash hash, const int hashbits )
 
 void AppendedZeroesTest ( pfHash hash, const int hashbits )
 {
-	const int hashbytes = hashbits/8;
+  printf("Running sanity check 2");
 
-	printf("Testing zero-appending");
+  const int hashbytes = hashbits/8;
 
-	for(int rep = 0; rep < 100; rep++)
-	{
-		if(rep % 10 == 0) printf(".");
+  for(int rep = 0; rep < 100; rep++)
+  {
+    if(rep % 10 == 0) printf(".");
 
-		unsigned char key[256];
+    unsigned char key[256];
 
-		memset(key,0,sizeof(key));
+    memset(key,0,sizeof(key));
 
-		rand_p(key,32);
+    rand_p(key,32);
 
-		uint32_t h1[16];
-		uint32_t h2[16];
+    uint32_t h1[16];
+    uint32_t h2[16];
 
-		memset(h1,0,hashbytes);
-		memset(h2,0,hashbytes);
+    memset(h1,0,hashbytes);
+    memset(h2,0,hashbytes);
 
-		for(int i = 0; i < 32; i++)
-		{
-			hash(key,32+i,0,h1);
+    for(int i = 0; i < 32; i++)
+    {
+      hash(key,32+i,0,h1);
 
-			if(memcmp(h1,h2,hashbytes) == 0)
-			{
-				printf("\n*********FAIL*********\n");
-				return;
-			}
+      if(memcmp(h1,h2,hashbytes) == 0)
+      {
+        printf("\n*********FAIL*********\n");
+        return;
+      }
 
-			memcpy(h2,h1,hashbytes);
-		}
-	}
+      memcpy(h2,h1,hashbytes);
+    }
+  }
 
-	printf("PASS\n");
+  printf("PASS\n");
 }
 
 //-----------------------------------------------------------------------------
diff --git a/KeysetTest.h b/KeysetTest.h
index 17ded7b..c2a5c33 100644
--- a/KeysetTest.h
+++ b/KeysetTest.h
@@ -16,9 +16,8 @@
 //-----------------------------------------------------------------------------
 // Sanity tests
 
+bool VerificationTest   ( pfHash hash, const int hashbits, uint32_t expected, bool verbose );
 bool SanityTest         ( pfHash hash, const int hashbits );
-void QuickBrownFox      ( pfHash hash, const int hashbits );
-void AlignmentTest      ( pfHash hash, const int hashbits );
 void AppendedZeroesTest ( pfHash hash, const int hashbits );
 
 //-----------------------------------------------------------------------------
@@ -26,55 +25,55 @@ void AppendedZeroesTest ( pfHash hash, const int hashbits );
 
 template< typename hashtype >
 void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen, 
-							    uint32_t * blocks, int blockcount, 
-								pfHash hash, std::vector<hashtype> & hashes )
+                  uint32_t * blocks, int blockcount, 
+                pfHash hash, std::vector<hashtype> & hashes )
 {
-	if(len == maxlen) return;
-
-	for(int i = 0; i < blockcount; i++)
-	{
-		key[len] = blocks[i];
-	
-		//if(len == maxlen-1)
-		{
-			hashtype h;
-			hash(key,(len+1) * sizeof(uint32_t),0,&h);
-			hashes.push_back(h);
-		}
-
-		//else
-		{
-			CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes);
-		}
-	}
+  if(len == maxlen) return;
+
+  for(int i = 0; i < blockcount; i++)
+  {
+    key[len] = blocks[i];
+  
+    //if(len == maxlen-1)
+    {
+      hashtype h;
+      hash(key,(len+1) * sizeof(uint32_t),0,&h);
+      hashes.push_back(h);
+    }
+
+    //else
+    {
+      CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes);
+    }
+  }
 }
 
 template< typename hashtype >
 bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
 {
-	printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount);
+  printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount);
 
-	//----------
+  //----------
 
-	std::vector<hashtype> hashes;
+  std::vector<hashtype> hashes;
 
-	uint32_t * key = new uint32_t[maxlen];
+  uint32_t * key = new uint32_t[maxlen];
 
-	CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes);
+  CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes);
 
-	delete [] key;
+  delete [] key;
 
-	printf("%d keys\n",(int)hashes.size());
+  printf("%d keys\n",(int)hashes.size());
 
-	//----------
+  //----------
 
-	bool result = true;
+  bool result = true;
 
-	result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
-	
-	printf("\n");
+  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
+  
+  printf("\n");
 
-	return result;
+  return result;
 }
 
 //----------------------------------------------------------------------------
@@ -84,49 +83,49 @@ bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks
 template< typename hashtype >
 void PermutationKeygenRecurse ( pfHash hash, uint32_t * blocks, int blockcount, int k, std::vector<hashtype> & hashes )
 {
-	if(k == blockcount-1)
-	{
-		hashtype h;
+  if(k == blockcount-1)
+  {
+    hashtype h;
 
-		hash(blocks,blockcount * sizeof(uint32_t),0,&h);
+    hash(blocks,blockcount * sizeof(uint32_t),0,&h);
 
-		hashes.push_back(h);
+    hashes.push_back(h);
 
-		return;
-	}
+    return;
+  }
 
-	for(int i = k; i < blockcount; i++)
-	{
-		std::swap(blocks[k],blocks[i]);
+  for(int i = k; i < blockcount; i++)
+  {
+    std::swap(blocks[k],blocks[i]);
 
-		PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes);
+    PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes);
 
-		std::swap(blocks[k],blocks[i]);
-	}
+    std::swap(blocks[k],blocks[i]);
+  }
 }
 
 template< typename hashtype >
 bool PermutationKeyTest ( hashfunc<hashtype> hash, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
 {
-	printf("Keyset 'Permutation' - %d blocks - ",blockcount);
+  printf("Keyset 'Permutation' - %d blocks - ",blockcount);
 
-	//----------
+  //----------
 
-	std::vector<hashtype> hashes;
+  std::vector<hashtype> hashes;
 
-	PermutationKeygenRecurse<hashtype>(hash,blocks,blockcount,0,hashes);
+  PermutationKeygenRecurse<hashtype>(hash,blocks,blockcount,0,hashes);
 
-	printf("%d keys\n",(int)hashes.size());
+  printf("%d keys\n",(int)hashes.size());
 
-	//----------
+  //----------
 
-	bool result = true;
+  bool result = true;
 
-	result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
-	
-	printf("\n");
+  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
+  
+  printf("\n");
 
-	return result;
+  return result;
 }
 
 //-----------------------------------------------------------------------------
@@ -135,28 +134,28 @@ bool PermutationKeyTest ( hashfunc<hashtype> hash, uint32_t * blocks, int blockc
 template < typename keytype, typename hashtype >
 void SparseKeygenRecurse ( pfHash hash, int start, int bitsleft, bool inclusive, keytype & k, std::vector<hashtype> & hashes )
 {
-	const int nbytes = sizeof(keytype);
-	const int nbits = nbytes * 8;
+  const int nbytes = sizeof(keytype);
+  const int nbits = nbytes * 8;
 
-	hashtype h;
+  hashtype h;
 
-	for(int i = start; i < nbits; i++)
-	{
-		flipbit(&k,nbytes,i);
+  for(int i = start; i < nbits; i++)
+  {
+    flipbit(&k,nbytes,i);
 
-		if(inclusive || (bitsleft == 1))
-		{
-			hash(&k,sizeof(keytype),0,&h);
-			hashes.push_back(h);
-		}
+    if(inclusive || (bitsleft == 1))
+    {
+      hash(&k,sizeof(keytype),0,&h);
+      hashes.push_back(h);
+    }
 
-		if(bitsleft > 1)
-		{
-			SparseKeygenRecurse(hash,i+1,bitsleft-1,inclusive,k,hashes);
-		}
+    if(bitsleft > 1)
+    {
+      SparseKeygenRecurse(hash,i+1,bitsleft-1,inclusive,k,hashes);
+    }
 
-		flipbit(&k,nbytes,i);
-	}
+    flipbit(&k,nbytes,i);
+  }
 }
 
 //----------
@@ -164,35 +163,35 @@ void SparseKeygenRecurse ( pfHash hash, int start, int bitsleft, bool inclusive,
 template < int keybits, typename hashtype >
 bool SparseKeyTest ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram  )
 {
-	printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits);
+  printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits);
 
-	typedef Blob<keybits> keytype;
+  typedef Blob<keybits> keytype;
 
-	std::vector<hashtype> hashes;
+  std::vector<hashtype> hashes;
 
-	keytype k;
-	memset(&k,0,sizeof(k));
+  keytype k;
+  memset(&k,0,sizeof(k));
 
-	if(inclusive)
-	{
-		hashtype h;
+  if(inclusive)
+  {
+    hashtype h;
 
-		hash(&k,sizeof(keytype),0,&h);
+    hash(&k,sizeof(keytype),0,&h);
 
-		hashes.push_back(h);
-	}
+    hashes.push_back(h);
+  }
 
-	SparseKeygenRecurse(hash,0,setbits,inclusive,k,hashes);
+  SparseKeygenRecurse(hash,0,setbits,inclusive,k,hashes);
 
-	printf("%d keys\n",(int)hashes.size());
+  printf("%d keys\n",(int)hashes.size());
 
-	bool result = true;
-	
-	result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
+  bool result = true;
+  
+  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
 
-	printf("\n");
+  printf("\n");
 
-	return result;
+  return result;
 }
 
 //-----------------------------------------------------------------------------
@@ -202,40 +201,40 @@ bool SparseKeyTest ( hashfunc<hashtype> hash, const int setbits, bool inclusive,
 template < typename keytype, typename hashtype >
 bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testCollision, bool testDistribution, bool drawDiagram )
 {
-	const int keybits = sizeof(keytype) * 8;
-	const int keycount = 1 << windowbits;
+  const int keybits = sizeof(keytype) * 8;
+  const int keycount = 1 << windowbits;
 
-	std::vector<hashtype> hashes;
-	hashes.resize(keycount);
+  std::vector<hashtype> hashes;
+  hashes.resize(keycount);
 
-	bool result = true;
+  bool result = true;
 
-	int testcount = (keybits-windowbits);
+  int testcount = (keybits-windowbits);
 
-	printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount);
+  printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount);
 
-	for(int j = 0; j <= testcount; j++)
-	{
-		int minbit = j;
+  for(int j = 0; j <= testcount; j++)
+  {
+    int minbit = j;
 
-		keytype key;
+    keytype key;
 
-		for(int i = 0; i < keycount; i++)
-		{
-			key = i;
-			key = key << minbit;
+    for(int i = 0; i < keycount; i++)
+    {
+      key = i;
+      key = key << minbit;
 
-			hash(&key,sizeof(keytype),0,&hashes[i]);
-		}
+      hash(&key,sizeof(keytype),0,&hashes[i]);
+    }
 
-		printf("Window at %3d - ",j);
+    printf("Window at %3d - ",j);
 
-		result &= TestHashList(hashes,testCollision,testDistribution,drawDiagram);
+    result &= TestHashList(hashes,testCollision,testDistribution,drawDiagram);
 
-		//printf("\n");
-	}
+    //printf("\n");
+  }
 
-	return result;
+  return result;
 }
 
 //-----------------------------------------------------------------------------
@@ -247,43 +246,43 @@ bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testC
 template < typename hashtype >
 bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycount, bool drawDiagram )
 {
-	printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount);
+  printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount);
 
-	std::vector<hashtype> hashes;
-	hashes.resize(keycount);
+  std::vector<hashtype> hashes;
+  hashes.resize(keycount);
 
-	int keyLen = cycleLen * cycleReps;
+  int keyLen = cycleLen * cycleReps;
 
-	uint8_t * cycle = new uint8_t[cycleLen + 16];
-	uint8_t * key = new uint8_t[keyLen];
+  uint8_t * cycle = new uint8_t[cycleLen + 16];
+  uint8_t * key = new uint8_t[keyLen];
 
-	//----------
+  //----------
 
-	for(int i = 0; i < keycount; i++)
-	{
-		rand_p(cycle,cycleLen);
+  for(int i = 0; i < keycount; i++)
+  {
+    rand_p(cycle,cycleLen);
 
-		*(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);
+    *(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);
 
-		for(int j = 0; j < keyLen; j++)
-		{
-			key[j] = cycle[j % cycleLen];
-		}
+    for(int j = 0; j < keyLen; j++)
+    {
+      key[j] = cycle[j % cycleLen];
+    }
 
-		hash(key,keyLen,0,&hashes[i]);
-	}
+    hash(key,keyLen,0,&hashes[i]);
+  }
 
-	//----------
-	
-	bool result = true;
+  //----------
+  
+  bool result = true;
 
-	result &= TestHashList(hashes,true,true,drawDiagram);
-	printf("\n");
+  result &= TestHashList(hashes,true,true,drawDiagram);
+  printf("\n");
 
-	delete [] cycle;
-	delete [] key;
+  delete [] cycle;
+  delete [] key;
 
-	return result;
+  return result;
 }
 
 //-----------------------------------------------------------------------------
@@ -294,52 +293,52 @@ bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycoun
 template < typename hashtype >
 bool TextKeyTest ( hashfunc<hashtype> hash, const char * prefix, const char * coreset, const int corelen, const char * suffix, bool drawDiagram )
 {
-	const int prefixlen = (int)strlen(prefix);
-	const int suffixlen = (int)strlen(suffix);
-	const int corecount = (int)strlen(coreset);
+  const int prefixlen = (int)strlen(prefix);
+  const int suffixlen = (int)strlen(suffix);
+  const int corecount = (int)strlen(coreset);
 
-	const int keybytes = prefixlen + corelen + suffixlen;
-	const int keycount = (int)pow(double(corecount),double(corelen));
+  const int keybytes = prefixlen + corelen + suffixlen;
+  const int keycount = (int)pow(double(corecount),double(corelen));
 
-	printf("Keyset 'Text' - keys of form \"%s[",prefix);
-	for(int i = 0; i < corelen; i++) printf("X");		
-	printf("]%s\" - %d keys\n",suffix,keycount);
+  printf("Keyset 'Text' - keys of form \"%s[",prefix);
+  for(int i = 0; i < corelen; i++) printf("X");		
+  printf("]%s\" - %d keys\n",suffix,keycount);
 
-	uint8_t * key = new uint8_t[keybytes+1];
+  uint8_t * key = new uint8_t[keybytes+1];
 
-	key[keybytes] = 0;
+  key[keybytes] = 0;
 
-	memcpy(key,prefix,prefixlen);
-	memcpy(key+prefixlen+corelen,suffix,suffixlen);
+  memcpy(key,prefix,prefixlen);
+  memcpy(key+prefixlen+corelen,suffix,suffixlen);
 
-	//----------
+  //----------
 
-	std::vector<hashtype> hashes;
-	hashes.resize(keycount);
+  std::vector<hashtype> hashes;
+  hashes.resize(keycount);
 
-	for(int i = 0; i < keycount; i++)
-	{
-		int t = i;
+  for(int i = 0; i < keycount; i++)
+  {
+    int t = i;
 
-		for(int j = 0; j < corelen; j++)
-		{
-			key[prefixlen+j] = coreset[t % corecount]; t /= corecount;
-		}
+    for(int j = 0; j < corelen; j++)
+    {
+      key[prefixlen+j] = coreset[t % corecount]; t /= corecount;
+    }
 
-		hash(key,keybytes,0,&hashes[i]);
-	}
+    hash(key,keybytes,0,&hashes[i]);
+  }
 
-	//----------
+  //----------
 
-	bool result = true;
+  bool result = true;
 
-	result &= TestHashList(hashes,true,true,drawDiagram);
+  result &= TestHashList(hashes,true,true,drawDiagram);
 
-	printf("\n");
+  printf("\n");
 
-	delete [] key;
+  delete [] key;
 
-	return result;
+  return result;
 }
 
 //-----------------------------------------------------------------------------
@@ -350,33 +349,33 @@ bool TextKeyTest ( hashfunc<hashtype> hash, const char * prefix, const char * co
 template < typename hashtype >
 bool ZeroKeyTest ( pfHash hash, bool drawDiagram )
 {
-	int keycount = 64*1024;
+  int keycount = 64*1024;
 
-	printf("Keyset 'Zeroes' - %d keys\n",keycount);
+  printf("Keyset 'Zeroes' - %d keys\n",keycount);
 
-	unsigned char * nullblock = new unsigned char[keycount];
-	memset(nullblock,0,keycount);
+  unsigned char * nullblock = new unsigned char[keycount];
+  memset(nullblock,0,keycount);
 
-	//----------
+  //----------
 
-	std::vector<hashtype> hashes;
+  std::vector<hashtype> hashes;
 
-	hashes.resize(keycount);
+  hashes.resize(keycount);
 
-	for(int i = 0; i < keycount; i++)
-	{
-		hash(nullblock,i,0,&hashes[i]);
-	}
+  for(int i = 0; i < keycount; i++)
+  {
+    hash(nullblock,i,0,&hashes[i]);
+  }
 
-	bool result = true;
+  bool result = true;
 
-	result &= TestHashList(hashes,true,true,drawDiagram);
+  result &= TestHashList(hashes,true,true,drawDiagram);
 
-	printf("\n");
+  printf("\n");
 
-	delete [] nullblock;
+  delete [] nullblock;
 
-	return result;
+  return result;
 }
 
 //-----------------------------------------------------------------------------
@@ -385,29 +384,29 @@ bool ZeroKeyTest ( pfHash hash, bool drawDiagram )
 template < typename hashtype >
 bool SeedTest ( pfHash hash, int keycount, bool drawDiagram )
 {
-	printf("Keyset 'Seed' - %d keys\n",keycount);
+  printf("Keyset 'Seed' - %d keys\n",keycount);
 
-	const char * text = "The quick brown fox jumps over the lazy dog";
-	const int len = (int)strlen(text);
+  const char * text = "The quick brown fox jumps over the lazy dog";
+  const int len = (int)strlen(text);
 
-	//----------
+  //----------
 
-	std::vector<hashtype> hashes;
+  std::vector<hashtype> hashes;
 
-	hashes.resize(keycount);
+  hashes.resize(keycount);
 
-	for(int i = 0; i < keycount; i++)
-	{
-		hash(text,len,i,&hashes[i]);
-	}
+  for(int i = 0; i < keycount; i++)
+  {
+    hash(text,len,i,&hashes[i]);
+  }
 
-	bool result = true;
+  bool result = true;
 
-	result &= TestHashList(hashes,true,true,drawDiagram);
+  result &= TestHashList(hashes,true,true,drawDiagram);
 
-	printf("\n");
+  printf("\n");
 
-	return result;
+  return result;
 }
 
 //-----------------------------------------------------------------------------
diff --git a/MurmurHash1.cpp b/MurmurHash1.cpp
index 6499a3d..ed23e6f 100644
--- a/MurmurHash1.cpp
+++ b/MurmurHash1.cpp
@@ -16,50 +16,50 @@
 
 uint32_t MurmurHash1 ( const void * key, int len, uint32_t seed )
 {
-	const unsigned int m = 0xc6a4a793;
-
-	const int r = 16;
-
-	unsigned int h = seed ^ (len * m);
-
-	//----------
-	
-	const unsigned char * data = (const unsigned char *)key;
-
-	while(len >= 4)
-	{
-		unsigned int k = *(unsigned int *)data;
-
-		h += k;
-		h *= m;
-		h ^= h >> 16;
-
-		data += 4;
-		len -= 4;
-	}
-	
-	//----------
-	
-	switch(len)
-	{
-	case 3:
-		h += data[2] << 16;
-	case 2:
-		h += data[1] << 8;
-	case 1:
-		h += data[0];
-		h *= m;
-		h ^= h >> r;
-	};
+  const unsigned int m = 0xc6a4a793;
+
+  const int r = 16;
+
+  unsigned int h = seed ^ (len * m);
+
+  //----------
+  
+  const unsigned char * data = (const unsigned char *)key;
+
+  while(len >= 4)
+  {
+    unsigned int k = *(unsigned int *)data;
+
+    h += k;
+    h *= m;
+    h ^= h >> 16;
+
+    data += 4;
+    len -= 4;
+  }
+  
+  //----------
+  
+  switch(len)
+  {
+  case 3:
+    h += data[2] << 16;
+  case 2:
+    h += data[1] << 8;
+  case 1:
+    h += data[0];
+    h *= m;
+    h ^= h >> r;
+  };
  
-	//----------
+  //----------
 
-	h *= m;
-	h ^= h >> 10;
-	h *= m;
-	h ^= h >> 17;
+  h *= m;
+  h ^= h >> 10;
+  h *= m;
+  h ^= h >> 17;
 
-	return h;
+  return h;
 } 
 
 //-----------------------------------------------------------------------------
@@ -72,100 +72,100 @@ uint32_t MurmurHash1 ( const void * key, int len, uint32_t seed )
 
 unsigned int MurmurHash1Aligned ( const void * key, int len, unsigned int seed )
 {
-	const unsigned int m = 0xc6a4a793;
-	const int r = 16;
-
-	const unsigned char * data = (const unsigned char *)key;
-
-	unsigned int h = seed ^ (len * m);
-
-	int align = (uint64_t)data & 3;
-
-	if(align && (len >= 4))
-	{
-		// Pre-load the temp registers
-
-		unsigned int t = 0, d = 0;
-
-		switch(align)
-		{
-			case 1: t |= data[2] << 16;
-			case 2: t |= data[1] << 8;
-			case 3: t |= data[0];
-		}
-
-		t <<= (8 * align);
-
-		data += 4-align;
-		len -= 4-align;
-
-		int sl = 8 * (4-align);
-		int sr = 8 * align;
-
-		// Mix
-
-		while(len >= 4)
-		{
-			d = *(unsigned int *)data;
-			t = (t >> sr) | (d << sl);
-			h += t;
-			h *= m;
-			h ^= h >> r;
-			t = d;
-
-			data += 4;
-			len -= 4;
-		}
-
-		// Handle leftover data in temp registers
-
-		int pack = len < align ? len : align;
-
-		d = 0;
-
-		switch(pack)
-		{
-		case 3: d |= data[2] << 16;
-		case 2: d |= data[1] << 8;
-		case 1: d |= data[0];
-		case 0: h += (t >> sr) | (d << sl);
-				h *= m;
-				h ^= h >> r;
-		}
-
-		data += pack;
-		len -= pack;
-	}
-	else
-	{
-		while(len >= 4)
-		{
-			h += *(unsigned int *)data;
-			h *= m;
-			h ^= h >> r;
-
-			data += 4;
-			len -= 4;
-		}
-	}
-
-	//----------
-	// Handle tail bytes
-
-	switch(len)
-	{
-	case 3: h += data[2] << 16;
-	case 2: h += data[1] << 8;
-	case 1: h += data[0];
-			h *= m;
-			h ^= h >> r;
-	};
-
-	h *= m;
-	h ^= h >> 10;
-	h *= m;
-	h ^= h >> 17;
-
-	return h;
+  const unsigned int m = 0xc6a4a793;
+  const int r = 16;
+
+  const unsigned char * data = (const unsigned char *)key;
+
+  unsigned int h = seed ^ (len * m);
+
+  int align = (uint64_t)data & 3;
+
+  if(align && (len >= 4))
+  {
+    // Pre-load the temp registers
+
+    unsigned int t = 0, d = 0;
+
+    switch(align)
+    {
+      case 1: t |= data[2] << 16;
+      case 2: t |= data[1] << 8;
+      case 3: t |= data[0];
+    }
+
+    t <<= (8 * align);
+
+    data += 4-align;
+    len -= 4-align;
+
+    int sl = 8 * (4-align);
+    int sr = 8 * align;
+
+    // Mix
+
+    while(len >= 4)
+    {
+      d = *(unsigned int *)data;
+      t = (t >> sr) | (d << sl);
+      h += t;
+      h *= m;
+      h ^= h >> r;
+      t = d;
+
+      data += 4;
+      len -= 4;
+    }
+
+    // Handle leftover data in temp registers
+
+    int pack = len < align ? len : align;
+
+    d = 0;
+
+    switch(pack)
+    {
+    case 3: d |= data[2] << 16;
+    case 2: d |= data[1] << 8;
+    case 1: d |= data[0];
+    case 0: h += (t >> sr) | (d << sl);
+        h *= m;
+        h ^= h >> r;
+    }
+
+    data += pack;
+    len -= pack;
+  }
+  else
+  {
+    while(len >= 4)
+    {
+      h += *(unsigned int *)data;
+      h *= m;
+      h ^= h >> r;
+
+      data += 4;
+      len -= 4;
+    }
+  }
+
+  //----------
+  // Handle tail bytes
+
+  switch(len)
+  {
+  case 3: h += data[2] << 16;
+  case 2: h += data[1] << 8;
+  case 1: h += data[0];
+      h *= m;
+      h ^= h >> r;
+  };
+
+  h *= m;
+  h ^= h >> 10;
+  h *= m;
+  h ^= h >> 17;
+
+  return h;
 }
 
diff --git a/MurmurHash2.cpp b/MurmurHash2.cpp
index e020628..cc94f79 100644
--- a/MurmurHash2.cpp
+++ b/MurmurHash2.cpp
@@ -16,53 +16,53 @@
 
 uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
 {
-	// 'm' and 'r' are mixing constants generated offline.
-	// They're not really 'magic', they just happen to work well.
+  // 'm' and 'r' are mixing constants generated offline.
+  // They're not really 'magic', they just happen to work well.
 
-	const uint32_t m = 0x5bd1e995;
-	const int r = 24;
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
 
-	// Initialize the hash to a 'random' value
+  // Initialize the hash to a 'random' value
 
-	uint32_t h = seed ^ len;
+  uint32_t h = seed ^ len;
 
-	// Mix 4 bytes at a time into the hash
+  // Mix 4 bytes at a time into the hash
 
-	const unsigned char * data = (const unsigned char *)key;
+  const unsigned char * data = (const unsigned char *)key;
 
-	while(len >= 4)
-	{
-		uint32_t k = *(uint32_t*)data;
+  while(len >= 4)
+  {
+    uint32_t k = *(uint32_t*)data;
 
-		k *= m;
-		k ^= k >> r;
-		k *= m;
+    k *= m;
+    k ^= k >> r;
+    k *= m;
 
-		h *= m;
-		h ^= k;
+    h *= m;
+    h ^= k;
 
-		data += 4;
-		len -= 4;
-	}
+    data += 4;
+    len -= 4;
+  }
 
-	// Handle the last few bytes of the input array
+  // Handle the last few bytes of the input array
 
-	switch(len)
-	{
-	case 3: h ^= data[2] << 16;
-	case 2: h ^= data[1] << 8;
-	case 1: h ^= data[0];
-			h *= m;
-	};
+  switch(len)
+  {
+  case 3: h ^= data[2] << 16;
+  case 2: h ^= data[1] << 8;
+  case 1: h ^= data[0];
+      h *= m;
+  };
 
-	// Do a few final mixes of the hash to ensure the last few
-	// bytes are well-incorporated.
+  // Do a few final mixes of the hash to ensure the last few
+  // bytes are well-incorporated.
 
-	h ^= h >> 13;
-	h *= m;
-	h ^= h >> 15;
+  h ^= h >> 13;
+  h *= m;
+  h ^= h >> 15;
 
-	return h;
+  return h;
 } 
 
 //-----------------------------------------------------------------------------
@@ -75,45 +75,45 @@ uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
 
 uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed )
 {
-	const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
-	const int r = 47;
-
-	uint64_t h = seed ^ (len * m);
-
-	const uint64_t * data = (const uint64_t *)key;
-	const uint64_t * end = data + (len/8);
-
-	while(data != end)
-	{
-		uint64_t k = *data++;
-
-		k *= m; 
-		k ^= k >> r; 
-		k *= m; 
-		
-		h ^= k;
-		h *= m; 
-	}
-
-	const unsigned char * data2 = (const unsigned char*)data;
-
-	switch(len & 7)
-	{
-	case 7: h ^= uint64_t(data2[6]) << 48;
-	case 6: h ^= uint64_t(data2[5]) << 40;
-	case 5: h ^= uint64_t(data2[4]) << 32;
-	case 4: h ^= uint64_t(data2[3]) << 24;
-	case 3: h ^= uint64_t(data2[2]) << 16;
-	case 2: h ^= uint64_t(data2[1]) << 8;
-	case 1: h ^= uint64_t(data2[0]);
-	        h *= m;
-	};
+  const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
+  const int r = 47;
+
+  uint64_t h = seed ^ (len * m);
+
+  const uint64_t * data = (const uint64_t *)key;
+  const uint64_t * end = data + (len/8);
+
+  while(data != end)
+  {
+    uint64_t k = *data++;
+
+    k *= m; 
+    k ^= k >> r; 
+    k *= m; 
+    
+    h ^= k;
+    h *= m; 
+  }
+
+  const unsigned char * data2 = (const unsigned char*)data;
+
+  switch(len & 7)
+  {
+  case 7: h ^= uint64_t(data2[6]) << 48;
+  case 6: h ^= uint64_t(data2[5]) << 40;
+  case 5: h ^= uint64_t(data2[4]) << 32;
+  case 4: h ^= uint64_t(data2[3]) << 24;
+  case 3: h ^= uint64_t(data2[2]) << 16;
+  case 2: h ^= uint64_t(data2[1]) << 8;
+  case 1: h ^= uint64_t(data2[0]);
+          h *= m;
+  };
  
-	h ^= h >> r;
-	h *= m;
-	h ^= h >> r;
+  h ^= h >> r;
+  h *= m;
+  h ^= h >> r;
 
-	return h;
+  return h;
 } 
 
 
@@ -121,53 +121,53 @@ uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed )
 
 uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed )
 {
-	const uint32_t m = 0x5bd1e995;
-	const int r = 24;
-
-	uint32_t h1 = uint32_t(seed) ^ len;
-	uint32_t h2 = uint32_t(seed >> 32);
-
-	const uint32_t * data = (const uint32_t *)key;
-
-	while(len >= 8)
-	{
-		uint32_t k1 = *data++;
-		k1 *= m; k1 ^= k1 >> r; k1 *= m;
-		h1 *= m; h1 ^= k1;
-		len -= 4;
-
-		uint32_t k2 = *data++;
-		k2 *= m; k2 ^= k2 >> r; k2 *= m;
-		h2 *= m; h2 ^= k2;
-		len -= 4;
-	}
-
-	if(len >= 4)
-	{
-		uint32_t k1 = *data++;
-		k1 *= m; k1 ^= k1 >> r; k1 *= m;
-		h1 *= m; h1 ^= k1;
-		len -= 4;
-	}
-
-	switch(len)
-	{
-	case 3: h2 ^= ((unsigned char*)data)[2] << 16;
-	case 2: h2 ^= ((unsigned char*)data)[1] << 8;
-	case 1: h2 ^= ((unsigned char*)data)[0];
-			h2 *= m;
-	};
-
-	h1 ^= h2 >> 18; h1 *= m;
-	h2 ^= h1 >> 22; h2 *= m;
-	h1 ^= h2 >> 17; h1 *= m;
-	h2 ^= h1 >> 19; h2 *= m;
-
-	uint64_t h = h1;
-
-	h = (h << 32) | h2;
-
-	return h;
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+
+  uint32_t h1 = uint32_t(seed) ^ len;
+  uint32_t h2 = uint32_t(seed >> 32);
+
+  const uint32_t * data = (const uint32_t *)key;
+
+  while(len >= 8)
+  {
+    uint32_t k1 = *data++;
+    k1 *= m; k1 ^= k1 >> r; k1 *= m;
+    h1 *= m; h1 ^= k1;
+    len -= 4;
+
+    uint32_t k2 = *data++;
+    k2 *= m; k2 ^= k2 >> r; k2 *= m;
+    h2 *= m; h2 ^= k2;
+    len -= 4;
+  }
+
+  if(len >= 4)
+  {
+    uint32_t k1 = *data++;
+    k1 *= m; k1 ^= k1 >> r; k1 *= m;
+    h1 *= m; h1 ^= k1;
+    len -= 4;
+  }
+
+  switch(len)
+  {
+  case 3: h2 ^= ((unsigned char*)data)[2] << 16;
+  case 2: h2 ^= ((unsigned char*)data)[1] << 8;
+  case 1: h2 ^= ((unsigned char*)data)[0];
+      h2 *= m;
+  };
+
+  h1 ^= h2 >> 18; h1 *= m;
+  h2 ^= h1 >> 22; h2 *= m;
+  h1 ^= h2 >> 17; h1 *= m;
+  h2 ^= h1 >> 19; h2 *= m;
+
+  uint64_t h = h1;
+
+  h = (h << 32) | h2;
+
+  return h;
 } 
 
 //-----------------------------------------------------------------------------
@@ -185,41 +185,41 @@ uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed )
 
 uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed )
 {
-	const uint32_t m = 0x5bd1e995;
-	const int r = 24;
-	uint32_t l = len;
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+  uint32_t l = len;
 
-	const unsigned char * data = (const unsigned char *)key;
+  const unsigned char * data = (const unsigned char *)key;
 
-	uint32_t h = seed;
+  uint32_t h = seed;
 
-	while(len >= 4)
-	{
-		uint32_t k = *(uint32_t*)data;
+  while(len >= 4)
+  {
+    uint32_t k = *(uint32_t*)data;
 
-		mmix(h,k);
+    mmix(h,k);
 
-		data += 4;
-		len -= 4;
-	}
+    data += 4;
+    len -= 4;
+  }
 
-	uint32_t t = 0;
+  uint32_t t = 0;
 
-	switch(len)
-	{
-	case 3: t ^= data[2] << 16;
-	case 2: t ^= data[1] << 8;
-	case 1: t ^= data[0];
-	};
+  switch(len)
+  {
+  case 3: t ^= data[2] << 16;
+  case 2: t ^= data[1] << 8;
+  case 1: t ^= data[0];
+  };
 
-	mmix(h,t);
-	mmix(h,l);
+  mmix(h,t);
+  mmix(h,l);
 
-	h ^= h >> 13;
-	h *= m;
-	h ^= h >> 15;
+  h ^= h >> 13;
+  h *= m;
+  h ^= h >> 15;
 
-	return h;
+  return h;
 }
 
 //-----------------------------------------------------------------------------
@@ -242,72 +242,72 @@ class CMurmurHash2A
 {
 public:
 
-	void Begin ( uint32_t seed = 0 )
-	{
-		m_hash  = seed;
-		m_tail  = 0;
-		m_count = 0;
-		m_size  = 0;
-	}
+  void Begin ( uint32_t seed = 0 )
+  {
+    m_hash  = seed;
+    m_tail  = 0;
+    m_count = 0;
+    m_size  = 0;
+  }
 
-	void Add ( const unsigned char * data, int len )
-	{
-		m_size += len;
+  void Add ( const unsigned char * data, int len )
+  {
+    m_size += len;
 
-		MixTail(data,len);
+    MixTail(data,len);
 
-		while(len >= 4)
-		{
-			uint32_t k = *(uint32_t*)data;
+    while(len >= 4)
+    {
+      uint32_t k = *(uint32_t*)data;
 
-			mmix(m_hash,k);
+      mmix(m_hash,k);
 
-			data += 4;
-			len -= 4;
-		}
+      data += 4;
+      len -= 4;
+    }
 
-		MixTail(data,len);
-	}
+    MixTail(data,len);
+  }
 
-	uint32_t End ( void )
-	{
-		mmix(m_hash,m_tail);
-		mmix(m_hash,m_size);
+  uint32_t End ( void )
+  {
+    mmix(m_hash,m_tail);
+    mmix(m_hash,m_size);
 
-		m_hash ^= m_hash >> 13;
-		m_hash *= m;
-		m_hash ^= m_hash >> 15;
+    m_hash ^= m_hash >> 13;
+    m_hash *= m;
+    m_hash ^= m_hash >> 15;
 
-		return m_hash;
-	}
+    return m_hash;
+  }
 
 private:
 
-	static const uint32_t m = 0x5bd1e995;
-	static const int r = 24;
-
-	void MixTail ( const unsigned char * & data, int & len )
-	{
-		while( len && ((len<4) || m_count) )
-		{
-			m_tail |= (*data++) << (m_count * 8);
-
-			m_count++;
-			len--;
-
-			if(m_count == 4)
-			{
-				mmix(m_hash,m_tail);
-				m_tail = 0;
-				m_count = 0;
-			}
-		}
-	}
-
-	uint32_t m_hash;
-	uint32_t m_tail;
-	uint32_t m_count;
-	uint32_t m_size;
+  static const uint32_t m = 0x5bd1e995;
+  static const int r = 24;
+
+  void MixTail ( const unsigned char * & data, int & len )
+  {
+    while( len && ((len<4) || m_count) )
+    {
+      m_tail |= (*data++) << (m_count * 8);
+
+      m_count++;
+      len--;
+
+      if(m_count == 4)
+      {
+        mmix(m_hash,m_tail);
+        m_tail = 0;
+        m_count = 0;
+      }
+    }
+  }
+
+  uint32_t m_hash;
+  uint32_t m_tail;
+  uint32_t m_count;
+  uint32_t m_size;
 };
 
 //-----------------------------------------------------------------------------
@@ -318,46 +318,46 @@ private:
 
 uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed )
 {
-	const uint32_t m = 0x5bd1e995;
-	const int r = 24;
-
-	uint32_t h = seed ^ len;
-
-	const unsigned char * data = (const unsigned char *)key;
-
-	while(len >= 4)
-	{
-		uint32_t k;
-
-		k  = data[0];
-		k |= data[1] << 8;
-		k |= data[2] << 16;
-		k |= data[3] << 24;
-
-		k *= m; 
-		k ^= k >> r; 
-		k *= m;
-
-		h *= m;
-		h ^= k;
-
-		data += 4;
-		len -= 4;
-	}
-	
-	switch(len)
-	{
-	case 3: h ^= data[2] << 16;
-	case 2: h ^= data[1] << 8;
-	case 1: h ^= data[0];
-	        h *= m;
-	};
-
-	h ^= h >> 13;
-	h *= m;
-	h ^= h >> 15;
-
-	return h;
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+
+  uint32_t h = seed ^ len;
+
+  const unsigned char * data = (const unsigned char *)key;
+
+  while(len >= 4)
+  {
+    uint32_t k;
+
+    k  = data[0];
+    k |= data[1] << 8;
+    k |= data[2] << 16;
+    k |= data[3] << 24;
+
+    k *= m; 
+    k ^= k >> r; 
+    k *= m;
+
+    h *= m;
+    h ^= k;
+
+    data += 4;
+    len -= 4;
+  }
+  
+  switch(len)
+  {
+  case 3: h ^= data[2] << 16;
+  case 2: h ^= data[1] << 8;
+  case 1: h ^= data[0];
+          h *= m;
+  };
+
+  h ^= h >> 13;
+  h *= m;
+  h ^= h >> 15;
+
+  return h;
 } 
 
 //-----------------------------------------------------------------------------
@@ -372,130 +372,130 @@ uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed )
 
 uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed )
 {
-	const uint32_t m = 0x5bd1e995;
-	const int r = 24;
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
 
-	const unsigned char * data = (const unsigned char *)key;
+  const unsigned char * data = (const unsigned char *)key;
 
-	uint32_t h = seed ^ len;
+  uint32_t h = seed ^ len;
 
-	int align = (uint64_t)data & 3;
+  int align = (uint64_t)data & 3;
 
-	if(align && (len >= 4))
-	{
-		// Pre-load the temp registers
+  if(align && (len >= 4))
+  {
+    // Pre-load the temp registers
 
-		uint32_t t = 0, d = 0;
+    uint32_t t = 0, d = 0;
 
-		switch(align)
-		{
-			case 1: t |= data[2] << 16;
-			case 2: t |= data[1] << 8;
-			case 3: t |= data[0];
-		}
+    switch(align)
+    {
+      case 1: t |= data[2] << 16;
+      case 2: t |= data[1] << 8;
+      case 3: t |= data[0];
+    }
 
-		t <<= (8 * align);
-
-		data += 4-align;
-		len -= 4-align;
-
-		int sl = 8 * (4-align);
-		int sr = 8 * align;
-
-		// Mix
-
-		while(len >= 4)
-		{
-			d = *(uint32_t *)data;
-			t = (t >> sr) | (d << sl);
-
-			uint32_t k = t;
-
-			MIX(h,k,m);
-
-			t = d;
-
-			data += 4;
-			len -= 4;
-		}
-
-		// Handle leftover data in temp registers
-
-		d = 0;
-
-		if(len >= align)
-		{
-			switch(align)
-			{
-			case 3: d |= data[2] << 16;
-			case 2: d |= data[1] << 8;
-			case 1: d |= data[0];
-			}
-
-			uint32_t k = (t >> sr) | (d << sl);
-			MIX(h,k,m);
-
-			data += align;
-			len -= align;
-
-			//----------
-			// Handle tail bytes
-
-			switch(len)
-			{
-			case 3: h ^= data[2] << 16;
-			case 2: h ^= data[1] << 8;
-			case 1: h ^= data[0];
-					h *= m;
-			};
-		}
-		else
-		{
-			switch(len)
-			{
-			case 3: d |= data[2] << 16;
-			case 2: d |= data[1] << 8;
-			case 1: d |= data[0];
-			case 0: h ^= (t >> sr) | (d << sl);
-					h *= m;
-			}
-		}
-
-		h ^= h >> 13;
-		h *= m;
-		h ^= h >> 15;
-
-		return h;
-	}
-	else
-	{
-		while(len >= 4)
-		{
-			uint32_t k = *(uint32_t *)data;
-
-			MIX(h,k,m);
+    t <<= (8 * align);
+
+    data += 4-align;
+    len -= 4-align;
+
+    int sl = 8 * (4-align);
+    int sr = 8 * align;
+
+    // Mix
+
+    while(len >= 4)
+    {
+      d = *(uint32_t *)data;
+      t = (t >> sr) | (d << sl);
+
+      uint32_t k = t;
+
+      MIX(h,k,m);
+
+      t = d;
+
+      data += 4;
+      len -= 4;
+    }
+
+    // Handle leftover data in temp registers
+
+    d = 0;
+
+    if(len >= align)
+    {
+      switch(align)
+      {
+      case 3: d |= data[2] << 16;
+      case 2: d |= data[1] << 8;
+      case 1: d |= data[0];
+      }
+
+      uint32_t k = (t >> sr) | (d << sl);
+      MIX(h,k,m);
+
+      data += align;
+      len -= align;
+
+      //----------
+      // Handle tail bytes
+
+      switch(len)
+      {
+      case 3: h ^= data[2] << 16;
+      case 2: h ^= data[1] << 8;
+      case 1: h ^= data[0];
+          h *= m;
+      };
+    }
+    else
+    {
+      switch(len)
+      {
+      case 3: d |= data[2] << 16;
+      case 2: d |= data[1] << 8;
+      case 1: d |= data[0];
+      case 0: h ^= (t >> sr) | (d << sl);
+          h *= m;
+      }
+    }
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+  }
+  else
+  {
+    while(len >= 4)
+    {
+      uint32_t k = *(uint32_t *)data;
+
+      MIX(h,k,m);
 
-			data += 4;
-			len -= 4;
-		}
-
-		//----------
-		// Handle tail bytes
-
-		switch(len)
-		{
-		case 3: h ^= data[2] << 16;
-		case 2: h ^= data[1] << 8;
-		case 1: h ^= data[0];
-				h *= m;
-		};
-
-		h ^= h >> 13;
-		h *= m;
-		h ^= h >> 15;
-
-		return h;
-	}
+      data += 4;
+      len -= 4;
+    }
+
+    //----------
+    // Handle tail bytes
+
+    switch(len)
+    {
+    case 3: h ^= data[2] << 16;
+    case 2: h ^= data[1] << 8;
+    case 1: h ^= data[0];
+        h *= m;
+    };
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+  }
 }
 
 //-----------------------------------------------------------------------------
diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index fa2dafc..7a6e435 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -366,66 +366,3 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
 }
 
 //-----------------------------------------------------------------------------
-// Quick copy-pasted test code for GCC build
-
-// This should print -
-
-// "The quick brown fox jumps over the lazy dog" => { 0x38585ecf, 0x5f6d752a, 0x0157c98a, 0x8c686b9b, }
-// "The quick brown fox jumps over the lazy cog" => { 0x6d3fd6f0, 0xc86a98a0, 0x4d6fac1c, 0x8f3e52b4, }
-
-#ifndef _MSC_VER
-
-/*
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-
-typedef void (*pfHash) ( const void * blob, const int len, const uint32_t seed, void * out );
-
-void printhex32 ( void * blob, int len )
-{
-	assert((len & 3) == 0);
-
-	uint32_t * d = (uint32_t*)blob;
-
-	printf("{ ");
-
-	for(int i = 0; i < len/4; i++) 
-	{
-		printf("0x%08x, ",d[i]);
-	}
-
-	printf("}");
-}
-
-void QuickBrownFox ( pfHash hash, const int hashbits )
-{
-	const int hashbytes = hashbits / 8;
-
-	const char * text1 = "The quick brown fox jumps over the lazy dog";
-	const char * text2 = "The quick brown fox jumps over the lazy cog";
-
-	uint8_t h1[128];
-	uint8_t h2[128];
-
-	hash(text1,(int)strlen(text1),0,h1);
-	hash(text2,(int)strlen(text2),0,h2);
-
-	printf("\"%s\" => ",text1);
-	printhex32(h1,hashbytes);
-	printf("\n");
-
-	printf("\"%s\" => ",text2);
-	printhex32(h2,hashbytes);
-	printf("\n");
-
-	printf("\n");
-}
-
-int main ( int argc, char** argv )
-{
-  QuickBrownFox(&MurmurHash3_x64_128,128);
-}
-*/
-
-#endif
diff --git a/Platform.cpp b/Platform.cpp
index 5f38872..3561379 100644
--- a/Platform.cpp
+++ b/Platform.cpp
@@ -15,7 +15,7 @@ void testRDTSC ( void )
 
 void SetAffinity ( int cpu )
 {
-	SetProcessAffinityMask(GetCurrentProcess(),cpu);
+  SetProcessAffinityMask(GetCurrentProcess(),cpu);
 }
 
 #else
diff --git a/Platform.h b/Platform.h
index 92ae44a..f15580e 100644
--- a/Platform.h
+++ b/Platform.h
@@ -43,22 +43,22 @@ void SetAffinity ( int cpu );
 
 inline uint32_t rotl32 ( uint32_t x, int8_t r )
 {
-	return (x << r) | (x >> (32 - r));
+  return (x << r) | (x >> (32 - r));
 }
 
 inline uint64_t rotl64 ( uint64_t x, int8_t r )
 {
-	return (x << r) | (x >> (64 - r));
+  return (x << r) | (x >> (64 - r));
 }
 
 inline uint32_t rotr32 ( uint32_t x, int8_t r )
 {
-	return (x >> r) | (x << (32 - r));
+  return (x >> r) | (x << (32 - r));
 }
 
 inline uint64_t rotr64 ( uint64_t x, int8_t r )
 {
-	return (x >> r) | (x << (64 - r));
+  return (x >> r) | (x << (64 - r));
 }
 
 #define	ROTL32(x,y)	rotl32(x,y)
diff --git a/Random.h b/Random.h
index 033e5f8..619c453 100644
--- a/Random.h
+++ b/Random.h
@@ -8,84 +8,84 @@
 
 struct Rand
 {
-	uint32_t x;
-	uint32_t y;
-	uint32_t z;
-	uint32_t w;
-
-	Rand()
-	{
-		reseed(uint32_t(0));
-	}
-
-	Rand( uint32_t seed )
-	{
-		reseed(seed);
-	}
-
-	void reseed ( uint32_t seed )
-	{
-		x = 0x498b3bc5 ^ seed;
-		y = 0;
-		z = 0;
-		w = 0;
-
-		for(int i = 0; i < 10; i++) mix();
-	}
-
-	void reseed ( uint64_t seed )
-	{
-		x = 0x498b3bc5 ^ (uint32_t)(seed >>  0);
-		y = 0x5a05089a ^ (uint32_t)(seed >> 32);
-		z = 0;
-		w = 0;
-
-		for(int i = 0; i < 10; i++) mix();
-	}
-
-	//-----------------------------------------------------------------------------
-
-	void mix ( void )
-	{
-		uint32_t t = x ^ (x << 11);
-		x = y; y = z; z = w;
-		w = w ^ (w >> 19) ^ t ^ (t >> 8); 
-	}
-
-	uint32_t rand_u32 ( void )
-	{
-		mix();
-
-		return x;
-	}
-
-	uint64_t rand_u64 ( void ) 
-	{
-		mix();
-
-		uint64_t a = x;
-		uint64_t b = y;
-
-		return (a << 32) | b;
-	}
-
-	void rand_p ( void * blob, int bytes )
-	{
-		uint32_t * blocks = (uint32_t*)blob;
-
-		while(bytes >= 4)
-		{
-			*blocks++ = rand_u32();
-			bytes -= 4;
-		}
-
-		uint8_t * tail = (uint8_t*)blocks;
-
-		for(int i = 0; i < bytes; i++)
-		{
-			tail[i] = (uint8_t)rand_u32();
-		}
-	}
+  uint32_t x;
+  uint32_t y;
+  uint32_t z;
+  uint32_t w;
+
+  Rand()
+  {
+    reseed(uint32_t(0));
+  }
+
+  Rand( uint32_t seed )
+  {
+    reseed(seed);
+  }
+
+  void reseed ( uint32_t seed )
+  {
+    x = 0x498b3bc5 ^ seed;
+    y = 0;
+    z = 0;
+    w = 0;
+
+    for(int i = 0; i < 10; i++) mix();
+  }
+
+  void reseed ( uint64_t seed )
+  {
+    x = 0x498b3bc5 ^ (uint32_t)(seed >>  0);
+    y = 0x5a05089a ^ (uint32_t)(seed >> 32);
+    z = 0;
+    w = 0;
+
+    for(int i = 0; i < 10; i++) mix();
+  }
+
+  //-----------------------------------------------------------------------------
+
+  void mix ( void )
+  {
+    uint32_t t = x ^ (x << 11);
+    x = y; y = z; z = w;
+    w = w ^ (w >> 19) ^ t ^ (t >> 8); 
+  }
+
+  uint32_t rand_u32 ( void )
+  {
+    mix();
+
+    return x;
+  }
+
+  uint64_t rand_u64 ( void ) 
+  {
+    mix();
+
+    uint64_t a = x;
+    uint64_t b = y;
+
+    return (a << 32) | b;
+  }
+
+  void rand_p ( void * blob, int bytes )
+  {
+    uint32_t * blocks = (uint32_t*)blob;
+
+    while(bytes >= 4)
+    {
+      *blocks++ = rand_u32();
+      bytes -= 4;
+    }
+
+    uint8_t * tail = (uint8_t*)blocks;
+
+    for(int i = 0; i < bytes; i++)
+    {
+      tail[i] = (uint8_t)rand_u32();
+    }
+  }
 };
 
 //-----------------------------------------------------------------------------
@@ -97,20 +97,20 @@ inline uint64_t rand_u64 ( void ) { return g_rand1.rand_u64(); }
 
 inline void rand_p ( void * blob, int bytes )
 {
-	uint32_t * blocks = (uint32_t*)blob;
+  uint32_t * blocks = (uint32_t*)blob;
 
-	while(bytes >= 4)
-	{
-		*blocks++ = rand_u32();
-		bytes -= 4;
-	}
+  while(bytes >= 4)
+  {
+    *blocks++ = rand_u32();
+    bytes -= 4;
+  }
 
-	uint8_t * tail = (uint8_t*)blocks;
+  uint8_t * tail = (uint8_t*)blocks;
 
-	for(int i = 0; i < bytes; i++)
-	{
-		tail[i] = (uint8_t)rand_u32();
-	}
+  for(int i = 0; i < bytes; i++)
+  {
+    tail[i] = (uint8_t)rand_u32();
+  }
 }
 
 //-----------------------------------------------------------------------------
diff --git a/SpeedTest.cpp b/SpeedTest.cpp
index c7c742b..a4ed8a7 100644
--- a/SpeedTest.cpp
+++ b/SpeedTest.cpp
@@ -9,98 +9,98 @@
 
 void BulkSpeedTest ( pfHash hash )
 {
-	const int trials = 9999;
-	const int blocksize = 256 * 1024;
+  const int trials = 9999;
+  const int blocksize = 256 * 1024;
 
-	printf("Bulk speed test - %d-byte keys\n",blocksize);
+  printf("Bulk speed test - %d-byte keys\n",blocksize);
 
-	char * block = new char[blocksize + 16];
+  char * block = new char[blocksize + 16];
 
-	rand_p(block,blocksize+16);
+  rand_p(block,blocksize+16);
 
-	uint32_t temp[16];
+  uint32_t temp[16];
 
-	for(int align = 0; align < 8; align++)
-	{
-		double bestbpc = 0;
+  for(int align = 0; align < 8; align++)
+  {
+    double bestbpc = 0;
 
-		for(int itrial = 0; itrial < trials; itrial++)
-		{
-			int64_t begin,end;
+    for(int itrial = 0; itrial < trials; itrial++)
+    {
+      int64_t begin,end;
 
-			begin = rdtsc();
+      begin = rdtsc();
 
-			hash(block + align,blocksize,itrial,temp);
+      hash(block + align,blocksize,itrial,temp);
 
-			end = rdtsc();
+      end = rdtsc();
 
-			blackhole(temp[0]);
+      blackhole(temp[0]);
 
-			double cycles = double(end-begin);
-			double bpc = double(blocksize) / cycles;
-			if(bpc > bestbpc) bestbpc = bpc;
-		}
+      double cycles = double(end-begin);
+      double bpc = double(blocksize) / cycles;
+      if(bpc > bestbpc) bestbpc = bpc;
+    }
 
-		double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
-		printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);
-	}
+    double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
+    printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);
+  }
 
-	delete [] block;
+  delete [] block;
 }
 
 //-----------------------------------------------------------------------------
 
 void TinySpeedTest ( pfHash hash, int hashsize, int keysize, bool verbose, double & outCycles )
 {
-	const int trials = 100000;
+  const int trials = 100000;
 
-	if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);
+  if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);
 
-	uint8_t * h = new uint8_t[hashsize];
-	uint8_t * k = new uint8_t[keysize];
+  uint8_t * h = new uint8_t[hashsize];
+  uint8_t * k = new uint8_t[keysize];
 
-	double bestcycles = 1e9;
+  double bestcycles = 1e9;
 
-	for(int itrial = 0; itrial < trials; itrial++)
-	{
-		int64_t begin,end;
+  for(int itrial = 0; itrial < trials; itrial++)
+  {
+    int64_t begin,end;
 
-		rand_p(k,keysize);
+    rand_p(k,keysize);
 
-		begin = rdtsc();
-		
-		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+    begin = rdtsc();
+    
+    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
 
-		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
 
-		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
 
-		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
 
-		end = rdtsc();
+    end = rdtsc();
 
-		//blackhole(*(uint32_t*)(&h));
+    //blackhole(*(uint32_t*)(&h));
 
-		double cycles = double(end-begin) / 64;
-		if(cycles < bestcycles) bestcycles = cycles;
-	}
+    double cycles = double(end-begin) / 64;
+    if(cycles < bestcycles) bestcycles = cycles;
+  }
 
-	double bestbpc = double(keysize) / bestcycles;
-	if(verbose) printf("%8.2f cycles/hash, %8.4f bytes/cycle\n",bestcycles,bestbpc);
+  double bestbpc = double(keysize) / bestcycles;
+  if(verbose) printf("%8.2f cycles/hash, %8.4f bytes/cycle\n",bestcycles,bestbpc);
 
-	outCycles = bestcycles;
+  outCycles = bestcycles;
 }
 
 //-----------------------------------------------------------------------------
diff --git a/Stats.cpp b/Stats.cpp
index ec51c8a..4b722c8 100644
--- a/Stats.cpp
+++ b/Stats.cpp
@@ -6,28 +6,28 @@ double chooseK ( int n, int k )
 {
     if(k > (n - k)) k = n - k;
 
-	double c = 1;
+  double c = 1;
 
-	for(int i = 0; i < k; i++)
-	{
-		double t = double(n-i) / double(i+1);
+  for(int i = 0; i < k; i++)
+  {
+    double t = double(n-i) / double(i+1);
 
-		c *= t;
-	}
+    c *= t;
+  }
 
     return c;
 }
 
 double chooseUpToK ( int n, int k )
 {
-	double c = 0;
+  double c = 0;
 
-	for(int i = 1; i <= k; i++)
-	{
-		c += chooseK(n,i);
-	}
+  for(int i = 1; i <= k; i++)
+  {
+    c += chooseK(n,i);
+  }
 
-	return c;
+  return c;
 }
 
 //-----------------------------------------------------------------------------
@@ -44,29 +44,29 @@ double chooseUpToK ( int n, int k )
 
 double calcScore ( const int * bins, const int bincount, const int keycount )
 {
-	double n = bincount;
-	double k = keycount;
+  double n = bincount;
+  double k = keycount;
 
-	// compute rms value
+  // compute rms value
 
-	double r = 0;
+  double r = 0;
 
-	for(int i = 0; i < bincount; i++)
-	{
-		double b = bins[i];
+  for(int i = 0; i < bincount; i++)
+  {
+    double b = bins[i];
 
-		r += b*b;
-	}
+    r += b*b;
+  }
 
-	r = sqrt(r / n);
+  r = sqrt(r / n);
 
-	// compute fill factor
+  // compute fill factor
 
-	double f = (k*k - 1) / (n*r*r - k);
+  double f = (k*k - 1) / (n*r*r - k);
 
-	// rescale to (0,1) with 0 = good, 1 = bad
+  // rescale to (0,1) with 0 = good, 1 = bad
 
-	return 1 - (f / n);
+  return 1 - (f / n);
 }
 
 
@@ -74,26 +74,26 @@ double calcScore ( const int * bins, const int bincount, const int keycount )
 
 void plot ( double n )
 {
-	double n2 = n * 1;
+  double n2 = n * 1;
 
-	if(n2 < 0) n2 = 0;
+  if(n2 < 0) n2 = 0;
 
-	n2 *= 100;
+  n2 *= 100;
 
-	if(n2 > 64) n2 = 64;
+  if(n2 > 64) n2 = 64;
 
-	int n3 = (int)n2;
+  int n3 = (int)n2;
 
-	if(n3 == 0)
-		printf(".");
-	else
-	{
-		char x = '0' + char(n3);
+  if(n3 == 0)
+    printf(".");
+  else
+  {
+    char x = '0' + char(n3);
 
-		if(x > '9') x = 'X';
+    if(x > '9') x = 'X';
 
-		printf("%c",x);
-	}
+    printf("%c",x);
+  }
 }
 
 //-----------------------------------------------------------------------------
diff --git a/Stats.h b/Stats.h
index dd0188c..3246373 100644
--- a/Stats.h
+++ b/Stats.h
@@ -15,7 +15,7 @@ void plot ( double n );
 
 inline double ExpectedCollisions ( double balls, double bins )
 {
-	return balls - bins + bins * pow(1 - 1/bins,balls);
+  return balls - bins + bins * pow(1 - 1/bins,balls);
 }
 
 double chooseK ( int b, int k );
@@ -25,13 +25,13 @@ double chooseUpToK ( int n, int k );
 
 inline uint32_t f3mix ( uint32_t k )
 {
-	k ^= k >> 16;
-	k *= 0x85ebca6b;
-	k ^= k >> 13;
-	k *= 0xc2b2ae35;
-	k ^= k >> 16;
+  k ^= k >> 16;
+  k *= 0x85ebca6b;
+  k ^= k >> 13;
+  k *= 0xc2b2ae35;
+  k ^= k >> 16;
 
-	return k;
+  return k;
 }
 
 //-----------------------------------------------------------------------------
@@ -39,17 +39,17 @@ inline uint32_t f3mix ( uint32_t k )
 template< typename hashtype >
 int CountCollisions ( std::vector<hashtype> const & hashes )
 {
-	int collcount = 0;
+  int collcount = 0;
 
-	std::vector<hashtype> temp = hashes;
-	std::sort(temp.begin(),temp.end());
+  std::vector<hashtype> temp = hashes;
+  std::sort(temp.begin(),temp.end());
 
-	for(size_t i = 1; i < hashes.size(); i++)
-	{
-		if(temp[i] == temp[i-1]) collcount++;
-	}
+  for(size_t i = 1; i < hashes.size(); i++)
+  {
+    if(temp[i] == temp[i-1]) collcount++;
+  }
 
-	return collcount;
+  return collcount;
 }
 
 //-----------------------------------------------------------------------------
@@ -57,35 +57,35 @@ int CountCollisions ( std::vector<hashtype> const & hashes )
 template < class keytype, typename hashtype >
 int PrintCollisions ( hashfunc<hashtype> hash, std::vector<keytype> & keys )
 {
-	int collcount = 0;
+  int collcount = 0;
 
-	typedef std::map<hashtype,keytype> htab;
-	htab tab;
+  typedef std::map<hashtype,keytype> htab;
+  htab tab;
 
-	for(size_t i = 1; i < keys.size(); i++)
-	{
-		keytype & k1 = keys[i];
+  for(size_t i = 1; i < keys.size(); i++)
+  {
+    keytype & k1 = keys[i];
 
-		hashtype h = hash(&k1,sizeof(keytype),0);
+    hashtype h = hash(&k1,sizeof(keytype),0);
 
-		typename htab::iterator it = tab.find(h);
+    typename htab::iterator it = tab.find(h);
 
-		if(it != tab.end())
-		{
-			keytype & k2 = (*it).second;
+    if(it != tab.end())
+    {
+      keytype & k2 = (*it).second;
 
-			printf("A: ");
-			printbits(&k1,sizeof(keytype));
-			printf("B: ");
-			printbits(&k2,sizeof(keytype));
-		}
-		else
-		{
+      printf("A: ");
+      printbits(&k1,sizeof(keytype));
+      printf("B: ");
+      printbits(&k2,sizeof(keytype));
+    }
+    else
+    {
       tab.insert( std::make_pair(h,k1) );
-		}
-	}
+    }
+  }
 
-	return collcount;
+  return collcount;
 }
 
 //----------------------------------------------------------------------------
@@ -93,45 +93,45 @@ int PrintCollisions ( hashfunc<hashtype> hash, std::vector<keytype> & keys )
 template < typename hashtype >
 bool TestHashList ( std::vector<hashtype> & hashes, bool testColl, bool testDist, bool drawDiagram )
 {
-	bool result = true;
+  bool result = true;
 
-	if(testColl)
-	{
-		size_t count = hashes.size();
+  if(testColl)
+  {
+    size_t count = hashes.size();
 
-		double expected = (double(count) * double(count-1)) / pow(2.0,double(sizeof(hashtype) * 8 + 1));
+    double expected = (double(count) * double(count-1)) / pow(2.0,double(sizeof(hashtype) * 8 + 1));
 
-		printf("Testing collisions   - Expected %8.2f, ",expected);
+    printf("Testing collisions   - Expected %8.2f, ",expected);
 
-		double collcount = 0;
+    double collcount = 0;
 
-		collcount = CountCollisions(hashes);
+    collcount = CountCollisions(hashes);
 
-		printf("actual %8.2f (%5.2fx)",collcount, collcount / expected);
+    printf("actual %8.2f (%5.2fx)",collcount, collcount / expected);
 
-		// 2x expected collisions = fail
+    // 2x expected collisions = fail
 
-		// #TODO - collision failure cutoff needs to be expressed as a standard deviation instead
-		// of a scale factor, otherwise we fail erroneously if there are a small expected number
-		// of collisions
+    // #TODO - collision failure cutoff needs to be expressed as a standard deviation instead
+    // of a scale factor, otherwise we fail erroneously if there are a small expected number
+    // of collisions
 
-		if(double(collcount) / double(expected) > 2.0)
-		{
-			printf(" !!!!! ");
-			result = false;
-		}
+    if(double(collcount) / double(expected) > 2.0)
+    {
+      printf(" !!!!! ");
+      result = false;
+    }
 
-		printf("\n");
-	}
+    printf("\n");
+  }
 
-	//----------
+  //----------
 
-	if(testDist)
-	{
-		TestDistribution(hashes,drawDiagram);
-	}
+  if(testDist)
+  {
+    TestDistribution(hashes,drawDiagram);
+  }
 
-	return result;
+  return result;
 }
 
 //-----------------------------------------------------------------------------
@@ -139,30 +139,30 @@ bool TestHashList ( std::vector<hashtype> & hashes, bool testColl, bool testDist
 template < class keytype, typename hashtype >
 bool TestKeyList ( hashfunc<hashtype> hash, std::vector<keytype> & keys, bool testColl, bool testDist, bool drawDiagram )
 {
-	int keycount = (int)keys.size();
+  int keycount = (int)keys.size();
 
-	std::vector<hashtype> hashes;
+  std::vector<hashtype> hashes;
 
-	hashes.resize(keycount);
+  hashes.resize(keycount);
 
-	printf("Hashing");
+  printf("Hashing");
 
-	for(int i = 0; i < keycount; i++)
-	{
-		if(i % (keycount / 10) == 0) printf(".");
+  for(int i = 0; i < keycount; i++)
+  {
+    if(i % (keycount / 10) == 0) printf(".");
 
-		keytype & k = keys[i];
+    keytype & k = keys[i];
 
-		hash(&k,sizeof(k),0,&hashes[i]);
-	}
+    hash(&k,sizeof(k),0,&hashes[i]);
+  }
 
-	printf("\n");
+  printf("\n");
 
-	bool result = TestHashList(hashes,testColl,testDist,drawDiagram);
+  bool result = TestHashList(hashes,testColl,testDist,drawDiagram);
 
-	printf("\n");
+  printf("\n");
 
-	return result;
+  return result;
 }
 
 //-----------------------------------------------------------------------------
@@ -178,52 +178,52 @@ bool TestKeyList ( hashfunc<hashtype> hash, std::vector<keytype> & keys, bool te
 template < typename hashtype >
 double TestDistributionBytepairs ( std::vector<hashtype> & hashes, bool drawDiagram )
 {
-	const int nbytes = sizeof(hashtype);
-	const int hashbits = nbytes * 8;
-	
-	const int nbins = 65536;
+  const int nbytes = sizeof(hashtype);
+  const int hashbits = nbytes * 8;
+  
+  const int nbins = 65536;
 
-	std::vector<int> bins(nbins,0);
+  std::vector<int> bins(nbins,0);
 
-	double worst = 0;
+  double worst = 0;
 
-	for(int a = 0; a < hashbits; a++)
-	{
-		if(drawDiagram) if((a % 8 == 0) && (a > 0)) printf("\n");
+  for(int a = 0; a < hashbits; a++)
+  {
+    if(drawDiagram) if((a % 8 == 0) && (a > 0)) printf("\n");
 
-		if(drawDiagram) printf("[");
+    if(drawDiagram) printf("[");
 
-		for(int b = 0; b < hashbits; b++)
-		{
-			if(drawDiagram) if((b % 8 == 0) && (b > 0)) printf(" ");
+    for(int b = 0; b < hashbits; b++)
+    {
+      if(drawDiagram) if((b % 8 == 0) && (b > 0)) printf(" ");
 
-			bins.clear();
-			bins.resize(nbins,0);
+      bins.clear();
+      bins.resize(nbins,0);
 
-			for(size_t i = 0; i < hashes.size(); i++)
-			{
-				hashtype & hash = hashes[i];
+      for(size_t i = 0; i < hashes.size(); i++)
+      {
+        hashtype & hash = hashes[i];
 
-				uint32_t pa = window(&hash,sizeof(hash),a,8);
-				uint32_t pb = window(&hash,sizeof(hash),b,8);
+        uint32_t pa = window(&hash,sizeof(hash),a,8);
+        uint32_t pb = window(&hash,sizeof(hash),b,8);
 
-				bins[pa | (pb << 8)]++;
-			}
+        bins[pa | (pb << 8)]++;
+      }
 
-			double s = calcScore(bins,bins.size(),hashes.size());
+      double s = calcScore(bins,bins.size(),hashes.size());
 
-			if(drawDiagram) plot(s);
+      if(drawDiagram) plot(s);
 
-			if(s > worst)
-			{
-				worst = s;
-			}
-		}
+      if(s > worst)
+      {
+        worst = s;
+      }
+    }
 
-		if(drawDiagram) printf("]\n");
-	}
+    if(drawDiagram) printf("]\n");
+  }
 
-	return worst;
+  return worst;
 }
 
 
@@ -233,84 +233,84 @@ double TestDistributionBytepairs ( std::vector<hashtype> & hashes, bool drawDiag
 template< typename hashtype >
 double TestDistribution ( std::vector<hashtype> & hashes, bool drawDiagram )
 {
-	printf("Testing distribution - ");
+  printf("Testing distribution - ");
 
-	if(drawDiagram) printf("\n");
+  if(drawDiagram) printf("\n");
 
-	const int hashbits = sizeof(hashtype) * 8;
+  const int hashbits = sizeof(hashtype) * 8;
 
-	int maxwidth = 20;
+  int maxwidth = 20;
 
-	// We need at least 5 keys per bin to reliably test distribution biases
-	// down to 1%, so don't bother to test sparser distributions than that
+  // We need at least 5 keys per bin to reliably test distribution biases
+  // down to 1%, so don't bother to test sparser distributions than that
 
-	while(double(hashes.size()) / double(1 << maxwidth) < 5.0)
-	{
-		maxwidth--;
-	}
+  while(double(hashes.size()) / double(1 << maxwidth) < 5.0)
+  {
+    maxwidth--;
+  }
 
-	std::vector<int> bins;
-	bins.resize(1 << maxwidth);
+  std::vector<int> bins;
+  bins.resize(1 << maxwidth);
 
-	double worst = 0;
-	int worstStart = -1;
-	int worstWidth = -1;
+  double worst = 0;
+  int worstStart = -1;
+  int worstWidth = -1;
 
-	for(int start = 0; start < hashbits; start++)
-	{
-		int width = maxwidth;
-		int bincount = (1 << width);
+  for(int start = 0; start < hashbits; start++)
+  {
+    int width = maxwidth;
+    int bincount = (1 << width);
 
-		memset(&bins[0],0,sizeof(int)*bincount);
+    memset(&bins[0],0,sizeof(int)*bincount);
 
-		for(size_t j = 0; j < hashes.size(); j++)
-		{
-			hashtype & hash = hashes[j];
+    for(size_t j = 0; j < hashes.size(); j++)
+    {
+      hashtype & hash = hashes[j];
 
-			uint32_t index = window(&hash,sizeof(hash),start,width);
+      uint32_t index = window(&hash,sizeof(hash),start,width);
 
-			bins[index]++;
-		}
+      bins[index]++;
+    }
 
-		// Test the distribution, then fold the bins in half,
-		// repeat until we're down to 256 bins
+    // Test the distribution, then fold the bins in half,
+    // repeat until we're down to 256 bins
 
-		if(drawDiagram) printf("[");
+    if(drawDiagram) printf("[");
 
-		while(bincount >= 256)
-		{
-			double n = calcScore(&bins[0],bincount,(int)hashes.size());
+    while(bincount >= 256)
+    {
+      double n = calcScore(&bins[0],bincount,(int)hashes.size());
 
-			if(drawDiagram) plot(n);
+      if(drawDiagram) plot(n);
 
-			if(n > worst)
-			{
-				worst = n;
-				worstStart = start;
-				worstWidth = width;
-			}
+      if(n > worst)
+      {
+        worst = n;
+        worstStart = start;
+        worstWidth = width;
+      }
 
-			width--;
-			bincount /= 2;
+      width--;
+      bincount /= 2;
 
-			if(width < 8) break;
+      if(width < 8) break;
 
-			for(int i = 0; i < bincount; i++)
-			{
-				bins[i] += bins[i+bincount];
-			}
-		}
+      for(int i = 0; i < bincount; i++)
+      {
+        bins[i] += bins[i+bincount];
+      }
+    }
 
-		if(drawDiagram) printf("]\n");
-	}
+    if(drawDiagram) printf("]\n");
+  }
 
-	double pct = worst * 100.0;
+  double pct = worst * 100.0;
 
-	printf("Worst bias is the %3d-bit window at bit %3d - %5.3f%%",worstWidth,worstStart,pct);
-	if(pct >= 1.0) printf(" !!!!! ");
-	printf("\n");
+  printf("Worst bias is the %3d-bit window at bit %3d - %5.3f%%",worstWidth,worstStart,pct);
+  if(pct >= 1.0) printf(" !!!!! ");
+  printf("\n");
 
-	return worst;
+  return worst;
 }
 
 //-----------------------------------------------------------------------------
@@ -319,36 +319,36 @@ double TestDistribution ( std::vector<hashtype> & hashes, bool drawDiagram )
 template < typename hashtype >
 void TestDistributionFast ( std::vector<hashtype> & hashes, double & dworst, double & davg )
 {
-	const int hashbits = sizeof(hashtype) * 8;
-	const int nbins = 65536;
-	
-	std::vector<int> bins(nbins,0);
+  const int hashbits = sizeof(hashtype) * 8;
+  const int nbins = 65536;
+  
+  std::vector<int> bins(nbins,0);
 
-	dworst = -1.0e90;
-	davg = 0;
+  dworst = -1.0e90;
+  davg = 0;
 
-	for(int start = 0; start < hashbits; start += 8)
-	{
-		bins.clear();
-		bins.resize(nbins,0);
+  for(int start = 0; start < hashbits; start += 8)
+  {
+    bins.clear();
+    bins.resize(nbins,0);
 
-		for(size_t j = 0; j < hashes.size(); j++)
-		{
-			hashtype & hash = hashes[j];
+    for(size_t j = 0; j < hashes.size(); j++)
+    {
+      hashtype & hash = hashes[j];
 
-			uint32_t index = window(&hash,sizeof(hash),start,16);
+      uint32_t index = window(&hash,sizeof(hash),start,16);
 
-			bins[index]++;
-		}
+      bins[index]++;
+    }
 
-		double n = calcScore((int*)bins.begin(),(int)hashes.size(),(int)bins.size());
-		
-		davg += n;
+    double n = calcScore((int*)bins.begin(),(int)bins.size(),(int)hashes.size());
+    
+    davg += n;
 
-		if(n > dworst) dworst = n;
-	}
+    if(n > dworst) dworst = n;
+  }
 
-	davg /= double(hashbits/8);
+  davg /= double(hashbits/8);
 }
 
 //-----------------------------------------------------------------------------
diff --git a/SuperFastHash.cpp b/SuperFastHash.cpp
index 3425634..b7094d3 100644
--- a/SuperFastHash.cpp
+++ b/SuperFastHash.cpp
@@ -21,48 +21,48 @@ uint32_t SuperFastHash (const char * data, int len) {
 uint32_t hash = 0, tmp;
 int rem;
 
-	if (len <= 0 || data == NULL) return 0;
+  if (len <= 0 || data == NULL) return 0;
 
-	rem = len & 3;
-	len >>= 2;
+  rem = len & 3;
+  len >>= 2;
 
-	/* Main loop */
-	for (;len > 0; len--) {
-		hash  += get16bits (data);
-		tmp    = (get16bits (data+2) << 11) ^ hash;
-		hash   = (hash << 16) ^ tmp;
-		data  += 2*sizeof (uint16_t);
-		hash  += hash >> 11;
-	}
+  /* Main loop */
+  for (;len > 0; len--) {
+    hash  += get16bits (data);
+    tmp    = (get16bits (data+2) << 11) ^ hash;
+    hash   = (hash << 16) ^ tmp;
+    data  += 2*sizeof (uint16_t);
+    hash  += hash >> 11;
+  }
 
-	/* Handle end cases */
-	switch (rem) {
-		case 3:	hash += get16bits (data);
-				hash ^= hash << 16;
-				hash ^= data[sizeof (uint16_t)] << 18;
-				hash += hash >> 11;
-				break;
-		case 2:	hash += get16bits (data);
-				hash ^= hash << 11;
-				hash += hash >> 17;
-				break;
-		case 1: hash += *data;
-				hash ^= hash << 10;
-				hash += hash >> 1;
-	}
+  /* Handle end cases */
+  switch (rem) {
+    case 3:	hash += get16bits (data);
+        hash ^= hash << 16;
+        hash ^= data[sizeof (uint16_t)] << 18;
+        hash += hash >> 11;
+        break;
+    case 2:	hash += get16bits (data);
+        hash ^= hash << 11;
+        hash += hash >> 17;
+        break;
+    case 1: hash += *data;
+        hash ^= hash << 10;
+        hash += hash >> 1;
+  }
 
-	/* Force "avalanching" of final 127 bits */
-	hash ^= hash << 3;
-	hash += hash >> 5;
-	hash ^= hash << 4;
-	hash += hash >> 17;
-	hash ^= hash << 25;
-	hash += hash >> 6;
+  /* Force "avalanching" of final 127 bits */
+  hash ^= hash << 3;
+  hash += hash >> 5;
+  hash ^= hash << 4;
+  hash += hash >> 17;
+  hash ^= hash << 25;
+  hash += hash >> 6;
 
-	return hash;
+  return hash;
 }
 
 void SuperFastHash     ( const void * key, int len, uint32_t /*seed*/, void * out )
 {
-	*(uint32_t*)out = SuperFastHash((const char*)key,len);
+  *(uint32_t*)out = SuperFastHash((const char*)key,len);
 }
\ No newline at end of file
diff --git a/Types.cpp b/Types.cpp
index 04d5c6e..44876e2 100644
--- a/Types.cpp
+++ b/Types.cpp
@@ -10,7 +10,7 @@ void blackhole ( uint32_t )
 
 uint32_t whitehole ( void )
 {
-	return 0;
+  return 0;
 }
 
 #pragma optimize( "", on ) 
diff --git a/Types.h b/Types.h
index 9e4ae10..db1fc8b 100644
--- a/Types.h
+++ b/Types.h
@@ -24,30 +24,30 @@ class hashfunc
 {
 public:
 
-	hashfunc ( pfHash h ) : m_hash(h)
-	{
-	}
+  hashfunc ( pfHash h ) : m_hash(h)
+  {
+  }
 
-	inline void operator () ( const void * key, const int len, const uint32_t seed, uint32_t * out )
-	{
-		m_hash(key,len,seed,out);
-	}
+  inline void operator () ( const void * key, const int len, const uint32_t seed, uint32_t * out )
+  {
+    m_hash(key,len,seed,out);
+  }
 
-	inline operator pfHash ( void ) const
-	{
-		return m_hash;
-	}
+  inline operator pfHash ( void ) const
+  {
+    return m_hash;
+  }
 
-	inline T operator () ( const void * key, const int len, const uint32_t seed ) 
-	{
-		T result;
+  inline T operator () ( const void * key, const int len, const uint32_t seed ) 
+  {
+    T result;
 
-		m_hash(key,len,seed,(uint32_t*)&result);
+    m_hash(key,len,seed,(uint32_t*)&result);
 
-		return result;
-	}
+    return result;
+  }
 
-	pfHash m_hash;
+  pfHash m_hash;
 };
 
 //-----------------------------------------------------------------------------
@@ -57,179 +57,179 @@ class Blob
 {
 public:
 
-	Blob()
-	{
-	}
-
-	Blob ( int x )
-	{
-		for(int i = 0; i < nbytes; i++)
-		{
-			bytes[i] = 0;
-		}
-
-		*(int*)bytes = x;
-	}
-
-	Blob ( const Blob & k )
-	{
-		for(int i = 0; i < nbytes; i++)
-		{
-			bytes[i] = k.bytes[i];
-		}
-	}
-
-	Blob & operator = ( const Blob & k )
-	{
-		for(int i = 0; i < nbytes; i++)
-		{
-			bytes[i] = k.bytes[i];
-		}
-
-		return *this;
-	}
-
-	void set ( const void * blob, int len )
-	{
-		const uint8_t * k = (const uint8_t*)blob;
-
-		len = len > nbytes ? nbytes : len;
-
-		for(int i = 0; i < len; i++)
-		{
-			bytes[i] = k[i];
-		}
-
-		for(int i = len; i < nbytes; i++)
-		{
-			bytes[i] = 0;
-		}
-	}
-
-	uint8_t & operator [] ( int i )
-	{
-		return bytes[i];
-	}
-
-	const uint8_t & operator [] ( int i ) const
-	{
-		return bytes[i];
-	}
-
-	//----------
-	// boolean operations
-	
-	bool operator < ( const Blob & k ) const
-	{
-		for(int i = 0; i < nbytes; i++)
-		{
-			if(bytes[i] < k.bytes[i]) return true;
-			if(bytes[i] > k.bytes[i]) return false;
-		}
-
-		return false;
-	}
-
-	bool operator == ( const Blob & k ) const
-	{
-		for(int i = 0; i < nbytes; i++)
-		{
-			if(bytes[i] != k.bytes[i]) return false;
-		}
-
-		return true;
-	}
-
-	bool operator != ( const Blob & k ) const
-	{
-		return !(*this == k);
-	}
-
-	//----------
-	// bitwise operations
-
-	Blob operator ^ ( const Blob & k ) const 
-	{
-		Blob t;
-
-		for(int i = 0; i < nbytes; i++)
-		{
-			t.bytes[i] = bytes[i] ^ k.bytes[i];
-		}
-
-		return t;
-	}
-
-	Blob & operator ^= ( const Blob & k )
-	{
-		for(int i = 0; i < nbytes; i++)
-		{
-			bytes[i] ^= k.bytes[i];
-		}
-
-		return *this;
-	}
-
-	int operator & ( int x )
-	{
-		return (*(int*)bytes) & x;
-	}
-
-	Blob & operator &= ( const Blob & k )
-	{
-		for(int i = 0; i < nbytes; i++)
-		{
-			bytes[i] &= k.bytes[i];
-		}
-	}
-
-	Blob operator << ( int c )
-	{
-		Blob t = *this;
-
-		lshift(&t.bytes[0],nbytes,c);
-
-		return t;
-	}
-
-	Blob operator >> ( int c )
-	{
-		Blob t = *this;
-
-		rshift(&t.bytes[0],nbytes,c);
-
-		return t;
-	}
-
-	Blob & operator <<= ( int c )
-	{
-		lshift(&bytes[0],nbytes,c);
-
-		return *this;
-	}
-
-	Blob & operator >>= ( int c )
-	{
-		rshift(&bytes[0],nbytes,c);
-
-		return *this;
-	}
-
-	//----------
-	
-	enum
-	{
-		nbits = _bits,
-		nbytes = (_bits+7)/8,
-
-		align4  = (nbytes & 2) ? 0 : 1,
-		align8  = (nbytes & 3) ? 0 : 1,
-		align16 = (nbytes & 4) ? 0 : 1,
-	};
+  Blob()
+  {
+  }
+
+  Blob ( int x )
+  {
+    for(int i = 0; i < nbytes; i++)
+    {
+      bytes[i] = 0;
+    }
+
+    *(int*)bytes = x;
+  }
+
+  Blob ( const Blob & k )
+  {
+    for(int i = 0; i < nbytes; i++)
+    {
+      bytes[i] = k.bytes[i];
+    }
+  }
+
+  Blob & operator = ( const Blob & k )
+  {
+    for(int i = 0; i < nbytes; i++)
+    {
+      bytes[i] = k.bytes[i];
+    }
+
+    return *this;
+  }
+
+  void set ( const void * blob, int len )
+  {
+    const uint8_t * k = (const uint8_t*)blob;
+
+    len = len > nbytes ? nbytes : len;
+
+    for(int i = 0; i < len; i++)
+    {
+      bytes[i] = k[i];
+    }
+
+    for(int i = len; i < nbytes; i++)
+    {
+      bytes[i] = 0;
+    }
+  }
+
+  uint8_t & operator [] ( int i )
+  {
+    return bytes[i];
+  }
+
+  const uint8_t & operator [] ( int i ) const
+  {
+    return bytes[i];
+  }
+
+  //----------
+  // boolean operations
+  
+  bool operator < ( const Blob & k ) const
+  {
+    for(int i = 0; i < nbytes; i++)
+    {
+      if(bytes[i] < k.bytes[i]) return true;
+      if(bytes[i] > k.bytes[i]) return false;
+    }
+
+    return false;
+  }
+
+  bool operator == ( const Blob & k ) const
+  {
+    for(int i = 0; i < nbytes; i++)
+    {
+      if(bytes[i] != k.bytes[i]) return false;
+    }
+
+    return true;
+  }
+
+  bool operator != ( const Blob & k ) const
+  {
+    return !(*this == k);
+  }
+
+  //----------
+  // bitwise operations
+
+  Blob operator ^ ( const Blob & k ) const 
+  {
+    Blob t;
+
+    for(int i = 0; i < nbytes; i++)
+    {
+      t.bytes[i] = bytes[i] ^ k.bytes[i];
+    }
+
+    return t;
+  }
+
+  Blob & operator ^= ( const Blob & k )
+  {
+    for(int i = 0; i < nbytes; i++)
+    {
+      bytes[i] ^= k.bytes[i];
+    }
+
+    return *this;
+  }
+
+  int operator & ( int x )
+  {
+    return (*(int*)bytes) & x;
+  }
+
+  Blob & operator &= ( const Blob & k )
+  {
+    for(int i = 0; i < nbytes; i++)
+    {
+      bytes[i] &= k.bytes[i];
+    }
+  }
+
+  Blob operator << ( int c )
+  {
+    Blob t = *this;
+
+    lshift(&t.bytes[0],nbytes,c);
+
+    return t;
+  }
+
+  Blob operator >> ( int c )
+  {
+    Blob t = *this;
+
+    rshift(&t.bytes[0],nbytes,c);
+
+    return t;
+  }
+
+  Blob & operator <<= ( int c )
+  {
+    lshift(&bytes[0],nbytes,c);
+
+    return *this;
+  }
+
+  Blob & operator >>= ( int c )
+  {
+    rshift(&bytes[0],nbytes,c);
+
+    return *this;
+  }
+
+  //----------
+  
+  enum
+  {
+    nbits = _bits,
+    nbytes = (_bits+7)/8,
+
+    align4  = (nbytes & 2) ? 0 : 1,
+    align8  = (nbytes & 3) ? 0 : 1,
+    align16 = (nbytes & 4) ? 0 : 1,
+  };
 
 private:
 
-	uint8_t bytes[nbytes];
+  uint8_t bytes[nbytes];
 };
 
 typedef Blob<128> uint128_t;
diff --git a/crc.cpp b/crc.cpp
index 97d84db..76fcfa0 100644
--- a/crc.cpp
+++ b/crc.cpp
@@ -80,21 +80,21 @@ static const uint32_t crc_table[256] = {
 
 void crc32 ( const void * key, int len, uint32_t seed, void * out )
 {
-	uint8_t * buf = (uint8_t*)key;
-	uint32_t crc = seed ^ 0xffffffffL;
+  uint8_t * buf = (uint8_t*)key;
+  uint32_t crc = seed ^ 0xffffffffL;
 
-	while (len >= 8)
-	{
-		DO8(buf);
-		len -= 8;
-	}
+  while (len >= 8)
+  {
+    DO8(buf);
+    len -= 8;
+  }
 
-	while(len--)
-	{
-		DO1(buf);
-	} 
+  while(len--)
+  {
+    DO1(buf);
+  } 
 
-	crc ^= 0xffffffffL;
+  crc ^= 0xffffffffL;
 
-	*(uint32_t*)out = crc;
+  *(uint32_t*)out = crc;
 }
diff --git a/lookup3.cpp b/lookup3.cpp
index 5dd3a42..edf1c9a 100644
--- a/lookup3.cpp
+++ b/lookup3.cpp
@@ -27,46 +27,46 @@
 
 uint32_t lookup3 ( const void * key, int length, uint32_t initval )
 {
-	uint32_t a,b,c;                                          /* internal state */
+  uint32_t a,b,c;                                          /* internal state */
 
-	a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
 
-	const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
+  const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
 
-	/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
-	while (length > 12)
-	{
-		a += k[0];
-		b += k[1];
-		c += k[2];
-		mix(a,b,c);
-		length -= 12;
-		k += 3;
-	}
+  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+  while (length > 12)
+  {
+    a += k[0];
+    b += k[1];
+    c += k[2];
+    mix(a,b,c);
+    length -= 12;
+    k += 3;
+  }
 
-	switch(length)
-	{
-		case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
-		case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
-		case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
-		case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
-		case 8 : b+=k[1]; a+=k[0]; break;
-		case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
-		case 6 : b+=k[1]&0xffff; a+=k[0]; break;
-		case 5 : b+=k[1]&0xff; a+=k[0]; break;
-		case 4 : a+=k[0]; break;
-		case 3 : a+=k[0]&0xffffff; break;
-		case 2 : a+=k[0]&0xffff; break;
-		case 1 : a+=k[0]&0xff; break;
-		case 0 : { return c; }              /* zero length strings require no mixing */
-	}
+  switch(length)
+  {
+    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
+    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
+    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
+    case 8 : b+=k[1]; a+=k[0]; break;
+    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
+    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
+    case 5 : b+=k[1]&0xff; a+=k[0]; break;
+    case 4 : a+=k[0]; break;
+    case 3 : a+=k[0]&0xffffff; break;
+    case 2 : a+=k[0]&0xffff; break;
+    case 1 : a+=k[0]&0xff; break;
+    case 0 : { return c; }              /* zero length strings require no mixing */
+  }
 
-	final(a,b,c);
+  final(a,b,c);
 
-	return c;
+  return c;
 }
 
 void lookup3_test ( const void * key, int len, uint32_t seed, void * out )
 {
-	*(uint32_t*)out = lookup3(key,len,seed);
+  *(uint32_t*)out = lookup3(key,len,seed);
 }
diff --git a/main.cpp b/main.cpp
index 21d8041..13974e6 100644
--- a/main.cpp
+++ b/main.cpp
@@ -8,6 +8,9 @@
 #include <stdio.h>
 #include <time.h>
 
+//-----------------------------------------------------------------------------
+// Configuration. TODO - move these to command-line flags
+
 bool g_testAll = false;
 
 bool g_testSanity      = false;
@@ -23,524 +26,467 @@ bool g_testZeroes      = false;
 bool g_testSeed        = false;
 
 //-----------------------------------------------------------------------------
-
-int64_t g_hashcount = 0;
-int64_t g_bytecount = 0;
-
-void counterhash ( const void * , const int len, const uint32_t , void * out )
-{
-	g_hashcount++;
-	g_bytecount += len;
-
-	*(uint32_t*)out = rand_u32();
-}
-
-//-----------------------------------------------------------------------------
+// This is the list of all hashes that SMHasher can test.
 
 struct HashInfo
 {
-	pfHash hash;
-	int hashbits;
-	const char * name;
-	const char * desc;
+  pfHash hash;
+  int hashbits;
+  uint32_t verification;
+  const char * name;
+  const char * desc;
 };
 
 HashInfo g_hashes[] = 
 {
-	{ counterhash,          32, "count",       "Counts how many times the hash function is called" },
-	{ randhash_32,          32, "rand32",      "Random number generator, 32-bit" },
-	{ randhash_64,          64, "rand64",      "Random number generator, 64-bit" },
-	{ randhash_128,        128, "rand128",     "Random number generator, 128-bit" },
+  { DoNothingHash,        32, 0x00000000, "donothing32", "Do-Nothing function (only valid for measuring call overhead)" },
+  { DoNothingHash,        64, 0x00000000, "donothing64", "Do-Nothing function (only valid for measuring call overhead)" },
+  { DoNothingHash,       128, 0x00000000, "donothing128", "Do-Nothing function (only valid for measuring call overhead)" },
 
-	{ crc32,                32, "crc32",       "CRC-32" },
-	{ DoNothingHash,        32, "donothing32", "Do-Nothing Function (only valid for speed test comparison)" },
+  { crc32,                32, 0x5C7DDD1F, "crc32",       "CRC-32" },
 
-	{ md5_32,               32, "md5_32a",     "MD5, first 32 bits of result" },
-	{ sha1_32a,             32, "sha1_32a",    "SHA1, first 32 bits of result" },
+  { md5_32,               32, 0xC10C356B, "md5_32a",     "MD5, first 32 bits of result" },
+  { sha1_32a,             32, 0xF9376EA7, "sha1_32a",    "SHA1, first 32 bits of result" },
 
-	{ FNV,                  32, "FNV",         "Fowler-Noll-Vo hash, 32-bit" },
-	{ lookup3_test,         32, "lookup3",     "Bob Jenkins' lookup3" },
-	{ SuperFastHash,        32, "superfast",   "Paul Hsieh's SuperFastHash" },
-	{ MurmurOAAT,           32, "MurmurOAAT",  "Murmur one-at-a-time" },
-	
-	// MurmurHash2
+  { FNV,                  32, 0x2B377407, "FNV",         "Fowler-Noll-Vo hash, 32-bit" },
+  { lookup3_test,         32, 0xDEC6FD2F, "lookup3",     "Bob Jenkins' lookup3" },
+  { SuperFastHash,        32, 0x980ACD1D, "superfast",   "Paul Hsieh's SuperFastHash" },
+  { MurmurOAAT,           32, 0x5F424541, "MurmurOAAT",  "Murmur one-at-a-time" },
+  
+  // MurmurHash2
 
-	{ MurmurHash2_test,     32, "Murmur2",     "MurmurHash2 for x86, 32-bit" },
-	{ MurmurHash2A_test,    32, "Murmur2A",    "MurmurHash2A for x86, 32-bit" },
-	{ MurmurHash64A_test,   64, "Murmur2B",    "MurmurHash2 for x64, 64-bit" },
-	{ MurmurHash64B_test,   64, "Murmur2C",    "MurmurHash2 for x86, 64-bit" },
+  { MurmurHash2_test,     32, 0xA6D95DE6, "Murmur2",     "MurmurHash2 for x86, 32-bit" },
+  { MurmurHash2A_test,    32, 0xB79DC030, "Murmur2A",    "MurmurHash2A for x86, 32-bit" },
+  { MurmurHash64A_test,   64, 0xDBD7FF4B, "Murmur2B",    "MurmurHash2 for x64, 64-bit" },
+  { MurmurHash64B_test,   64, 0x3B861F71, "Murmur2C",    "MurmurHash2 for x86, 64-bit" },
 
-	// MurmurHash3
+  // MurmurHash3
 
-	{ MurmurHash3_x86_32,   32, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
-	{ MurmurHash3_x86_128, 128, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
-	{ MurmurHash3_x64_128, 128, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
+  { MurmurHash3_x86_32,   32, 0x3B75AFFD, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
+  { MurmurHash3_x86_128, 128, 0x78C7F0DB, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
+  { MurmurHash3_x64_128, 128, 0x54667393, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
 
 };
 
 HashInfo * findHash ( const char * name ) 
 {
-	for(int i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
-	{
-		if(_stricmp(name,g_hashes[i].name) == 0) return &g_hashes[i];
-	}
+  for(int i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
+  {
+    if(_stricmp(name,g_hashes[i].name) == 0) return &g_hashes[i];
+  }
+
+  return NULL;
+}
+
+//-----------------------------------------------------------------------------
+// Self-test on startup - verify that all installed hashes work correctly.
+
+void SelfTest ( void )
+{
+  bool pass = true;
+
+  for(int i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
+  {
+    HashInfo * info = & g_hashes[i];
+
+    pass &= VerificationTest(info->hash,info->hashbits,info->verification,false);
+  }
+
+  if(!pass)
+  {
+    printf("Self-test FAILED!\n");
+
+    for(int i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
+    {
+      HashInfo * info = & g_hashes[i];
+
+      pass &= VerificationTest(info->hash,info->hashbits,info->verification,true);
+    }
 
-	return NULL;
+    exit(1);
+  }
 }
 
 //----------------------------------------------------------------------------
 
 template < typename hashtype >
-void test ( hashfunc<hashtype> hash, const char * hashname )
+void test ( hashfunc<hashtype> hash, HashInfo * info )
 {
-	const int hashbits = sizeof(hashtype) * 8;
-
-	printf("-------------------------------------------------------------------------------\n");
-	printf("--- Testing %s\n\n",hashname);
-
-	//-----------------------------------------------------------------------------
-	// Sanity tests
-
-	if(g_testSanity || g_testAll)
-	{
-		printf("[[[ Sanity Tests ]]]\n\n");
-
-		QuickBrownFox(hash,hashbits);
-		SanityTest(hash,hashbits);
-		AppendedZeroesTest(hash,hashbits);
-		printf("\n");
-	}
-
-	//-----------------------------------------------------------------------------
-	// Speed tests
-
-	if(g_testSpeed || g_testAll)
-	{
-		printf("[[[ Speed Tests ]]]\n\n");
-
-		BulkSpeedTest(hash);
-		printf("\n");
-
-		for(int i = 1; i < 32; i++)
-		{
-			double cycles;
-
-			TinySpeedTest(hash,sizeof(hashtype),i,true,cycles);
-		}
-
-    /*
-		for(int i = 32; i <= 2048; i += 32)
-		{
-			double cycles;
-
-
-			TinySpeedTest(hash,sizeof(hashtype),i,true,cycles);
-		}
-    */
-
-		printf("\n");
-	}
-
-	//-----------------------------------------------------------------------------
-	// Differential tests
-
-	if(g_testDiff || g_testAll)
-	{
-		printf("[[[ Differential Tests ]]]\n\n");
-
-		bool result = true;
-		bool dumpCollisions = false;
-
-		result &= DiffTest< Blob<64>,  hashtype >(hash,5,1000,dumpCollisions);
-		result &= DiffTest< Blob<128>, hashtype >(hash,4,1000,dumpCollisions);
-		result &= DiffTest< Blob<256>, hashtype >(hash,3,1000,dumpCollisions);
-
-		if(!result) printf("*********FAIL*********\n");
-		printf("\n");
-	}
-
-	//-----------------------------------------------------------------------------
-	// Avalanche tests.
-	
-	// 2 million reps is enough to measure bias down to ~0.25%
-	
-	if(g_testAvalanche || g_testAll)
-	{
-		printf("[[[ Avalanche Tests ]]]\n\n");
-
-		//const int hashbits = sizeof(hashtype) * 8;
-		bool result = true;
-
-    /*
-		result &= AvalancheTest< Blob< 32>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob< 40>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob< 48>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob< 56>, hashtype > (hash,300000);
-
-		result &= AvalancheTest< Blob< 64>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob< 72>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob< 80>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob< 88>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob< 96>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob<104>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob<112>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob<120>, hashtype > (hash,300000);
-    */
-
-		result &= AvalancheTest< Blob<128>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob<136>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob<144>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob<152>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob<160>, hashtype > (hash,300000);
-
-    /*
-    result &= AvalancheTest< Blob<168>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob<176>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob<184>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob<192>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob<200>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob<208>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob<216>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob<224>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob<232>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob<240>, hashtype > (hash,300000);
-		result &= AvalancheTest< Blob<248>, hashtype > (hash,300000);
-
-		result &= AvalancheTest< Blob<256>, hashtype > (hash,300000);
-    */
-
-		//result &= AvalancheTest< Blob<hashbits * 2>, hashtype > (hash,200000);
-		//result &= AvalancheTest< Blob<768>, hashtype > (hash,2000000);
-
-		// The bit independence test is slow and not particularly useful...
-		//result &= BicTest < Blob<hashbits * 2>, hashtype > ( hash, 1000000 );
-
-		if(!result) printf("*********FAIL*********\n");
-		printf("\n");
-	}
-
-	//-----------------------------------------------------------------------------
-	// Keyset 'Cyclic'
-
-	if(g_testCyclic || g_testAll)
-	{
-		printf("[[[ Keyset 'Cyclic' Tests ]]]\n\n");
-
-		bool result = true;
-		bool drawDiagram = false;
-
-		result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+0,8,10000000,drawDiagram);
-		result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+1,8,10000000,drawDiagram);
-		result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+2,8,10000000,drawDiagram);
-		result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+3,8,10000000,drawDiagram);
-		result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+4,8,10000000,drawDiagram);
-		
-		if(!result) printf("*********FAIL*********\n");
-		printf("\n");
-	}
-
-	//-----------------------------------------------------------------------------
-	// Keyset 'Sparse'
-
-	if(g_testSparse || g_testAll)
-	{
-		printf("[[[ Keyset 'Sparse' Tests ]]]\n\n");
-
-		bool result = true;
-		bool drawDiagram = false;
-
-		result &= SparseKeyTest<  32,hashtype>(hash,6,true,true,true,drawDiagram);
-		result &= SparseKeyTest<  40,hashtype>(hash,6,true,true,true,drawDiagram);
-		result &= SparseKeyTest<  48,hashtype>(hash,5,true,true,true,drawDiagram);
-		result &= SparseKeyTest<  56,hashtype>(hash,5,true,true,true,drawDiagram);
-		result &= SparseKeyTest<  64,hashtype>(hash,5,true,true,true,drawDiagram);
-		result &= SparseKeyTest<  96,hashtype>(hash,4,true,true,true,drawDiagram); 
-		result &= SparseKeyTest< 256,hashtype>(hash,3,true,true,true,drawDiagram);
-		result &= SparseKeyTest<2048,hashtype>(hash,2,true,true,true,drawDiagram);
-
-		if(!result) printf("*********FAIL*********\n");
-		printf("\n");
-	}
-
-	//-----------------------------------------------------------------------------
-	// Keyset 'Permutation'
-
-	if(g_testPermutation || g_testAll)
-	{
-		{
-			// This one breaks lookup3, surprisingly
-
-			printf("[[[ Keyset 'Combination Lowbits' Tests ]]]\n\n");
-
-			bool result = true;
-			bool drawDiagram = false;
-
-			uint32_t blocks[] =
-			{
-				0x00000000, 
-				
-				0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
-			};
-
-			result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
-
-			if(!result) printf("*********FAIL*********\n");
-			printf("\n");
-		}
-
-		{
-			printf("[[[ Keyset 'Combination Highbits' Tests ]]]\n\n");
-
-			bool result = true;
-			bool drawDiagram = false;
-
-			uint32_t blocks[] =
-			{
-				0x00000000, 
-				
-				0x20000000, 0x40000000, 0x60000000, 0x80000000, 0xA0000000, 0xC0000000, 0xE0000000
-			};
-
-			result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+  const int hashbits = sizeof(hashtype) * 8;
+
+  printf("-------------------------------------------------------------------------------\n");
+  printf("--- Testing %s\n\n",info->name);
+
+  //-----------------------------------------------------------------------------
+  // Sanity tests
+
+  if(g_testSanity || g_testAll)
+  {
+    printf("[[[ Sanity Tests ]]]\n\n");
+
+    VerificationTest(hash,hashbits,info->verification,true);
+    SanityTest(hash,hashbits);
+    AppendedZeroesTest(hash,hashbits);
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Speed tests
+
+  if(g_testSpeed || g_testAll)
+  {
+    printf("[[[ Speed Tests ]]]\n\n");
+
+    BulkSpeedTest(hash);
+    printf("\n");
+
+    for(int i = 1; i < 32; i++)
+    {
+      double cycles;
+
+      TinySpeedTest(hash,sizeof(hashtype),i,true,cycles);
+    }
+
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Differential tests
+
+  if(g_testDiff || g_testAll)
+  {
+    printf("[[[ Differential Tests ]]]\n\n");
+
+    bool result = true;
+    bool dumpCollisions = false;
+
+    result &= DiffTest< Blob<64>,  hashtype >(hash,5,1000,dumpCollisions);
+    result &= DiffTest< Blob<128>, hashtype >(hash,4,1000,dumpCollisions);
+    result &= DiffTest< Blob<256>, hashtype >(hash,3,1000,dumpCollisions);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Avalanche tests
+  
+  if(g_testAvalanche || g_testAll)
+  {
+    printf("[[[ Avalanche Tests ]]]\n\n");
+
+    bool result = true;
+
+    result &= AvalancheTest< Blob< 32>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 40>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 48>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 56>, hashtype > (hash,300000);
+
+    result &= AvalancheTest< Blob< 64>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 72>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 80>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 88>, hashtype > (hash,300000);
+
+    result &= AvalancheTest< Blob< 96>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<104>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<112>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<120>, hashtype > (hash,300000);
+
+    result &= AvalancheTest< Blob<128>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<136>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<144>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<152>, hashtype > (hash,300000);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Cyclic'
+
+  if(g_testCyclic || g_testAll)
+  {
+    printf("[[[ Keyset 'Cyclic' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+0,8,10000000,drawDiagram);
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+1,8,10000000,drawDiagram);
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+2,8,10000000,drawDiagram);
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+3,8,10000000,drawDiagram);
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+4,8,10000000,drawDiagram);
+    
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Sparse'
+
+  if(g_testSparse || g_testAll)
+  {
+    printf("[[[ Keyset 'Sparse' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    result &= SparseKeyTest<  32,hashtype>(hash,6,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  40,hashtype>(hash,6,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  48,hashtype>(hash,5,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  56,hashtype>(hash,5,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  64,hashtype>(hash,5,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  96,hashtype>(hash,4,true,true,true,drawDiagram); 
+    result &= SparseKeyTest< 256,hashtype>(hash,3,true,true,true,drawDiagram);
+    result &= SparseKeyTest<2048,hashtype>(hash,2,true,true,true,drawDiagram);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Permutation'
+
+  if(g_testPermutation || g_testAll)
+  {
+    {
+      // This one breaks lookup3, surprisingly
 
-			if(!result) printf("*********FAIL*********\n");
-			printf("\n");
-		}
+      printf("[[[ Keyset 'Combination Lowbits' Tests ]]]\n\n");
+
+      bool result = true;
+      bool drawDiagram = false;
 
-		{
-			printf("[[[ Keyset 'Combination 0x8000000' Tests ]]]\n\n");
+      uint32_t blocks[] =
+      {
+        0x00000000, 
+        
+        0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
+      };
 
-			bool result = true;
-			bool drawDiagram = false;
+      result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
 
-			uint32_t blocks[] =
-			{
-				0x00000000, 
-				
-				0x80000000,
-			};
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
 
-			result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+    {
+      printf("[[[ Keyset 'Combination Highbits' Tests ]]]\n\n");
 
-			if(!result) printf("*********FAIL*********\n");
-			printf("\n");
-		}
+      bool result = true;
+      bool drawDiagram = false;
 
-		{
-			printf("[[[ Keyset 'Combination 0x0000001' Tests ]]]\n\n");
+      uint32_t blocks[] =
+      {
+        0x00000000, 
+        
+        0x20000000, 0x40000000, 0x60000000, 0x80000000, 0xA0000000, 0xC0000000, 0xE0000000
+      };
 
-			bool result = true;
-			bool drawDiagram = false;
+      result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
 
-			uint32_t blocks[] =
-			{
-				0x00000000, 
-				
-				0x00000001,
-			};
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
 
-			result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+    {
+      printf("[[[ Keyset 'Combination 0x8000000' Tests ]]]\n\n");
 
-			if(!result) printf("*********FAIL*********\n");
-			printf("\n");
-		}
+      bool result = true;
+      bool drawDiagram = false;
 
-		{
-			printf("[[[ Keyset 'Combination Hi-Lo' Tests ]]]\n\n");
+      uint32_t blocks[] =
+      {
+        0x00000000, 
+        
+        0x80000000,
+      };
 
-			bool result = true;
-			bool drawDiagram = false;
+      result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
 
-			uint32_t blocks[] =
-			{
-				0x00000000, 
-				
-				0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
 
-				0x80000000, 0x40000000, 0xC0000000, 0x20000000, 0xA0000000, 0x60000000, 0xE0000000
-			};
+    {
+      printf("[[[ Keyset 'Combination 0x0000001' Tests ]]]\n\n");
 
-			result &= CombinationKeyTest<hashtype>(hash,6,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+      bool result = true;
+      bool drawDiagram = false;
 
-			if(!result) printf("*********FAIL*********\n");
-			printf("\n");
-		}
+      uint32_t blocks[] =
+      {
+        0x00000000, 
+        
+        0x00000001,
+      };
 
-		//----------
+      result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
 
-		/*
-		{
-			printf("[[[ Keyset 'Permutation' Tests ]]]\n\n");
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
 
-			bool result = true;
-			bool drawDiagram = false;
+    {
+      printf("[[[ Keyset 'Combination Hi-Lo' Tests ]]]\n\n");
 
-			// This very sparse set of blocks is particularly hard for SuperFastHash
+      bool result = true;
+      bool drawDiagram = false;
 
-			uint32_t blocks[] =
-			{
-				0x00000000,
-				0x00000001,
-				0x00000002,
-				
-				0x00000400,
-				0x00008000,
-				
-				0x00080000,
-				0x00200000,
+      uint32_t blocks[] =
+      {
+        0x00000000, 
+        
+        0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
 
-				0x20000000,
-				0x40000000,
-				0x80000000,
-			};
+        0x80000000, 0x40000000, 0xC0000000, 0x20000000, 0xA0000000, 0x60000000, 0xE0000000
+      };
 
-			result &= PermutationKeyTest<hashtype>(hash,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+      result &= CombinationKeyTest<hashtype>(hash,6,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
 
-			if(!result) printf("*********FAIL*********\n");
-			printf("\n");
-		}
-		*/
-	}
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
+  }
 
-	//-----------------------------------------------------------------------------
-	// Keyset 'Window'
+  //-----------------------------------------------------------------------------
+  // Keyset 'Window'
 
-	// Skip distribution test for these - they're too easy to distribute well,
-	// and it generates a _lot_ of testing
+  // Skip distribution test for these - they're too easy to distribute well,
+  // and it generates a _lot_ of testing
 
-	if(g_testWindow || g_testAll)
-	{
-		printf("[[[ Keyset 'Window' Tests ]]]\n\n");
+  if(g_testWindow || g_testAll)
+  {
+    printf("[[[ Keyset 'Window' Tests ]]]\n\n");
 
-		bool result = true;
-		bool testCollision = true;
-		bool testDistribution = false;
-		bool drawDiagram = false;
+    bool result = true;
+    bool testCollision = true;
+    bool testDistribution = false;
+    bool drawDiagram = false;
 
-		result &= WindowedKeyTest< Blob<hashbits*2>, hashtype > ( hash, 20, testCollision, testDistribution, drawDiagram );
+    result &= WindowedKeyTest< Blob<hashbits*2>, hashtype > ( hash, 20, testCollision, testDistribution, drawDiagram );
 
-		if(!result) printf("*********FAIL*********\n");
-		printf("\n");
-	}
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
 
-	//-----------------------------------------------------------------------------
-	// Keyset 'Text'
+  //-----------------------------------------------------------------------------
+  // Keyset 'Text'
 
-	if(g_testText || g_testAll)
-	{
-		printf("[[[ Keyset 'Text' Tests ]]]\n\n");
+  if(g_testText || g_testAll)
+  {
+    printf("[[[ Keyset 'Text' Tests ]]]\n\n");
 
-		bool result = true;
-		bool drawDiagram = false;
+    bool result = true;
+    bool drawDiagram = false;
 
-		const char * alnum = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+    const char * alnum = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
 
-		result &= TextKeyTest( hash, "Foo",    alnum,4, "Bar",    drawDiagram );
-		result &= TextKeyTest( hash, "FooBar", alnum,4, "",       drawDiagram );
-		result &= TextKeyTest( hash, "",       alnum,4, "FooBar", drawDiagram );
+    result &= TextKeyTest( hash, "Foo",    alnum,4, "Bar",    drawDiagram );
+    result &= TextKeyTest( hash, "FooBar", alnum,4, "",       drawDiagram );
+    result &= TextKeyTest( hash, "",       alnum,4, "FooBar", drawDiagram );
 
-		if(!result) printf("*********FAIL*********\n");
-		printf("\n");
-	}
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
 
-	//-----------------------------------------------------------------------------
-	// Keyset 'Zeroes'
+  //-----------------------------------------------------------------------------
+  // Keyset 'Zeroes'
 
-	if(g_testZeroes || g_testAll)
-	{
-		printf("[[[ Keyset 'Zeroes' Tests ]]]\n\n");
+  if(g_testZeroes || g_testAll)
+  {
+    printf("[[[ Keyset 'Zeroes' Tests ]]]\n\n");
 
-		bool result = true;
-		bool drawDiagram = false;
+    bool result = true;
+    bool drawDiagram = false;
 
-		result &= ZeroKeyTest<hashtype>( hash, drawDiagram );
+    result &= ZeroKeyTest<hashtype>( hash, drawDiagram );
 
-		if(!result) printf("*********FAIL*********\n");
-		printf("\n");
-	}
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
 
-	//-----------------------------------------------------------------------------
-	// Keyset 'Seed'
+  //-----------------------------------------------------------------------------
+  // Keyset 'Seed'
 
-	if(g_testSeed || g_testAll)
-	{
-		printf("[[[ Keyset 'Seed' Tests ]]]\n\n");
+  if(g_testSeed || g_testAll)
+  {
+    printf("[[[ Keyset 'Seed' Tests ]]]\n\n");
 
-		bool result = true;
-		bool drawDiagram = false;
+    bool result = true;
+    bool drawDiagram = false;
 
-		result &= SeedTest<hashtype>( hash, 1000000, drawDiagram );
+    result &= SeedTest<hashtype>( hash, 1000000, drawDiagram );
 
-		if(!result) printf("*********FAIL*********\n");
-		printf("\n");
-	}
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
 }
 
 //-----------------------------------------------------------------------------
 
 void testHash ( const char * name )
 {
-	HashInfo * pInfo = findHash(name);
-
-	if(pInfo == NULL)
-	{
-		printf("Invalid hash '%s' specified\n",name);
-		return;
-	}
-	else
-	{
-		if(pInfo->hashbits == 32)
-		{
-			test<uint32_t>( pInfo->hash, pInfo->desc );
-		}
-		else if(pInfo->hashbits == 64)
-		{
-			test<uint64_t>( pInfo->hash, pInfo->desc );
-		}
-		else if(pInfo->hashbits == 128)
-		{
-			test<uint128_t>( pInfo->hash, pInfo->desc );
-		}
-		else if(pInfo->hashbits == 256)
-		{
-			test<uint256_t>( pInfo->hash, pInfo->desc );
-		}
-		else
-		{
-			printf("Invalid hash bit width %d for hash '%s'",pInfo->hashbits,pInfo->name);
-		}
-	}
+  HashInfo * pInfo = findHash(name);
+
+  if(pInfo == NULL)
+  {
+    printf("Invalid hash '%s' specified\n",name);
+    return;
+  }
+  else
+  {
+    if(pInfo->hashbits == 32)
+    {
+      test<uint32_t>( pInfo->hash, pInfo );
+    }
+    else if(pInfo->hashbits == 64)
+    {
+      test<uint64_t>( pInfo->hash, pInfo );
+    }
+    else if(pInfo->hashbits == 128)
+    {
+      test<uint128_t>( pInfo->hash, pInfo );
+    }
+    else if(pInfo->hashbits == 256)
+    {
+      test<uint256_t>( pInfo->hash, pInfo );
+    }
+    else
+    {
+      printf("Invalid hash bit width %d for hash '%s'",pInfo->hashbits,pInfo->name);
+    }
+  }
 }
 //-----------------------------------------------------------------------------
 
 int main ( int argc, char ** argv )
 {
-	SetAffinity(2);
+  SetAffinity(2);
 
-	int timeBegin = clock();
+  SelfTest();
 
-	g_testAll = true;
+  int timeBegin = clock();
 
-	//g_testSanity = true;
-	//g_testSpeed = true;
-	//g_testAvalanche = true;
-	//g_testCyclic = true;
-	//g_testDiff = true;
-	//g_testSparse = true;
-	//g_testPermutation = true;
-	//g_testZeroes = true;
+  g_testAll = true;
 
-	//testHash("count");
-	//printf("Called the hash function %I64d times, %I64d bytes hashed\n",g_hashcount,g_bytecount);
+  //g_testSanity = true;
+  //g_testSpeed = true;
+  //g_testAvalanche = true;
+  //g_testCyclic = true;
+  //g_testDiff = true;
+  //g_testSparse = true;
+  //g_testPermutation = true;
+  //g_testZeroes = true;
 
-	testHash("murmur3a");
+  testHash("murmur3a");
 
-	//----------
+  //----------
 
-	int timeEnd = clock();
+  int timeEnd = clock();
 
   printf("\n");
-	printf("Testing took %f seconds\n",double(timeEnd-timeBegin)/double(CLOCKS_PER_SEC));
+  printf("Testing took %f seconds\n",double(timeEnd-timeBegin)/double(CLOCKS_PER_SEC));
   printf("-------------------------------------------------------------------------------\n");
-	return 0;
+  return 0;
 }
diff --git a/md5.cpp b/md5.cpp
index 57bcbf3..43b870a 100644
--- a/md5.cpp
+++ b/md5.cpp
@@ -363,20 +363,20 @@ void md5( unsigned char *input, int ilen, unsigned char output[16] )
 
 unsigned int md5hash ( const void * input, int len, unsigned int /*seed*/ )
 {
-	unsigned int hash[4];
+  unsigned int hash[4];
 
-	md5((unsigned char *)input,len,(unsigned char *)hash);
+  md5((unsigned char *)input,len,(unsigned char *)hash);
 
-	//return hash[0] ^ hash[1] ^ hash[2] ^ hash[3];
+  //return hash[0] ^ hash[1] ^ hash[2] ^ hash[3];
 
-	return hash[0];
+  return hash[0];
 }	
 
 void md5_32            ( const void * key, int len, uint32_t /*seed*/, void * out )
 {
-	unsigned int hash[4];
+  unsigned int hash[4];
 
-	md5((unsigned char*)key,len,(unsigned char*)hash);
+  md5((unsigned char*)key,len,(unsigned char*)hash);
 
-	*(uint32_t*)out = hash[0];
+  *(uint32_t*)out = hash[0];
 }
\ No newline at end of file
diff --git a/pstdint.h b/pstdint.h
index 12c108a..3320264 100644
--- a/pstdint.h
+++ b/pstdint.h
@@ -749,51 +749,51 @@ typedef uint_least32_t uint_fast32_t;
 #define TESTUMAX(bits) glue3(u,bits,=) glue3(~,u,bits); if (glue3(UINT,bits,_MAX) glue3(!=,u,bits)) printf ("Something wrong with UINT%d_MAX\n", bits)
  
 int main () {
-	DECL(I,8)
-	DECL(U,8)
-	DECL(I,16)
-	DECL(U,16)
-	DECL(I,32)
-	DECL(U,32)
+  DECL(I,8)
+  DECL(U,8)
+  DECL(I,16)
+  DECL(U,16)
+  DECL(I,32)
+  DECL(U,32)
 #ifdef INT64_MAX
-	DECL(I,64)
-	DECL(U,64)
-#endif
-	intmax_t imax = INTMAX_C(0);
-	uintmax_t umax = UINTMAX_C(0);
-	char str0[256], str1[256];
-
-	sprintf (str0, "%d %x\n", 0, ~0);
-	
-	sprintf (str1, "%d %x\n",  i8, ~0);
-	if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1);
-	sprintf (str1, "%u %x\n",  u8, ~0);
-	if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1);
-	sprintf (str1, "%d %x\n",  i16, ~0);
-	if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1);
-	sprintf (str1, "%u %x\n",  u16, ~0);
-	if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1);	
-	sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n",  i32, ~0);
-	if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1);
-	sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n",  u32, ~0);
-	if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1);
+  DECL(I,64)
+  DECL(U,64)
+#endif
+  intmax_t imax = INTMAX_C(0);
+  uintmax_t umax = UINTMAX_C(0);
+  char str0[256], str1[256];
+
+  sprintf (str0, "%d %x\n", 0, ~0);
+  
+  sprintf (str1, "%d %x\n",  i8, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1);
+  sprintf (str1, "%u %x\n",  u8, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1);
+  sprintf (str1, "%d %x\n",  i16, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1);
+  sprintf (str1, "%u %x\n",  u16, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1);	
+  sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n",  i32, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1);
+  sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n",  u32, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1);
 #ifdef INT64_MAX	
-	sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n",  i64, ~0);
-	if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1);
-#endif
-	sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n",  imax, ~0);
-	if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1);
-	sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n",  umax, ~0);
-	if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1);	
-	
-	TESTUMAX(8);
-	TESTUMAX(16);
-	TESTUMAX(32);
+  sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n",  i64, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1);
+#endif
+  sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n",  imax, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1);
+  sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n",  umax, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1);	
+  
+  TESTUMAX(8);
+  TESTUMAX(16);
+  TESTUMAX(32);
 #ifdef INT64_MAX
-	TESTUMAX(64);
+  TESTUMAX(64);
 #endif
 
-	return EXIT_SUCCESS;
+  return EXIT_SUCCESS;
 }
 
 #endif
diff --git a/sha1.cpp b/sha1.cpp
index f663d23..fceb463 100644
--- a/sha1.cpp
+++ b/sha1.cpp
@@ -10,10 +10,10 @@ Still 100% Public Domain
 
 Corrected a problem which generated improper hash values on 16 bit machines
 Routine SHA1Update changed from
-	void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned int
+  void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned int
 len)
 to
-	void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned
+  void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned
 long len)
 
 The 'len' parameter was declared an int which works fine on 32 bit machines.
@@ -172,7 +172,7 @@ void SHA1_Init(SHA1_CTX* context)
     context->state[3] = 0x10325476;
     context->state[4] = 0xC3D2E1F0;
     context->count[0] = 0;
-	context->count[1] = 0;
+  context->count[1] = 0;
 }
 
 
@@ -187,12 +187,12 @@ void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len)
     context->count[1] += (len >> 29);
 
     if ((j + len) > 63) 
-	{
+  {
         memcpy(&context->buffer[j], data, (i = 64-j));
         SHA1_Transform(context->state, context->buffer);
 
         for ( ; i + 63 < len; i += 64) 
-		{
+    {
             SHA1_Transform(context->state, data + i);
         }
 
@@ -235,15 +235,15 @@ void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE])
 
 void sha1_32a ( const void * key, int len, uint32_t seed, void * out )
 {
-	SHA1_CTX context;
+  SHA1_CTX context;
 
-	uint8_t digest[20];
+  uint8_t digest[20];
 
-	SHA1_Init(&context);
-	SHA1_Update(&context, (uint8_t*)key, len);
-	SHA1_Final(&context, digest);
+  SHA1_Init(&context);
+  SHA1_Update(&context, (uint8_t*)key, len);
+  SHA1_Final(&context, digest);
 
-	memcpy(out,&digest[0],4);
+  memcpy(out,&digest[0],4);
 }
 
 //-----------------------------------------------------------------------------
@@ -292,7 +292,7 @@ int main(int argc, char** argv)
         SHA1_Init(&context);
         SHA1_Update(&context, (uint8_t*)test_data[k], strlen(test_data[k]));
         SHA1_Final(&context, digest);
-	digest_to_hex(digest, output);
+  digest_to_hex(digest, output);
 
         if (strcmp(output, test_results[k])) {
             fprintf(stdout, "FAIL\n");
-- 
cgit v1.2.3


From b379f34cd4dc54e1beb6f74a7ef054b76600d781 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Sun, 20 Mar 2011 03:18:20 +0000
Subject: Fix rdtsc errors from core hopping causing bad timing results

git-svn-id: http://smhasher.googlecode.com/svn/trunk@91 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 SpeedTest.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/SpeedTest.cpp b/SpeedTest.cpp
index a4ed8a7..16e183d 100644
--- a/SpeedTest.cpp
+++ b/SpeedTest.cpp
@@ -37,8 +37,11 @@ void BulkSpeedTest ( pfHash hash )
       blackhole(temp[0]);
 
       double cycles = double(end-begin);
-      double bpc = double(blocksize) / cycles;
-      if(bpc > bestbpc) bestbpc = bpc;
+      if(cycles > 0)
+      {
+        double bpc = double(blocksize) / cycles;
+        if(bpc > bestbpc) bestbpc = bpc;
+      }
     }
 
     double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
@@ -94,7 +97,7 @@ void TinySpeedTest ( pfHash hash, int hashsize, int keysize, bool verbose, doubl
     //blackhole(*(uint32_t*)(&h));
 
     double cycles = double(end-begin) / 64;
-    if(cycles < bestcycles) bestcycles = cycles;
+    if((cycles > 0) && (cycles < bestcycles)) bestcycles = cycles;
   }
 
   double bestbpc = double(keysize) / bestcycles;
-- 
cgit v1.2.3


From 9808b178928a74852eff46dd572b8eaf443faa39 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Sun, 20 Mar 2011 04:25:41 +0000
Subject: Remove leftover references to pstdint.h that break Ubuntu build

git-svn-id: http://smhasher.googlecode.com/svn/trunk@92 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 SuperFastHash.cpp | 2 +-
 lookup3.cpp       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/SuperFastHash.cpp b/SuperFastHash.cpp
index b7094d3..598c57c 100644
--- a/SuperFastHash.cpp
+++ b/SuperFastHash.cpp
@@ -1,4 +1,4 @@
-#include "pstdint.h"
+#include "Platform.h"
 
 /* By Paul Hsieh (C) 2004, 2005.  Covered under the Paul Hsieh derivative 
    license. See: 
diff --git a/lookup3.cpp b/lookup3.cpp
index edf1c9a..60087f1 100644
--- a/lookup3.cpp
+++ b/lookup3.cpp
@@ -1,6 +1,6 @@
 // lookup3 by Bob Jekins, code is public domain.
 
-#include "pstdint.h"
+#include "Platform.h"
 
 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
 
-- 
cgit v1.2.3


From b39a5f06af1dc12b52072539ca742a14e7f8e519 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Sun, 20 Mar 2011 04:29:19 +0000
Subject: And add stdio.h so SuperFastHash has a reference to NULL

git-svn-id: http://smhasher.googlecode.com/svn/trunk@93 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 SuperFastHash.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/SuperFastHash.cpp b/SuperFastHash.cpp
index 598c57c..8fd74cc 100644
--- a/SuperFastHash.cpp
+++ b/SuperFastHash.cpp
@@ -1,4 +1,5 @@
 #include "Platform.h"
+#include <stdio.h> // for NULL
 
 /* By Paul Hsieh (C) 2004, 2005.  Covered under the Paul Hsieh derivative 
    license. See: 
-- 
cgit v1.2.3


From f14506757829d5d9cae7f510b71d826fecc55f3f Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Sun, 20 Mar 2011 20:27:33 +0000
Subject: Remove stub file

git-svn-id: http://smhasher.googlecode.com/svn/trunk@94 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 CMakeLists.txt     | 1 -
 MurmurHashTest.cpp | 9 ---------
 2 files changed, 10 deletions(-)
 delete mode 100644 MurmurHashTest.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3b3cca6..7a5ecb9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,7 +21,6 @@ add_library(
   MurmurHashAligned2.cpp
   MurmurHashAligned.cpp
   MurmurHashNeutral2.cpp
-  MurmurHashTest.cpp
   Platform.cpp
   Random.cpp
   sha1.cpp
diff --git a/MurmurHashTest.cpp b/MurmurHashTest.cpp
deleted file mode 100644
index 3bc6a96..0000000
--- a/MurmurHashTest.cpp
+++ /dev/null
@@ -1,9 +0,0 @@
-#include "pstdint.h"
-
-uint32_t MurmurHash1        ( const void * key, int len, uint32_t seed );
-uint32_t MurmurHash2        ( const void * key, int len, uint32_t seed );
-uint64_t MurmurHash64A      ( const void * key, int len, uint64_t seed );
-uint64_t MurmurHash64B      ( const void * key, int len, uint64_t seed );
-uint32_t MurmurHash2A       ( const void * key, int len, uint32_t seed );
-uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed );
-uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed );
-- 
cgit v1.2.3


From 3ee4561b1347b8ad7a6cda7210c640d9288df957 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Mon, 21 Mar 2011 19:33:01 +0000
Subject: Start work on verification codes

git-svn-id: http://smhasher.googlecode.com/svn/trunk@95 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 DifferentialTest.h |  5 ++++-
 Hashes.cpp         | 12 +++++++-----
 Hashes.h           |  4 +++-
 Types.cpp          |  9 +++++++++
 Types.h            | 13 +++++++++++++
 main.cpp           |  8 ++++----
 6 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/DifferentialTest.h b/DifferentialTest.h
index 0894c0a..b0b1231 100644
--- a/DifferentialTest.h
+++ b/DifferentialTest.h
@@ -7,6 +7,7 @@
 #include "Types.h"
 #include "Stats.h"      // for chooseUpToK
 #include "KeysetTest.h" // for SparseKeygenRecurse
+#include "Random.h"
 
 #include <vector>
 #include <algorithm>
@@ -138,6 +139,8 @@ bool DiffTest ( pfHash hash, int diffbits, int reps, bool dumpCollisions )
   double testcount = (diffcount * double(reps));
   double expected  = testcount / pow(2.0,double(hashbits));
 
+  Rand r(100);
+
   std::vector<keytype> diffs;
 
   keytype k1,k2;
@@ -150,7 +153,7 @@ bool DiffTest ( pfHash hash, int diffbits, int reps, bool dumpCollisions )
   {
     if(i % (reps/10) == 0) printf(".");
 
-    rand_p(&k1,sizeof(k1));
+    r.rand_p(&k1,sizeof(keytype));
     k2 = k1;
 
     hash(&k1,sizeof(k1),0,(uint32_t*)&h1);
diff --git a/Hashes.cpp b/Hashes.cpp
index 890aeb0..aae3db8 100644
--- a/Hashes.cpp
+++ b/Hashes.cpp
@@ -63,11 +63,11 @@ void DoNothingHash ( const void *, int, uint32_t, void * )
 //-----------------------------------------------------------------------------
 // One-byte-at-a-time hash based on Murmur's mix
 
-void MurmurOAAT ( const void * key, int len, uint32_t seed, void * out )
+uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed )
 {
   const uint8_t * data = (const uint8_t*)key;
 
-  uint32_t h = seed ^ len;
+  uint32_t h = seed;
 
   for(int i = 0; i < len; i++)
   {
@@ -76,10 +76,12 @@ void MurmurOAAT ( const void * key, int len, uint32_t seed, void * out )
     h ^= h >> 15;
   }
 
-  h *= 0x5bd1e995;
-  h ^= h >> 15;
+  return h;
+}
 
-  *(uint32_t*)out = h;
+void MurmurOAAT_test ( const void * key, int len, uint32_t seed, void * out )
+{
+	*(uint32_t*)out = MurmurOAAT(key,len,seed);
 }
 
 //----------------------------------------------------------------------------
diff --git a/Hashes.h b/Hashes.h
index 8f39f76..d0b6778 100644
--- a/Hashes.h
+++ b/Hashes.h
@@ -31,7 +31,9 @@ void sha1_32a              ( const void * key, int len, uint32_t seed, void * ou
 void FNV                   ( const void * key, int len, uint32_t seed, void * out );
 void SuperFastHash         ( const void * key, int len, uint32_t seed, void * out );
 void lookup3_test          ( const void * key, int len, uint32_t seed, void * out );
-void MurmurOAAT            ( const void * key, int len, uint32_t seed, void * out );
+void MurmurOAAT_test       ( const void * key, int len, uint32_t seed, void * out );
+
+uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed );
 
 //----------
 // MurmurHash2
diff --git a/Types.cpp b/Types.cpp
index 44876e2..91b617c 100644
--- a/Types.cpp
+++ b/Types.cpp
@@ -1,5 +1,7 @@
 #include "Types.h"
 
+uint32_t MurmurOAAT ( const void * blob, int len, uint32_t seed );
+
 //-----------------------------------------------------------------------------
 
 #pragma optimize( "", off )
@@ -15,4 +17,11 @@ uint32_t whitehole ( void )
 
 #pragma optimize( "", on ) 
 
+uint32_t g_verify = 1;
+
+void MixVCode ( const void * blob, int len )
+{
+	g_verify = MurmurOAAT(blob,len,g_verify);
+}
+
 //-----------------------------------------------------------------------------
diff --git a/Types.h b/Types.h
index db1fc8b..1abb352 100644
--- a/Types.h
+++ b/Types.h
@@ -13,6 +13,19 @@
 void     blackhole ( uint32_t x );
 uint32_t whitehole ( void );
 
+//-----------------------------------------------------------------------------
+// We want to verify that every test produces the same result on every platform
+// To do this, we hash the results of every test to produce an overall
+// verification value for the whole test suite. If two runs produce the same
+// verification value, then every test in both run produced the same results
+
+extern uint32_t g_verify;
+
+// Mix the given blob of data into the verification code
+
+void MixVCode ( const void * blob, int len );
+
+
 //-----------------------------------------------------------------------------
 
 typedef void (*pfHash) ( const void * blob, const int len, const uint32_t seed, void * out );
diff --git a/main.cpp b/main.cpp
index 13974e6..43e8c51 100644
--- a/main.cpp
+++ b/main.cpp
@@ -51,7 +51,7 @@ HashInfo g_hashes[] =
   { FNV,                  32, 0x2B377407, "FNV",         "Fowler-Noll-Vo hash, 32-bit" },
   { lookup3_test,         32, 0xDEC6FD2F, "lookup3",     "Bob Jenkins' lookup3" },
   { SuperFastHash,        32, 0x980ACD1D, "superfast",   "Paul Hsieh's SuperFastHash" },
-  { MurmurOAAT,           32, 0x5F424541, "MurmurOAAT",  "Murmur one-at-a-time" },
+  { MurmurOAAT_test,      32, 0xF5AC8D0D, "MurmurOAAT",  "Murmur one-at-a-time" },
   
   // MurmurHash2
 
@@ -468,11 +468,11 @@ int main ( int argc, char ** argv )
 
   int timeBegin = clock();
 
-  g_testAll = true;
+  g_testAll = false;
 
   //g_testSanity = true;
   //g_testSpeed = true;
-  //g_testAvalanche = true;
+  g_testAvalanche = true;
   //g_testCyclic = true;
   //g_testDiff = true;
   //g_testSparse = true;
@@ -486,7 +486,7 @@ int main ( int argc, char ** argv )
   int timeEnd = clock();
 
   printf("\n");
-  printf("Testing took %f seconds\n",double(timeEnd-timeBegin)/double(CLOCKS_PER_SEC));
+  printf("Verification value is 0x%08x - Testing took %f seconds\n",g_verify,double(timeEnd-timeBegin)/double(CLOCKS_PER_SEC));
   printf("-------------------------------------------------------------------------------\n");
   return 0;
 }
-- 
cgit v1.2.3


From 7f20a31b739a5465934433593061eb6c64ee9997 Mon Sep 17 00:00:00 2001
From: "aappleby@google.com"
 <aappleby@google.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Mon, 21 Mar 2011 20:55:06 +0000
Subject: verification code work

git-svn-id: http://smhasher.googlecode.com/svn/trunk@96 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 AvalancheTest.h | 12 ++++++++----
 Bitvec.cpp      | 12 +++++++++---
 CMakeLists.txt  |  2 ++
 KeysetTest.cpp  | 10 +++++++---
 KeysetTest.h    |  4 +++-
 SpeedTest.cpp   | 23 +++++++++++++++++------
 SpeedTest.h     |  4 ++--
 main.cpp        | 43 +++++++++++++++++++++++++++++++++++--------
 8 files changed, 83 insertions(+), 27 deletions(-)

diff --git a/AvalancheTest.h b/AvalancheTest.h
index 966f8b0..30bc6ea 100644
--- a/AvalancheTest.h
+++ b/AvalancheTest.h
@@ -25,7 +25,7 @@ double maxBias ( std::vector<int> & counts, int reps );
 //-----------------------------------------------------------------------------
 
 template < typename keytype, typename hashtype >
-void calcBias ( pfHash hash, std::vector<int> & counts, int reps )
+void calcBias ( pfHash hash, std::vector<int> & counts, int reps, Rand & r )
 {
   const int keybytes = sizeof(keytype);
   const int hashbytes = sizeof(hashtype);
@@ -40,7 +40,7 @@ void calcBias ( pfHash hash, std::vector<int> & counts, int reps )
   {
     if(irep % (reps/10) == 0) printf(".");
 
-    rand_p(&K,keybytes);
+    r.rand_p(&K,keybytes);
 
     hash(&K,keybytes,0,&A);
 
@@ -68,6 +68,8 @@ void calcBias ( pfHash hash, std::vector<int> & counts, int reps )
 template < typename keytype, typename hashtype >
 bool AvalancheTest ( pfHash hash, const int reps )
 {
+  Rand r(48273);
+  
   const int keybytes = sizeof(keytype);
   const int hashbytes = sizeof(hashtype);
 
@@ -80,7 +82,7 @@ bool AvalancheTest ( pfHash hash, const int reps )
 
   std::vector<int> bins(keybits*hashbits,0);
 
-  calcBias<keytype,hashtype>(hash,bins,reps);
+  calcBias<keytype,hashtype>(hash,bins,reps,r);
   
   //----------
 
@@ -108,6 +110,8 @@ bool AvalancheTest ( pfHash hash, const int reps )
 template< typename keytype, typename hashtype >
 void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose )
 {
+  Rand r(11938);
+  
   const int keybytes = sizeof(keytype);
   const int hashbytes = sizeof(hashtype);
   const int hashbits = hashbytes * 8;
@@ -124,7 +128,7 @@ void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias,
       if(irep % (reps/10) == 0) printf(".");
     }
 
-    rand_p(&key,keybytes);
+    r.rand_p(&key,keybytes);
     hash(&key,keybytes,0,&h1);
 
     flipbit(key,keybit);
diff --git a/Bitvec.cpp b/Bitvec.cpp
index 667902d..932b200 100644
--- a/Bitvec.cpp
+++ b/Bitvec.cpp
@@ -594,6 +594,8 @@ uint32_t window32 ( void * blob, int len, int start, int count )
 
 bool test_shift ( void )
 {
+  Rand r(1123);
+
   int nbits   = 64;
   int nbytes  = nbits / 8;
   int reps = 10000;
@@ -602,7 +604,7 @@ bool test_shift ( void )
   {
     if(j % (reps/10) == 0) printf(".");
 
-    uint64_t a = rand_u64();
+    uint64_t a = r.rand_u64();
     uint64_t b;
 
     for(int i = 0; i < nbits; i++)
@@ -634,6 +636,8 @@ bool test_shift ( void )
 template < int nbits >
 bool test_window2 ( void )
 {
+  Rand r(83874);
+  
   struct keytype
   {
     uint8_t bytes[nbits/8];
@@ -648,7 +652,7 @@ bool test_window2 ( void )
 
     keytype k;
 
-    rand_p(&k,nbytes);
+    r.rand_p(&k,nbytes);
 
     for(int start = 0; start < nbits; start++)
     {
@@ -671,6 +675,8 @@ bool test_window2 ( void )
 
 bool test_window ( void )
 {
+  Rand r(48402);
+  
   int reps = 10000;
 
   for(int j = 0; j < reps; j++)
@@ -680,7 +686,7 @@ bool test_window ( void )
     int nbits   = 64;
     int nbytes  = nbits / 8;
 
-    uint64_t x = rand_u64();
+    uint64_t x = r.rand_u64();
 
     for(int start = 0; start < nbits; start++)
     {
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7a5ecb9..9717f48 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,6 +2,8 @@ project(SMHasher)
 
 cmake_minimum_required(VERSION 2.4)
 
+set(CMAKE_BUILD_TYPE Release)
+
 add_library(
   SMHasherSupport
   AvalancheTest.cpp
diff --git a/KeysetTest.cpp b/KeysetTest.cpp
index a59fda4..dda137c 100644
--- a/KeysetTest.cpp
+++ b/KeysetTest.cpp
@@ -64,6 +64,8 @@ bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool
 bool SanityTest ( pfHash hash, const int hashbits )
 {
   printf("Running sanity check 1");
+  
+  Rand r(883741);
 
   bool result = true;
 
@@ -92,8 +94,8 @@ bool SanityTest ( pfHash hash, const int hashbits )
         uint8_t * key1 = &buffer1[pad];
         uint8_t * key2 = &buffer2[pad+offset];
 
-        rand_p(buffer1,buflen);
-        rand_p(buffer2,buflen);
+        r.rand_p(buffer1,buflen);
+        r.rand_p(buffer2,buflen);
 
         memcpy(key2,key1,len);
 
@@ -147,6 +149,8 @@ bool SanityTest ( pfHash hash, const int hashbits )
 void AppendedZeroesTest ( pfHash hash, const int hashbits )
 {
   printf("Running sanity check 2");
+  
+  Rand r(173994);
 
   const int hashbytes = hashbits/8;
 
@@ -158,7 +162,7 @@ void AppendedZeroesTest ( pfHash hash, const int hashbits )
 
     memset(key,0,sizeof(key));
 
-    rand_p(key,32);
+    r.rand_p(key,32);
 
     uint32_t h1[16];
     uint32_t h2[16];
diff --git a/KeysetTest.h b/KeysetTest.h
index c2a5c33..936e535 100644
--- a/KeysetTest.h
+++ b/KeysetTest.h
@@ -248,6 +248,8 @@ bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycoun
 {
   printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount);
 
+  Rand r(483723);
+
   std::vector<hashtype> hashes;
   hashes.resize(keycount);
 
@@ -260,7 +262,7 @@ bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycoun
 
   for(int i = 0; i < keycount; i++)
   {
-    rand_p(cycle,cycleLen);
+    r.rand_p(cycle,cycleLen);
 
     *(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);
 
diff --git a/SpeedTest.cpp b/SpeedTest.cpp
index 16e183d..dc6d7cc 100644
--- a/SpeedTest.cpp
+++ b/SpeedTest.cpp
@@ -2,13 +2,16 @@
 
 #include "Random.h"
 
-#include <stdio.h>  // for printf
+#include <stdio.h>   // for printf
+#include <memory.h>  // for memset
 
 //-----------------------------------------------------------------------------
 // 256k blocks seem to give the best results.
 
-void BulkSpeedTest ( pfHash hash )
+void BulkSpeedTest ( pfHash hash, uint32_t seed )
 {
+  Rand r(seed);
+  
   const int trials = 9999;
   const int blocksize = 256 * 1024;
 
@@ -16,7 +19,7 @@ void BulkSpeedTest ( pfHash hash )
 
   char * block = new char[blocksize + 16];
 
-  rand_p(block,blocksize+16);
+  r.rand_p(block,blocksize+16);
 
   uint32_t temp[16];
 
@@ -53,23 +56,30 @@ void BulkSpeedTest ( pfHash hash )
 
 //-----------------------------------------------------------------------------
 
-void TinySpeedTest ( pfHash hash, int hashsize, int keysize, bool verbose, double & outCycles )
+void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & outCycles )
 {
   const int trials = 100000;
 
   if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);
+  
+  Rand r(seed);
 
   uint8_t * h = new uint8_t[hashsize];
   uint8_t * k = new uint8_t[keysize];
+  
+  memset(h,0,hashsize);
+  memset(k,0,keysize);
 
   double bestcycles = 1e9;
 
   for(int itrial = 0; itrial < trials; itrial++)
   {
-    int64_t begin,end;
+    volatile int64_t begin,end;
 
     rand_p(k,keysize);
 
+    MixVCode(h,4);
+    
     begin = rdtsc();
     
     hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
@@ -94,7 +104,8 @@ void TinySpeedTest ( pfHash hash, int hashsize, int keysize, bool verbose, doubl
 
     end = rdtsc();
 
-    //blackhole(*(uint32_t*)(&h));
+    MixVCode(h,4);
+    //printf("0x%08x\n",g_verify);
 
     double cycles = double(end-begin) / 64;
     if((cycles > 0) && (cycles < bestcycles)) bestcycles = cycles;
diff --git a/SpeedTest.h b/SpeedTest.h
index a8f0086..b881a78 100644
--- a/SpeedTest.h
+++ b/SpeedTest.h
@@ -2,7 +2,7 @@
 
 #include "Types.h"
 
-void BulkSpeedTest ( pfHash hash );
-void TinySpeedTest ( pfHash hash, int hashsize, int keysize, bool verbose, double & outCycles );
+void BulkSpeedTest ( pfHash hash, uint32_t seed );
+void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & outCycles );
 
 //-----------------------------------------------------------------------------
diff --git a/main.cpp b/main.cpp
index 43e8c51..77cb771 100644
--- a/main.cpp
+++ b/main.cpp
@@ -137,14 +137,14 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
   {
     printf("[[[ Speed Tests ]]]\n\n");
 
-    BulkSpeedTest(hash);
+    BulkSpeedTest(info->hash,info->verification);
     printf("\n");
 
     for(int i = 1; i < 32; i++)
     {
       double cycles;
 
-      TinySpeedTest(hash,sizeof(hashtype),i,true,cycles);
+      TinySpeedTest(hashfunc<hashtype>(info->hash),sizeof(hashtype),i,info->verification,true,cycles);
     }
 
     printf("\n");
@@ -425,10 +425,28 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
 
 //-----------------------------------------------------------------------------
 
+uint32_t g_inputVCode = 1;
+uint32_t g_outputVCode = 1;
+uint32_t g_resultVCode = 1;
+
+HashInfo * g_hashUnderTest = NULL;
+
+void VerifyHash ( const void * key, int len, uint32_t seed, void * out )
+{
+  g_inputVCode = MurmurOAAT(key,len,g_inputVCode);
+  g_inputVCode = MurmurOAAT(&seed,sizeof(uint32_t),g_inputVCode);
+  
+  g_hashUnderTest->hash(key,len,seed,out);
+  
+  g_outputVCode = MurmurOAAT(out,g_hashUnderTest->hashbits/8,g_outputVCode);
+}
+
+//-----------------------------------------------------------------------------
+
 void testHash ( const char * name )
 {
   HashInfo * pInfo = findHash(name);
-
+  
   if(pInfo == NULL)
   {
     printf("Invalid hash '%s' specified\n",name);
@@ -436,9 +454,11 @@ void testHash ( const char * name )
   }
   else
   {
+    g_hashUnderTest = pInfo;
+
     if(pInfo->hashbits == 32)
     {
-      test<uint32_t>( pInfo->hash, pInfo );
+      test<uint32_t>( VerifyHash, pInfo );
     }
     else if(pInfo->hashbits == 64)
     {
@@ -462,6 +482,12 @@ void testHash ( const char * name )
 
 int main ( int argc, char ** argv )
 {
+  if(argc < 2)
+  {
+    printf("Bad args\n");
+    exit(1);
+  }
+  
   SetAffinity(2);
 
   SelfTest();
@@ -470,22 +496,23 @@ int main ( int argc, char ** argv )
 
   g_testAll = false;
 
-  //g_testSanity = true;
-  //g_testSpeed = true;
-  g_testAvalanche = true;
+  g_testSanity = true;
+  g_testSpeed = true;
+  //g_testAvalanche = true;
   //g_testCyclic = true;
   //g_testDiff = true;
   //g_testSparse = true;
   //g_testPermutation = true;
   //g_testZeroes = true;
 
-  testHash("murmur3a");
+  testHash(argv[1]);
 
   //----------
 
   int timeEnd = clock();
 
   printf("\n");
+  printf("Input vcode 0x%08x, Output vcode 0x%08x, Result vcode 0x%08x\n",g_inputVCode,g_outputVCode,g_resultVCode);
   printf("Verification value is 0x%08x - Testing took %f seconds\n",g_verify,double(timeEnd-timeBegin)/double(CLOCKS_PER_SEC));
   printf("-------------------------------------------------------------------------------\n");
   return 0;
-- 
cgit v1.2.3


From 623590de821184aafc3aa6d72b9fd24791884751 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Mon, 28 Mar 2011 18:19:31 +0000
Subject: Add faster BICTest variants Add simpler differential distribution
 test Add Crap8 hash for testing Include seed value in verification test Test
 Murmur3_x86_32 by default

git-svn-id: http://smhasher.googlecode.com/svn/trunk@100 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 AvalancheTest.h    | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 DifferentialTest.h |  41 ++++++++++++
 Hashes.cpp         |  21 ++++++
 Hashes.h           |   1 +
 KeysetTest.cpp     |  12 +++-
 main.cpp           |  74 ++++++++++++++++-----
 6 files changed, 319 insertions(+), 22 deletions(-)

diff --git a/AvalancheTest.h b/AvalancheTest.h
index 30bc6ea..4c23369 100644
--- a/AvalancheTest.h
+++ b/AvalancheTest.h
@@ -134,7 +134,7 @@ void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias,
     flipbit(key,keybit);
     hash(&key,keybytes,0,&h2);
 
-    keytype d = h1 ^ h2;
+    hashtype d = h1 ^ h2;
 
     for(int out1 = 0; out1 < hashbits; out1++)
     for(int out2 = 0; out2 < hashbits; out2++)
@@ -211,7 +211,7 @@ bool BicTest ( pfHash hash, const int reps )
     double bias;
     int a,b;
     
-    BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,false);
+    BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,true);
 
     if(bias > maxBias)
     {
@@ -232,3 +232,191 @@ bool BicTest ( pfHash hash, const int reps )
 }
 
 //-----------------------------------------------------------------------------
+// BIC test variant - store all intermediate data in a table, draw diagram
+// afterwards (much faster)
+
+template< typename keytype, typename hashtype >
+void BicTest3 ( pfHash hash, const int reps, bool verbose = true )
+{
+  const int keybytes = sizeof(keytype);
+  const int keybits = keybytes * 8;
+  const int hashbytes = sizeof(hashtype);
+  const int hashbits = hashbytes * 8;
+  const int pagesize = hashbits*hashbits*4;
+
+  Rand r(11938);
+
+  double maxBias = 0;
+  int maxK = 0;
+  int maxA = 0;
+  int maxB = 0;
+
+  keytype key;
+  hashtype h1,h2;
+
+  std::vector<int> bins(keybits*pagesize,0);
+
+  for(int keybit = 0; keybit < keybits; keybit++)
+  {
+    if(keybit % (keybits/10) == 0) printf(".");
+
+    int * page = &bins[keybit*pagesize];
+
+    for(int irep = 0; irep < reps; irep++)
+    {
+      r.rand_p(&key,keybytes);
+      hash(&key,keybytes,0,&h1);
+      flipbit(key,keybit);
+      hash(&key,keybytes,0,&h2);
+
+      hashtype d = h1 ^ h2;
+
+      for(int out1 = 0; out1 < hashbits-1; out1++)
+      for(int out2 = out1+1; out2 < hashbits; out2++)
+      {
+        int * b = &page[(out1*hashbits+out2)*4];
+
+        uint32_t x = getbit(d,out1) | (getbit(d,out2) << 1);
+
+        b[x]++;
+      }
+    }
+  }
+
+  printf("\n");
+
+  for(int out1 = 0; out1 < hashbits-1; out1++)
+  {
+    for(int out2 = out1+1; out2 < hashbits; out2++)
+    {
+      if(verbose) printf("(%3d,%3d) - ",out1,out2);
+
+      for(int keybit = 0; keybit < keybits; keybit++)
+      {
+        int * page = &bins[keybit*pagesize];
+        int * bins = &page[(out1*hashbits+out2)*4];
+
+        double bias = 0;
+
+        for(int b = 0; b < 4; b++)
+        {
+          double b2 = double(bins[b]) / double(reps / 2);
+          b2 = fabs(b2 * 2 - 1);
+
+          if(b2 > bias) bias = b2;
+        }
+
+        if(bias > maxBias)
+        {
+          maxBias = bias;
+          maxK = keybit;
+          maxA = out1;
+          maxB = out2;
+        }
+
+        if(verbose) 
+        {
+          if     (bias < 0.01) printf(".");
+          else if(bias < 0.05) printf("o");
+          else if(bias < 0.33) printf("O");
+          else                 printf("X");
+        }
+      }
+
+      // Finished keybit
+
+      if(verbose) printf("\n");
+    }
+
+    if(verbose)
+    {
+      for(int i = 0; i < keybits+12; i++) printf("-");
+      printf("\n");
+    }
+  }
+
+  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
+}
+
+
+//-----------------------------------------------------------------------------
+// BIC test variant - iterate over output bits, then key bits. No temp storage,
+// but slooooow
+
+template< typename keytype, typename hashtype >
+void BicTest2 ( pfHash hash, const int reps, bool verbose = true )
+{
+  const int keybytes = sizeof(keytype);
+  const int keybits = keybytes * 8;
+  const int hashbytes = sizeof(hashtype);
+  const int hashbits = hashbytes * 8;
+
+  Rand r(11938);
+
+  double maxBias = 0;
+  int maxK = 0;
+  int maxA = 0;
+  int maxB = 0;
+
+  keytype key;
+  hashtype h1,h2;
+
+  for(int out1 = 0; out1 < hashbits-1; out1++)
+  for(int out2 = out1+1; out2 < hashbits; out2++)
+  {
+    if(verbose) printf("(%3d,%3d) - ",out1,out2);
+
+    for(int keybit = 0; keybit < keybits; keybit++)
+    {
+      int bins[4] = { 0, 0, 0, 0 };
+
+      for(int irep = 0; irep < reps; irep++)
+      {
+        r.rand_p(&key,keybytes);
+        hash(&key,keybytes,0,&h1);
+        flipbit(key,keybit);
+        hash(&key,keybytes,0,&h2);
+
+        hashtype d = h1 ^ h2;
+
+        uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
+
+        bins[b]++;
+      }
+
+      double bias = 0;
+
+      for(int b = 0; b < 4; b++)
+      {
+        double b2 = double(bins[b]) / double(reps / 2);
+        b2 = fabs(b2 * 2 - 1);
+
+        if(b2 > bias) bias = b2;
+      }
+
+      if(bias > maxBias)
+      {
+        maxBias = bias;
+        maxK = keybit;
+        maxA = out1;
+        maxB = out2;
+      }
+
+      if(verbose) 
+      {
+        if     (bias < 0.05) printf(".");
+        else if(bias < 0.10) printf("o");
+        else if(bias < 0.50) printf("O");
+        else                 printf("X");
+      }
+    }
+
+    // Finished keybit
+
+    if(verbose) printf("\n");
+  }
+
+  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
+}
+
+//-----------------------------------------------------------------------------
diff --git a/DifferentialTest.h b/DifferentialTest.h
index b0b1231..3136cbb 100644
--- a/DifferentialTest.h
+++ b/DifferentialTest.h
@@ -237,4 +237,45 @@ void DiffDistTest ( pfHash hash, const int diffbits, int trials, double & worst,
   avg /= double(diffs.size());
 }
 
+//-----------------------------------------------------------------------------
+// Simpler differential-distribution test - for all 1-bit differentials,
+// generate random key pairs and run full distribution/collision tests on the
+// hash differentials
+
+template < typename keytype, typename hashtype >
+bool DiffDistTest2 ( pfHash hash  )
+{
+  Rand r(857374);
+
+  int keybits = sizeof(keytype) * 8;
+  const int keycount = 256*256*32;
+  keytype k;
+  
+  std::vector<hashtype> hashes(keycount);
+  hashtype h1,h2;
+
+  bool result = true;
+
+  for(int keybit = 0; keybit < keybits; keybit++)
+  {
+    printf("Testing bit %d\n",keybit);
+
+    for(int i = 0; i < keycount; i++)
+    {
+      r.rand_p(&k,sizeof(keytype));
+      
+      hash(&k,sizeof(keytype),0,&h1);
+      flipbit(&k,sizeof(keytype),keybit);
+      hash(&k,sizeof(keytype),0,&h2);
+
+      hashes[i] = h1 ^ h2;
+    }
+
+    result &= TestHashList<hashtype>(hashes,true,true,true);
+    printf("\n");
+  }
+
+  return result;
+}
+
 //----------------------------------------------------------------------------
diff --git a/Hashes.cpp b/Hashes.cpp
index aae3db8..1930bc5 100644
--- a/Hashes.cpp
+++ b/Hashes.cpp
@@ -132,3 +132,24 @@ uint32_t Bernstein ( const void * key, int len, uint32_t h )
 }
 
 //-----------------------------------------------------------------------------
+// Crap8 hash from http://www.team5150.com/~andrew/noncryptohashzoo/Crap8.html
+
+uint32_t Crap8( const uint8_t *key, uint32_t len, uint32_t seed ) {
+  #define c8fold( a, b, y, z ) { p = (uint32_t)(a) * (uint64_t)(b); y ^= (uint32_t)p; z ^= (uint32_t)(p >> 32); }
+  #define c8mix( in ) { h *= m; c8fold( in, m, k, h ); }
+
+  const uint32_t m = 0x83d2e73b, n = 0x97e1cc59, *key4 = (const uint32_t *)key;
+  uint32_t h = len + seed, k = n + len;
+  uint64_t p;
+
+  while ( len >= 8 ) { c8mix(key4[0]) c8mix(key4[1]) key4 += 2; len -= 8; }
+  if ( len >= 4 ) { c8mix(key4[0]) key4 += 1; len -= 4; }
+  if ( len ) { c8mix( key4[0] & ( ( 1 << ( len * 8 ) ) - 1 ) ) }
+  c8fold( h ^ k, n, k, k )
+  return k;
+}
+
+void Crap8_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint32_t*)out = Crap8((const uint8_t*)key,len,seed);
+}
diff --git a/Hashes.h b/Hashes.h
index d0b6778..b5b3c1f 100644
--- a/Hashes.h
+++ b/Hashes.h
@@ -32,6 +32,7 @@ void FNV                   ( const void * key, int len, uint32_t seed, void * ou
 void SuperFastHash         ( const void * key, int len, uint32_t seed, void * out );
 void lookup3_test          ( const void * key, int len, uint32_t seed, void * out );
 void MurmurOAAT_test       ( const void * key, int len, uint32_t seed, void * out );
+void Crap8_test            ( const void * key, int len, uint32_t seed, void * out );
 
 uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed );
 
diff --git a/KeysetTest.cpp b/KeysetTest.cpp
index dda137c..6519f8b 100644
--- a/KeysetTest.cpp
+++ b/KeysetTest.cpp
@@ -18,17 +18,23 @@ bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool
   memset(hashes,0,hashbytes*256);
   memset(final,0,hashbytes);
 
+  // Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as
+  // the seed
+
   for(int i = 0; i < 256; i++)
   {
     key[i] = (uint8_t)i;
-    
-    hash(key,i,0,&hashes[i*hashbytes]);
+
+    hash(key,i,256-i,&hashes[i*hashbytes]);
   }
 
-  //----------
+  // Then hash the result array
 
   hash(hashes,hashbytes*256,0,final);
 
+  // The first four bytes of that hash, interpreted as a little-endian integer, is our
+  // verification value
+
   uint32_t verification = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) | (final[3] << 24);
 
   delete [] key;
diff --git a/main.cpp b/main.cpp
index 77cb771..ab397e7 100644
--- a/main.cpp
+++ b/main.cpp
@@ -16,7 +16,9 @@ bool g_testAll = false;
 bool g_testSanity      = false;
 bool g_testSpeed       = false;
 bool g_testDiff        = false;
+bool g_testDiffDist    = false;
 bool g_testAvalanche   = false;
+bool g_testBIC         = false;
 bool g_testCyclic      = false;
 bool g_testSparse      = false;
 bool g_testPermutation = false;
@@ -43,28 +45,29 @@ HashInfo g_hashes[] =
   { DoNothingHash,        64, 0x00000000, "donothing64", "Do-Nothing function (only valid for measuring call overhead)" },
   { DoNothingHash,       128, 0x00000000, "donothing128", "Do-Nothing function (only valid for measuring call overhead)" },
 
-  { crc32,                32, 0x5C7DDD1F, "crc32",       "CRC-32" },
+  { crc32,                32, 0x3719DB20, "crc32",       "CRC-32" },
 
   { md5_32,               32, 0xC10C356B, "md5_32a",     "MD5, first 32 bits of result" },
   { sha1_32a,             32, 0xF9376EA7, "sha1_32a",    "SHA1, first 32 bits of result" },
 
-  { FNV,                  32, 0x2B377407, "FNV",         "Fowler-Noll-Vo hash, 32-bit" },
-  { lookup3_test,         32, 0xDEC6FD2F, "lookup3",     "Bob Jenkins' lookup3" },
+  { FNV,                  32, 0xE3CBBE91, "FNV",         "Fowler-Noll-Vo hash, 32-bit" },
+  { lookup3_test,         32, 0x3D83917A, "lookup3",     "Bob Jenkins' lookup3" },
   { SuperFastHash,        32, 0x980ACD1D, "superfast",   "Paul Hsieh's SuperFastHash" },
-  { MurmurOAAT_test,      32, 0xF5AC8D0D, "MurmurOAAT",  "Murmur one-at-a-time" },
+  { MurmurOAAT_test,      32, 0x5363BD98, "MurmurOAAT",  "Murmur one-at-a-time" },
+  { Crap8_test,           32, 0x743E97A1, "Crap8",       "Crap8" },
   
   // MurmurHash2
 
-  { MurmurHash2_test,     32, 0xA6D95DE6, "Murmur2",     "MurmurHash2 for x86, 32-bit" },
-  { MurmurHash2A_test,    32, 0xB79DC030, "Murmur2A",    "MurmurHash2A for x86, 32-bit" },
-  { MurmurHash64A_test,   64, 0xDBD7FF4B, "Murmur2B",    "MurmurHash2 for x64, 64-bit" },
-  { MurmurHash64B_test,   64, 0x3B861F71, "Murmur2C",    "MurmurHash2 for x86, 64-bit" },
+  { MurmurHash2_test,     32, 0x27864C1E, "Murmur2",     "MurmurHash2 for x86, 32-bit" },
+  { MurmurHash2A_test,    32, 0x7FBD4396, "Murmur2A",    "MurmurHash2A for x86, 32-bit" },
+  { MurmurHash64A_test,   64, 0x1F0D3804, "Murmur2B",    "MurmurHash2 for x64, 64-bit" },
+  { MurmurHash64B_test,   64, 0xDD537C05, "Murmur2C",    "MurmurHash2 for x86, 64-bit" },
 
   // MurmurHash3
 
-  { MurmurHash3_x86_32,   32, 0x3B75AFFD, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
-  { MurmurHash3_x86_128, 128, 0x78C7F0DB, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
-  { MurmurHash3_x64_128, 128, 0x54667393, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
+  { MurmurHash3_x86_32,   32, 0xEA5DFD02, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
+  { MurmurHash3_x86_128, 128, 0x411C981B, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
+  { MurmurHash3_x64_128, 128, 0x04D005BA, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
 
 };
 
@@ -168,6 +171,20 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
     printf("\n");
   }
 
+  //-----------------------------------------------------------------------------
+  // Differential-distribution tests
+
+  if(g_testDiffDist /*|| g_testAll*/)
+  {
+    printf("[[[ Differential Distribution Tests ]]]\n\n");
+
+    bool result = true;
+
+    result &= DiffDistTest2<uint64_t,hashtype>(hash);
+
+    printf("\n");
+  }
+
   //-----------------------------------------------------------------------------
   // Avalanche tests
   
@@ -201,6 +218,22 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
     printf("\n");
   }
 
+  //-----------------------------------------------------------------------------
+  // Bit Independence Criteria
+
+  if(g_testBIC /*|| g_testAll*/)
+  {
+    printf("[[[ Bit Independence Criteria ]]]\n\n");
+
+    bool result = true;
+
+    //result &= BicTest<uint64_t,hashtype>(hash,2000000);
+    BicTest3<Blob<88>,hashtype>(hash,2000000);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
   //-----------------------------------------------------------------------------
   // Keyset 'Cyclic'
 
@@ -482,10 +515,15 @@ void testHash ( const char * name )
 
 int main ( int argc, char ** argv )
 {
+  const char * hashToTest = "murmur3a";
+
   if(argc < 2)
   {
-    printf("Bad args\n");
-    exit(1);
+    printf("(No test hash given on command line, testing Murmur3_x86_32.)\n");
+  }
+  else
+  {
+    hashToTest = argv[1];
   }
   
   SetAffinity(2);
@@ -494,18 +532,20 @@ int main ( int argc, char ** argv )
 
   int timeBegin = clock();
 
-  g_testAll = false;
+  g_testAll = true;
 
-  g_testSanity = true;
-  g_testSpeed = true;
+  //g_testSanity = true;
+  //g_testSpeed = true;
   //g_testAvalanche = true;
+  //g_testBIC = true;
   //g_testCyclic = true;
   //g_testDiff = true;
+  //g_testDiffDist = true;
   //g_testSparse = true;
   //g_testPermutation = true;
   //g_testZeroes = true;
 
-  testHash(argv[1]);
+  testHash(hashToTest);
 
   //----------
 
-- 
cgit v1.2.3


From 96601f2cd5d12b4618ecd830e8a155cad18e15dc Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Thu, 31 Mar 2011 02:41:29 +0000
Subject: Add TwoBytes test (not on by default) Remove hash list duplication
 from collision test so we don't use so much RAM

git-svn-id: http://smhasher.googlecode.com/svn/trunk@101 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 KeysetTest.h    | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 SMHasher.vcproj |  4 +++
 Stats.cpp       |  2 +-
 Stats.h         |  7 +++--
 Types.h         |  6 +++++
 main.cpp        | 29 +++++++++++++++++++++
 6 files changed, 123 insertions(+), 5 deletions(-)

diff --git a/KeysetTest.h b/KeysetTest.h
index 936e535..9e41b6f 100644
--- a/KeysetTest.h
+++ b/KeysetTest.h
@@ -12,6 +12,7 @@
 #include "Random.h"   // for rand_p
 
 #include <algorithm>  // for std::swap
+#include <assert.h>
 
 //-----------------------------------------------------------------------------
 // Sanity tests
@@ -287,6 +288,85 @@ bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycoun
   return result;
 }
 
+//-----------------------------------------------------------------------------
+// Keyset 'TwoBytes' - generate all keys up to length N with two non-zero bytes
+
+template < typename hashtype >
+bool TwoBytesTest ( pfHash hash, int maxlen, bool drawDiagram )
+{
+  int keycount = 0;
+
+  for(int i = 2; i <= maxlen; i++) keycount += (int)chooseK(i,2);
+
+  keycount *= 255*255;
+
+  for(int i = 2; i <= maxlen; i++) keycount += i*255;
+
+  printf("Keyset 'TwoBytes' - %d keys of up to %d bytes\n",keycount,maxlen);
+
+  std::vector<hashtype> hashes;
+  hashes.resize(keycount);
+  int cursor = 0;
+
+  uint8_t key[256];
+
+  memset(key,0,256);
+
+  //----------
+  // Add all keys with one non-zero byte
+
+  for(int keylen = 2; keylen <= maxlen; keylen++)
+  for(int byteA = 0; byteA < keylen; byteA++)
+  {
+    for(int valA = 1; valA <= 255; valA++)
+    {
+      key[byteA] = (uint8_t)valA;
+
+      assert(cursor <= keycount);
+      hash(key,keylen,0,&hashes[cursor++]);
+    }
+
+    key[byteA] = 0;
+  }
+
+  //----------
+  // Add all keys with two non-zero bytes
+
+  for(int keylen = 2; keylen <= maxlen; keylen++)
+  for(int byteA = 0; byteA < keylen-1; byteA++)
+  for(int byteB = byteA+1; byteB < keylen; byteB++)
+  {
+    for(int valA = 1; valA <= 255; valA++)
+    {
+      key[byteA] = (uint8_t)valA;
+
+      for(int valB = 1; valB <= 255; valB++)
+      {
+        key[byteB] = (uint8_t)valB;
+        assert(cursor <= keycount);
+        hash(key,keylen,0,&hashes[cursor++]);
+      }
+
+      key[byteB] = 0;
+    }
+
+    key[byteA] = 0;
+  }
+
+  //----------
+  
+  printf("Actually %d keys\n",cursor);
+
+  assert(cursor == keycount);
+
+  bool result = true;
+
+  result &= TestHashList(hashes,true,false,drawDiagram);
+  printf("\n");
+
+  return result;
+}
+
 //-----------------------------------------------------------------------------
 // Keyset 'Text' - generate all keys of the form "prefix"+"core"+"suffix",
 // where "core" consists of all possible combinations of the given character
diff --git a/SMHasher.vcproj b/SMHasher.vcproj
index 05586f7..bb4125e 100644
--- a/SMHasher.vcproj
+++ b/SMHasher.vcproj
@@ -322,6 +322,10 @@
 		<Filter
 			Name="Hashes"
 			>
+			<File
+				RelativePath=".\CityHash.cpp"
+				>
+			</File>
 			<File
 				RelativePath=".\crc.cpp"
 				>
diff --git a/Stats.cpp b/Stats.cpp
index 4b722c8..55e99fc 100644
--- a/Stats.cpp
+++ b/Stats.cpp
@@ -4,7 +4,7 @@
 
 double chooseK ( int n, int k )
 {
-    if(k > (n - k)) k = n - k;
+  if(k > (n - k)) k = n - k;
 
   double c = 1;
 
diff --git a/Stats.h b/Stats.h
index 3246373..5f60c61 100644
--- a/Stats.h
+++ b/Stats.h
@@ -37,16 +37,15 @@ inline uint32_t f3mix ( uint32_t k )
 //-----------------------------------------------------------------------------
 
 template< typename hashtype >
-int CountCollisions ( std::vector<hashtype> const & hashes )
+int CountCollisions ( std::vector<hashtype> & hashes )
 {
   int collcount = 0;
 
-  std::vector<hashtype> temp = hashes;
-  std::sort(temp.begin(),temp.end());
+  std::sort(hashes.begin(),hashes.end());
 
   for(size_t i = 1; i < hashes.size(); i++)
   {
-    if(temp[i] == temp[i-1]) collcount++;
+    if(hashes[i] == hashes[i-1]) collcount++;
   }
 
   return collcount;
diff --git a/Types.h b/Types.h
index 1abb352..ddb464b 100644
--- a/Types.h
+++ b/Types.h
@@ -102,6 +102,12 @@ public:
     return *this;
   }
 
+  Blob ( uint64_t a, uint64_t b )
+  {
+    uint64_t t[2] = {a,b};
+    set(&t,16);
+  }
+
   void set ( const void * blob, int len )
   {
     const uint8_t * k = (const uint8_t*)blob;
diff --git a/main.cpp b/main.cpp
index ab397e7..bc4996c 100644
--- a/main.cpp
+++ b/main.cpp
@@ -20,6 +20,7 @@ bool g_testDiffDist    = false;
 bool g_testAvalanche   = false;
 bool g_testBIC         = false;
 bool g_testCyclic      = false;
+bool g_testTwoBytes    = false;
 bool g_testSparse      = false;
 bool g_testPermutation = false;
 bool g_testWindow      = false;
@@ -254,6 +255,22 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
     printf("\n");
   }
 
+  //-----------------------------------------------------------------------------
+  // Keyset 'TwoBytes'
+
+  if(g_testTwoBytes)
+  {
+    printf("[[[ Keyset 'TwoBytes' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    result &= TwoBytesTest<hashtype>(hash,24,drawDiagram);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
   //-----------------------------------------------------------------------------
   // Keyset 'Sparse'
 
@@ -539,6 +556,7 @@ int main ( int argc, char ** argv )
   //g_testAvalanche = true;
   //g_testBIC = true;
   //g_testCyclic = true;
+  //g_testTwoBytes = true;
   //g_testDiff = true;
   //g_testDiffDist = true;
   //g_testSparse = true;
@@ -547,6 +565,17 @@ int main ( int argc, char ** argv )
 
   testHash(hashToTest);
 
+  /*
+  for(int i = 0; i < sizeof(g_hashes)/sizeof(HashInfo); i++)
+  {
+    testHash(g_hashes[i].name);
+  }
+  */
+
+  //testHash("murmur3a");
+  //testHash("murmur3c");
+  //testHash("murmur3f");
+
   //----------
 
   int timeEnd = clock();
-- 
cgit v1.2.3


From 603c8781c055989d0123fcb61f60871bd26e095c Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Fri, 1 Apr 2011 08:50:06 +0000
Subject: Tweak to Murmur3a (yeah, even though I said it was finalized...)
 Added key-processing callback experiments, may move all keyset tests to use
 it

git-svn-id: http://smhasher.googlecode.com/svn/trunk@102 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 Bitvec.cpp      |  46 ++++++++++++++++++----
 Bitvec.h        |  16 +++++---
 KeysetTest.cpp  |  68 +++++++++++++++++++++++++++++++++
 KeysetTest.h    |  75 +++++-------------------------------
 MurmurHash3.cpp |  12 +++---
 SMHasher.vcproj |   8 ++++
 SpeedTest.cpp   |   2 +-
 Stats.h         |  46 +++++++++++++++++++---
 Types.h         | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 main.cpp        |  38 +++++++++----------
 10 files changed, 316 insertions(+), 111 deletions(-)

diff --git a/Bitvec.cpp b/Bitvec.cpp
index 932b200..2160060 100644
--- a/Bitvec.cpp
+++ b/Bitvec.cpp
@@ -14,9 +14,9 @@ void assert ( bool )
 
 //----------------------------------------------------------------------------
 
-void printbits ( void * blob, int len )
+void printbits ( const void * blob, int len )
 {
-  uint8_t * data = (uint8_t*)blob;
+  const uint8_t * data = (const uint8_t *)blob;
 
   printf("[");
   for(int i = 0; i < len; i++)
@@ -37,7 +37,7 @@ void printbits ( void * blob, int len )
   printf("]");
 }
 
-void printbits2 ( uint8_t * k, int nbytes )
+void printbits2 ( const uint8_t * k, int nbytes )
 {
   printf("[");
 
@@ -55,7 +55,7 @@ void printbits2 ( uint8_t * k, int nbytes )
   printf("]");
 }
 
-void printhex32 ( void * blob, int len )
+void printhex32 ( const void * blob, int len )
 {
   assert((len & 3) == 0);
 
@@ -71,7 +71,7 @@ void printhex32 ( void * blob, int len )
   printf("}");
 }
 
-void printbytes ( void * blob, int len )
+void printbytes ( const void * blob, int len )
 {
   uint8_t * d = (uint8_t*)blob;
 
@@ -85,9 +85,41 @@ void printbytes ( void * blob, int len )
   printf(" };");
 }
 
+void printbytes2 ( const void * blob, int len )
+{
+  uint8_t * d = (uint8_t*)blob;
+
+  for(int i = 0; i < len; i++)
+  {
+    printf("%02x ",d[i]);
+  }
+}
+
+//-----------------------------------------------------------------------------
+// Bit-level manipulation
+
+// These two are from the "Bit Twiddling Hacks" webpage
+
+uint32_t popcount ( uint32_t v )
+{
+	v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
+	v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
+	uint32_t c = ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
+
+	return c;
+}
+
+uint32_t parity ( uint32_t v )
+{
+	v ^= v >> 1;
+	v ^= v >> 2;
+	v = (v & 0x11111111U) * 0x11111111U;
+	return (v >> 28) & 1;
+}
+
 //-----------------------------------------------------------------------------
 
-uint32_t getbit ( void * block, int len, uint32_t bit )
+uint32_t getbit ( const void * block, int len, uint32_t bit )
 {
   uint8_t * b = (uint8_t*)block;
 
@@ -99,7 +131,7 @@ uint32_t getbit ( void * block, int len, uint32_t bit )
   return 0;
 }
 
-uint32_t getbit_wrap ( void * block, int len, uint32_t bit )
+uint32_t getbit_wrap ( const void * block, int len, uint32_t bit )
 {
   uint8_t * b = (uint8_t*)block;
 
diff --git a/Bitvec.h b/Bitvec.h
index ac91c36..8a3a1b0 100644
--- a/Bitvec.h
+++ b/Bitvec.h
@@ -6,12 +6,16 @@
 
 //-----------------------------------------------------------------------------
 
-void     printbits   ( void * blob, int len );
-void     printhex32  ( void * blob, int len );
-void     printbytes  ( void * blob, int len );
+void     printbits   ( const void * blob, int len );
+void     printhex32  ( const void * blob, int len );
+void     printbytes  ( const void * blob, int len );
+void     printbytes2 ( const void * blob, int len );
 
-uint32_t getbit      ( void * blob, int len, uint32_t bit );
-uint32_t getbit_wrap ( void * blob, int len, uint32_t bit );
+uint32_t popcount    ( uint32_t v );
+uint32_t parity      ( uint32_t v );
+
+uint32_t getbit      ( const void * blob, int len, uint32_t bit );
+uint32_t getbit_wrap ( const void * blob, int len, uint32_t bit );
 
 void     setbit      ( void * blob, int len, uint32_t bit );
 void     setbit      ( void * blob, int len, uint32_t bit, uint32_t val );
@@ -23,7 +27,7 @@ void     flipbit     ( void * blob, int len, uint32_t bit );
 int      countbits   ( uint32_t v );
 int      countbits   ( std::vector<uint32_t> & v );
 
-int      countbits   ( void * blob, int len );
+int      countbits   ( const void * blob, int len );
 
 void     invert      ( std::vector<uint32_t> & v );
 
diff --git a/KeysetTest.cpp b/KeysetTest.cpp
index 6519f8b..f11d512 100644
--- a/KeysetTest.cpp
+++ b/KeysetTest.cpp
@@ -1,7 +1,11 @@
 #include "KeysetTest.h"
 
+#include "Platform.h"
 #include "Random.h"
 
+#include <map>
+#include <set>
+
 //-----------------------------------------------------------------------------
 // This should hopefully be a thorough and uambiguous test of whether a hash
 // is correctly implemented on a given platform
@@ -194,3 +198,67 @@ void AppendedZeroesTest ( pfHash hash, const int hashbits )
 }
 
 //-----------------------------------------------------------------------------
+// Generate all keys of up to N bytes containing two non-zero bytes
+
+void TwoBytesKeygen ( int maxlen, KeyCallback & c )
+{
+  //----------
+  // Compute # of keys
+
+  int keycount = 0;
+
+  for(int i = 2; i <= maxlen; i++) keycount += (int)chooseK(i,2);
+
+  keycount *= 255*255;
+
+  for(int i = 2; i <= maxlen; i++) keycount += i*255;
+
+  printf("Keyset 'TwoBytes' - up-to-%d-byte keys, %d total keys\n",maxlen, keycount);
+
+  c.reserve(keycount);
+
+  //----------
+  // Add all keys with one non-zero byte
+
+  uint8_t key[256];
+
+  memset(key,0,256);
+
+  for(int keylen = 2; keylen <= maxlen; keylen++)
+  for(int byteA = 0; byteA < keylen; byteA++)
+  {
+    for(int valA = 1; valA <= 255; valA++)
+    {
+      key[byteA] = (uint8_t)valA;
+
+      c(key,keylen);
+    }
+
+    key[byteA] = 0;
+  }
+
+  //----------
+  // Add all keys with two non-zero bytes
+
+  for(int keylen = 2; keylen <= maxlen; keylen++)
+  for(int byteA = 0; byteA < keylen-1; byteA++)
+  for(int byteB = byteA+1; byteB < keylen; byteB++)
+  {
+    for(int valA = 1; valA <= 255; valA++)
+    {
+      key[byteA] = (uint8_t)valA;
+
+      for(int valB = 1; valB <= 255; valB++)
+      {
+        key[byteB] = (uint8_t)valB;
+        c(key,keylen);
+      }
+
+      key[byteB] = 0;
+    }
+
+    key[byteA] = 0;
+  }
+}
+
+//-----------------------------------------------------------------------------
diff --git a/KeysetTest.h b/KeysetTest.h
index 9e41b6f..55d5d5f 100644
--- a/KeysetTest.h
+++ b/KeysetTest.h
@@ -210,7 +210,7 @@ bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testC
 
   bool result = true;
 
-  int testcount = (keybits-windowbits);
+  int testcount = keybits;
 
   printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount);
 
@@ -223,7 +223,9 @@ bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testC
     for(int i = 0; i < keycount; i++)
     {
       key = i;
-      key = key << minbit;
+      //key = key << minbit;
+
+      lrot(&key,sizeof(keytype),minbit);
 
       hash(&key,sizeof(keytype),0,&hashes[i]);
     }
@@ -291,77 +293,20 @@ bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycoun
 //-----------------------------------------------------------------------------
 // Keyset 'TwoBytes' - generate all keys up to length N with two non-zero bytes
 
+void TwoBytesKeygen ( int maxlen, KeyCallback & c );
+
 template < typename hashtype >
-bool TwoBytesTest ( pfHash hash, int maxlen, bool drawDiagram )
+bool TwoBytesTest2 ( pfHash hash, int maxlen, bool drawDiagram )
 {
-  int keycount = 0;
-
-  for(int i = 2; i <= maxlen; i++) keycount += (int)chooseK(i,2);
-
-  keycount *= 255*255;
-
-  for(int i = 2; i <= maxlen; i++) keycount += i*255;
-
-  printf("Keyset 'TwoBytes' - %d keys of up to %d bytes\n",keycount,maxlen);
-
   std::vector<hashtype> hashes;
-  hashes.resize(keycount);
-  int cursor = 0;
-
-  uint8_t key[256];
-
-  memset(key,0,256);
-
-  //----------
-  // Add all keys with one non-zero byte
-
-  for(int keylen = 2; keylen <= maxlen; keylen++)
-  for(int byteA = 0; byteA < keylen; byteA++)
-  {
-    for(int valA = 1; valA <= 255; valA++)
-    {
-      key[byteA] = (uint8_t)valA;
-
-      assert(cursor <= keycount);
-      hash(key,keylen,0,&hashes[cursor++]);
-    }
-
-    key[byteA] = 0;
-  }
-
-  //----------
-  // Add all keys with two non-zero bytes
-
-  for(int keylen = 2; keylen <= maxlen; keylen++)
-  for(int byteA = 0; byteA < keylen-1; byteA++)
-  for(int byteB = byteA+1; byteB < keylen; byteB++)
-  {
-    for(int valA = 1; valA <= 255; valA++)
-    {
-      key[byteA] = (uint8_t)valA;
 
-      for(int valB = 1; valB <= 255; valB++)
-      {
-        key[byteB] = (uint8_t)valB;
-        assert(cursor <= keycount);
-        hash(key,keylen,0,&hashes[cursor++]);
-      }
+  HashCallback<hashtype> c(hash,hashes);
 
-      key[byteB] = 0;
-    }
-
-    key[byteA] = 0;
-  }
-
-  //----------
-  
-  printf("Actually %d keys\n",cursor);
-
-  assert(cursor == keycount);
+  TwoBytesKeygen(maxlen,c);
 
   bool result = true;
 
-  result &= TestHashList(hashes,true,false,drawDiagram);
+  result &= TestHashList(hashes,true,true,drawDiagram);
   printf("\n");
 
   return result;
diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 7a6e435..28aa6a0 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -20,16 +20,16 @@ FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
 FORCE_INLINE void bmix32 ( uint32_t & h1, uint32_t & k1, 
                            uint32_t & c1, uint32_t & c2 )
 {
-  c1 = c1*5+0x7b7d159c;
-  c2 = c2*5+0x6bce6396;
-
   k1 *= c1; 
-  k1 = ROTL32(k1,11); 
+  k1 = ROTL32(k1,16); 
   k1 *= c2;
 
-  h1 = ROTL32(h1,13);
-  h1 = h1*5+0x52dce729;
   h1 ^= k1;
+  h1 = h1*3+0x52dce729;
+  h1 = ROTL32(h1,15);
+
+  c1 = c1*3+0x7b7d159c;
+  c2 = c2*3+0x6bce6396;
 }
 
 //----------
diff --git a/SMHasher.vcproj b/SMHasher.vcproj
index bb4125e..dffe6dc 100644
--- a/SMHasher.vcproj
+++ b/SMHasher.vcproj
@@ -434,6 +434,14 @@
 				RelativePath=".\Bitvec.h"
 				>
 			</File>
+			<File
+				RelativePath=".\Experiments.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\Experiments.h"
+				>
+			</File>
 			<File
 				RelativePath=".\Platform.cpp"
 				>
diff --git a/SpeedTest.cpp b/SpeedTest.cpp
index dc6d7cc..a6d131e 100644
--- a/SpeedTest.cpp
+++ b/SpeedTest.cpp
@@ -17,7 +17,7 @@ void BulkSpeedTest ( pfHash hash, uint32_t seed )
 
   printf("Bulk speed test - %d-byte keys\n",blocksize);
 
-  char * block = new char[blocksize + 16];
+  uint8_t * block = new uint8_t[blocksize + 16];
 
   r.rand_p(block,blocksize+16);
 
diff --git a/Stats.h b/Stats.h
index 5f60c61..5106299 100644
--- a/Stats.h
+++ b/Stats.h
@@ -35,9 +35,13 @@ inline uint32_t f3mix ( uint32_t k )
 }
 
 //-----------------------------------------------------------------------------
+// Sort the hash list, count the total number of collisions and return
+// the first N collisions for further processing
 
 template< typename hashtype >
-int CountCollisions ( std::vector<hashtype> & hashes )
+int FindCollisions ( std::vector<hashtype> & hashes, 
+                     HashSet<hashtype> & collisions,
+                     int maxCollisions )
 {
   int collcount = 0;
 
@@ -45,7 +49,15 @@ int CountCollisions ( std::vector<hashtype> & hashes )
 
   for(size_t i = 1; i < hashes.size(); i++)
   {
-    if(hashes[i] == hashes[i-1]) collcount++;
+    if(hashes[i] == hashes[i-1])
+    {
+      collcount++;
+
+      if((int)collisions.size() < maxCollisions)
+      {
+        collisions.insert(hashes[i]);
+      }
+    }
   }
 
   return collcount;
@@ -90,11 +102,10 @@ int PrintCollisions ( hashfunc<hashtype> hash, std::vector<keytype> & keys )
 //----------------------------------------------------------------------------
 
 template < typename hashtype >
-bool TestHashList ( std::vector<hashtype> & hashes, bool testColl, bool testDist, bool drawDiagram )
+bool TestHashList ( std::vector<hashtype> & hashes, std::vector<hashtype> & collisions, bool testDist, bool drawDiagram )
 {
   bool result = true;
 
-  if(testColl)
   {
     size_t count = hashes.size();
 
@@ -104,10 +115,14 @@ bool TestHashList ( std::vector<hashtype> & hashes, bool testColl, bool testDist
 
     double collcount = 0;
 
-    collcount = CountCollisions(hashes);
+    HashSet<hashtype> collisions;
+
+    collcount = FindCollisions(hashes,collisions,1000);
 
     printf("actual %8.2f (%5.2fx)",collcount, collcount / expected);
 
+    if(sizeof(hashtype) == sizeof(uint32_t))
+    {
     // 2x expected collisions = fail
 
     // #TODO - collision failure cutoff needs to be expressed as a standard deviation instead
@@ -119,6 +134,17 @@ bool TestHashList ( std::vector<hashtype> & hashes, bool testColl, bool testDist
       printf(" !!!!! ");
       result = false;
     }
+    }
+    else
+    {
+      // For all hashes larger than 32 bits, _any_ collisions are a failure.
+      
+      if(collcount > 0)
+      {
+        printf(" !!!!! ");
+        result = false;
+      }
+    }
 
     printf("\n");
   }
@@ -133,6 +159,16 @@ bool TestHashList ( std::vector<hashtype> & hashes, bool testColl, bool testDist
   return result;
 }
 
+//----------
+
+template < typename hashtype >
+bool TestHashList ( std::vector<hashtype> & hashes, bool /*testColl*/, bool testDist, bool drawDiagram )
+{
+  std::vector<hashtype> collisions;
+
+  return TestHashList(hashes,collisions,testDist,drawDiagram);
+}
+
 //-----------------------------------------------------------------------------
 
 template < class keytype, typename hashtype >
diff --git a/Types.h b/Types.h
index ddb464b..bf02288 100644
--- a/Types.h
+++ b/Types.h
@@ -3,6 +3,10 @@
 #include "Platform.h"
 #include "Bitvec.h"
 
+#include <vector>
+#include <map>
+#include <set>
+
 //-----------------------------------------------------------------------------
 // If the optimizer detects that a value in a speed test is constant or unused,
 // the optimizer may remove references to it or otherwise create code that
@@ -30,6 +34,25 @@ void MixVCode ( const void * blob, int len );
 
 typedef void (*pfHash) ( const void * blob, const int len, const uint32_t seed, void * out );
 
+struct ByteVec : public std::vector<uint8_t>
+{
+  ByteVec ( const void * key, int len )
+  {
+    resize(len);
+    memcpy(&front(),key,len);
+  }
+};
+
+template< typename hashtype, typename keytype >
+struct CollisionMap : public std::map< hashtype, std::vector<keytype> >
+{
+};
+
+template< typename hashtype >
+struct HashSet : public std::set<hashtype>
+{
+};
+
 //-----------------------------------------------------------------------------
 
 template < class T >
@@ -63,6 +86,99 @@ public:
   pfHash m_hash;
 };
 
+//-----------------------------------------------------------------------------
+// Key-processing callback objects. Simplifies keyset testing a bit.
+
+struct KeyCallback
+{
+  KeyCallback() : m_count(0)
+  {
+  }
+
+  virtual void operator() ( const uint8_t * key, int len )
+  {
+    m_count++;
+  }
+
+  virtual void reserve ( int keycount )
+  {
+  };
+
+  int m_count;
+};
+
+//----------
+
+template<typename hashtype>
+struct HashCallback : public KeyCallback
+{
+  typedef std::vector<hashtype> hashvec;
+
+  HashCallback ( pfHash hash, hashvec & hashes ) : m_pfHash(hash), m_hashes(hashes)
+  {
+    m_hashes.clear();
+  }
+
+  virtual void operator () ( const uint8_t * key, int len )
+  {
+    m_hashes.resize(m_hashes.size() + 1);
+
+    m_pfHash(key,len,0,&m_hashes.back());
+  }
+
+  virtual void reserve ( int keycount )
+  {
+    m_hashes.reserve(keycount);
+  }
+
+  hashvec & m_hashes;
+  pfHash m_pfHash;
+
+  //----------
+
+private:
+
+  HashCallback & operator = ( const HashCallback & );
+};
+
+//----------
+
+template<typename hashtype>
+struct CollisionCallback : public KeyCallback
+{
+  typedef HashSet<hashtype> hashset;
+  typedef CollisionMap<hashtype,ByteVec> collmap;
+
+  CollisionCallback ( pfHash hash, hashset & collisions, collmap & cmap ) 
+  : m_pfHash(hash), 
+    m_collisions(collisions),
+    m_collmap(cmap)
+  {
+  }
+
+  virtual void operator () ( const uint8_t * key, int len )
+  {
+    hashtype h;
+
+    m_pfHash(key,len,0,&h);
+    
+    if(m_collisions.count(h))
+    {
+      m_collmap[h].push_back( ByteVec(key,len) );
+    }
+  }
+
+  //----------
+
+  pfHash m_pfHash;
+  hashset & m_collisions;
+  collmap & m_collmap;
+
+private:
+
+  CollisionCallback & operator = ( const CollisionCallback & c );
+};
+
 //-----------------------------------------------------------------------------
 
 template < int _bits >
diff --git a/main.cpp b/main.cpp
index bc4996c..3d23f21 100644
--- a/main.cpp
+++ b/main.cpp
@@ -66,7 +66,7 @@ HashInfo g_hashes[] =
 
   // MurmurHash3
 
-  { MurmurHash3_x86_32,   32, 0xEA5DFD02, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
+  { MurmurHash3_x86_32,   32, 0xCB75A3F6, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
   { MurmurHash3_x86_128, 128, 0x411C981B, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
   { MurmurHash3_x64_128, 128, 0x04D005BA, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
 
@@ -220,9 +220,10 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
   }
 
   //-----------------------------------------------------------------------------
-  // Bit Independence Criteria
+  // Bit Independence Criteria. Interesting, but doesn't tell us much about
+  // collision or distribution.
 
-  if(g_testBIC /*|| g_testAll*/)
+  if(g_testBIC)
   {
     printf("[[[ Bit Independence Criteria ]]]\n\n");
 
@@ -236,7 +237,7 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
   }
 
   //-----------------------------------------------------------------------------
-  // Keyset 'Cyclic'
+  // Keyset 'Cyclic' - keys of the form "abcdabcdabcd..."
 
   if(g_testCyclic || g_testAll)
   {
@@ -256,23 +257,28 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
   }
 
   //-----------------------------------------------------------------------------
-  // Keyset 'TwoBytes'
+  // Keyset 'TwoBytes' - all keys up to N bytes containing two non-zero bytes
 
-  if(g_testTwoBytes)
+  // This generates some huge keysets, 128-bit tests will take ~1.3 gigs of RAM.
+
+  if(g_testTwoBytes || g_testAll)
   {
     printf("[[[ Keyset 'TwoBytes' Tests ]]]\n\n");
 
     bool result = true;
     bool drawDiagram = false;
 
-    result &= TwoBytesTest<hashtype>(hash,24,drawDiagram);
+    for(int i = 4; i <= 20; i += 4)
+    {
+      result &= TwoBytesTest2<hashtype>(hash,i,drawDiagram);
+    }
 
     if(!result) printf("*********FAIL*********\n");
     printf("\n");
   }
 
   //-----------------------------------------------------------------------------
-  // Keyset 'Sparse'
+  // Keyset 'Sparse' - keys with all bits 0 except a few
 
   if(g_testSparse || g_testAll)
   {
@@ -295,7 +301,7 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
   }
 
   //-----------------------------------------------------------------------------
-  // Keyset 'Permutation'
+  // Keyset 'Permutation' - all possible combinations of a set of blocks
 
   if(g_testPermutation || g_testAll)
   {
@@ -543,7 +549,7 @@ int main ( int argc, char ** argv )
     hashToTest = argv[1];
   }
   
-  SetAffinity(2);
+  SetAffinity(3);
 
   SelfTest();
 
@@ -561,21 +567,11 @@ int main ( int argc, char ** argv )
   //g_testDiffDist = true;
   //g_testSparse = true;
   //g_testPermutation = true;
+  //g_testWindow = true;
   //g_testZeroes = true;
 
   testHash(hashToTest);
 
-  /*
-  for(int i = 0; i < sizeof(g_hashes)/sizeof(HashInfo); i++)
-  {
-    testHash(g_hashes[i].name);
-  }
-  */
-
-  //testHash("murmur3a");
-  //testHash("murmur3c");
-  //testHash("murmur3f");
-
   //----------
 
   int timeEnd = clock();
-- 
cgit v1.2.3


From b6aa510df7afc9b0a12454b64b59493e5506f4c0 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Fri, 1 Apr 2011 19:11:35 +0000
Subject: Add missing <memory.h> to fix gcc builds

git-svn-id: http://smhasher.googlecode.com/svn/trunk@103 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 Types.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Types.h b/Types.h
index bf02288..a100787 100644
--- a/Types.h
+++ b/Types.h
@@ -3,6 +3,7 @@
 #include "Platform.h"
 #include "Bitvec.h"
 
+#include <memory.h>
 #include <vector>
 #include <map>
 #include <set>
-- 
cgit v1.2.3


From c365c96ae506bdb952b42703ad17558ae034d852 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Fri, 1 Apr 2011 21:34:37 +0000
Subject: simpler block mix for murmur3c add mix-constant-generator code tweak
 constants for 3c and 3f

git-svn-id: http://smhasher.googlecode.com/svn/trunk@104 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 MurmurHash3.cpp |  87 +++++++++++++++++++++---------------------
 Types.cpp       | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 157 insertions(+), 45 deletions(-)

diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 28aa6a0..8334435 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -88,54 +88,54 @@ void MurmurHash3_x86_32 ( const void * key, int len,
 } 
 
 //-----------------------------------------------------------------------------
-// This mix is large enough that VC++ refuses to inline it unless we use
-// __forceinline. It's also not all that fast due to register spillage.
+// x86 platforms don't have enough registers to do the c1/c2 mixing step
+// without spilling data onto the stack, so we use inline constants for this
+// block mix.
 
 FORCE_INLINE void bmix32 ( uint32_t & h1, uint32_t & h2,
                            uint32_t & h3, uint32_t & h4, 
                            uint32_t & k1, uint32_t & k2, 
-                           uint32_t & k3, uint32_t & k4, 
-                           uint32_t & c1, uint32_t & c2 )
+                           uint32_t & k3, uint32_t & k4 )
 {
-  k1 *= c1; 
-  k1  = ROTL32(k1,11); 
-  k1 *= c2;
+  k1 *= 0x239b961b; 
+  k1  = ROTL32(k1,15); 
+  k1 *= 0xab0e9789;
+
   h1 ^= k1;
-  h1 += h2;
-  h1 += h3;
-  h1 += h4;
+  h1 = h1*3+0x561ccd1b;
+  h1 = ROTL32(h1,19);
 
-  h1 = ROTL32(h1,17);
+  k2 *= 0x38b34ae5; 
+  k2  = ROTL32(k2,16);
+  k2 *= 0xa1e38b93;
 
-  k2 *= c2; 
-  k2  = ROTL32(k2,11);
-  k2 *= c1;
   h2 ^= k2;
-  h2 += h1;
+  h2 = h2*3+0x0bcaa747;
+  h2 = ROTL32(h2,17);
 
-  h1 = h1*3+0x52dce729;
-  h2 = h2*3+0x38495ab5;
+  k3 *= 0x4b2f1cc5; 
+  k3  = ROTL32(k3,17); 
+  k3 *= 0x8cd62ad3;
 
-  c1 = c1*5+0x7b7d159c;
-  c2 = c2*5+0x6bce6396;
-
-  k3 *= c1; 
-  k3  = ROTL32(k3,11); 
-  k3 *= c2;
   h3 ^= k3;
-  h3 += h1;
+  h3 = h3*3+0x96cd1c35;
+  h3 = ROTL32(h3,15);
+
+  k4 *= 0x561ad8f1; 
+  k4  = ROTL32(k4,18);
+  k4 *= 0xaac75299;
 
-  k4 *= c2; 
-  k4  = ROTL32(k4,11);
-  k4 *= c1;
   h4 ^= k4;
-  h4 += h1;
+  h4 = h4*3+0x32ac3b17;
+  h4 = ROTL32(h4,13);
 
-  h3 = h3*3+0x52dce729;
-  h4 = h4*3+0x38495ab5;
+  h1 += h2;
+  h1 += h3;
+  h1 += h4;
 
-  c1 = c1*5+0x7b7d159c;
-  c2 = c2*5+0x6bce6396;
+  h2 += h1;
+  h3 += h1;
+  h4 += h1;
 }
 
 //----------
@@ -165,22 +165,19 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
   uint32_t h3 = 0xfcba5b2d ^ seed;
   uint32_t h4 = 0x32452e3e ^ seed;
 
-  uint32_t c1 = 0x95543787;
-  uint32_t c2 = 0x2ad7eb25;
-
   //----------
   // body
 
-  const uint32_t * blocks = (const uint32_t *)(data);
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
 
-  for(int i = 0; i < nblocks; i++)
+  for(int i = -nblocks; i; i++)
   {
     uint32_t k1 = getblock(blocks,i*4+0);
     uint32_t k2 = getblock(blocks,i*4+1);
     uint32_t k3 = getblock(blocks,i*4+2);
     uint32_t k4 = getblock(blocks,i*4+3);
 
-    bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
+    bmix32(h1,h2,h3,h4, k1,k2,k3,k4);
   }
 
   //----------
@@ -213,7 +210,7 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
   case  3: k1 ^= tail[ 2] << 16;
   case  2: k1 ^= tail[ 1] << 8;
   case  1: k1 ^= tail[ 0] << 0;
-           bmix32(h1,h2,h3,h4, k1,k2,k3,k4, c1,c2);
+           bmix32(h1,h2,h3,h4, k1,k2,k3,k4);
   };
 
   //----------
@@ -255,26 +252,26 @@ FORCE_INLINE void bmix64 ( uint64_t & h1, uint64_t & h2,
                            uint64_t & c1, uint64_t & c2 )
 {
   k1 *= c1; 
-  k1  = ROTL64(k1,23); 
+  k1  = ROTL64(k1,29); 
   k1 *= c2;
 
   k2 *= c2; 
-  k2  = ROTL64(k2,23);
+  k2  = ROTL64(k2,33);
   k2 *= c1;
 
-  h1 = ROTL64(h1,17);
+  h1 = ROTL64(h1,27);
   h1 += h2;
   h1 ^= k1;
 
-  h2 = ROTL64(h2,41);
+  h2 = ROTL64(h2,31);
   h2 += h1;
   h2 ^= k2;
 
   h1 = h1*3+0x52dce729;
   h2 = h2*3+0x38495ab5;
 
-  c1 = c1*5+0x7b7d159c;
-  c2 = c2*5+0x6bce6396;
+  c1 = c1*3+0x7b7d159c;
+  c2 = c2*3+0x6bce6396;
 }
 
 //----------
diff --git a/Types.cpp b/Types.cpp
index 91b617c..34f5e88 100644
--- a/Types.cpp
+++ b/Types.cpp
@@ -1,5 +1,7 @@
 #include "Types.h"
 
+#include "Random.h"
+
 uint32_t MurmurOAAT ( const void * blob, int len, uint32_t seed );
 
 //-----------------------------------------------------------------------------
@@ -25,3 +27,116 @@ void MixVCode ( const void * blob, int len )
 }
 
 //-----------------------------------------------------------------------------
+
+bool isprime ( uint32_t x )
+{
+  uint32_t p[] = 
+  {
+    2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,
+    103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,
+    199,211,223,227,229,233,239,241,251
+  };
+
+  for(int i=0; i < sizeof(p)/sizeof(uint32_t); i++)
+  { 
+    if((x % p[i]) == 0)
+    {
+      return false;
+    }
+  } 
+
+  for(int i = 257; i < 65536; i += 2) 
+  { 
+    if((x % i) == 0)
+    {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+void GenerateMixingConstants ( void )
+{
+  Rand r(8350147);
+
+  int count = 0;
+
+  int trials = 0;
+  int bitfail = 0;
+  int popfail = 0;
+  int matchfail = 0;
+  int primefail = 0;
+
+  //for(uint32_t x = 1; x; x++)
+  while(count < 100)
+  {
+    //if(x % 100000000 == 0) printf(".");
+
+    trials++;
+    uint32_t b = r.rand_u32();
+    //uint32_t b = x;
+
+    //----------
+    // must have between 14 and 18 set bits
+
+    if(popcount(b) < 16) { b = 0; popfail++; }
+    if(popcount(b) > 16) { b = 0; popfail++; }
+
+    if(b == 0) continue;
+
+    //----------
+    // must have 3-5 bits set per 8-bit window
+
+    for(int i = 0; i < 32; i++)
+    {
+      uint32_t c = ROTL32(b,i) & 0xFF;
+
+      if(popcount(c) < 3) { b = 0; bitfail++; break; }
+      if(popcount(c) > 5) { b = 0; bitfail++; break; }
+    }
+
+    if(b == 0) continue;
+
+    //----------
+    // all 8-bit windows must be different
+
+    uint8_t match[256];
+
+    memset(match,0,256);
+
+    for(int i = 0; i < 32; i++)
+    {
+      uint32_t c = ROTL32(b,i) & 0xFF;
+      
+      if(match[c]) { b = 0; matchfail++; break; }
+
+      match[c] = 1;
+    }
+
+    if(b == 0) continue;
+
+    //----------
+    // must be prime
+
+    if(!isprime(b))
+    {
+      b = 0;
+      primefail++;
+    }
+
+    if(b == 0) continue;
+
+    //----------
+
+    if(b)
+    {
+      printf("0x%08x : 0x%08x\n",b,~b);
+      count++;
+    }
+  }
+
+  printf("%d %d %d %d %d %d\n",trials,popfail,bitfail,matchfail,primefail,count);
+}
+
+//-----------------------------------------------------------------------------
-- 
cgit v1.2.3


From b93e1c5302173204dd30103cddfd55a60ee1ea97 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Fri, 1 Apr 2011 21:41:17 +0000
Subject: Fix self-test values Remove reference to experiments.cpp

git-svn-id: http://smhasher.googlecode.com/svn/trunk@105 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 SMHasher.vcproj | 8 --------
 main.cpp        | 6 +++---
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/SMHasher.vcproj b/SMHasher.vcproj
index dffe6dc..bb4125e 100644
--- a/SMHasher.vcproj
+++ b/SMHasher.vcproj
@@ -434,14 +434,6 @@
 				RelativePath=".\Bitvec.h"
 				>
 			</File>
-			<File
-				RelativePath=".\Experiments.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\Experiments.h"
-				>
-			</File>
 			<File
 				RelativePath=".\Platform.cpp"
 				>
diff --git a/main.cpp b/main.cpp
index 3d23f21..dea1838 100644
--- a/main.cpp
+++ b/main.cpp
@@ -67,8 +67,8 @@ HashInfo g_hashes[] =
   // MurmurHash3
 
   { MurmurHash3_x86_32,   32, 0xCB75A3F6, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
-  { MurmurHash3_x86_128, 128, 0x411C981B, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
-  { MurmurHash3_x64_128, 128, 0x04D005BA, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
+  { MurmurHash3_x86_128, 128, 0x917EC4EF, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
+  { MurmurHash3_x64_128, 128, 0xD1CAC156, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
 
 };
 
@@ -538,7 +538,7 @@ void testHash ( const char * name )
 
 int main ( int argc, char ** argv )
 {
-  const char * hashToTest = "murmur3a";
+  const char * hashToTest = "murmur3f";
 
   if(argc < 2)
   {
-- 
cgit v1.2.3


From 9915597709841dcbc213cf0eb5bf8436738208f9 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Sat, 2 Apr 2011 00:50:27 +0000
Subject: faster bmix64, 7.5 gigs/sec, passes all tests

make speed test do more reps

git-svn-id: http://smhasher.googlecode.com/svn/trunk@106 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 MurmurHash3.cpp | 16 ++++++----------
 SpeedTest.cpp   |  2 +-
 main.cpp        |  2 +-
 3 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 8334435..33eb2e8 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -255,23 +255,19 @@ FORCE_INLINE void bmix64 ( uint64_t & h1, uint64_t & h2,
   k1  = ROTL64(k1,29); 
   k1 *= c2;
 
+  h1 ^= k1;
+  h1 = ROTL64(h1,27);
+  h1 += h2;
+  h1 = h1*3+0x52dce729;
+
   k2 *= c2; 
   k2  = ROTL64(k2,33);
   k2 *= c1;
 
-  h1 = ROTL64(h1,27);
-  h1 += h2;
-  h1 ^= k1;
-
+  h2 ^= k2;
   h2 = ROTL64(h2,31);
   h2 += h1;
-  h2 ^= k2;
-
-  h1 = h1*3+0x52dce729;
   h2 = h2*3+0x38495ab5;
-
-  c1 = c1*3+0x7b7d159c;
-  c2 = c2*3+0x6bce6396;
 }
 
 //----------
diff --git a/SpeedTest.cpp b/SpeedTest.cpp
index a6d131e..a450c53 100644
--- a/SpeedTest.cpp
+++ b/SpeedTest.cpp
@@ -12,7 +12,7 @@ void BulkSpeedTest ( pfHash hash, uint32_t seed )
 {
   Rand r(seed);
   
-  const int trials = 9999;
+  const int trials = 29999;
   const int blocksize = 256 * 1024;
 
   printf("Bulk speed test - %d-byte keys\n",blocksize);
diff --git a/main.cpp b/main.cpp
index dea1838..5f8921f 100644
--- a/main.cpp
+++ b/main.cpp
@@ -68,7 +68,7 @@ HashInfo g_hashes[] =
 
   { MurmurHash3_x86_32,   32, 0xCB75A3F6, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
   { MurmurHash3_x86_128, 128, 0x917EC4EF, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
-  { MurmurHash3_x64_128, 128, 0xD1CAC156, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
+  { MurmurHash3_x64_128, 128, 0x9E20536F, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
 
 };
 
-- 
cgit v1.2.3


From 2ff5e9b28318d73dbf1ccaa8e03d6d270e1a8292 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Sun, 3 Apr 2011 06:30:51 +0000
Subject: Final final final Murmur3, all variants. I am tired of working on it.
 :)

Unified, simplified, optimized implementation that works well on all platforms and is easy to extend to larger/smaller hash sizes and streaming implementations if needed.

git-svn-id: http://smhasher.googlecode.com/svn/trunk@107 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 MurmurHash3.cpp | 258 +++++++++++++++++++++++---------------------------------
 SpeedTest.cpp   |   2 +-
 main.cpp        |  10 +--
 3 files changed, 113 insertions(+), 157 deletions(-)

diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 33eb2e8..52c4043 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -1,3 +1,7 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
 #include "MurmurHash3.h"
 
 // Note - The x86 and x64 versions do _not_ produce the same results, as the
@@ -15,35 +19,97 @@ FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
   return p[i];
 }
 
+FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
+{
+  return p[i];
+}
+
+//-----------------------------------------------------------------------------
+// Block mix - mix the key block, combine with hash block, mix the hash block,
+// repeat.
+
+FORCE_INLINE void bmix ( uint32_t & h1, uint32_t & k1, 
+                         uint32_t & c1, uint32_t & c2 )
+{
+  k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+
+  h1 = ROTL32(h1,13); h1 = h1*5+0xe6546b64;
+}
+
 //----------
 
-FORCE_INLINE void bmix32 ( uint32_t & h1, uint32_t & k1, 
-                           uint32_t & c1, uint32_t & c2 )
+FORCE_INLINE void bmix ( uint64_t & h1, uint64_t & h2, 
+                         uint64_t & k1, uint64_t & k2, 
+                         uint64_t & c1, uint64_t & c2 )
 {
-  k1 *= c1; 
-  k1 = ROTL32(k1,16); 
-  k1 *= c2;
+  k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
 
-  h1 ^= k1;
-  h1 = h1*3+0x52dce729;
-  h1 = ROTL32(h1,15);
+  h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+
+  k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+  h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
+}
+
+//----------
+
+FORCE_INLINE void bmix ( uint32_t & h1, uint32_t & h2,
+                         uint32_t & h3, uint32_t & h4, 
+                         uint32_t & k1, uint32_t & k2, 
+                         uint32_t & k3, uint32_t & k4,
+                         uint32_t & c1, uint32_t & c2, 
+                         uint32_t & c3, uint32_t & c4 )
+{
+  k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+  k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+  k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+  k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+  h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
+  h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
+  h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
+  h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
+}
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+FORCE_INLINE uint32_t fmix ( uint32_t h )
+{
+  h ^= h >> 16;
+  h *= 0x85ebca6b;
+  h ^= h >> 13;
+  h *= 0xc2b2ae35;
+  h ^= h >> 16;
 
-  c1 = c1*3+0x7b7d159c;
-  c2 = c2*3+0x6bce6396;
+  return h;
 }
 
 //----------
 
+FORCE_INLINE uint64_t fmix ( uint64_t k )
+{
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+  k ^= k >> 33;
+
+  return k;
+}
+
+//-----------------------------------------------------------------------------
+
 void MurmurHash3_x86_32 ( const void * key, int len,
                           uint32_t seed, void * out )
 {
   const uint8_t * data = (const uint8_t*)key;
   const int nblocks = len / 4;
 
-  uint32_t h1 = 0x971e137b ^ seed;
+  uint32_t h1 = seed;
 
-  uint32_t c1 = 0x95543787;
-  uint32_t c2 = 0x2ad7eb25;
+  uint32_t c1 = 0xcc9e2d51;
+  uint32_t c2 = 0x1b873593;
 
   //----------
   // body
@@ -54,7 +120,7 @@ void MurmurHash3_x86_32 ( const void * key, int len,
   {
     uint32_t k1 = getblock(blocks,i);
 
-    bmix32(h1,k1,c1,c2);
+    bmix(h1,k1,c1,c2);
   }
 
   //----------
@@ -69,7 +135,7 @@ void MurmurHash3_x86_32 ( const void * key, int len,
   case 3: k1 ^= tail[2] << 16;
   case 2: k1 ^= tail[1] << 8;
   case 1: k1 ^= tail[0];
-      bmix32(h1,k1,c1,c2);
+          k1 *= c1; k1 = ROTL32(k1,16); k1 *= c2; h1 ^= k1;
   };
 
   //----------
@@ -77,82 +143,12 @@ void MurmurHash3_x86_32 ( const void * key, int len,
 
   h1 ^= len;
 
-  h1 *= 0x85ebca6b;
-  h1 ^= h1 >> 13;
-  h1 *= 0xc2b2ae35;
-  h1 ^= h1 >> 16;
-
-  h1 ^= seed;
+  h1 = fmix(h1);
 
   *(uint32_t*)out = h1;
 } 
 
 //-----------------------------------------------------------------------------
-// x86 platforms don't have enough registers to do the c1/c2 mixing step
-// without spilling data onto the stack, so we use inline constants for this
-// block mix.
-
-FORCE_INLINE void bmix32 ( uint32_t & h1, uint32_t & h2,
-                           uint32_t & h3, uint32_t & h4, 
-                           uint32_t & k1, uint32_t & k2, 
-                           uint32_t & k3, uint32_t & k4 )
-{
-  k1 *= 0x239b961b; 
-  k1  = ROTL32(k1,15); 
-  k1 *= 0xab0e9789;
-
-  h1 ^= k1;
-  h1 = h1*3+0x561ccd1b;
-  h1 = ROTL32(h1,19);
-
-  k2 *= 0x38b34ae5; 
-  k2  = ROTL32(k2,16);
-  k2 *= 0xa1e38b93;
-
-  h2 ^= k2;
-  h2 = h2*3+0x0bcaa747;
-  h2 = ROTL32(h2,17);
-
-  k3 *= 0x4b2f1cc5; 
-  k3  = ROTL32(k3,17); 
-  k3 *= 0x8cd62ad3;
-
-  h3 ^= k3;
-  h3 = h3*3+0x96cd1c35;
-  h3 = ROTL32(h3,15);
-
-  k4 *= 0x561ad8f1; 
-  k4  = ROTL32(k4,18);
-  k4 *= 0xaac75299;
-
-  h4 ^= k4;
-  h4 = h4*3+0x32ac3b17;
-  h4 = ROTL32(h4,13);
-
-  h1 += h2;
-  h1 += h3;
-  h1 += h4;
-
-  h2 += h1;
-  h3 += h1;
-  h4 += h1;
-}
-
-//----------
-// Finalization mix - force all bits of a hash block to avalanche
-
-// avalanches all bits to within 0.25% bias
-
-FORCE_INLINE uint32_t fmix32 ( uint32_t h )
-{
-  h ^= h >> 16;
-  h *= 0x85ebca6b;
-  h ^= h >> 13;
-  h *= 0xc2b2ae35;
-  h ^= h >> 16;
-
-  return h;
-}
 
 void MurmurHash3_x86_128 ( const void * key, const int len,
                            uint32_t seed, void * out )
@@ -160,10 +156,15 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
   const uint8_t * data = (const uint8_t*)key;
   const int nblocks = len / 16;
 
-  uint32_t h1 = 0x8de1c3ac ^ seed;
-  uint32_t h2 = 0xbab98226 ^ seed;
-  uint32_t h3 = 0xfcba5b2d ^ seed;
-  uint32_t h4 = 0x32452e3e ^ seed;
+  uint32_t h1 = seed;
+  uint32_t h2 = seed;
+  uint32_t h3 = seed;
+  uint32_t h4 = seed;
+
+  uint32_t c1 = 0x239b961b; 
+  uint32_t c2 = 0xab0e9789;
+  uint32_t c3 = 0x38b34ae5; 
+  uint32_t c4 = 0xa1e38b93;
 
   //----------
   // body
@@ -177,7 +178,7 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
     uint32_t k3 = getblock(blocks,i*4+2);
     uint32_t k4 = getblock(blocks,i*4+3);
 
-    bmix32(h1,h2,h3,h4, k1,k2,k3,k4);
+    bmix(h1,h2,h3,h4, k1,k2,k3,k4, c1, c2, c3, c4);
   }
 
   //----------
@@ -195,22 +196,25 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
   case 15: k4 ^= tail[14] << 16;
   case 14: k4 ^= tail[13] << 8;
   case 13: k4 ^= tail[12] << 0;
+           k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
 
   case 12: k3 ^= tail[11] << 24;
   case 11: k3 ^= tail[10] << 16;
   case 10: k3 ^= tail[ 9] << 8;
   case  9: k3 ^= tail[ 8] << 0;
+           k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
 
   case  8: k2 ^= tail[ 7] << 24;
   case  7: k2 ^= tail[ 6] << 16;
   case  6: k2 ^= tail[ 5] << 8;
   case  5: k2 ^= tail[ 4] << 0;
+           k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
 
   case  4: k1 ^= tail[ 3] << 24;
   case  3: k1 ^= tail[ 2] << 16;
   case  2: k1 ^= tail[ 1] << 8;
   case  1: k1 ^= tail[ 0] << 0;
-           bmix32(h1,h2,h3,h4, k1,k2,k3,k4);
+           k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
   };
 
   //----------
@@ -221,10 +225,10 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
   h1 += h2; h1 += h3; h1 += h4;
   h2 += h1; h3 += h1; h4 += h1;
 
-  h1 = fmix32(h1);
-  h2 = fmix32(h2);
-  h3 = fmix32(h3);
-  h4 = fmix32(h4);
+  h1 = fmix(h1);
+  h2 = fmix(h2);
+  h3 = fmix(h3);
+  h4 = fmix(h4);
 
   h1 += h2; h1 += h3; h1 += h4;
   h2 += h1; h3 += h1; h4 += h1;
@@ -236,55 +240,6 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
 }
 
 //-----------------------------------------------------------------------------
-// Block read - if your platform needs to do endian-swapping or can only
-// handle aligned reads, do the conversion here
-
-FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
-{
-  return p[i];
-}
-
-//----------
-// Block mix - combine the key bits with the hash bits and scramble everything
-
-FORCE_INLINE void bmix64 ( uint64_t & h1, uint64_t & h2, 
-                           uint64_t & k1, uint64_t & k2, 
-                           uint64_t & c1, uint64_t & c2 )
-{
-  k1 *= c1; 
-  k1  = ROTL64(k1,29); 
-  k1 *= c2;
-
-  h1 ^= k1;
-  h1 = ROTL64(h1,27);
-  h1 += h2;
-  h1 = h1*3+0x52dce729;
-
-  k2 *= c2; 
-  k2  = ROTL64(k2,33);
-  k2 *= c1;
-
-  h2 ^= k2;
-  h2 = ROTL64(h2,31);
-  h2 += h1;
-  h2 = h2*3+0x38495ab5;
-}
-
-//----------
-// Finalization mix - avalanches all bits to within 0.05% bias
-
-FORCE_INLINE uint64_t fmix64 ( uint64_t k )
-{
-  k ^= k >> 33;
-  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
-  k ^= k >> 33;
-  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
-  k ^= k >> 33;
-
-  return k;
-}
-
-//----------
 
 void MurmurHash3_x64_128 ( const void * key, const int len,
                            const uint32_t seed, void * out )
@@ -292,8 +247,8 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
   const uint8_t * data = (const uint8_t*)key;
   const int nblocks = len / 16;
 
-  uint64_t h1 = BIG_CONSTANT(0x9368e53c2f6af274) ^ seed;
-  uint64_t h2 = BIG_CONSTANT(0x586dcd208f7cd3fd) ^ seed;
+  uint64_t h1 = seed;
+  uint64_t h2 = seed;
 
   uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
   uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
@@ -308,7 +263,7 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
     uint64_t k1 = getblock(blocks,i*2+0);
     uint64_t k2 = getblock(blocks,i*2+1);
 
-    bmix64(h1,h2,k1,k2,c1,c2);
+    bmix(h1,h2,k1,k2,c1,c2);
   }
 
   //----------
@@ -328,6 +283,7 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
   case 11: k2 ^= uint64_t(tail[10]) << 16;
   case 10: k2 ^= uint64_t(tail[ 9]) << 8;
   case  9: k2 ^= uint64_t(tail[ 8]) << 0;
+           k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
 
   case  8: k1 ^= uint64_t(tail[ 7]) << 56;
   case  7: k1 ^= uint64_t(tail[ 6]) << 48;
@@ -337,7 +293,7 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
   case  3: k1 ^= uint64_t(tail[ 2]) << 16;
   case  2: k1 ^= uint64_t(tail[ 1]) << 8;
   case  1: k1 ^= uint64_t(tail[ 0]) << 0;
-           bmix64(h1,h2,k1,k2,c1,c2);
+           k1 *= c1; k1  = ROTL64(k1,29); k1 *= c2; h1 ^= k1;
   };
 
   //----------
@@ -348,8 +304,8 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
   h1 += h2;
   h2 += h1;
 
-  h1 = fmix64(h1);
-  h2 = fmix64(h2);
+  h1 = fmix(h1);
+  h2 = fmix(h2);
 
   h1 += h2;
   h2 += h1;
diff --git a/SpeedTest.cpp b/SpeedTest.cpp
index a450c53..d0bba8b 100644
--- a/SpeedTest.cpp
+++ b/SpeedTest.cpp
@@ -58,7 +58,7 @@ void BulkSpeedTest ( pfHash hash, uint32_t seed )
 
 void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & outCycles )
 {
-  const int trials = 100000;
+  const int trials = 300000;
 
   if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);
   
diff --git a/main.cpp b/main.cpp
index 5f8921f..4574015 100644
--- a/main.cpp
+++ b/main.cpp
@@ -66,9 +66,9 @@ HashInfo g_hashes[] =
 
   // MurmurHash3
 
-  { MurmurHash3_x86_32,   32, 0xCB75A3F6, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
-  { MurmurHash3_x86_128, 128, 0x917EC4EF, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
-  { MurmurHash3_x64_128, 128, 0x9E20536F, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
+  { MurmurHash3_x86_32,   32, 0x3252D141, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
+  { MurmurHash3_x86_128, 128, 0x13C7ED69, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
+  { MurmurHash3_x64_128, 128, 0x33949085, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
 
 };
 
@@ -119,7 +119,7 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
   const int hashbits = sizeof(hashtype) * 8;
 
   printf("-------------------------------------------------------------------------------\n");
-  printf("--- Testing %s\n\n",info->name);
+  printf("--- Testing %s (%s)\n\n",info->name,info->desc);
 
   //-----------------------------------------------------------------------------
   // Sanity tests
@@ -538,7 +538,7 @@ void testHash ( const char * name )
 
 int main ( int argc, char ** argv )
 {
-  const char * hashToTest = "murmur3f";
+  const char * hashToTest = "murmur3a";
 
   if(argc < 2)
   {
-- 
cgit v1.2.3


From a6248fc797e4a4491f58c345bad6973fe67ef4b1 Mon Sep 17 00:00:00 2001
From: "aappleby@google.com"
 <aappleby@google.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Mon, 4 Apr 2011 19:18:44 +0000
Subject: Add #include <stdio.h> to Types.cpp

git-svn-id: http://smhasher.googlecode.com/svn/trunk@118 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 Types.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Types.cpp b/Types.cpp
index 34f5e88..43544f2 100644
--- a/Types.cpp
+++ b/Types.cpp
@@ -2,6 +2,8 @@
 
 #include "Random.h"
 
+#include <stdio.h>
+
 uint32_t MurmurOAAT ( const void * blob, int len, uint32_t seed );
 
 //-----------------------------------------------------------------------------
-- 
cgit v1.2.3


From 6bde2782cde84aea2002eed32ffd12c49d72544d Mon Sep 17 00:00:00 2001
From: "aappleby@google.com"
 <aappleby@google.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Mon, 4 Apr 2011 22:42:08 +0000
Subject: Add improved timing code that attempts to filter out spurious timing
 results

git-svn-id: http://smhasher.googlecode.com/svn/trunk@119 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 Platform.h    |   2 +
 SpeedTest.cpp | 266 ++++++++++++++++++++++++++++++++++++++++++----------------
 2 files changed, 197 insertions(+), 71 deletions(-)

diff --git a/Platform.h b/Platform.h
index f15580e..8bb0d58 100644
--- a/Platform.h
+++ b/Platform.h
@@ -11,6 +11,7 @@ void SetAffinity ( int cpu );
 #if defined(_MSC_VER)
 
 #define FORCE_INLINE	__forceinline
+#define	NEVER_INLINE  __declspec(noinline)
 
 #include <stdlib.h>
 #include <math.h>   // Has to be included before intrin.h or VC complains about 'ceil'
@@ -40,6 +41,7 @@ void SetAffinity ( int cpu );
 #include <stdint.h>
 
 #define	FORCE_INLINE __attribute__((always_inline))
+#define	NEVER_INLINE __attribute__((noinline))
 
 inline uint32_t rotl32 ( uint32_t x, int8_t r )
 {
diff --git a/SpeedTest.cpp b/SpeedTest.cpp
index d0bba8b..9012ae3 100644
--- a/SpeedTest.cpp
+++ b/SpeedTest.cpp
@@ -4,117 +4,241 @@
 
 #include <stdio.h>   // for printf
 #include <memory.h>  // for memset
+#include <math.h>    // for sqrt
+#include <algorithm> // for sort
 
 //-----------------------------------------------------------------------------
-// 256k blocks seem to give the best results.
+// We view our timing values as a series of random variables V that has been
+// contaminated with occasional outliers due to cache misses, thread
+// preemption, etcetera. To filter out the outliers, we search for the largest
+// subset of V such that all its values are within three standard deviations
+// of the mean.
 
-void BulkSpeedTest ( pfHash hash, uint32_t seed )
+double CalcMean ( std::vector<double> & v )
 {
-  Rand r(seed);
+  double mean = 0;
   
-  const int trials = 29999;
-  const int blocksize = 256 * 1024;
-
-  printf("Bulk speed test - %d-byte keys\n",blocksize);
-
-  uint8_t * block = new uint8_t[blocksize + 16];
+  for(int i = 0; i < (int)v.size(); i++)
+  {
+    mean += v[i];
+  }
+  
+  mean /= double(v.size());
+  
+  return mean;
+}
 
-  r.rand_p(block,blocksize+16);
+double CalcMean ( std::vector<double> & v, int a, int b )
+{
+  double mean = 0;
+  
+  for(int i = a; i <= b; i++)
+  {
+    mean += v[i];
+  }
+  
+  mean /= (b-a+1);
+  
+  return mean;
+}
 
-  uint32_t temp[16];
+double CalcStdv ( std::vector<double> & v, int a, int b )
+{
+  double mean = CalcMean(v,a,b);
 
-  for(int align = 0; align < 8; align++)
+  double stdv = 0;
+  
+  for(int i = a; i <= b; i++)
   {
-    double bestbpc = 0;
-
-    for(int itrial = 0; itrial < trials; itrial++)
-    {
-      int64_t begin,end;
+    double x = v[i] - mean;
+    
+    stdv += x*x;
+  }
+  
+  stdv = sqrt(stdv / (b-a+1));
+  
+  return stdv;
+}
 
-      begin = rdtsc();
+// Return true if the largest value in v[0,len) is more than three
+// standard deviations from the mean
 
-      hash(block + align,blocksize,itrial,temp);
+bool ContainsOutlier ( std::vector<double> & v, int len )
+{
+  double mean = 0;
+  
+  for(int i = 0; i < len; i++)
+  {
+    mean += v[i];
+  }
+  
+  mean /= double(len);
+  
+  double stdv = 0;
+  
+  for(int i = 0; i < len; i++)
+  {
+    double x = v[i] - mean;
+    stdv += x*x;
+  }
+  
+  stdv = sqrt(stdv / double(len));
 
-      end = rdtsc();
+  double cutoff = mean + stdv*3;
+  
+  return v[len-1] > cutoff;  
+}
 
-      blackhole(temp[0]);
+// Do a binary search to find the largest subset of v that does not contain
+// outliers.
 
-      double cycles = double(end-begin);
-      if(cycles > 0)
-      {
-        double bpc = double(blocksize) / cycles;
-        if(bpc > bestbpc) bestbpc = bpc;
-      }
+void FilterOutliers ( std::vector<double> & v )
+{
+  std::sort(v.begin(),v.end());
+  
+  int len = 0;
+  
+  for(int x = 0x40000000; x; x = x >> 1 )
+  {
+    if((len | x) >= v.size()) continue;
+    
+    if(!ContainsOutlier(v,len | x))
+    {
+      len |= x;
     }
-
-    double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
-    printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);
   }
+  
+  v.resize(len);
+}
 
-  delete [] block;
+// Iteratively tighten the set to find a subset that does not contain
+// outliers. I'm not positive this works correctly in all cases.
+
+void FilterOutliers2 ( std::vector<double> & v )
+{
+  std::sort(v.begin(),v.end());
+  
+  int a = 0;
+  int b = (int)(v.size() - 1);
+  
+  for(int i = 0; i < 10; i++)
+  {
+    //printf("%d %d\n",a,b);
+  
+    double mean = CalcMean(v,a,b);
+    double stdv = CalcStdv(v,a,b);
+    
+    double cutA = mean - stdv*3;  
+    double cutB = mean + stdv*3;
+    
+    while((a < b) && (v[a] < cutA)) a++;
+    while((b > a) && (v[b] > cutB)) b--;
+  }
+  
+  std::vector<double> v2;
+  
+  v2.insert(v2.begin(),v.begin()+a,v.begin()+b+1);
+  
+  v.swap(v2);
 }
 
 //-----------------------------------------------------------------------------
+// We really want the rdtsc() calls to bracket the function call as tightly
+// as possible, but that's hard to do portably. We'll try and get as close as
+// possible by marking the function as NEVER_INLINE (to keep the optimizer from
+// moving it) and marking the timing variables as "volatile register".
 
-void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & outCycles )
+NEVER_INLINE int64_t timehash ( pfHash hash, const void * key, int len, int seed )
 {
-  const int trials = 300000;
-
-  if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);
+  volatile register int64_t begin,end;
+  
+  uint32_t temp[16];
+  
+  begin = rdtsc();
   
+  hash(key,len,seed,temp);
+  
+  end = rdtsc();
+  
+  return end-begin;
+}
+
+//-----------------------------------------------------------------------------
+
+double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int blocksize, const int align )
+{
   Rand r(seed);
+  
+  uint8_t * buf = new uint8_t[blocksize + 64];
 
-  uint8_t * h = new uint8_t[hashsize];
-  uint8_t * k = new uint8_t[keysize];
+  uint64_t t1 = reinterpret_cast<uint64_t>(buf);
+  
+  t1 = (t1 + 15) & 0xFFFFFFFFFFFFFFF0;
+  t1 += align;
   
-  memset(h,0,hashsize);
-  memset(k,0,keysize);
+  uint8_t * block = reinterpret_cast<uint8_t*>(t1);
 
-  double bestcycles = 1e9;
+  r.rand_p(block,blocksize);
 
-  for(int itrial = 0; itrial < trials; itrial++)
-  {
-    volatile int64_t begin,end;
+  //----------
 
-    rand_p(k,keysize);
+  std::vector<double> times;
+  times.reserve(trials);
 
-    MixVCode(h,4);
+  for(int itrial = 0; itrial < trials; itrial++)
+  {
+    r.rand_p(block,blocksize);
     
-    begin = rdtsc();
+    double t = timehash(hash,block,blocksize,itrial);
     
-    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-
-    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+    if(t > 0) times.push_back(t);
+  }
 
-    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+  //----------
+  
+  std::sort(times.begin(),times.end());
+  
+  FilterOutliers(times);
+  
+  delete [] buf;
+  
+  return CalcMean(times);
+}
 
-    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
-    hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+//-----------------------------------------------------------------------------
+// 256k blocks seem to give the best results.
 
-    end = rdtsc();
+void BulkSpeedTest ( pfHash hash, uint32_t seed )
+{
+  Rand r(seed);
+  
+  const int trials = 999;
+  const int blocksize = 256 * 1024;
 
-    MixVCode(h,4);
-    //printf("0x%08x\n",g_verify);
+  printf("Bulk speed test - %d-byte keys\n",blocksize);
 
-    double cycles = double(end-begin) / 64;
-    if((cycles > 0) && (cycles < bestcycles)) bestcycles = cycles;
+  for(int align = 0; align < 8; align++)
+  {
+    double cycles = SpeedTest(hash,seed,trials,blocksize,align);
+    
+    double bestbpc = double(blocksize)/cycles;
+    
+    double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
+    printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);
   }
+}
 
-  double bestbpc = double(keysize) / bestcycles;
-  if(verbose) printf("%8.2f cycles/hash, %8.4f bytes/cycle\n",bestcycles,bestbpc);
+//-----------------------------------------------------------------------------
+
+void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & /*outCycles*/ )
+{
+  const int trials = 999999;
 
-  outCycles = bestcycles;
+  if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);
+  
+  double cycles = SpeedTest(hash,seed,trials,keysize,0);
+  
+  printf("%8.2f cycles/hash\n",cycles);  
 }
 
 //-----------------------------------------------------------------------------
-- 
cgit v1.2.3


From 0f37bbdda0d33613c84af0d1b786751c1cc99316 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Mon, 4 Apr 2011 23:05:26 +0000
Subject: some test code for collision reporting cleanup murmur3, fix
 len-collision issue make main thread high priority on windows fix missing
 typecast in SpeedTest.cpp, increase bulk speed test reps remove reference to
 old file

git-svn-id: http://smhasher.googlecode.com/svn/trunk@120 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 KeysetTest.cpp  | 63 +++++++++++++++++++++++++++++++++++++++++++
 MurmurHash3.cpp | 83 +++++++++++++++++++++------------------------------------
 Platform.cpp    |  1 +
 SMHasher.vcproj |  4 ---
 SpeedTest.cpp   |  6 ++---
 main.cpp        |  4 +--
 6 files changed, 99 insertions(+), 62 deletions(-)

diff --git a/KeysetTest.cpp b/KeysetTest.cpp
index f11d512..1cae57d 100644
--- a/KeysetTest.cpp
+++ b/KeysetTest.cpp
@@ -262,3 +262,66 @@ void TwoBytesKeygen ( int maxlen, KeyCallback & c )
 }
 
 //-----------------------------------------------------------------------------
+
+template< typename hashtype >
+void DumpCollisionMap ( CollisionMap<hashtype,ByteVec> & cmap )
+{
+  typedef CollisionMap<hashtype,ByteVec> cmap_t;
+
+  for(cmap_t::iterator it = cmap.begin(); it != cmap.end(); ++it)
+  {
+    const hashtype & hash = (*it).first;
+
+    printf("Hash - ");
+    printbytes(&hash,sizeof(hashtype));
+    printf("\n");
+
+    std::vector<ByteVec> & keys = (*it).second;
+
+    for(int i = 0; i < (int)keys.size(); i++)
+    {
+      ByteVec & key = keys[i];
+
+      printf("Key  - ");
+      printbytes(&key[0],(int)key.size());
+      printf("\n");
+    }
+    printf("\n");
+  }
+
+}
+
+// test code
+
+void ReportCollisions ( pfHash hash )
+{
+  printf("Hashing keyset\n");
+
+  std::vector<uint128_t> hashes;
+
+  HashCallback<uint128_t> c(hash,hashes);
+
+  TwoBytesKeygen(20,c);
+
+  printf("%d hashes\n",hashes.size());
+
+  printf("Finding collisions\n");
+
+  HashSet<uint128_t> collisions;
+
+  FindCollisions(hashes,collisions,1000);
+
+  printf("%d collisions\n",collisions.size());
+
+  printf("Mapping collisions\n");
+
+  CollisionMap<uint128_t,ByteVec> cmap;
+
+  CollisionCallback<uint128_t> c2(hash,collisions,cmap);
+
+  TwoBytesKeygen(20,c2);
+
+  printf("Dumping collisions\n");
+
+  DumpCollisionMap(cmap);
+}
\ No newline at end of file
diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 52c4043..8f63772 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -24,53 +24,6 @@ FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
   return p[i];
 }
 
-//-----------------------------------------------------------------------------
-// Block mix - mix the key block, combine with hash block, mix the hash block,
-// repeat.
-
-FORCE_INLINE void bmix ( uint32_t & h1, uint32_t & k1, 
-                         uint32_t & c1, uint32_t & c2 )
-{
-  k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
-
-  h1 = ROTL32(h1,13); h1 = h1*5+0xe6546b64;
-}
-
-//----------
-
-FORCE_INLINE void bmix ( uint64_t & h1, uint64_t & h2, 
-                         uint64_t & k1, uint64_t & k2, 
-                         uint64_t & c1, uint64_t & c2 )
-{
-  k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
-
-  h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
-
-  k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
-
-  h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
-}
-
-//----------
-
-FORCE_INLINE void bmix ( uint32_t & h1, uint32_t & h2,
-                         uint32_t & h3, uint32_t & h4, 
-                         uint32_t & k1, uint32_t & k2, 
-                         uint32_t & k3, uint32_t & k4,
-                         uint32_t & c1, uint32_t & c2, 
-                         uint32_t & c3, uint32_t & c4 )
-{
-  k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
-  k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
-  k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
-  k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
-
-  h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
-  h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
-  h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
-  h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
-}
-
 //-----------------------------------------------------------------------------
 // Finalization mix - force all bits of a hash block to avalanche
 
@@ -120,7 +73,13 @@ void MurmurHash3_x86_32 ( const void * key, int len,
   {
     uint32_t k1 = getblock(blocks,i);
 
-    bmix(h1,k1,c1,c2);
+    k1 *= c1;
+    k1 = ROTL32(k1,15);
+    k1 *= c2;
+    
+    h1 ^= k1;
+    h1 = ROTL32(h1,13); 
+    h1 = h1*5+0xe6546b64;
   }
 
   //----------
@@ -178,7 +137,21 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
     uint32_t k3 = getblock(blocks,i*4+2);
     uint32_t k4 = getblock(blocks,i*4+3);
 
-    bmix(h1,h2,h3,h4, k1,k2,k3,k4, c1, c2, c3, c4);
+    k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
+
+    k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+    h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
+
+    k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+    h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
+
+    k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+    h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
   }
 
   //----------
@@ -220,7 +193,7 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
   //----------
   // finalization
 
-  h4 ^= len;
+  h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
 
   h1 += h2; h1 += h3; h1 += h4;
   h2 += h1; h3 += h1; h4 += h1;
@@ -263,7 +236,13 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
     uint64_t k1 = getblock(blocks,i*2+0);
     uint64_t k2 = getblock(blocks,i*2+1);
 
-    bmix(h1,h2,k1,k2,c1,c2);
+    k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+
+    k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+    h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
   }
 
   //----------
@@ -299,7 +278,7 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
   //----------
   // finalization
 
-  h2 ^= len;
+  h1 ^= len; h2 ^= len;
 
   h1 += h2;
   h2 += h1;
diff --git a/Platform.cpp b/Platform.cpp
index 3561379..dff36cb 100644
--- a/Platform.cpp
+++ b/Platform.cpp
@@ -16,6 +16,7 @@ void testRDTSC ( void )
 void SetAffinity ( int cpu )
 {
   SetProcessAffinityMask(GetCurrentProcess(),cpu);
+  SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
 }
 
 #else
diff --git a/SMHasher.vcproj b/SMHasher.vcproj
index bb4125e..05586f7 100644
--- a/SMHasher.vcproj
+++ b/SMHasher.vcproj
@@ -322,10 +322,6 @@
 		<Filter
 			Name="Hashes"
 			>
-			<File
-				RelativePath=".\CityHash.cpp"
-				>
-			</File>
 			<File
 				RelativePath=".\crc.cpp"
 				>
diff --git a/SpeedTest.cpp b/SpeedTest.cpp
index 9012ae3..b586102 100644
--- a/SpeedTest.cpp
+++ b/SpeedTest.cpp
@@ -189,7 +189,7 @@ double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int block
   {
     r.rand_p(block,blocksize);
     
-    double t = timehash(hash,block,blocksize,itrial);
+    double t = (double)timehash(hash,block,blocksize,itrial);
     
     if(t > 0) times.push_back(t);
   }
@@ -210,9 +210,7 @@ double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int block
 
 void BulkSpeedTest ( pfHash hash, uint32_t seed )
 {
-  Rand r(seed);
-  
-  const int trials = 999;
+  const int trials = 2999;
   const int blocksize = 256 * 1024;
 
   printf("Bulk speed test - %d-byte keys\n",blocksize);
diff --git a/main.cpp b/main.cpp
index 4574015..8c76468 100644
--- a/main.cpp
+++ b/main.cpp
@@ -67,8 +67,8 @@ HashInfo g_hashes[] =
   // MurmurHash3
 
   { MurmurHash3_x86_32,   32, 0x3252D141, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
-  { MurmurHash3_x86_128, 128, 0x13C7ED69, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
-  { MurmurHash3_x64_128, 128, 0x33949085, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
+  { MurmurHash3_x86_128, 128, 0xB3ECE62A, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
+  { MurmurHash3_x64_128, 128, 0x833607F9, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
 
 };
 
-- 
cgit v1.2.3


From 4be8e18e62864ae1f1c34ad440822ee046405ee7 Mon Sep 17 00:00:00 2001
From: "aappleby@google.com"
 <aappleby@google.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Mon, 4 Apr 2011 23:07:18 +0000
Subject: and of course gcc complains about missing "typename" specifier and
 missing typecasts

git-svn-id: http://smhasher.googlecode.com/svn/trunk@121 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 KeysetTest.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/KeysetTest.cpp b/KeysetTest.cpp
index 1cae57d..5561030 100644
--- a/KeysetTest.cpp
+++ b/KeysetTest.cpp
@@ -268,7 +268,7 @@ void DumpCollisionMap ( CollisionMap<hashtype,ByteVec> & cmap )
 {
   typedef CollisionMap<hashtype,ByteVec> cmap_t;
 
-  for(cmap_t::iterator it = cmap.begin(); it != cmap.end(); ++it)
+  for(typename cmap_t::iterator it = cmap.begin(); it != cmap.end(); ++it)
   {
     const hashtype & hash = (*it).first;
 
@@ -303,7 +303,7 @@ void ReportCollisions ( pfHash hash )
 
   TwoBytesKeygen(20,c);
 
-  printf("%d hashes\n",hashes.size());
+  printf("%d hashes\n",(int)hashes.size());
 
   printf("Finding collisions\n");
 
@@ -311,7 +311,7 @@ void ReportCollisions ( pfHash hash )
 
   FindCollisions(hashes,collisions,1000);
 
-  printf("%d collisions\n",collisions.size());
+  printf("%d collisions\n",(int)collisions.size());
 
   printf("Mapping collisions\n");
 
@@ -324,4 +324,4 @@ void ReportCollisions ( pfHash hash )
   printf("Dumping collisions\n");
 
   DumpCollisionMap(cmap);
-}
\ No newline at end of file
+}
-- 
cgit v1.2.3


From 9715053bffb6aa255104427c756084e815e530aa Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Mon, 4 Apr 2011 23:29:25 +0000
Subject: fix cpu affinity mask

git-svn-id: http://smhasher.googlecode.com/svn/trunk@122 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 main.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/main.cpp b/main.cpp
index 8c76468..eaab22f 100644
--- a/main.cpp
+++ b/main.cpp
@@ -549,7 +549,9 @@ int main ( int argc, char ** argv )
     hashToTest = argv[1];
   }
   
-  SetAffinity(3);
+  // Code runs on the 3rd CPU by default
+
+  SetAffinity((1 << 2));
 
   SelfTest();
 
-- 
cgit v1.2.3


From 4bbda98dc256abd514bb2011560cded111fd205b Mon Sep 17 00:00:00 2001
From: "aappleby@google.com"
 <aappleby@google.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Mon, 4 Apr 2011 23:38:12 +0000
Subject: Add CPU affinity to gcc build

git-svn-id: http://smhasher.googlecode.com/svn/trunk@123 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 Platform.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/Platform.cpp b/Platform.cpp
index dff36cb..bed3aa1 100644
--- a/Platform.cpp
+++ b/Platform.cpp
@@ -21,8 +21,20 @@ void SetAffinity ( int cpu )
 
 #else
 
+#include <sched.h>
+
 void SetAffinity ( int /*cpu*/ )
 {
+  cpu_set_t mask;
+    
+  CPU_ZERO(&mask);
+    
+  CPU_SET(2,&mask);
+    
+  if( sched_setaffinity(0,sizeof(mask),&mask) == -1)
+  {
+    printf("WARNING: Could not set CPU affinity\n");
+  }
 }
 
 #endif
-- 
cgit v1.2.3


From f068a580c78997431304d9dcb22ba7d89c4333e7 Mon Sep 17 00:00:00 2001
From: "aappleby@google.com"
 <aappleby@google.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Tue, 5 Apr 2011 00:15:28 +0000
Subject: Make MurmurHash3.cpp compile standalone on GCC

git-svn-id: http://smhasher.googlecode.com/svn/trunk@124 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 MurmurHash3.cpp | 43 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 8f63772..5fcbe94 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -2,13 +2,51 @@
 // MurmurHash3 was written by Austin Appleby, and is placed in the public
 // domain. The author hereby disclaims copyright to this source code.
 
-#include "MurmurHash3.h"
-
 // Note - The x86 and x64 versions do _not_ produce the same results, as the
 // algorithms are optimized for their respective platforms. You can still
 // compile and run any of them on any platform, but your performance with the
 // non-native version will be less than optimal.
 
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE	__forceinline
+
+#include <stdlib.h>
+
+#define ROTL32(x,y)	_rotl(x,y)
+#define ROTL64(x,y)	_rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#include <stdint.h>
+
+#define	FORCE_INLINE __attribute__((always_inline))
+
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+  return (x << r) | (x >> (32 - r));
+}
+
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+  return (x << r) | (x >> (64 - r));
+}
+
+#define	ROTL32(x,y)	rotl32(x,y)
+#define ROTL64(x,y)	rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
 
 //-----------------------------------------------------------------------------
 // Block read - if your platform needs to do endian-swapping or can only
@@ -294,3 +332,4 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
 }
 
 //-----------------------------------------------------------------------------
+
-- 
cgit v1.2.3


From 954fc28ca3b79ffe99d12d8c8b830d9b698d2266 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Tue, 5 Apr 2011 00:18:44 +0000
Subject: make murmurhash3 compile standalone on Visual Studio make speedtest
 allocate 256-byte aligned blocks

git-svn-id: http://smhasher.googlecode.com/svn/trunk@125 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 MurmurHash3.cpp | 4 ++++
 SpeedTest.cpp   | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 5fcbe94..1fec6a0 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -14,6 +14,10 @@
 
 #if defined(_MSC_VER)
 
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
 #define FORCE_INLINE	__forceinline
 
 #include <stdlib.h>
diff --git a/SpeedTest.cpp b/SpeedTest.cpp
index b586102..211b1e9 100644
--- a/SpeedTest.cpp
+++ b/SpeedTest.cpp
@@ -169,11 +169,11 @@ double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int block
 {
   Rand r(seed);
   
-  uint8_t * buf = new uint8_t[blocksize + 64];
+  uint8_t * buf = new uint8_t[blocksize + 512];
 
   uint64_t t1 = reinterpret_cast<uint64_t>(buf);
   
-  t1 = (t1 + 15) & 0xFFFFFFFFFFFFFFF0;
+  t1 = (t1 + 255) & 0xFFFFFFFFFFFFFF00;
   t1 += align;
   
   uint8_t * block = reinterpret_cast<uint8_t*>(t1);
-- 
cgit v1.2.3


From 58dd8869da8c95f5c26ec70a6cdd243a7647c8fc Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Fri, 8 Apr 2011 19:39:16 +0000
Subject: Remove platform.h dependency from MurmurHash3.h, move platform
 #ifdefs to header

git-svn-id: http://smhasher.googlecode.com/svn/trunk@126 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 MurmurHash3.cpp | 45 +----------------------------------------
 MurmurHash3.h   | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 59 insertions(+), 48 deletions(-)

diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 1fec6a0..95d2e26 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -7,50 +7,7 @@
 // compile and run any of them on any platform, but your performance with the
 // non-native version will be less than optimal.
 
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-typedef unsigned char uint8_t;
-typedef unsigned long uint32_t;
-typedef unsigned __int64 uint64_t;
-
-#define FORCE_INLINE	__forceinline
-
-#include <stdlib.h>
-
-#define ROTL32(x,y)	_rotl(x,y)
-#define ROTL64(x,y)	_rotl64(x,y)
-
-#define BIG_CONSTANT(x) (x)
-
-// Other compilers
-
-#else	// defined(_MSC_VER)
-
-#include <stdint.h>
-
-#define	FORCE_INLINE __attribute__((always_inline))
-
-inline uint32_t rotl32 ( uint32_t x, int8_t r )
-{
-  return (x << r) | (x >> (32 - r));
-}
-
-inline uint64_t rotl64 ( uint64_t x, int8_t r )
-{
-  return (x << r) | (x >> (64 - r));
-}
-
-#define	ROTL32(x,y)	rotl32(x,y)
-#define ROTL64(x,y)	rotl64(x,y)
-
-#define BIG_CONSTANT(x) (x##LLU)
-
-#endif // !defined(_MSC_VER)
+#include "MurmurHash3.h"
 
 //-----------------------------------------------------------------------------
 // Block read - if your platform needs to do endian-swapping or can only
diff --git a/MurmurHash3.h b/MurmurHash3.h
index a547f0f..a2f26e9 100644
--- a/MurmurHash3.h
+++ b/MurmurHash3.h
@@ -1,14 +1,68 @@
-#include "Platform.h"
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+#ifndef _MURMURHASH3_H_
+#define _MURMURHASH3_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+#define FORCE_INLINE	__forceinline
+
+#include <stdlib.h>
+
+#define ROTL32(x,y)	_rotl(x,y)
+#define ROTL64(x,y)	_rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#include <stdint.h>
+
+#define	FORCE_INLINE __attribute__((always_inline))
+
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+  return (x << r) | (x >> (32 - r));
+}
+
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+  return (x << r) | (x >> (64 - r));
+}
+
+#define	ROTL32(x,y)	rotl32(x,y)
+#define ROTL64(x,y)	rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
 
 //-----------------------------------------------------------------------------
 
 void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
-void MurmurHash3_x86_64  ( const void * key, int len, uint32_t seed, void * out );
+
 void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
 
-void MurmurHash3_x64_32  ( const void * key, int len, uint32_t seed, void * out );
-void MurmurHash3_x64_64  ( const void * key, int len, uint32_t seed, void * out );
 void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
 
 //-----------------------------------------------------------------------------
 
+#endif // _MURMURHASH3_H_
\ No newline at end of file
-- 
cgit v1.2.3


From 7af0ee099b7f8d3a5b628791951ffb9680082583 Mon Sep 17 00:00:00 2001
From: "aappleby@google.com"
 <aappleby@google.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Fri, 8 Apr 2011 19:46:54 +0000
Subject: gcc complains about multiply defined symbols, move rotl macros back
 to source file

git-svn-id: http://smhasher.googlecode.com/svn/trunk@127 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 MurmurHash3.cpp | 39 +++++++++++++++++++++++++++++++++++++++
 MurmurHash3.h   | 28 +---------------------------
 2 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 95d2e26..8ce4688 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -9,6 +9,45 @@
 
 #include "MurmurHash3.h"
 
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE	__forceinline
+
+#include <stdlib.h>
+
+#define ROTL32(x,y)	_rotl(x,y)
+#define ROTL64(x,y)	_rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#define	FORCE_INLINE __attribute__((always_inline))
+
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+  return (x << r) | (x >> (32 - r));
+}
+
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+  return (x << r) | (x >> (64 - r));
+}
+
+#define	ROTL32(x,y)	rotl32(x,y)
+#define ROTL64(x,y)	rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
 //-----------------------------------------------------------------------------
 // Block read - if your platform needs to do endian-swapping or can only
 // handle aligned reads, do the conversion here
diff --git a/MurmurHash3.h b/MurmurHash3.h
index a2f26e9..9c8425c 100644
--- a/MurmurHash3.h
+++ b/MurmurHash3.h
@@ -21,38 +21,12 @@ typedef unsigned char uint8_t;
 typedef unsigned long uint32_t;
 typedef unsigned __int64 uint64_t;
 
-#define FORCE_INLINE	__forceinline
-
-#include <stdlib.h>
-
-#define ROTL32(x,y)	_rotl(x,y)
-#define ROTL64(x,y)	_rotl64(x,y)
-
-#define BIG_CONSTANT(x) (x)
-
 // Other compilers
 
 #else	// defined(_MSC_VER)
 
 #include <stdint.h>
 
-#define	FORCE_INLINE __attribute__((always_inline))
-
-inline uint32_t rotl32 ( uint32_t x, int8_t r )
-{
-  return (x << r) | (x >> (32 - r));
-}
-
-inline uint64_t rotl64 ( uint64_t x, int8_t r )
-{
-  return (x << r) | (x >> (64 - r));
-}
-
-#define	ROTL32(x,y)	rotl32(x,y)
-#define ROTL64(x,y)	rotl64(x,y)
-
-#define BIG_CONSTANT(x) (x##LLU)
-
 #endif // !defined(_MSC_VER)
 
 //-----------------------------------------------------------------------------
@@ -65,4 +39,4 @@ void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out
 
 //-----------------------------------------------------------------------------
 
-#endif // _MURMURHASH3_H_
\ No newline at end of file
+#endif // _MURMURHASH3_H_
-- 
cgit v1.2.3


From cc592168c77f4f357d593456e7d094d786866f38 Mon Sep 17 00:00:00 2001
From: "aappleby@google.com"
 <aappleby@google.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Fri, 8 Apr 2011 20:49:43 +0000
Subject: Build fixes for clang, etc

git-svn-id: http://smhasher.googlecode.com/svn/trunk@128 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 Bitvec.cpp        |   4 +-
 Stats.h           | 175 +++++++++++++++++++++++++++---------------------------
 SuperFastHash.cpp |  13 +++-
 Types.cpp         |   4 ++
 Types.h           |  60 +++++++++----------
 sha1.cpp          |   4 +-
 6 files changed, 136 insertions(+), 124 deletions(-)

diff --git a/Bitvec.cpp b/Bitvec.cpp
index 2160060..16feaa7 100644
--- a/Bitvec.cpp
+++ b/Bitvec.cpp
@@ -104,7 +104,7 @@ uint32_t popcount ( uint32_t v )
 {
 	v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
 	v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
-	uint32_t c = ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
+	uint32_t c = ((v + ((v >> 4) & 0xF0F0F0F)) * 0x1010101) >> 24; // count
 
 	return c;
 }
@@ -184,7 +184,7 @@ int countbits ( uint32_t v )
 {
   v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
   v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
-  int c = ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
+  int c = ((v + ((v >> 4) & 0xF0F0F0F)) * 0x1010101) >> 24; // count
 
   return c;
 }
diff --git a/Stats.h b/Stats.h
index 5106299..c80393e 100644
--- a/Stats.h
+++ b/Stats.h
@@ -99,6 +99,92 @@ int PrintCollisions ( hashfunc<hashtype> hash, std::vector<keytype> & keys )
   return collcount;
 }
 
+//----------------------------------------------------------------------------
+// Measure the distribution "score" for each possible N-bit span up to 20 bits
+
+template< typename hashtype >
+double TestDistribution ( std::vector<hashtype> & hashes, bool drawDiagram )
+{
+  printf("Testing distribution - ");
+
+  if(drawDiagram) printf("\n");
+
+  const int hashbits = sizeof(hashtype) * 8;
+
+  int maxwidth = 20;
+
+  // We need at least 5 keys per bin to reliably test distribution biases
+  // down to 1%, so don't bother to test sparser distributions than that
+
+  while(double(hashes.size()) / double(1 << maxwidth) < 5.0)
+  {
+    maxwidth--;
+  }
+
+  std::vector<int> bins;
+  bins.resize(1 << maxwidth);
+
+  double worst = 0;
+  int worstStart = -1;
+  int worstWidth = -1;
+
+  for(int start = 0; start < hashbits; start++)
+  {
+    int width = maxwidth;
+    int bincount = (1 << width);
+
+    memset(&bins[0],0,sizeof(int)*bincount);
+
+    for(size_t j = 0; j < hashes.size(); j++)
+    {
+      hashtype & hash = hashes[j];
+
+      uint32_t index = window(&hash,sizeof(hash),start,width);
+
+      bins[index]++;
+    }
+
+    // Test the distribution, then fold the bins in half,
+    // repeat until we're down to 256 bins
+
+    if(drawDiagram) printf("[");
+
+    while(bincount >= 256)
+    {
+      double n = calcScore(&bins[0],bincount,(int)hashes.size());
+
+      if(drawDiagram) plot(n);
+
+      if(n > worst)
+      {
+        worst = n;
+        worstStart = start;
+        worstWidth = width;
+      }
+
+      width--;
+      bincount /= 2;
+
+      if(width < 8) break;
+
+      for(int i = 0; i < bincount; i++)
+      {
+        bins[i] += bins[i+bincount];
+      }
+    }
+
+    if(drawDiagram) printf("]\n");
+  }
+
+  double pct = worst * 100.0;
+
+  printf("Worst bias is the %3d-bit window at bit %3d - %5.3f%%",worstWidth,worstStart,pct);
+  if(pct >= 1.0) printf(" !!!!! ");
+  printf("\n");
+
+  return worst;
+}
+
 //----------------------------------------------------------------------------
 
 template < typename hashtype >
@@ -261,93 +347,6 @@ double TestDistributionBytepairs ( std::vector<hashtype> & hashes, bool drawDiag
   return worst;
 }
 
-
-//----------------------------------------------------------------------------
-// Measure the distribution "score" for each possible N-bit span up to 20 bits
-
-template< typename hashtype >
-double TestDistribution ( std::vector<hashtype> & hashes, bool drawDiagram )
-{
-  printf("Testing distribution - ");
-
-  if(drawDiagram) printf("\n");
-
-  const int hashbits = sizeof(hashtype) * 8;
-
-  int maxwidth = 20;
-
-  // We need at least 5 keys per bin to reliably test distribution biases
-  // down to 1%, so don't bother to test sparser distributions than that
-
-  while(double(hashes.size()) / double(1 << maxwidth) < 5.0)
-  {
-    maxwidth--;
-  }
-
-  std::vector<int> bins;
-  bins.resize(1 << maxwidth);
-
-  double worst = 0;
-  int worstStart = -1;
-  int worstWidth = -1;
-
-  for(int start = 0; start < hashbits; start++)
-  {
-    int width = maxwidth;
-    int bincount = (1 << width);
-
-    memset(&bins[0],0,sizeof(int)*bincount);
-
-    for(size_t j = 0; j < hashes.size(); j++)
-    {
-      hashtype & hash = hashes[j];
-
-      uint32_t index = window(&hash,sizeof(hash),start,width);
-
-      bins[index]++;
-    }
-
-    // Test the distribution, then fold the bins in half,
-    // repeat until we're down to 256 bins
-
-    if(drawDiagram) printf("[");
-
-    while(bincount >= 256)
-    {
-      double n = calcScore(&bins[0],bincount,(int)hashes.size());
-
-      if(drawDiagram) plot(n);
-
-      if(n > worst)
-      {
-        worst = n;
-        worstStart = start;
-        worstWidth = width;
-      }
-
-      width--;
-      bincount /= 2;
-
-      if(width < 8) break;
-
-      for(int i = 0; i < bincount; i++)
-      {
-        bins[i] += bins[i+bincount];
-      }
-    }
-
-    if(drawDiagram) printf("]\n");
-  }
-
-  double pct = worst * 100.0;
-
-  printf("Worst bias is the %3d-bit window at bit %3d - %5.3f%%",worstWidth,worstStart,pct);
-  if(pct >= 1.0) printf(" !!!!! ");
-  printf("\n");
-
-  return worst;
-}
-
 //-----------------------------------------------------------------------------
 // Simplified test - only check 64k distributions, and only on byte boundaries
 
@@ -376,7 +375,7 @@ void TestDistributionFast ( std::vector<hashtype> & hashes, double & dworst, dou
       bins[index]++;
     }
 
-    double n = calcScore((int*)bins.begin(),(int)bins.size(),(int)hashes.size());
+    double n = calcScore(&bins.front(),(int)bins.size(),(int)hashes.size());
     
     davg += n;
 
diff --git a/SuperFastHash.cpp b/SuperFastHash.cpp
index 8fd74cc..38d030d 100644
--- a/SuperFastHash.cpp
+++ b/SuperFastHash.cpp
@@ -7,6 +7,7 @@
 
    http://www.azillionmonkeys.com/qed/hash.html */
 
+/*
 #undef get16bits
 #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
   || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
@@ -17,8 +18,14 @@
 #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
                        +(uint32_t)(((const uint8_t *)(d))[0]) )
 #endif
+*/
 
-uint32_t SuperFastHash (const char * data, int len) {
+FORCE_INLINE uint16_t get16bits ( const void * p )
+{
+  return *(const uint16_t*)p;
+}
+
+uint32_t SuperFastHash (const signed char * data, int len) {
 uint32_t hash = 0, tmp;
 int rem;
 
@@ -65,5 +72,5 @@ int rem;
 
 void SuperFastHash     ( const void * key, int len, uint32_t /*seed*/, void * out )
 {
-  *(uint32_t*)out = SuperFastHash((const char*)key,len);
-}
\ No newline at end of file
+  *(uint32_t*)out = SuperFastHash((const signed char*)key,len);
+}
diff --git a/Types.cpp b/Types.cpp
index 43544f2..46051a6 100644
--- a/Types.cpp
+++ b/Types.cpp
@@ -8,7 +8,9 @@ uint32_t MurmurOAAT ( const void * blob, int len, uint32_t seed );
 
 //-----------------------------------------------------------------------------
 
+#if defined(_MSC_VER)
 #pragma optimize( "", off )
+#endif
 
 void blackhole ( uint32_t )
 {
@@ -19,7 +21,9 @@ uint32_t whitehole ( void )
   return 0;
 }
 
+#if defined(_MSC_VER)
 #pragma optimize( "", on ) 
+#endif
 
 uint32_t g_verify = 1;
 
diff --git a/Types.h b/Types.h
index a100787..8814093 100644
--- a/Types.h
+++ b/Types.h
@@ -96,7 +96,11 @@ struct KeyCallback
   {
   }
 
-  virtual void operator() ( const uint8_t * key, int len )
+  virtual ~KeyCallback()
+  {
+  }
+
+  virtual void operator() ( const void * key, int len )
   {
     m_count++;
   }
@@ -115,14 +119,16 @@ struct HashCallback : public KeyCallback
 {
   typedef std::vector<hashtype> hashvec;
 
-  HashCallback ( pfHash hash, hashvec & hashes ) : m_pfHash(hash), m_hashes(hashes)
+  HashCallback ( pfHash hash, hashvec & hashes ) : m_hashes(hashes), m_pfHash(hash)
   {
     m_hashes.clear();
   }
 
-  virtual void operator () ( const uint8_t * key, int len )
+  virtual void operator () ( const void * key, int len )
   {
-    m_hashes.resize(m_hashes.size() + 1);
+    size_t newsize = m_hashes.size() + 1;
+    
+    m_hashes.resize(newsize);
 
     m_pfHash(key,len,0,&m_hashes.back());
   }
@@ -157,7 +163,7 @@ struct CollisionCallback : public KeyCallback
   {
   }
 
-  virtual void operator () ( const uint8_t * key, int len )
+  virtual void operator () ( const void * key, int len )
   {
     hashtype h;
 
@@ -189,11 +195,15 @@ public:
 
   Blob()
   {
+    for(int i = 0; i < sizeof(bytes); i++)
+    {
+      bytes[i] = 0;
+    }
   }
 
   Blob ( int x )
   {
-    for(int i = 0; i < nbytes; i++)
+    for(int i = 0; i < sizeof(bytes); i++)
     {
       bytes[i] = 0;
     }
@@ -203,7 +213,7 @@ public:
 
   Blob ( const Blob & k )
   {
-    for(int i = 0; i < nbytes; i++)
+    for(int i = 0; i < sizeof(bytes); i++)
     {
       bytes[i] = k.bytes[i];
     }
@@ -211,7 +221,7 @@ public:
 
   Blob & operator = ( const Blob & k )
   {
-    for(int i = 0; i < nbytes; i++)
+    for(int i = 0; i < sizeof(bytes); i++)
     {
       bytes[i] = k.bytes[i];
     }
@@ -229,14 +239,14 @@ public:
   {
     const uint8_t * k = (const uint8_t*)blob;
 
-    len = len > nbytes ? nbytes : len;
+    len = len > sizeof(bytes) ? sizeof(bytes) : len;
 
     for(int i = 0; i < len; i++)
     {
       bytes[i] = k[i];
     }
 
-    for(int i = len; i < nbytes; i++)
+    for(int i = len; i < sizeof(bytes); i++)
     {
       bytes[i] = 0;
     }
@@ -257,7 +267,7 @@ public:
   
   bool operator < ( const Blob & k ) const
   {
-    for(int i = 0; i < nbytes; i++)
+    for(int i = 0; i < sizeof(bytes); i++)
     {
       if(bytes[i] < k.bytes[i]) return true;
       if(bytes[i] > k.bytes[i]) return false;
@@ -268,7 +278,7 @@ public:
 
   bool operator == ( const Blob & k ) const
   {
-    for(int i = 0; i < nbytes; i++)
+    for(int i = 0; i < sizeof(bytes); i++)
     {
       if(bytes[i] != k.bytes[i]) return false;
     }
@@ -288,7 +298,7 @@ public:
   {
     Blob t;
 
-    for(int i = 0; i < nbytes; i++)
+    for(int i = 0; i < sizeof(bytes); i++)
     {
       t.bytes[i] = bytes[i] ^ k.bytes[i];
     }
@@ -298,7 +308,7 @@ public:
 
   Blob & operator ^= ( const Blob & k )
   {
-    for(int i = 0; i < nbytes; i++)
+    for(int i = 0; i < sizeof(bytes); i++)
     {
       bytes[i] ^= k.bytes[i];
     }
@@ -313,7 +323,7 @@ public:
 
   Blob & operator &= ( const Blob & k )
   {
-    for(int i = 0; i < nbytes; i++)
+    for(int i = 0; i < sizeof(bytes); i++)
     {
       bytes[i] &= k.bytes[i];
     }
@@ -323,7 +333,7 @@ public:
   {
     Blob t = *this;
 
-    lshift(&t.bytes[0],nbytes,c);
+    lshift(&t.bytes[0],sizeof(bytes),c);
 
     return t;
   }
@@ -332,40 +342,30 @@ public:
   {
     Blob t = *this;
 
-    rshift(&t.bytes[0],nbytes,c);
+    rshift(&t.bytes[0],sizeof(bytes),c);
 
     return t;
   }
 
   Blob & operator <<= ( int c )
   {
-    lshift(&bytes[0],nbytes,c);
+    lshift(&bytes[0],sizeof(bytes),c);
 
     return *this;
   }
 
   Blob & operator >>= ( int c )
   {
-    rshift(&bytes[0],nbytes,c);
+    rshift(&bytes[0],sizeof(bytes),c);
 
     return *this;
   }
 
   //----------
   
-  enum
-  {
-    nbits = _bits,
-    nbytes = (_bits+7)/8,
-
-    align4  = (nbytes & 2) ? 0 : 1,
-    align8  = (nbytes & 3) ? 0 : 1,
-    align16 = (nbytes & 4) ? 0 : 1,
-  };
-
 private:
 
-  uint8_t bytes[nbytes];
+  uint8_t bytes[(_bits+7)/8];
 };
 
 typedef Blob<128> uint128_t;
diff --git a/sha1.cpp b/sha1.cpp
index fceb463..9578438 100644
--- a/sha1.cpp
+++ b/sha1.cpp
@@ -82,9 +82,11 @@ A million repetitions of "a"
 
 #include "sha1.h"
 
+#if defined(_MSC_VER)
 #pragma warning(disable : 4267)
 #pragma warning(disable : 4996)
 #pragma warning(disable : 4100)
+#endif
 
 void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64]);
 
@@ -320,4 +322,4 @@ int main(int argc, char** argv)
     fprintf(stdout, "ok\n");
     return(0);
 }
-#endif /* TEST */
\ No newline at end of file
+#endif /* TEST */
-- 
cgit v1.2.3


From c8e8bf81bc6041d6d836365a501a0a96830d2d81 Mon Sep 17 00:00:00 2001
From: "aappleby@google.com"
 <aappleby@google.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Fri, 8 Apr 2011 21:07:48 +0000
Subject: Remove dead files, make Murmur1/2/3 all have explicit public domain
 licensing & no external dependencies

git-svn-id: http://smhasher.googlecode.com/svn/trunk@129 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 CMakeLists.txt         |  5 -----
 MurmurHash1.cpp        |  9 ++++++---
 MurmurHash1.h          | 28 +++++++++++++++++++++++++++-
 MurmurHash2.cpp        | 26 +++++++++++++++++++++++---
 MurmurHash2.h          | 28 +++++++++++++++++++++++++++-
 MurmurHash2_test.cpp   |  0
 MurmurHash3.h          |  5 -----
 MurmurHash64.cpp       |  0
 MurmurHashAligned.cpp  |  2 --
 MurmurHashAligned2.cpp |  4 ----
 MurmurHashNeutral2.cpp |  2 --
 11 files changed, 83 insertions(+), 26 deletions(-)
 delete mode 100644 MurmurHash2_test.cpp
 delete mode 100644 MurmurHash64.cpp
 delete mode 100644 MurmurHashAligned.cpp
 delete mode 100644 MurmurHashAligned2.cpp
 delete mode 100644 MurmurHashNeutral2.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9717f48..fc75ddc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,12 +17,7 @@ add_library(
   md5.cpp
   MurmurHash1.cpp
   MurmurHash2.cpp
-  MurmurHash2_test.cpp
   MurmurHash3.cpp
-  MurmurHash64.cpp
-  MurmurHashAligned2.cpp
-  MurmurHashAligned.cpp
-  MurmurHashNeutral2.cpp
   Platform.cpp
   Random.cpp
   sha1.cpp
diff --git a/MurmurHash1.cpp b/MurmurHash1.cpp
index ed23e6f..b21e9f7 100644
--- a/MurmurHash1.cpp
+++ b/MurmurHash1.cpp
@@ -1,7 +1,6 @@
-#include "MurmurHash1.h"
-
 //-----------------------------------------------------------------------------
-// MurmurHash1, by Austin Appleby
+// MurmurHash was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
 
 // Note - This code makes a few assumptions about how your machine behaves -
 
@@ -14,6 +13,10 @@
 // 2. It will not produce the same results on little-endian and big-endian
 //    machines.
 
+#include "MurmurHash1.h"
+
+//-----------------------------------------------------------------------------
+
 uint32_t MurmurHash1 ( const void * key, int len, uint32_t seed )
 {
   const unsigned int m = 0xc6a4a793;
diff --git a/MurmurHash1.h b/MurmurHash1.h
index eff8d11..edbfa21 100644
--- a/MurmurHash1.h
+++ b/MurmurHash1.h
@@ -1,4 +1,28 @@
-#include "Platform.h"
+//-----------------------------------------------------------------------------
+// MurmurHash was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH1_H_
+#define _MURMURHASH1_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
 
 //-----------------------------------------------------------------------------
 
@@ -6,3 +30,5 @@ uint32_t MurmurHash1        ( const void * key, int len, uint32_t seed );
 uint32_t MurmurHash1Aligned ( const void * key, int len, uint32_t seed );
 
 //-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH1_H_
diff --git a/MurmurHash2.cpp b/MurmurHash2.cpp
index cc94f79..cf15edf 100644
--- a/MurmurHash2.cpp
+++ b/MurmurHash2.cpp
@@ -1,7 +1,6 @@
-#include "MurmurHash2.h"
-
 //-----------------------------------------------------------------------------
-// MurmurHash2, by Austin Appleby
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
 
 // Note - This code makes a few assumptions about how your machine behaves -
 
@@ -14,6 +13,27 @@
 // 2. It will not produce the same results on little-endian and big-endian
 //    machines.
 
+#include "MurmurHash2.h"
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
 uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
 {
   // 'm' and 'r' are mixing constants generated offline.
diff --git a/MurmurHash2.h b/MurmurHash2.h
index f119653..38dbbeb 100644
--- a/MurmurHash2.h
+++ b/MurmurHash2.h
@@ -1,4 +1,28 @@
-#include "Platform.h"
+//-----------------------------------------------------------------------------
+// MurmurHash2 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH2_H_
+#define _MURMURHASH2_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
 
 //-----------------------------------------------------------------------------
 
@@ -11,3 +35,5 @@ uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed );
 
 //-----------------------------------------------------------------------------
 
+#endif // _MURMURHASH2_H_
+
diff --git a/MurmurHash2_test.cpp b/MurmurHash2_test.cpp
deleted file mode 100644
index e69de29..0000000
diff --git a/MurmurHash3.h b/MurmurHash3.h
index 9c8425c..58e9820 100644
--- a/MurmurHash3.h
+++ b/MurmurHash3.h
@@ -2,11 +2,6 @@
 // MurmurHash3 was written by Austin Appleby, and is placed in the public
 // domain. The author hereby disclaims copyright to this source code.
 
-// Note - The x86 and x64 versions do _not_ produce the same results, as the
-// algorithms are optimized for their respective platforms. You can still
-// compile and run any of them on any platform, but your performance with the
-// non-native version will be less than optimal.
-
 #ifndef _MURMURHASH3_H_
 #define _MURMURHASH3_H_
 
diff --git a/MurmurHash64.cpp b/MurmurHash64.cpp
deleted file mode 100644
index e69de29..0000000
diff --git a/MurmurHashAligned.cpp b/MurmurHashAligned.cpp
deleted file mode 100644
index 63dfe61..0000000
--- a/MurmurHashAligned.cpp
+++ /dev/null
@@ -1,2 +0,0 @@
-//#include "stdafx.h"
-
diff --git a/MurmurHashAligned2.cpp b/MurmurHashAligned2.cpp
deleted file mode 100644
index 83e9723..0000000
--- a/MurmurHashAligned2.cpp
+++ /dev/null
@@ -1,4 +0,0 @@
-//#include "stdafx.h"
-
-#pragma warning(disable:4311)
-
diff --git a/MurmurHashNeutral2.cpp b/MurmurHashNeutral2.cpp
deleted file mode 100644
index 63dfe61..0000000
--- a/MurmurHashNeutral2.cpp
+++ /dev/null
@@ -1,2 +0,0 @@
-//#include "stdafx.h"
-
-- 
cgit v1.2.3


From c2b49e0d2168979b648edcc449a36292449dd5f5 Mon Sep 17 00:00:00 2001
From: "aappleby@google.com"
 <aappleby@google.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Fri, 8 Apr 2011 21:13:25 +0000
Subject: Fix typos

git-svn-id: http://smhasher.googlecode.com/svn/trunk@130 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 MurmurHash1.h   | 2 +-
 MurmurHash2.cpp | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/MurmurHash1.h b/MurmurHash1.h
index edbfa21..40ddbc4 100644
--- a/MurmurHash1.h
+++ b/MurmurHash1.h
@@ -1,5 +1,5 @@
 //-----------------------------------------------------------------------------
-// MurmurHash was written by Austin Appleby, and is placed in the public
+// MurmurHash1 was written by Austin Appleby, and is placed in the public
 // domain. The author hereby disclaims copyright to this source code.
 
 #ifndef _MURMURHASH1_H_
diff --git a/MurmurHash2.cpp b/MurmurHash2.cpp
index cf15edf..dbb2053 100644
--- a/MurmurHash2.cpp
+++ b/MurmurHash2.cpp
@@ -1,5 +1,5 @@
 //-----------------------------------------------------------------------------
-// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// MurmurHash2 was written by Austin Appleby, and is placed in the public
 // domain. The author hereby disclaims copyright to this source code.
 
 // Note - This code makes a few assumptions about how your machine behaves -
@@ -390,6 +390,7 @@ uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed )
 
 #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
 
+
 uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed )
 {
   const uint32_t m = 0x5bd1e995;
-- 
cgit v1.2.3


From 833fd8d1baf463eeeeff29a86fa29a67588453f0 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Mon, 11 Apr 2011 20:45:44 +0000
Subject: Make verification failure print the hash names Fix typo in murmur3f
 (rotl values didn't match)

git-svn-id: http://smhasher.googlecode.com/svn/trunk@131 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 MurmurHash3.cpp | 2 +-
 main.cpp        | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 8ce4688..97883fa 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -310,7 +310,7 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
   case  3: k1 ^= uint64_t(tail[ 2]) << 16;
   case  2: k1 ^= uint64_t(tail[ 1]) << 8;
   case  1: k1 ^= uint64_t(tail[ 0]) << 0;
-           k1 *= c1; k1  = ROTL64(k1,29); k1 *= c2; h1 ^= k1;
+           k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
   };
 
   //----------
diff --git a/main.cpp b/main.cpp
index eaab22f..973ffa6 100644
--- a/main.cpp
+++ b/main.cpp
@@ -68,7 +68,7 @@ HashInfo g_hashes[] =
 
   { MurmurHash3_x86_32,   32, 0x3252D141, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
   { MurmurHash3_x86_128, 128, 0xB3ECE62A, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
-  { MurmurHash3_x64_128, 128, 0x833607F9, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
+  { MurmurHash3_x64_128, 128, 0x6384BA69, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
 
 };
 
@@ -103,7 +103,8 @@ void SelfTest ( void )
     for(int i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
     {
       HashInfo * info = & g_hashes[i];
-
+      
+      printf("%16s - ",info->name);
       pass &= VerificationTest(info->hash,info->hashbits,info->verification,true);
     }
 
-- 
cgit v1.2.3


From 510b8528df1cc38c289abf5e9c3c689e979a6cc3 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Tue, 12 Apr 2011 15:36:18 +0000
Subject: Add Google's open-source CityHash Fix build breakage on Cygwin

git-svn-id: http://smhasher.googlecode.com/svn/trunk@132 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 CMakeLists.txt  |   1 +
 City.cpp        | 321 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 City.h          |  97 +++++++++++++++++
 Hashes.h        |   2 +
 Platform.cpp    |   2 +
 SMHasher.vcproj |   8 ++
 SpeedTest.cpp   |   2 +-
 main.cpp        |   3 +
 8 files changed, 435 insertions(+), 1 deletion(-)
 create mode 100644 City.cpp
 create mode 100644 City.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fc75ddc..88f9cce 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -9,6 +9,7 @@ add_library(
   AvalancheTest.cpp
   Bitslice.cpp
   Bitvec.cpp
+  City.cpp
   crc.cpp
   DifferentialTest.cpp
   Hashes.cpp
diff --git a/City.cpp b/City.cpp
new file mode 100644
index 0000000..2f089d3
--- /dev/null
+++ b/City.cpp
@@ -0,0 +1,321 @@
+// Copyright (c) 2011 Google, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+// CityHash Version 1, by Geoff Pike and Jyrki Alakuijala
+//
+// This file provides CityHash64() and related functions.
+//
+// It's probably possible to create even faster hash functions by
+// writing a program that systematically explores some of the space of
+// possible hash functions, by using SIMD instructions, or by
+// compromising on hash quality.
+
+#include "city.h"
+
+#include <algorithm>
+
+using namespace std;
+
+#define UNALIGNED_LOAD64(p) (*(const uint64*)(p))
+#define UNALIGNED_LOAD32(p) (*(const uint32*)(p))
+
+#if !defined(LIKELY)
+#if defined(__GNUC__)
+#define LIKELY(x) (__builtin_expect(!!(x), 1))
+#else
+#define LIKELY(x) (x)
+#endif
+#endif
+
+// Some primes between 2^63 and 2^64 for various uses.
+static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
+static const uint64 k1 = 0xb492b66fbe98f273ULL;
+static const uint64 k2 = 0x9ae16a3b2f90404fULL;
+static const uint64 k3 = 0xc949d7c7509e6557ULL;
+
+// Bitwise right rotate.  Normally this will compile to a single
+// instruction, especially if the shift is a manifest constant.
+static uint64 Rotate(uint64 val, int shift) {
+  // Avoid shifting by 64: doing so yields an undefined result.
+  return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
+}
+
+// Equivalent to Rotate(), but requires the second arg to be non-zero.
+// On x86-64, and probably others, it's possible for this to compile
+// to a single instruction if both args are already in registers.
+static uint64 RotateByAtLeast1(uint64 val, int shift) {
+  return (val >> shift) | (val << (64 - shift));
+}
+
+static uint64 ShiftMix(uint64 val) {
+  return val ^ (val >> 47);
+}
+
+static uint64 HashLen16(uint64 u, uint64 v) {
+  return Hash128to64(uint128(u, v));
+}
+
+static uint64 HashLen0to16(const char *s, size_t len) {
+  if (len > 8) {
+    uint64 a = UNALIGNED_LOAD64(s);
+    uint64 b = UNALIGNED_LOAD64(s + len - 8);
+    return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;
+  }
+  if (len >= 4) {
+    uint64 a = UNALIGNED_LOAD32(s);
+    return HashLen16(len + (a << 3), UNALIGNED_LOAD32(s + len - 4));
+  }
+  if (len > 0) {
+    uint8 a = s[0];
+    uint8 b = s[len >> 1];
+    uint8 c = s[len - 1];
+    uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
+    uint32 z = len + (static_cast<uint32>(c) << 2);
+    return ShiftMix(y * k2 ^ z * k3) * k2;
+  }
+  return k2;
+}
+
+// This probably works well for 16-byte strings as well, but it may be overkill
+// in that case.
+static uint64 HashLen17to32(const char *s, size_t len) {
+  uint64 a = UNALIGNED_LOAD64(s) * k1;
+  uint64 b = UNALIGNED_LOAD64(s + 8);
+  uint64 c = UNALIGNED_LOAD64(s + len - 8) * k2;
+  uint64 d = UNALIGNED_LOAD64(s + len - 16) * k0;
+  return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
+                   a + Rotate(b ^ k3, 20) - c + len);
+}
+
+// Return a 16-byte hash for 48 bytes.  Quick and dirty.
+// Callers do best to use "random-looking" values for a and b.
+static pair<uint64, uint64> WeakHashLen32WithSeeds(
+    uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) {
+  a += w;
+  b = Rotate(b + a + z, 21);
+  uint64 c = a;
+  a += x;
+  a += y;
+  b += Rotate(a, 44);
+  return make_pair(a + z, b + c);
+}
+
+// Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
+static pair<uint64, uint64> WeakHashLen32WithSeeds(
+    const char* s, uint64 a, uint64 b) {
+  return WeakHashLen32WithSeeds(UNALIGNED_LOAD64(s),
+                                UNALIGNED_LOAD64(s + 8),
+                                UNALIGNED_LOAD64(s + 16),
+                                UNALIGNED_LOAD64(s + 24),
+                                a,
+                                b);
+}
+
+// Return an 8-byte hash for 33 to 64 bytes.
+static uint64 HashLen33to64(const char *s, size_t len) {
+  uint64 z = UNALIGNED_LOAD64(s + 24);
+  uint64 a = UNALIGNED_LOAD64(s) + (len + UNALIGNED_LOAD64(s + len - 16)) * k0;
+  uint64 b = Rotate(a + z, 52);
+  uint64 c = Rotate(a, 37);
+  a += UNALIGNED_LOAD64(s + 8);
+  c += Rotate(a, 7);
+  a += UNALIGNED_LOAD64(s + 16);
+  uint64 vf = a + z;
+  uint64 vs = b + Rotate(a, 31) + c;
+  a = UNALIGNED_LOAD64(s + 16) + UNALIGNED_LOAD64(s + len - 32);
+  z = UNALIGNED_LOAD64(s + len - 8);
+  b = Rotate(a + z, 52);
+  c = Rotate(a, 37);
+  a += UNALIGNED_LOAD64(s + len - 24);
+  c += Rotate(a, 7);
+  a += UNALIGNED_LOAD64(s + len - 16);
+  uint64 wf = a + z;
+  uint64 ws = b + Rotate(a, 31) + c;
+  uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
+  return ShiftMix(r * k0 + vs) * k2;
+}
+
+uint64 CityHash64(const char *s, size_t len) {
+  if (len <= 32) {
+    if (len <= 16) {
+      return HashLen0to16(s, len);
+    } else {
+      return HashLen17to32(s, len);
+    }
+  } else if (len <= 64) {
+    return HashLen33to64(s, len);
+  }
+
+  // For strings over 64 bytes we hash the end first, and then as we
+  // loop we keep 56 bytes of state: v, w, x, y, and z.
+  uint64 x = UNALIGNED_LOAD64(s);
+  uint64 y = UNALIGNED_LOAD64(s + len - 16) ^ k1;
+  uint64 z = UNALIGNED_LOAD64(s + len - 56) ^ k0;
+  pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, y);
+  pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, len * k1, k0);
+  z += ShiftMix(v.second) * k1;
+  x = Rotate(z + x, 39) * k1;
+  y = Rotate(y, 33) * k1;
+
+  // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
+  len = (len - 1) & ~static_cast<size_t>(63);
+  do {
+    x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1;
+    y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1;
+    x ^= w.second;
+    y ^= v.first;
+    z = Rotate(z ^ w.first, 33);
+    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
+    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);
+    std::swap(z, x);
+    s += 64;
+    len -= 64;
+  } while (len != 0);
+  return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
+                   HashLen16(v.second, w.second) + x);
+}
+
+uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) {
+  return CityHash64WithSeeds(s, len, k2, seed);
+}
+
+uint64 CityHash64WithSeeds(const char *s, size_t len,
+                           uint64 seed0, uint64 seed1) {
+  return HashLen16(CityHash64(s, len) - seed0, seed1);
+}
+
+// A subroutine for CityHash128().  Returns a decent 128-bit hash for strings
+// of any length representable in ssize_t.  Based on City and Murmur.
+static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
+  uint64 a = Uint128Low64(seed);
+  uint64 b = Uint128High64(seed);
+  uint64 c = 0;
+  uint64 d = 0;
+  ssize_t l = len - 16;
+  if (l <= 0) {  // len <= 16
+    c = b * k1 + HashLen0to16(s, len);
+    d = Rotate(a + (len >= 8 ? UNALIGNED_LOAD64(s) : c), 32);
+  } else {  // len > 16
+    c = HashLen16(UNALIGNED_LOAD64(s + len - 8) + k1, a);
+    d = HashLen16(b + len, c + UNALIGNED_LOAD64(s + len - 16));
+    a += d;
+    do {
+      a ^= ShiftMix(UNALIGNED_LOAD64(s) * k1) * k1;
+      a *= k1;
+      b ^= a;
+      c ^= ShiftMix(UNALIGNED_LOAD64(s + 8) * k1) * k1;
+      c *= k1;
+      d ^= c;
+      s += 16;
+      l -= 16;
+    } while (l > 0);
+  }
+  a = HashLen16(a, c);
+  b = HashLen16(d, b);
+  return uint128(a ^ b, HashLen16(b, a));
+}
+
+uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
+  if (len < 128) {
+    return CityMurmur(s, len, seed);
+  }
+
+  // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
+  // v, w, x, y, and z.
+  pair<uint64, uint64> v, w;
+  uint64 x = Uint128Low64(seed);
+  uint64 y = Uint128High64(seed);
+  uint64 z = len * k1;
+  v.first = Rotate(y ^ k1, 49) * k1 + UNALIGNED_LOAD64(s);
+  v.second = Rotate(v.first, 42) * k1 + UNALIGNED_LOAD64(s + 8);
+  w.first = Rotate(y + z, 35) * k1 + x;
+  w.second = Rotate(x + UNALIGNED_LOAD64(s + 88), 53) * k1;
+
+  // This is the same inner loop as CityHash64(), manually unrolled.
+  do {
+    x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1;
+    y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1;
+    x ^= w.second;
+    y ^= v.first;
+    z = Rotate(z ^ w.first, 33);
+    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
+    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);
+    std::swap(z, x);
+    s += 64;
+    x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1;
+    y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1;
+    x ^= w.second;
+    y ^= v.first;
+    z = Rotate(z ^ w.first, 33);
+    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
+    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);
+    std::swap(z, x);
+    s += 64;
+    len -= 128;
+  } while (LIKELY(len >= 128));
+  y += Rotate(w.first, 37) * k0 + z;
+  x += Rotate(v.first + z, 49) * k0;
+  // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
+  for (size_t tail_done = 0; tail_done < len; ) {
+    tail_done += 32;
+    y = Rotate(y - x, 42) * k0 + v.second;
+    w.first += UNALIGNED_LOAD64(s + len - tail_done + 16);
+    x = Rotate(x, 49) * k0 + w.first;
+    w.first += v.first;
+    v = WeakHashLen32WithSeeds(s + len - tail_done, v.first, v.second);
+  }
+  // At this point our 48 bytes of state should contain more than
+  // enough information for a strong 128-bit hash.  We use two
+  // different 48-byte-to-8-byte hashes to get a 16-byte final result.
+  x = HashLen16(x, v.first);
+  y = HashLen16(y, w.first);
+  return uint128(HashLen16(x + v.second, w.second) + y,
+                 HashLen16(x + w.second, y + v.second));
+}
+
+uint128 CityHash128(const char *s, size_t len) {
+  if (len >= 16) {
+    return CityHash128WithSeed(s + 16,
+                               len - 16,
+                               uint128(UNALIGNED_LOAD64(s) ^ k3,
+                                       UNALIGNED_LOAD64(s + 8)));
+  } else if (len >= 8) {
+    return CityHash128WithSeed(NULL,
+                               0,
+                               uint128(UNALIGNED_LOAD64(s) ^ (len * k0),
+                                       UNALIGNED_LOAD64(s + len - 8) ^ k1));
+  } else {
+    return CityHash128WithSeed(s, len, uint128(k0, k1));
+  }
+}
+
+void CityHash64_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint64*)out = CityHash64WithSeed((const char *)key,len,seed);
+}
+
+void CityHash128_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  uint128 s(0,0);
+
+  s.first = seed;
+
+  *(uint128*)out = CityHash128WithSeed((const char*)key,len,s);
+}
\ No newline at end of file
diff --git a/City.h b/City.h
new file mode 100644
index 0000000..171f693
--- /dev/null
+++ b/City.h
@@ -0,0 +1,97 @@
+// Copyright (c) 2011 Google, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+// CityHash Version 1, by Geoff Pike and Jyrki Alakuijala
+//
+// This file provides a few functions for hashing strings. On x86-64
+// hardware in 2011, CityHash64() is faster than other high-quality
+// hash functions, such as Murmur.  This is largely due to higher
+// instruction-level parallelism.  CityHash64() and CityHash128() also perform
+// well on hash-quality tests.
+//
+// CityHash128() is optimized for relatively long strings and returns
+// a 128-bit hash.  For strings more than about 2000 bytes it can be
+// faster than CityHash64().
+//
+// Functions in the CityHash family are not suitable for cryptography.
+//
+// WARNING: This code has not been tested on big-endian platforms!
+// It is known to work well on little-endian platforms that have a small penalty
+// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
+//
+// By the way, for some hash functions, given strings a and b, the hash
+// of a+b is easily derived from the hashes of a and b.  This property
+// doesn't hold for any hash functions in this file.
+
+#ifndef CITY_HASH_H_
+#define CITY_HASH_H_
+
+#if defined(_MSC_VER) || defined(__CYGWIN__)
+#include "pstdint.h"
+typedef int ssize_t;
+#pragma warning(disable:4267)
+#else
+#include <stdint.h>
+#endif
+
+#include <stdlib.h>  // for size_t.
+#include <utility>
+
+typedef uint8_t uint8;
+typedef uint32_t uint32;
+typedef uint64_t uint64;
+typedef std::pair<uint64, uint64> uint128;
+
+inline uint64 Uint128Low64(const uint128& x) { return x.first; }
+inline uint64 Uint128High64(const uint128& x) { return x.second; }
+
+// Hash function for a byte array.
+uint64 CityHash64(const char *buf, size_t len);
+
+// Hash function for a byte array.  For convenience, a 64-bit seed is also
+// hashed into the result.
+uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
+
+// Hash function for a byte array.  For convenience, two seeds are also
+// hashed into the result.
+uint64 CityHash64WithSeeds(const char *buf, size_t len,
+                           uint64 seed0, uint64 seed1);
+
+// Hash function for a byte array.
+uint128 CityHash128(const char *s, size_t len);
+
+// Hash function for a byte array.  For convenience, a 128-bit seed is also
+// hashed into the result.
+uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
+
+// Hash 128 input bits down to 64 bits of output.
+// This is intended to be a reasonably good hash function.
+inline uint64 Hash128to64(const uint128& x) {
+  // Murmur-inspired hashing.
+  const uint64 kMul = 0x9ddfea08eb382d69ULL;
+  uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
+  a ^= (a >> 47);
+  uint64 b = (Uint128High64(x) ^ a) * kMul;
+  b ^= (b >> 47);
+  b *= kMul;
+  return b;
+}
+
+#endif  // CITY_HASH_H_
diff --git a/Hashes.h b/Hashes.h
index b5b3c1f..2120cd8 100644
--- a/Hashes.h
+++ b/Hashes.h
@@ -33,6 +33,8 @@ void SuperFastHash         ( const void * key, int len, uint32_t seed, void * ou
 void lookup3_test          ( const void * key, int len, uint32_t seed, void * out );
 void MurmurOAAT_test       ( const void * key, int len, uint32_t seed, void * out );
 void Crap8_test            ( const void * key, int len, uint32_t seed, void * out );
+void CityHash128_test      ( const void * key, int len, uint32_t seed, void * out );
+void CityHash64_test       ( const void * key, int len, uint32_t seed, void * out );
 
 uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed );
 
diff --git a/Platform.cpp b/Platform.cpp
index bed3aa1..d90dab8 100644
--- a/Platform.cpp
+++ b/Platform.cpp
@@ -25,6 +25,7 @@ void SetAffinity ( int cpu )
 
 void SetAffinity ( int /*cpu*/ )
 {
+#ifndef __CYGWIN__
   cpu_set_t mask;
     
   CPU_ZERO(&mask);
@@ -35,6 +36,7 @@ void SetAffinity ( int /*cpu*/ )
   {
     printf("WARNING: Could not set CPU affinity\n");
   }
+#endif
 }
 
 #endif
diff --git a/SMHasher.vcproj b/SMHasher.vcproj
index 05586f7..e5a59da 100644
--- a/SMHasher.vcproj
+++ b/SMHasher.vcproj
@@ -322,6 +322,14 @@
 		<Filter
 			Name="Hashes"
 			>
+			<File
+				RelativePath=".\City.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\City.h"
+				>
+			</File>
 			<File
 				RelativePath=".\crc.cpp"
 				>
diff --git a/SpeedTest.cpp b/SpeedTest.cpp
index 211b1e9..5f218f8 100644
--- a/SpeedTest.cpp
+++ b/SpeedTest.cpp
@@ -173,7 +173,7 @@ double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int block
 
   uint64_t t1 = reinterpret_cast<uint64_t>(buf);
   
-  t1 = (t1 + 255) & 0xFFFFFFFFFFFFFF00;
+  t1 = (t1 + 255) & BIG_CONSTANT(0xFFFFFFFFFFFFFF00);
   t1 += align;
   
   uint8_t * block = reinterpret_cast<uint8_t*>(t1);
diff --git a/main.cpp b/main.cpp
index 973ffa6..ffe04b3 100644
--- a/main.cpp
+++ b/main.cpp
@@ -57,6 +57,9 @@ HashInfo g_hashes[] =
   { MurmurOAAT_test,      32, 0x5363BD98, "MurmurOAAT",  "Murmur one-at-a-time" },
   { Crap8_test,           32, 0x743E97A1, "Crap8",       "Crap8" },
   
+  { CityHash64_test,      64, 0x45754A6F, "City64",      "Google CityHash128WithSeed" },
+  { CityHash128_test,    128, 0x94B0EF46, "City128",     "Google CityHash128WithSeed" },
+  
   // MurmurHash2
 
   { MurmurHash2_test,     32, 0x27864C1E, "Murmur2",     "MurmurHash2 for x86, 32-bit" },
-- 
cgit v1.2.3


From a27c28138be78f35f76ea9c13c6354be47097985 Mon Sep 17 00:00:00 2001
From: "aappleby@google.com"
 <aappleby@google.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Wed, 13 Apr 2011 23:23:14 +0000
Subject: Enable all gcc warnings except strict aliasing, fix build issues

git-svn-id: http://smhasher.googlecode.com/svn/trunk@133 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 Bitvec.cpp     |  2 +-
 CMakeLists.txt |  1 +
 City.cpp       |  4 ++--
 Random.h       |  7 ++++---
 SpeedTest.cpp  |  4 ++--
 Types.h        | 24 ++++++++++++------------
 main.cpp       | 49 +++++++++++++++++++++++++------------------------
 7 files changed, 47 insertions(+), 44 deletions(-)

diff --git a/Bitvec.cpp b/Bitvec.cpp
index 16feaa7..6c74bcc 100644
--- a/Bitvec.cpp
+++ b/Bitvec.cpp
@@ -239,7 +239,7 @@ void lshift32 ( void * blob, int len, int c )
   int nbytes  = len;
   int ndwords = nbytes / 4;
 
-  uint32_t * k = (uint32_t*)blob;
+  uint32_t * k = reinterpret_cast<uint32_t*>(blob);
 
   if(c == 0) return;
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 88f9cce..2b5df45 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,6 +3,7 @@ project(SMHasher)
 cmake_minimum_required(VERSION 2.4)
 
 set(CMAKE_BUILD_TYPE Release)
+set(CMAKE_CXX_FLAGS "-g -fno-strict-aliasing -Wall")
 
 add_library(
   SMHasherSupport
diff --git a/City.cpp b/City.cpp
index 2f089d3..9043440 100644
--- a/City.cpp
+++ b/City.cpp
@@ -27,7 +27,7 @@
 // possible hash functions, by using SIMD instructions, or by
 // compromising on hash quality.
 
-#include "city.h"
+#include "City.h"
 
 #include <algorithm>
 
@@ -318,4 +318,4 @@ void CityHash128_test ( const void * key, int len, uint32_t seed, void * out )
   s.first = seed;
 
   *(uint128*)out = CityHash128WithSeed((const char*)key,len,s);
-}
\ No newline at end of file
+}
diff --git a/Random.h b/Random.h
index 619c453..e5a78fb 100644
--- a/Random.h
+++ b/Random.h
@@ -71,15 +71,16 @@ struct Rand
 
   void rand_p ( void * blob, int bytes )
   {
-    uint32_t * blocks = (uint32_t*)blob;
+    uint32_t * blocks = reinterpret_cast<uint32_t*>(blob);
 
     while(bytes >= 4)
     {
-      *blocks++ = rand_u32();
+      blocks[0] = rand_u32();
+      blocks++;
       bytes -= 4;
     }
 
-    uint8_t * tail = (uint8_t*)blocks;
+    uint8_t * tail = reinterpret_cast<uint8_t*>(blocks);
 
     for(int i = 0; i < bytes; i++)
     {
diff --git a/SpeedTest.cpp b/SpeedTest.cpp
index 5f218f8..fc71a36 100644
--- a/SpeedTest.cpp
+++ b/SpeedTest.cpp
@@ -96,9 +96,9 @@ void FilterOutliers ( std::vector<double> & v )
 {
   std::sort(v.begin(),v.end());
   
-  int len = 0;
+  size_t len = 0;
   
-  for(int x = 0x40000000; x; x = x >> 1 )
+  for(size_t x = 0x40000000; x; x = x >> 1 )
   {
     if((len | x) >= v.size()) continue;
     
diff --git a/Types.h b/Types.h
index 8814093..ee7ae9d 100644
--- a/Types.h
+++ b/Types.h
@@ -195,7 +195,7 @@ public:
 
   Blob()
   {
-    for(int i = 0; i < sizeof(bytes); i++)
+    for(size_t i = 0; i < sizeof(bytes); i++)
     {
       bytes[i] = 0;
     }
@@ -203,7 +203,7 @@ public:
 
   Blob ( int x )
   {
-    for(int i = 0; i < sizeof(bytes); i++)
+    for(size_t i = 0; i < sizeof(bytes); i++)
     {
       bytes[i] = 0;
     }
@@ -213,7 +213,7 @@ public:
 
   Blob ( const Blob & k )
   {
-    for(int i = 0; i < sizeof(bytes); i++)
+    for(size_t i = 0; i < sizeof(bytes); i++)
     {
       bytes[i] = k.bytes[i];
     }
@@ -221,7 +221,7 @@ public:
 
   Blob & operator = ( const Blob & k )
   {
-    for(int i = 0; i < sizeof(bytes); i++)
+    for(size_t i = 0; i < sizeof(bytes); i++)
     {
       bytes[i] = k.bytes[i];
     }
@@ -235,18 +235,18 @@ public:
     set(&t,16);
   }
 
-  void set ( const void * blob, int len )
+  void set ( const void * blob, size_t len )
   {
     const uint8_t * k = (const uint8_t*)blob;
 
     len = len > sizeof(bytes) ? sizeof(bytes) : len;
 
-    for(int i = 0; i < len; i++)
+    for(size_t i = 0; i < len; i++)
     {
       bytes[i] = k[i];
     }
 
-    for(int i = len; i < sizeof(bytes); i++)
+    for(size_t i = len; i < sizeof(bytes); i++)
     {
       bytes[i] = 0;
     }
@@ -267,7 +267,7 @@ public:
   
   bool operator < ( const Blob & k ) const
   {
-    for(int i = 0; i < sizeof(bytes); i++)
+    for(size_t i = 0; i < sizeof(bytes); i++)
     {
       if(bytes[i] < k.bytes[i]) return true;
       if(bytes[i] > k.bytes[i]) return false;
@@ -278,7 +278,7 @@ public:
 
   bool operator == ( const Blob & k ) const
   {
-    for(int i = 0; i < sizeof(bytes); i++)
+    for(size_t i = 0; i < sizeof(bytes); i++)
     {
       if(bytes[i] != k.bytes[i]) return false;
     }
@@ -298,7 +298,7 @@ public:
   {
     Blob t;
 
-    for(int i = 0; i < sizeof(bytes); i++)
+    for(size_t i = 0; i < sizeof(bytes); i++)
     {
       t.bytes[i] = bytes[i] ^ k.bytes[i];
     }
@@ -308,7 +308,7 @@ public:
 
   Blob & operator ^= ( const Blob & k )
   {
-    for(int i = 0; i < sizeof(bytes); i++)
+    for(size_t i = 0; i < sizeof(bytes); i++)
     {
       bytes[i] ^= k.bytes[i];
     }
@@ -323,7 +323,7 @@ public:
 
   Blob & operator &= ( const Blob & k )
   {
-    for(int i = 0; i < sizeof(bytes); i++)
+    for(size_t i = 0; i < sizeof(bytes); i++)
     {
       bytes[i] &= k.bytes[i];
     }
diff --git a/main.cpp b/main.cpp
index ffe04b3..03f3e42 100644
--- a/main.cpp
+++ b/main.cpp
@@ -7,6 +7,7 @@
 
 #include <stdio.h>
 #include <time.h>
+#include <unistd.h>
 
 //-----------------------------------------------------------------------------
 // Configuration. TODO - move these to command-line flags
@@ -40,7 +41,7 @@ struct HashInfo
   const char * desc;
 };
 
-HashInfo g_hashes[] = 
+HashInfo g_hashes[] =
 {
   { DoNothingHash,        32, 0x00000000, "donothing32", "Do-Nothing function (only valid for measuring call overhead)" },
   { DoNothingHash,        64, 0x00000000, "donothing64", "Do-Nothing function (only valid for measuring call overhead)" },
@@ -56,7 +57,7 @@ HashInfo g_hashes[] =
   { SuperFastHash,        32, 0x980ACD1D, "superfast",   "Paul Hsieh's SuperFastHash" },
   { MurmurOAAT_test,      32, 0x5363BD98, "MurmurOAAT",  "Murmur one-at-a-time" },
   { Crap8_test,           32, 0x743E97A1, "Crap8",       "Crap8" },
-  
+
   { CityHash64_test,      64, 0x45754A6F, "City64",      "Google CityHash128WithSeed" },
   { CityHash128_test,    128, 0x94B0EF46, "City128",     "Google CityHash128WithSeed" },
   
@@ -75,9 +76,9 @@ HashInfo g_hashes[] =
 
 };
 
-HashInfo * findHash ( const char * name ) 
+HashInfo * findHash ( const char * name )
 {
-  for(int i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
+  for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
   {
     if(_stricmp(name,g_hashes[i].name) == 0) return &g_hashes[i];
   }
@@ -92,7 +93,7 @@ void SelfTest ( void )
 {
   bool pass = true;
 
-  for(int i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
+  for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
   {
     HashInfo * info = & g_hashes[i];
 
@@ -103,10 +104,10 @@ void SelfTest ( void )
   {
     printf("Self-test FAILED!\n");
 
-    for(int i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
+    for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
     {
       HashInfo * info = & g_hashes[i];
-      
+
       printf("%16s - ",info->name);
       pass &= VerificationTest(info->hash,info->hashbits,info->verification,true);
     }
@@ -192,7 +193,7 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
 
   //-----------------------------------------------------------------------------
   // Avalanche tests
-  
+
   if(g_testAvalanche || g_testAll)
   {
     printf("[[[ Avalanche Tests ]]]\n\n");
@@ -255,7 +256,7 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
     result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+2,8,10000000,drawDiagram);
     result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+3,8,10000000,drawDiagram);
     result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+4,8,10000000,drawDiagram);
-    
+
     if(!result) printf("*********FAIL*********\n");
     printf("\n");
   }
@@ -296,7 +297,7 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
     result &= SparseKeyTest<  48,hashtype>(hash,5,true,true,true,drawDiagram);
     result &= SparseKeyTest<  56,hashtype>(hash,5,true,true,true,drawDiagram);
     result &= SparseKeyTest<  64,hashtype>(hash,5,true,true,true,drawDiagram);
-    result &= SparseKeyTest<  96,hashtype>(hash,4,true,true,true,drawDiagram); 
+    result &= SparseKeyTest<  96,hashtype>(hash,4,true,true,true,drawDiagram);
     result &= SparseKeyTest< 256,hashtype>(hash,3,true,true,true,drawDiagram);
     result &= SparseKeyTest<2048,hashtype>(hash,2,true,true,true,drawDiagram);
 
@@ -319,8 +320,8 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
 
       uint32_t blocks[] =
       {
-        0x00000000, 
-        
+        0x00000000,
+
         0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
       };
 
@@ -338,8 +339,8 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
 
       uint32_t blocks[] =
       {
-        0x00000000, 
-        
+        0x00000000,
+
         0x20000000, 0x40000000, 0x60000000, 0x80000000, 0xA0000000, 0xC0000000, 0xE0000000
       };
 
@@ -357,8 +358,8 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
 
       uint32_t blocks[] =
       {
-        0x00000000, 
-        
+        0x00000000,
+
         0x80000000,
       };
 
@@ -376,8 +377,8 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
 
       uint32_t blocks[] =
       {
-        0x00000000, 
-        
+        0x00000000,
+
         0x00000001,
       };
 
@@ -395,8 +396,8 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
 
       uint32_t blocks[] =
       {
-        0x00000000, 
-        
+        0x00000000,
+
         0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
 
         0x80000000, 0x40000000, 0xC0000000, 0x20000000, 0xA0000000, 0x60000000, 0xE0000000
@@ -495,9 +496,9 @@ void VerifyHash ( const void * key, int len, uint32_t seed, void * out )
 {
   g_inputVCode = MurmurOAAT(key,len,g_inputVCode);
   g_inputVCode = MurmurOAAT(&seed,sizeof(uint32_t),g_inputVCode);
-  
+
   g_hashUnderTest->hash(key,len,seed,out);
-  
+
   g_outputVCode = MurmurOAAT(out,g_hashUnderTest->hashbits/8,g_outputVCode);
 }
 
@@ -506,7 +507,7 @@ void VerifyHash ( const void * key, int len, uint32_t seed, void * out )
 void testHash ( const char * name )
 {
   HashInfo * pInfo = findHash(name);
-  
+
   if(pInfo == NULL)
   {
     printf("Invalid hash '%s' specified\n",name);
@@ -552,7 +553,7 @@ int main ( int argc, char ** argv )
   {
     hashToTest = argv[1];
   }
-  
+
   // Code runs on the 3rd CPU by default
 
   SetAffinity((1 << 2));
-- 
cgit v1.2.3


From 84de9bd4f5d49fd69a478ec7905a9928e556ac32 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Wed, 13 Apr 2011 23:29:37 +0000
Subject: and fix corresponding build breakages under Windows

git-svn-id: http://smhasher.googlecode.com/svn/trunk@134 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 SpeedTest.cpp | 6 +++---
 main.cpp      | 1 -
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/SpeedTest.cpp b/SpeedTest.cpp
index fc71a36..2265389 100644
--- a/SpeedTest.cpp
+++ b/SpeedTest.cpp
@@ -63,11 +63,11 @@ double CalcStdv ( std::vector<double> & v, int a, int b )
 // Return true if the largest value in v[0,len) is more than three
 // standard deviations from the mean
 
-bool ContainsOutlier ( std::vector<double> & v, int len )
+bool ContainsOutlier ( std::vector<double> & v, size_t len )
 {
   double mean = 0;
   
-  for(int i = 0; i < len; i++)
+  for(size_t i = 0; i < len; i++)
   {
     mean += v[i];
   }
@@ -76,7 +76,7 @@ bool ContainsOutlier ( std::vector<double> & v, int len )
   
   double stdv = 0;
   
-  for(int i = 0; i < len; i++)
+  for(size_t i = 0; i < len; i++)
   {
     double x = v[i] - mean;
     stdv += x*x;
diff --git a/main.cpp b/main.cpp
index 03f3e42..eca0848 100644
--- a/main.cpp
+++ b/main.cpp
@@ -7,7 +7,6 @@
 
 #include <stdio.h>
 #include <time.h>
-#include <unistd.h>
 
 //-----------------------------------------------------------------------------
 // Configuration. TODO - move these to command-line flags
-- 
cgit v1.2.3


From bb9b561f4c5b0ad049633ec880ffa16fe882fb80 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Wed, 13 Apr 2011 23:36:49 +0000
Subject: more size_t warnings

git-svn-id: http://smhasher.googlecode.com/svn/trunk@135 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 Types.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Types.cpp b/Types.cpp
index 46051a6..f4c9b05 100644
--- a/Types.cpp
+++ b/Types.cpp
@@ -43,7 +43,7 @@ bool isprime ( uint32_t x )
     199,211,223,227,229,233,239,241,251
   };
 
-  for(int i=0; i < sizeof(p)/sizeof(uint32_t); i++)
+  for(size_t i=0; i < sizeof(p)/sizeof(uint32_t); i++)
   { 
     if((x % p[i]) == 0)
     {
-- 
cgit v1.2.3


From b35e562e2d80bc47a51b53ec92a305eb9a3383b4 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Fri, 20 May 2011 23:00:53 +0000
Subject: Fix typo in rotation constant for Murmur3_x86_32's tail, update
 verification value.

git-svn-id: http://smhasher.googlecode.com/svn/trunk@136 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 MurmurHash3.cpp | 2 +-
 main.cpp        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 97883fa..0bf7386 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -132,7 +132,7 @@ void MurmurHash3_x86_32 ( const void * key, int len,
   case 3: k1 ^= tail[2] << 16;
   case 2: k1 ^= tail[1] << 8;
   case 1: k1 ^= tail[0];
-          k1 *= c1; k1 = ROTL32(k1,16); k1 *= c2; h1 ^= k1;
+          k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
   };
 
   //----------
diff --git a/main.cpp b/main.cpp
index eca0848..bf25ce0 100644
--- a/main.cpp
+++ b/main.cpp
@@ -69,7 +69,7 @@ HashInfo g_hashes[] =
 
   // MurmurHash3
 
-  { MurmurHash3_x86_32,   32, 0x3252D141, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
+  { MurmurHash3_x86_32,   32, 0xB0F57EE3, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
   { MurmurHash3_x86_128, 128, 0xB3ECE62A, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
   { MurmurHash3_x64_128, 128, 0x6384BA69, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
 
-- 
cgit v1.2.3


From f3b789787b93945c974e2cc517b7dc352b28354e Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Thu, 1 Mar 2012 03:38:55 +0000
Subject: Merge branch chandlerc_dev

git-svn-id: http://smhasher.googlecode.com/svn/trunk@144 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 AvalancheTest.cpp    |  112 ++--
 AvalancheTest.h      |  844 +++++++++++++-------------
 Bitslice.cpp         |  252 ++++----
 Bitvec.cpp           | 1514 +++++++++++++++++++++++------------------------
 Bitvec.h             |  490 ++++++++--------
 CMakeLists.txt       |    3 +
 City.cpp             |  786 +++++++++++++++----------
 City.h               |  203 ++++---
 CityTest.cpp         |   15 +
 DifferentialTest.cpp |    6 +-
 DifferentialTest.h   |  562 +++++++++---------
 Hashes.cpp           |  310 +++++-----
 Hashes.h             |  151 ++---
 KeysetTest.cpp       |  654 ++++++++++-----------
 KeysetTest.h         |  878 +++++++++++++--------------
 MurmurHash1.cpp      |  348 +++++------
 MurmurHash1.h        |   68 +--
 MurmurHash2.cpp      | 1046 ++++++++++++++++-----------------
 MurmurHash2.h        |   78 +--
 MurmurHash3.cpp      |  670 ++++++++++-----------
 MurmurHash3.h        |   74 +--
 Platform.cpp         |   84 +--
 Platform.h           |  179 +++---
 Random.cpp           |   16 +-
 Random.h             |  234 ++++----
 SpeedTest.cpp        |  484 +++++++--------
 SpeedTest.h          |   16 +-
 Spooky.cpp           |  347 +++++++++++
 Spooky.h             |  293 +++++++++
 SpookyTest.cpp       |   16 +
 Stats.cpp            |  198 +++----
 Stats.h              |  776 ++++++++++++------------
 SuperFastHash.cpp    |  152 ++---
 Types.cpp            |  296 +++++-----
 Types.h              |  748 +++++++++++------------
 crc.cpp              |  200 +++----
 lookup3.cpp          |  144 ++---
 main.cpp             | 1185 ++++++++++++++++++-------------------
 md5.cpp              |  762 ++++++++++++------------
 pstdint.h            | 1598 +++++++++++++++++++++++++-------------------------
 sha1.cpp             |  650 ++++++++++----------
 sha1.h               |   40 +-
 42 files changed, 9164 insertions(+), 8318 deletions(-)
 create mode 100644 CityTest.cpp
 create mode 100644 Spooky.cpp
 create mode 100644 Spooky.h
 create mode 100644 SpookyTest.cpp

diff --git a/AvalancheTest.cpp b/AvalancheTest.cpp
index 38aa452..f5ea0df 100644
--- a/AvalancheTest.cpp
+++ b/AvalancheTest.cpp
@@ -1,56 +1,56 @@
-#include "AvalancheTest.h"
-
-//-----------------------------------------------------------------------------
-
-void PrintAvalancheDiagram ( int x, int y, int reps, double scale, int * bins )
-{
-  const char * symbols = ".123456789X";
-
-  for(int i = 0; i < y; i++)
-  {
-    printf("[");
-    for(int j = 0; j < x; j++)
-    {
-      int k = (y - i) -1;
-
-      int bin = bins[k + (j*y)];
-
-      double b = double(bin) / double(reps);
-      b = fabs(b*2 - 1);
-
-      b *= scale;
-
-      int s = (int)floor(b*10);
-
-      if(s > 10) s = 10;
-      if(s < 0) s = 0;
-
-      printf("%c",symbols[s]);
-    }
-
-    printf("]\n");
-  }
-}
-
-//----------------------------------------------------------------------------
-
-double maxBias ( std::vector<int> & counts, int reps )
-{
-  double worst = 0;
-
-  for(int i = 0; i < (int)counts.size(); i++)
-  {
-    double c = double(counts[i]) / double(reps);
-
-    double d = fabs(c * 2 - 1);
-      
-    if(d > worst)
-    {
-      worst = d;
-    }
-  }
-
-  return worst;
-}
-
-//-----------------------------------------------------------------------------
+#include "AvalancheTest.h"
+
+//-----------------------------------------------------------------------------
+
+void PrintAvalancheDiagram ( int x, int y, int reps, double scale, int * bins )
+{
+  const char * symbols = ".123456789X";
+
+  for(int i = 0; i < y; i++)
+  {
+    printf("[");
+    for(int j = 0; j < x; j++)
+    {
+      int k = (y - i) -1;
+
+      int bin = bins[k + (j*y)];
+
+      double b = double(bin) / double(reps);
+      b = fabs(b*2 - 1);
+
+      b *= scale;
+
+      int s = (int)floor(b*10);
+
+      if(s > 10) s = 10;
+      if(s < 0) s = 0;
+
+      printf("%c",symbols[s]);
+    }
+
+    printf("]\n");
+  }
+}
+
+//----------------------------------------------------------------------------
+
+double maxBias ( std::vector<int> & counts, int reps )
+{
+  double worst = 0;
+
+  for(int i = 0; i < (int)counts.size(); i++)
+  {
+    double c = double(counts[i]) / double(reps);
+
+    double d = fabs(c * 2 - 1);
+      
+    if(d > worst)
+    {
+      worst = d;
+    }
+  }
+
+  return worst;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/AvalancheTest.h b/AvalancheTest.h
index 4c23369..f1bfeea 100644
--- a/AvalancheTest.h
+++ b/AvalancheTest.h
@@ -1,422 +1,422 @@
-//-----------------------------------------------------------------------------
-// Flipping a single bit of a key should cause an "avalanche" of changes in
-// the hash function's output. Ideally, each output bits should flip 50% of
-// the time - if the probability of an output bit flipping is not 50%, that bit
-// is "biased". Too much bias means that patterns applied to the input will
-// cause "echoes" of the patterns in the output, which in turn can cause the
-// hash function to fail to create an even, random distribution of hash values.
-
-
-#pragma once
-
-#include "Types.h"
-#include "Random.h"
-
-#include <vector>
-#include <stdio.h>
-#include <math.h>
-
-// Avalanche fails if a bit is biased by more than 1%
-
-#define AVALANCHE_FAIL 0.01
-
-double maxBias ( std::vector<int> & counts, int reps );
-
-//-----------------------------------------------------------------------------
-
-template < typename keytype, typename hashtype >
-void calcBias ( pfHash hash, std::vector<int> & counts, int reps, Rand & r )
-{
-  const int keybytes = sizeof(keytype);
-  const int hashbytes = sizeof(hashtype);
-
-  const int keybits = keybytes * 8;
-  const int hashbits = hashbytes * 8;
-
-  keytype K;
-  hashtype A,B;
-
-  for(int irep = 0; irep < reps; irep++)
-  {
-    if(irep % (reps/10) == 0) printf(".");
-
-    r.rand_p(&K,keybytes);
-
-    hash(&K,keybytes,0,&A);
-
-    int * cursor = &counts[0];
-
-    for(int iBit = 0; iBit < keybits; iBit++)
-    {
-      flipbit(&K,keybytes,iBit);
-      hash(&K,keybytes,0,&B);
-      flipbit(&K,keybytes,iBit);
-
-      for(int iOut = 0; iOut < hashbits; iOut++)
-      {
-        int bitA = getbit(&A,hashbytes,iOut);
-        int bitB = getbit(&B,hashbytes,iOut);
-
-        (*cursor++) += (bitA ^ bitB);
-      }
-    }
-  }
-}
-
-//-----------------------------------------------------------------------------
-
-template < typename keytype, typename hashtype >
-bool AvalancheTest ( pfHash hash, const int reps )
-{
-  Rand r(48273);
-  
-  const int keybytes = sizeof(keytype);
-  const int hashbytes = sizeof(hashtype);
-
-  const int keybits = keybytes * 8;
-  const int hashbits = hashbytes * 8;
-
-  printf("Testing %3d-bit keys -> %3d-bit hashes, %8d reps",keybits,hashbits,reps);
-
-  //----------
-
-  std::vector<int> bins(keybits*hashbits,0);
-
-  calcBias<keytype,hashtype>(hash,bins,reps,r);
-  
-  //----------
-
-  bool result = true;
-
-  double b = maxBias(bins,reps);
-
-  printf(" worst bias is %f%%",b * 100.0);
-
-  if(b > AVALANCHE_FAIL)
-  {
-    printf(" !!!!! ");
-    result = false;
-  }
-
-  printf("\n");
-
-  return result;
-}
-
-//----------------------------------------------------------------------------
-// Tests the Bit Independence Criteron. Stricter than Avalanche, but slow and
-// not really all that useful.
-
-template< typename keytype, typename hashtype >
-void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose )
-{
-  Rand r(11938);
-  
-  const int keybytes = sizeof(keytype);
-  const int hashbytes = sizeof(hashtype);
-  const int hashbits = hashbytes * 8;
-
-  std::vector<int> bins(hashbits*hashbits*4,0);
-
-  keytype key;
-  hashtype h1,h2;
-
-  for(int irep = 0; irep < reps; irep++)
-  {
-    if(verbose)
-    {
-      if(irep % (reps/10) == 0) printf(".");
-    }
-
-    r.rand_p(&key,keybytes);
-    hash(&key,keybytes,0,&h1);
-
-    flipbit(key,keybit);
-    hash(&key,keybytes,0,&h2);
-
-    hashtype d = h1 ^ h2;
-
-    for(int out1 = 0; out1 < hashbits; out1++)
-    for(int out2 = 0; out2 < hashbits; out2++)
-    {
-      if(out1 == out2) continue;
-
-      uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
-
-      bins[(out1 * hashbits + out2) * 4 + b]++;
-    }
-  }
-
-  if(verbose) printf("\n");
-
-  maxBias = 0;
-
-  for(int out1 = 0; out1 < hashbits; out1++)
-  {
-    for(int out2 = 0; out2 < hashbits; out2++)
-    {
-      if(out1 == out2)
-      {
-        if(verbose) printf("\\");
-        continue;
-      }
-
-      double bias = 0;
-
-      for(int b = 0; b < 4; b++)
-      {
-        double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2);
-        b2 = fabs(b2 * 2 - 1);
-
-        if(b2 > bias) bias = b2;
-      }
-
-      if(bias > maxBias)
-      {
-        maxBias = bias;
-        maxA = out1;
-        maxB = out2;
-      }
-
-      if(verbose) 
-      {
-        if     (bias < 0.01) printf(".");
-        else if(bias < 0.05) printf("o");
-        else if(bias < 0.33) printf("O");
-        else                 printf("X");
-      }
-    }
-
-    if(verbose) printf("\n");
-  }
-}
-
-//----------
-
-template< typename keytype, typename hashtype >
-bool BicTest ( pfHash hash, const int reps )
-{
-  const int keybytes = sizeof(keytype);
-  const int keybits = keybytes * 8;
-
-  double maxBias = 0;
-  int maxK = 0;
-  int maxA = 0;
-  int maxB = 0;
-
-  for(int i = 0; i < keybits; i++)
-  {
-    if(i % (keybits/10) == 0) printf(".");
-
-    double bias;
-    int a,b;
-    
-    BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,true);
-
-    if(bias > maxBias)
-    {
-      maxBias = bias;
-      maxK = i;
-      maxA = a;
-      maxB = b;
-    }
-  }
-
-  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
-
-  // Bit independence is harder to pass than avalanche, so we're a bit more lax here.
-
-  bool result = (maxBias < 0.05);
-
-  return result;
-}
-
-//-----------------------------------------------------------------------------
-// BIC test variant - store all intermediate data in a table, draw diagram
-// afterwards (much faster)
-
-template< typename keytype, typename hashtype >
-void BicTest3 ( pfHash hash, const int reps, bool verbose = true )
-{
-  const int keybytes = sizeof(keytype);
-  const int keybits = keybytes * 8;
-  const int hashbytes = sizeof(hashtype);
-  const int hashbits = hashbytes * 8;
-  const int pagesize = hashbits*hashbits*4;
-
-  Rand r(11938);
-
-  double maxBias = 0;
-  int maxK = 0;
-  int maxA = 0;
-  int maxB = 0;
-
-  keytype key;
-  hashtype h1,h2;
-
-  std::vector<int> bins(keybits*pagesize,0);
-
-  for(int keybit = 0; keybit < keybits; keybit++)
-  {
-    if(keybit % (keybits/10) == 0) printf(".");
-
-    int * page = &bins[keybit*pagesize];
-
-    for(int irep = 0; irep < reps; irep++)
-    {
-      r.rand_p(&key,keybytes);
-      hash(&key,keybytes,0,&h1);
-      flipbit(key,keybit);
-      hash(&key,keybytes,0,&h2);
-
-      hashtype d = h1 ^ h2;
-
-      for(int out1 = 0; out1 < hashbits-1; out1++)
-      for(int out2 = out1+1; out2 < hashbits; out2++)
-      {
-        int * b = &page[(out1*hashbits+out2)*4];
-
-        uint32_t x = getbit(d,out1) | (getbit(d,out2) << 1);
-
-        b[x]++;
-      }
-    }
-  }
-
-  printf("\n");
-
-  for(int out1 = 0; out1 < hashbits-1; out1++)
-  {
-    for(int out2 = out1+1; out2 < hashbits; out2++)
-    {
-      if(verbose) printf("(%3d,%3d) - ",out1,out2);
-
-      for(int keybit = 0; keybit < keybits; keybit++)
-      {
-        int * page = &bins[keybit*pagesize];
-        int * bins = &page[(out1*hashbits+out2)*4];
-
-        double bias = 0;
-
-        for(int b = 0; b < 4; b++)
-        {
-          double b2 = double(bins[b]) / double(reps / 2);
-          b2 = fabs(b2 * 2 - 1);
-
-          if(b2 > bias) bias = b2;
-        }
-
-        if(bias > maxBias)
-        {
-          maxBias = bias;
-          maxK = keybit;
-          maxA = out1;
-          maxB = out2;
-        }
-
-        if(verbose) 
-        {
-          if     (bias < 0.01) printf(".");
-          else if(bias < 0.05) printf("o");
-          else if(bias < 0.33) printf("O");
-          else                 printf("X");
-        }
-      }
-
-      // Finished keybit
-
-      if(verbose) printf("\n");
-    }
-
-    if(verbose)
-    {
-      for(int i = 0; i < keybits+12; i++) printf("-");
-      printf("\n");
-    }
-  }
-
-  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
-}
-
-
-//-----------------------------------------------------------------------------
-// BIC test variant - iterate over output bits, then key bits. No temp storage,
-// but slooooow
-
-template< typename keytype, typename hashtype >
-void BicTest2 ( pfHash hash, const int reps, bool verbose = true )
-{
-  const int keybytes = sizeof(keytype);
-  const int keybits = keybytes * 8;
-  const int hashbytes = sizeof(hashtype);
-  const int hashbits = hashbytes * 8;
-
-  Rand r(11938);
-
-  double maxBias = 0;
-  int maxK = 0;
-  int maxA = 0;
-  int maxB = 0;
-
-  keytype key;
-  hashtype h1,h2;
-
-  for(int out1 = 0; out1 < hashbits-1; out1++)
-  for(int out2 = out1+1; out2 < hashbits; out2++)
-  {
-    if(verbose) printf("(%3d,%3d) - ",out1,out2);
-
-    for(int keybit = 0; keybit < keybits; keybit++)
-    {
-      int bins[4] = { 0, 0, 0, 0 };
-
-      for(int irep = 0; irep < reps; irep++)
-      {
-        r.rand_p(&key,keybytes);
-        hash(&key,keybytes,0,&h1);
-        flipbit(key,keybit);
-        hash(&key,keybytes,0,&h2);
-
-        hashtype d = h1 ^ h2;
-
-        uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
-
-        bins[b]++;
-      }
-
-      double bias = 0;
-
-      for(int b = 0; b < 4; b++)
-      {
-        double b2 = double(bins[b]) / double(reps / 2);
-        b2 = fabs(b2 * 2 - 1);
-
-        if(b2 > bias) bias = b2;
-      }
-
-      if(bias > maxBias)
-      {
-        maxBias = bias;
-        maxK = keybit;
-        maxA = out1;
-        maxB = out2;
-      }
-
-      if(verbose) 
-      {
-        if     (bias < 0.05) printf(".");
-        else if(bias < 0.10) printf("o");
-        else if(bias < 0.50) printf("O");
-        else                 printf("X");
-      }
-    }
-
-    // Finished keybit
-
-    if(verbose) printf("\n");
-  }
-
-  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
-}
-
-//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+// Flipping a single bit of a key should cause an "avalanche" of changes in
+// the hash function's output. Ideally, each output bits should flip 50% of
+// the time - if the probability of an output bit flipping is not 50%, that bit
+// is "biased". Too much bias means that patterns applied to the input will
+// cause "echoes" of the patterns in the output, which in turn can cause the
+// hash function to fail to create an even, random distribution of hash values.
+
+
+#pragma once
+
+#include "Types.h"
+#include "Random.h"
+
+#include <vector>
+#include <stdio.h>
+#include <math.h>
+
+// Avalanche fails if a bit is biased by more than 1%
+
+#define AVALANCHE_FAIL 0.01
+
+double maxBias ( std::vector<int> & counts, int reps );
+
+//-----------------------------------------------------------------------------
+
+template < typename keytype, typename hashtype >
+void calcBias ( pfHash hash, std::vector<int> & counts, int reps, Rand & r )
+{
+  const int keybytes = sizeof(keytype);
+  const int hashbytes = sizeof(hashtype);
+
+  const int keybits = keybytes * 8;
+  const int hashbits = hashbytes * 8;
+
+  keytype K;
+  hashtype A,B;
+
+  for(int irep = 0; irep < reps; irep++)
+  {
+    if(irep % (reps/10) == 0) printf(".");
+
+    r.rand_p(&K,keybytes);
+
+    hash(&K,keybytes,0,&A);
+
+    int * cursor = &counts[0];
+
+    for(int iBit = 0; iBit < keybits; iBit++)
+    {
+      flipbit(&K,keybytes,iBit);
+      hash(&K,keybytes,0,&B);
+      flipbit(&K,keybytes,iBit);
+
+      for(int iOut = 0; iOut < hashbits; iOut++)
+      {
+        int bitA = getbit(&A,hashbytes,iOut);
+        int bitB = getbit(&B,hashbytes,iOut);
+
+        (*cursor++) += (bitA ^ bitB);
+      }
+    }
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+template < typename keytype, typename hashtype >
+bool AvalancheTest ( pfHash hash, const int reps )
+{
+  Rand r(48273);
+  
+  const int keybytes = sizeof(keytype);
+  const int hashbytes = sizeof(hashtype);
+
+  const int keybits = keybytes * 8;
+  const int hashbits = hashbytes * 8;
+
+  printf("Testing %3d-bit keys -> %3d-bit hashes, %8d reps",keybits,hashbits,reps);
+
+  //----------
+
+  std::vector<int> bins(keybits*hashbits,0);
+
+  calcBias<keytype,hashtype>(hash,bins,reps,r);
+  
+  //----------
+
+  bool result = true;
+
+  double b = maxBias(bins,reps);
+
+  printf(" worst bias is %f%%",b * 100.0);
+
+  if(b > AVALANCHE_FAIL)
+  {
+    printf(" !!!!! ");
+    result = false;
+  }
+
+  printf("\n");
+
+  return result;
+}
+
+//----------------------------------------------------------------------------
+// Tests the Bit Independence Criteron. Stricter than Avalanche, but slow and
+// not really all that useful.
+
+template< typename keytype, typename hashtype >
+void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose )
+{
+  Rand r(11938);
+  
+  const int keybytes = sizeof(keytype);
+  const int hashbytes = sizeof(hashtype);
+  const int hashbits = hashbytes * 8;
+
+  std::vector<int> bins(hashbits*hashbits*4,0);
+
+  keytype key;
+  hashtype h1,h2;
+
+  for(int irep = 0; irep < reps; irep++)
+  {
+    if(verbose)
+    {
+      if(irep % (reps/10) == 0) printf(".");
+    }
+
+    r.rand_p(&key,keybytes);
+    hash(&key,keybytes,0,&h1);
+
+    flipbit(key,keybit);
+    hash(&key,keybytes,0,&h2);
+
+    hashtype d = h1 ^ h2;
+
+    for(int out1 = 0; out1 < hashbits; out1++)
+    for(int out2 = 0; out2 < hashbits; out2++)
+    {
+      if(out1 == out2) continue;
+
+      uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
+
+      bins[(out1 * hashbits + out2) * 4 + b]++;
+    }
+  }
+
+  if(verbose) printf("\n");
+
+  maxBias = 0;
+
+  for(int out1 = 0; out1 < hashbits; out1++)
+  {
+    for(int out2 = 0; out2 < hashbits; out2++)
+    {
+      if(out1 == out2)
+      {
+        if(verbose) printf("\\");
+        continue;
+      }
+
+      double bias = 0;
+
+      for(int b = 0; b < 4; b++)
+      {
+        double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2);
+        b2 = fabs(b2 * 2 - 1);
+
+        if(b2 > bias) bias = b2;
+      }
+
+      if(bias > maxBias)
+      {
+        maxBias = bias;
+        maxA = out1;
+        maxB = out2;
+      }
+
+      if(verbose) 
+      {
+        if     (bias < 0.01) printf(".");
+        else if(bias < 0.05) printf("o");
+        else if(bias < 0.33) printf("O");
+        else                 printf("X");
+      }
+    }
+
+    if(verbose) printf("\n");
+  }
+}
+
+//----------
+
+template< typename keytype, typename hashtype >
+bool BicTest ( pfHash hash, const int reps )
+{
+  const int keybytes = sizeof(keytype);
+  const int keybits = keybytes * 8;
+
+  double maxBias = 0;
+  int maxK = 0;
+  int maxA = 0;
+  int maxB = 0;
+
+  for(int i = 0; i < keybits; i++)
+  {
+    if(i % (keybits/10) == 0) printf(".");
+
+    double bias;
+    int a,b;
+    
+    BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,true);
+
+    if(bias > maxBias)
+    {
+      maxBias = bias;
+      maxK = i;
+      maxA = a;
+      maxB = b;
+    }
+  }
+
+  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
+
+  // Bit independence is harder to pass than avalanche, so we're a bit more lax here.
+
+  bool result = (maxBias < 0.05);
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// BIC test variant - store all intermediate data in a table, draw diagram
+// afterwards (much faster)
+
+template< typename keytype, typename hashtype >
+void BicTest3 ( pfHash hash, const int reps, bool verbose = true )
+{
+  const int keybytes = sizeof(keytype);
+  const int keybits = keybytes * 8;
+  const int hashbytes = sizeof(hashtype);
+  const int hashbits = hashbytes * 8;
+  const int pagesize = hashbits*hashbits*4;
+
+  Rand r(11938);
+
+  double maxBias = 0;
+  int maxK = 0;
+  int maxA = 0;
+  int maxB = 0;
+
+  keytype key;
+  hashtype h1,h2;
+
+  std::vector<int> bins(keybits*pagesize,0);
+
+  for(int keybit = 0; keybit < keybits; keybit++)
+  {
+    if(keybit % (keybits/10) == 0) printf(".");
+
+    int * page = &bins[keybit*pagesize];
+
+    for(int irep = 0; irep < reps; irep++)
+    {
+      r.rand_p(&key,keybytes);
+      hash(&key,keybytes,0,&h1);
+      flipbit(key,keybit);
+      hash(&key,keybytes,0,&h2);
+
+      hashtype d = h1 ^ h2;
+
+      for(int out1 = 0; out1 < hashbits-1; out1++)
+      for(int out2 = out1+1; out2 < hashbits; out2++)
+      {
+        int * b = &page[(out1*hashbits+out2)*4];
+
+        uint32_t x = getbit(d,out1) | (getbit(d,out2) << 1);
+
+        b[x]++;
+      }
+    }
+  }
+
+  printf("\n");
+
+  for(int out1 = 0; out1 < hashbits-1; out1++)
+  {
+    for(int out2 = out1+1; out2 < hashbits; out2++)
+    {
+      if(verbose) printf("(%3d,%3d) - ",out1,out2);
+
+      for(int keybit = 0; keybit < keybits; keybit++)
+      {
+        int * page = &bins[keybit*pagesize];
+        int * bins = &page[(out1*hashbits+out2)*4];
+
+        double bias = 0;
+
+        for(int b = 0; b < 4; b++)
+        {
+          double b2 = double(bins[b]) / double(reps / 2);
+          b2 = fabs(b2 * 2 - 1);
+
+          if(b2 > bias) bias = b2;
+        }
+
+        if(bias > maxBias)
+        {
+          maxBias = bias;
+          maxK = keybit;
+          maxA = out1;
+          maxB = out2;
+        }
+
+        if(verbose) 
+        {
+          if     (bias < 0.01) printf(".");
+          else if(bias < 0.05) printf("o");
+          else if(bias < 0.33) printf("O");
+          else                 printf("X");
+        }
+      }
+
+      // Finished keybit
+
+      if(verbose) printf("\n");
+    }
+
+    if(verbose)
+    {
+      for(int i = 0; i < keybits+12; i++) printf("-");
+      printf("\n");
+    }
+  }
+
+  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
+}
+
+
+//-----------------------------------------------------------------------------
+// BIC test variant - iterate over output bits, then key bits. No temp storage,
+// but slooooow
+
+template< typename keytype, typename hashtype >
+void BicTest2 ( pfHash hash, const int reps, bool verbose = true )
+{
+  const int keybytes = sizeof(keytype);
+  const int keybits = keybytes * 8;
+  const int hashbytes = sizeof(hashtype);
+  const int hashbits = hashbytes * 8;
+
+  Rand r(11938);
+
+  double maxBias = 0;
+  int maxK = 0;
+  int maxA = 0;
+  int maxB = 0;
+
+  keytype key;
+  hashtype h1,h2;
+
+  for(int out1 = 0; out1 < hashbits-1; out1++)
+  for(int out2 = out1+1; out2 < hashbits; out2++)
+  {
+    if(verbose) printf("(%3d,%3d) - ",out1,out2);
+
+    for(int keybit = 0; keybit < keybits; keybit++)
+    {
+      int bins[4] = { 0, 0, 0, 0 };
+
+      for(int irep = 0; irep < reps; irep++)
+      {
+        r.rand_p(&key,keybytes);
+        hash(&key,keybytes,0,&h1);
+        flipbit(key,keybit);
+        hash(&key,keybytes,0,&h2);
+
+        hashtype d = h1 ^ h2;
+
+        uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
+
+        bins[b]++;
+      }
+
+      double bias = 0;
+
+      for(int b = 0; b < 4; b++)
+      {
+        double b2 = double(bins[b]) / double(reps / 2);
+        b2 = fabs(b2 * 2 - 1);
+
+        if(b2 > bias) bias = b2;
+      }
+
+      if(bias > maxBias)
+      {
+        maxBias = bias;
+        maxK = keybit;
+        maxA = out1;
+        maxB = out2;
+      }
+
+      if(verbose) 
+      {
+        if     (bias < 0.05) printf(".");
+        else if(bias < 0.10) printf("o");
+        else if(bias < 0.50) printf("O");
+        else                 printf("X");
+      }
+    }
+
+    // Finished keybit
+
+    if(verbose) printf("\n");
+  }
+
+  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Bitslice.cpp b/Bitslice.cpp
index 428e355..45a2249 100644
--- a/Bitslice.cpp
+++ b/Bitslice.cpp
@@ -1,127 +1,127 @@
-#include "Bitvec.h"
-#include <vector>
-#include <assert.h>
-
-// handle xnor
-
-typedef std::vector<uint32_t> slice;
-typedef std::vector<slice> slice_vec;
-
-int countbits ( slice & v )
-{
-  int c = 0;
-
-  for(size_t i = 0; i < v.size(); i++)
-  {
-    int d = countbits(v[i]);
-
-    c += d;
-  }
-
-  return c;
-}
-
-int countxor ( slice & a, slice & b )
-{
-  assert(a.size() == b.size());
-
-  int c = 0;
-
-  for(size_t i = 0; i < a.size(); i++)
-  {
-    int d = countbits(a[i] ^ b[i]);
-
-    c += d;
-  }
-
-  return c;
-}
-
-void xoreq ( slice & a, slice & b )
-{
-  assert(a.size() == b.size());
-
-  for(size_t i = 0; i < a.size(); i++)
-  {
-    a[i] ^= b[i];
-  }
-}
-
-//-----------------------------------------------------------------------------
-// Bitslice a hash set
-
-template< typename hashtype >
-void Bitslice ( std::vector<hashtype> & hashes, slice_vec & slices )
-{
-  const int hashbytes = sizeof(hashtype);
-  const int hashbits = hashbytes * 8;
-  const int slicelen = ((int)hashes.size() + 31) / 32;
-
-  slices.clear();
-  slices.resize(hashbits);
-
-  for(int i = 0; i < (int)slices.size(); i++)
-  {
-    slices[i].resize(slicelen,0);
-  }
-
-  for(int j = 0; j < hashbits; j++)
-  {
-    void * sliceblob = &(slices[j][0]);
-
-    for(int i = 0; i < (int)hashes.size(); i++)
-    {
-      int b = getbit(hashes[i],j);
-
-      setbit(sliceblob,slicelen*4,i,b);
-    }
-  }
-}
-
-void FactorSlices ( slice_vec & slices )
-{
-  std::vector<int> counts(slices.size(),0);
-
-  for(size_t i = 0; i < slices.size(); i++)
-  {
-    counts[i] = countbits(slices[i]);
-  }
-
-  bool changed = true;
-
-  while(changed)
-  {
-    int bestA = -1;
-    int bestB = -1;
-
-    for(int j = 0; j < (int)slices.size()-1; j++)
-    {
-      for(int i = j+1; i < (int)slices.size(); i++)
-      {
-        int d = countxor(slices[i],slices[j]);
-
-        if((d < counts[i]) && (d < counts[j]))
-        {
-          if(counts[i] < counts[j])
-          {
-            bestA = j;
-            bestB = i;
-          }
-        }
-        else if(d < counts[i])
-        {
-          //bestA = 
-        }
-      }
-    }
-  }
-}
-
-
-void foo ( void )
-{
-  slice a;
-  slice_vec b;
-
-  Bitslice(a,b);
+#include "Bitvec.h"
+#include <vector>
+#include <assert.h>
+
+// handle xnor
+
+typedef std::vector<uint32_t> slice;
+typedef std::vector<slice> slice_vec;
+
+int countbits ( slice & v )
+{
+  int c = 0;
+
+  for(size_t i = 0; i < v.size(); i++)
+  {
+    int d = countbits(v[i]);
+
+    c += d;
+  }
+
+  return c;
+}
+
+int countxor ( slice & a, slice & b )
+{
+  assert(a.size() == b.size());
+
+  int c = 0;
+
+  for(size_t i = 0; i < a.size(); i++)
+  {
+    int d = countbits(a[i] ^ b[i]);
+
+    c += d;
+  }
+
+  return c;
+}
+
+void xoreq ( slice & a, slice & b )
+{
+  assert(a.size() == b.size());
+
+  for(size_t i = 0; i < a.size(); i++)
+  {
+    a[i] ^= b[i];
+  }
+}
+
+//-----------------------------------------------------------------------------
+// Bitslice a hash set
+
+template< typename hashtype >
+void Bitslice ( std::vector<hashtype> & hashes, slice_vec & slices )
+{
+  const int hashbytes = sizeof(hashtype);
+  const int hashbits = hashbytes * 8;
+  const int slicelen = ((int)hashes.size() + 31) / 32;
+
+  slices.clear();
+  slices.resize(hashbits);
+
+  for(int i = 0; i < (int)slices.size(); i++)
+  {
+    slices[i].resize(slicelen,0);
+  }
+
+  for(int j = 0; j < hashbits; j++)
+  {
+    void * sliceblob = &(slices[j][0]);
+
+    for(int i = 0; i < (int)hashes.size(); i++)
+    {
+      int b = getbit(hashes[i],j);
+
+      setbit(sliceblob,slicelen*4,i,b);
+    }
+  }
+}
+
+void FactorSlices ( slice_vec & slices )
+{
+  std::vector<int> counts(slices.size(),0);
+
+  for(size_t i = 0; i < slices.size(); i++)
+  {
+    counts[i] = countbits(slices[i]);
+  }
+
+  bool changed = true;
+
+  while(changed)
+  {
+    int bestA = -1;
+    int bestB = -1;
+
+    for(int j = 0; j < (int)slices.size()-1; j++)
+    {
+      for(int i = j+1; i < (int)slices.size(); i++)
+      {
+        int d = countxor(slices[i],slices[j]);
+
+        if((d < counts[i]) && (d < counts[j]))
+        {
+          if(counts[i] < counts[j])
+          {
+            bestA = j;
+            bestB = i;
+          }
+        }
+        else if(d < counts[i])
+        {
+          //bestA = 
+        }
+      }
+    }
+  }
+}
+
+
+void foo ( void )
+{
+  slice a;
+  slice_vec b;
+
+  Bitslice(a,b);
 }
\ No newline at end of file
diff --git a/Bitvec.cpp b/Bitvec.cpp
index 6c74bcc..4855f8f 100644
--- a/Bitvec.cpp
+++ b/Bitvec.cpp
@@ -1,757 +1,757 @@
-#include "Bitvec.h"
-
-#include "Random.h"
-
-#include <assert.h>
-#include <stdio.h>
-
-#ifndef DEBUG
-#undef assert
-void assert ( bool )
-{
-}
-#endif
-
-//----------------------------------------------------------------------------
-
-void printbits ( const void * blob, int len )
-{
-  const uint8_t * data = (const uint8_t *)blob;
-
-  printf("[");
-  for(int i = 0; i < len; i++)
-  {
-    unsigned char byte = data[i];
-
-    int hi = (byte >> 4);
-    int lo = (byte & 0xF);
-
-    if(hi) printf("%01x",hi);
-    else   printf(".");
-
-    if(lo) printf("%01x",lo);
-    else   printf(".");
-
-    if(i != len-1) printf(" ");
-  }
-  printf("]");
-}
-
-void printbits2 ( const uint8_t * k, int nbytes )
-{
-  printf("[");
-
-  for(int i = nbytes-1; i >= 0; i--)
-  {
-    uint8_t b = k[i];
-
-    for(int j = 7; j >= 0; j--)
-    {
-      uint8_t c = (b & (1 << j)) ? '#' : ' ';
-
-      putc(c,stdout);
-    }
-  }
-  printf("]");
-}
-
-void printhex32 ( const void * blob, int len )
-{
-  assert((len & 3) == 0);
-
-  uint32_t * d = (uint32_t*)blob;
-
-  printf("{ ");
-
-  for(int i = 0; i < len/4; i++) 
-  {
-    printf("0x%08x, ",d[i]);
-  }
-
-  printf("}");
-}
-
-void printbytes ( const void * blob, int len )
-{
-  uint8_t * d = (uint8_t*)blob;
-
-  printf("{ ");
-
-  for(int i = 0; i < len; i++)
-  {
-    printf("0x%02x, ",d[i]);
-  }
-
-  printf(" };");
-}
-
-void printbytes2 ( const void * blob, int len )
-{
-  uint8_t * d = (uint8_t*)blob;
-
-  for(int i = 0; i < len; i++)
-  {
-    printf("%02x ",d[i]);
-  }
-}
-
-//-----------------------------------------------------------------------------
-// Bit-level manipulation
-
-// These two are from the "Bit Twiddling Hacks" webpage
-
-uint32_t popcount ( uint32_t v )
-{
-	v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
-	v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
-	uint32_t c = ((v + ((v >> 4) & 0xF0F0F0F)) * 0x1010101) >> 24; // count
-
-	return c;
-}
-
-uint32_t parity ( uint32_t v )
-{
-	v ^= v >> 1;
-	v ^= v >> 2;
-	v = (v & 0x11111111U) * 0x11111111U;
-	return (v >> 28) & 1;
-}
-
-//-----------------------------------------------------------------------------
-
-uint32_t getbit ( const void * block, int len, uint32_t bit )
-{
-  uint8_t * b = (uint8_t*)block;
-
-  int byte = bit >> 3;
-  bit = bit & 0x7;
-  
-  if(byte < len) return (b[byte] >> bit) & 1;
-
-  return 0;
-}
-
-uint32_t getbit_wrap ( const void * block, int len, uint32_t bit )
-{
-  uint8_t * b = (uint8_t*)block;
-
-  int byte = bit >> 3;
-  bit = bit & 0x7;
-  
-  byte %= len;
-    
-  return (b[byte] >> bit) & 1;
-}
-
-void setbit ( void * block, int len, uint32_t bit )
-{
-  uint8_t * b = (uint8_t*)block;
-
-  int byte = bit >> 3;
-  bit = bit & 0x7;
-  
-  if(byte < len) b[byte] |= (1 << bit);
-}
-
-void setbit ( void * block, int len, uint32_t bit, uint32_t val )
-{
-  val ? setbit(block,len,bit) : clearbit(block,len,bit);
-}
-
-void clearbit ( void * block, int len, uint32_t bit )
-{
-  uint8_t * b = (uint8_t*)block;
-
-  int byte = bit >> 3;
-  bit = bit & 0x7;
-  
-  if(byte < len) b[byte] &= ~(1 << bit);
-}
-
-void flipbit ( void * block, int len, uint32_t bit )
-{
-  uint8_t * b = (uint8_t*)block;
-
-  int byte = bit >> 3;
-  bit = bit & 0x7;
-  
-  if(byte < len) b[byte] ^= (1 << bit);
-}
-
-// from the "Bit Twiddling Hacks" webpage
-
-int countbits ( uint32_t v )
-{
-  v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
-  v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
-  int c = ((v + ((v >> 4) & 0xF0F0F0F)) * 0x1010101) >> 24; // count
-
-  return c;
-}
-
-//-----------------------------------------------------------------------------
-
-void lshift1 ( void * blob, int len, int c )
-{
-  int nbits = len*8;
-
-  for(int i = nbits-1; i >= 0; i--)
-  {
-    setbit(blob,len,i,getbit(blob,len,i-c));
-  }
-}
-
-
-void lshift8 ( void * blob, int nbytes, int c )
-{
-  uint8_t * k = (uint8_t*)blob;
-
-  if(c == 0) return;
-
-  int b = c >> 3;
-  c &= 7;
-
-  for(int i = nbytes-1; i >= b; i--)
-  {
-    k[i] = k[i-b];
-  }
-
-  for(int i = b-1; i >= 0; i--)
-  {
-    k[i] = 0;
-  }
-
-  if(c == 0) return;
-
-  for(int i = nbytes-1; i >= 0; i--)
-  {
-    uint8_t a = k[i];
-    uint8_t b = (i == 0) ? 0 : k[i-1];
-
-    k[i] = (a << c) | (b >> (8-c));
-  }
-}
-
-void lshift32 ( void * blob, int len, int c )
-{
-  assert((len & 3) == 0);
-
-  int nbytes  = len;
-  int ndwords = nbytes / 4;
-
-  uint32_t * k = reinterpret_cast<uint32_t*>(blob);
-
-  if(c == 0) return;
-
-  //----------
-
-  int b = c / 32;
-  c &= (32-1);
-
-  for(int i = ndwords-1; i >= b; i--)
-  {
-    k[i] = k[i-b];
-  }
-
-  for(int i = b-1; i >= 0; i--)
-  {
-    k[i] = 0;
-  }
-
-  if(c == 0) return;
-
-  for(int i = ndwords-1; i >= 0; i--)
-  {
-    uint32_t a = k[i];
-    uint32_t b = (i == 0) ? 0 : k[i-1];
-
-    k[i] = (a << c) | (b >> (32-c));
-  }
-}
-
-//-----------------------------------------------------------------------------
-
-void rshift1 ( void * blob, int len, int c )
-{
-  int nbits = len*8;
-
-  for(int i = 0; i < nbits; i++)
-  {
-    setbit(blob,len,i,getbit(blob,len,i+c));
-  }
-}
-
-void rshift8 ( void * blob, int nbytes, int c )
-{
-  uint8_t * k = (uint8_t*)blob;
-
-  if(c == 0) return;
-
-  int b = c >> 3;
-  c &= 7;
-
-  for(int i = 0; i < nbytes-b; i++)
-  {
-    k[i] = k[i+b];
-  }
-
-  for(int i = nbytes-b; i < nbytes; i++)
-  {
-    k[i] = 0;
-  }
-
-  if(c == 0) return;
-
-  for(int i = 0; i < nbytes; i++)
-  {
-    uint8_t a = (i == nbytes-1) ? 0 : k[i+1];
-    uint8_t b = k[i];
-
-    k[i] = (a << (8-c) ) | (b >> c);
-  }
-}
-
-void rshift32 ( void * blob, int len, int c )
-{
-  assert((len & 3) == 0);
-
-  int nbytes  = len;
-  int ndwords = nbytes / 4;
-
-  uint32_t * k = (uint32_t*)blob;
-
-  //----------
-
-  if(c == 0) return;
-
-  int b = c / 32;
-  c &= (32-1);
-
-  for(int i = 0; i < ndwords-b; i++)
-  {
-    k[i] = k[i+b];
-  }
-
-  for(int i = ndwords-b; i < ndwords; i++)
-  {
-    k[i] = 0;
-  }
-
-  if(c == 0) return;
-
-  for(int i = 0; i < ndwords; i++)
-  {
-    uint32_t a = (i == ndwords-1) ? 0 : k[i+1];
-    uint32_t b = k[i];
-
-    k[i] = (a << (32-c) ) | (b >> c);
-  }
-}
-
-//-----------------------------------------------------------------------------
-
-void lrot1 ( void * blob, int len, int c )
-{
-  int nbits = len * 8;
-
-  for(int i = 0; i < c; i++)
-  {
-    uint32_t bit = getbit(blob,len,nbits-1);
-
-    lshift1(blob,len,1);
-
-    setbit(blob,len,0,bit);
-  }
-}
-
-void lrot8 ( void * blob, int len, int c )
-{
-  int nbytes  = len;
-
-  uint8_t * k = (uint8_t*)blob;
-
-  if(c == 0) return;
-
-  //----------
-
-  int b = c / 8;
-  c &= (8-1);
-
-  for(int j = 0; j < b; j++)
-  {
-    uint8_t t = k[nbytes-1];
-
-    for(int i = nbytes-1; i > 0; i--)
-    {
-      k[i] = k[i-1];
-    }
-
-    k[0] = t;
-  }
-
-  uint8_t t = k[nbytes-1];
-
-  if(c == 0) return;
-
-  for(int i = nbytes-1; i >= 0; i--)
-  {
-    uint8_t a = k[i];
-    uint8_t b = (i == 0) ? t : k[i-1];
-
-    k[i] = (a << c) | (b >> (8-c));
-  }
-}
-
-void lrot32 ( void * blob, int len, int c )
-{
-  assert((len & 3) == 0);
-
-  int nbytes  = len;
-  int ndwords = nbytes/4;
-
-  uint32_t * k = (uint32_t*)blob;
-
-  if(c == 0) return;
-
-  //----------
-
-  int b = c / 32;
-  c &= (32-1);
-
-  for(int j = 0; j < b; j++)
-  {
-    uint32_t t = k[ndwords-1];
-
-    for(int i = ndwords-1; i > 0; i--)
-    {
-      k[i] = k[i-1];
-    }
-
-    k[0] = t;
-  }
-
-  uint32_t t = k[ndwords-1];
-
-  if(c == 0) return;
-
-  for(int i = ndwords-1; i >= 0; i--)
-  {
-    uint32_t a = k[i];
-    uint32_t b = (i == 0) ? t : k[i-1];
-
-    k[i] = (a << c) | (b >> (32-c));
-  }
-}
-
-//-----------------------------------------------------------------------------
-
-void rrot1 ( void * blob, int len, int c )
-{
-  int nbits = len * 8;
-
-  for(int i = 0; i < c; i++)
-  {
-    uint32_t bit = getbit(blob,len,0);
-
-    rshift1(blob,len,1);
-
-    setbit(blob,len,nbits-1,bit);
-  }
-}
-
-void rrot8 ( void * blob, int len, int c )
-{
-  int nbytes  = len;
-
-  uint8_t * k = (uint8_t*)blob;
-
-  if(c == 0) return;
-
-  //----------
-
-  int b = c / 8;
-  c &= (8-1);
-
-  for(int j = 0; j < b; j++)
-  {
-    uint8_t t = k[0];
-
-    for(int i = 0; i < nbytes-1; i++)
-    {
-      k[i] = k[i+1];
-    }
-
-    k[nbytes-1] = t;
-  }
-
-  if(c == 0) return;
-
-  //----------
-
-  uint8_t t = k[0];
-
-  for(int i = 0; i < nbytes; i++)
-  {
-    uint8_t a = (i == nbytes-1) ? t : k[i+1];
-    uint8_t b = k[i];
-
-    k[i] = (a << (8-c)) | (b >> c);
-  }
-}
-
-void rrot32 ( void * blob, int len, int c )
-{
-  assert((len & 3) == 0);
-
-  int nbytes  = len;
-  int ndwords = nbytes/4;
-
-  uint32_t * k = (uint32_t*)blob;
-
-  if(c == 0) return;
-
-  //----------
-
-  int b = c / 32;
-  c &= (32-1);
-
-  for(int j = 0; j < b; j++)
-  {
-    uint32_t t = k[0];
-
-    for(int i = 0; i < ndwords-1; i++)
-    {
-      k[i] = k[i+1];
-    }
-
-    k[ndwords-1] = t;
-  }
-
-  if(c == 0) return;
-
-  //----------
-
-  uint32_t t = k[0];
-
-  for(int i = 0; i < ndwords; i++)
-  {
-    uint32_t a = (i == ndwords-1) ? t : k[i+1];
-    uint32_t b = k[i];
-
-    k[i] = (a << (32-c)) | (b >> c);
-  }
-}
-
-//-----------------------------------------------------------------------------
-
-uint32_t window1 ( void * blob, int len, int start, int count )
-{
-  int nbits = len*8;
-  start %= nbits;
-
-  uint32_t t = 0;
-
-  for(int i = 0; i < count; i++)
-  {
-    setbit(&t,sizeof(t),i, getbit_wrap(blob,len,start+i));
-  }
-
-  return t;
-}
-
-uint32_t window8 ( void * blob, int len, int start, int count )
-{
-  int nbits = len*8;
-  start %= nbits;
-
-  uint32_t t = 0;
-  uint8_t * k = (uint8_t*)blob;
-
-  if(count == 0) return 0;
-
-  int c = start & (8-1);
-  int d = start / 8;
-
-  for(int i = 0; i < 4; i++)
-  {
-    int ia = (i + d + 1) % len;
-    int ib = (i + d + 0) % len;
-
-    uint32_t a = k[ia];
-    uint32_t b = k[ib];
-    
-    uint32_t m = (a << (8-c)) | (b >> c);
-
-    t |= (m << (8*i));
-
-  }
-
-  t &= ((1 << count)-1);
-
-  return t;
-}
-
-uint32_t window32 ( void * blob, int len, int start, int count )
-{
-  int nbits = len*8;
-  start %= nbits;
-
-  assert((len & 3) == 0);
-
-  int ndwords = len / 4;
-
-  uint32_t * k = (uint32_t*)blob;
-
-  if(count == 0) return 0;
-
-  int c = start & (32-1);
-  int d = start / 32;
-
-  if(c == 0) return (k[d] & ((1 << count) - 1));
-
-  int ia = (d + 1) % ndwords;
-  int ib = (d + 0) % ndwords;
-
-  uint32_t a = k[ia];
-  uint32_t b = k[ib];
-  
-  uint32_t t = (a << (32-c)) | (b >> c);
-
-  t &= ((1 << count)-1);
-
-  return t;
-}
-
-//-----------------------------------------------------------------------------
-
-bool test_shift ( void )
-{
-  Rand r(1123);
-
-  int nbits   = 64;
-  int nbytes  = nbits / 8;
-  int reps = 10000;
-
-  for(int j = 0; j < reps; j++)
-  {
-    if(j % (reps/10) == 0) printf(".");
-
-    uint64_t a = r.rand_u64();
-    uint64_t b;
-
-    for(int i = 0; i < nbits; i++)
-    {
-      b = a; lshift1  (&b,nbytes,i);  assert(b == (a << i));
-      b = a; lshift8  (&b,nbytes,i);  assert(b == (a << i));
-      b = a; lshift32 (&b,nbytes,i);  assert(b == (a << i));
-
-      b = a; rshift1  (&b,nbytes,i);  assert(b == (a >> i));
-      b = a; rshift8  (&b,nbytes,i);  assert(b == (a >> i));
-      b = a; rshift32 (&b,nbytes,i);  assert(b == (a >> i));
-
-      b = a; lrot1    (&b,nbytes,i);  assert(b == ROTL64(a,i));
-      b = a; lrot8    (&b,nbytes,i);  assert(b == ROTL64(a,i));
-      b = a; lrot32   (&b,nbytes,i);  assert(b == ROTL64(a,i));
-
-      b = a; rrot1    (&b,nbytes,i);  assert(b == ROTR64(a,i));
-      b = a; rrot8    (&b,nbytes,i);  assert(b == ROTR64(a,i));
-      b = a; rrot32   (&b,nbytes,i);  assert(b == ROTR64(a,i));
-    }
-  }
-
-  printf("PASS\n");
-  return true;
-}
-
-//-----------------------------------------------------------------------------
-
-template < int nbits >
-bool test_window2 ( void )
-{
-  Rand r(83874);
-  
-  struct keytype
-  {
-    uint8_t bytes[nbits/8];
-  };
-
-  int nbytes = nbits / 8;
-  int reps = 10000;
-
-  for(int j = 0; j < reps; j++)
-  {
-    if(j % (reps/10) == 0) printf(".");
-
-    keytype k;
-
-    r.rand_p(&k,nbytes);
-
-    for(int start = 0; start < nbits; start++)
-    {
-      for(int count = 0; count < 32; count++)
-      {
-        uint32_t a = window1(&k,nbytes,start,count);
-        uint32_t b = window8(&k,nbytes,start,count);
-        uint32_t c = window(&k,nbytes,start,count);
-
-        assert(a == b);
-        assert(a == c);
-      }
-    }
-  }
-
-  printf("PASS %d\n",nbits);
-
-  return true;
-}
-
-bool test_window ( void )
-{
-  Rand r(48402);
-  
-  int reps = 10000;
-
-  for(int j = 0; j < reps; j++)
-  {
-    if(j % (reps/10) == 0) printf(".");
-
-    int nbits   = 64;
-    int nbytes  = nbits / 8;
-
-    uint64_t x = r.rand_u64();
-
-    for(int start = 0; start < nbits; start++)
-    {
-      for(int count = 0; count < 32; count++)
-      {
-        uint32_t a = (uint32_t)ROTR64(x,start);
-        a &= ((1 << count)-1);
-        
-        uint32_t b = window1 (&x,nbytes,start,count);
-        uint32_t c = window8 (&x,nbytes,start,count);
-        uint32_t d = window32(&x,nbytes,start,count);
-        uint32_t e = window  (x,start,count);
-
-        assert(a == b);
-        assert(a == c);
-        assert(a == d);
-        assert(a == e);
-      }
-    }
-  }
-
-  printf("PASS 64\n");
-
-  test_window2<8>();
-  test_window2<16>();
-  test_window2<24>();
-  test_window2<32>();
-  test_window2<40>();
-  test_window2<48>();
-  test_window2<56>();
-  test_window2<64>();
-
-  return true;
-}
-
-//-----------------------------------------------------------------------------
+#include "Bitvec.h"
+
+#include "Random.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+#ifndef DEBUG
+#undef assert
+void assert ( bool )
+{
+}
+#endif
+
+//----------------------------------------------------------------------------
+
+void printbits ( const void * blob, int len )
+{
+  const uint8_t * data = (const uint8_t *)blob;
+
+  printf("[");
+  for(int i = 0; i < len; i++)
+  {
+    unsigned char byte = data[i];
+
+    int hi = (byte >> 4);
+    int lo = (byte & 0xF);
+
+    if(hi) printf("%01x",hi);
+    else   printf(".");
+
+    if(lo) printf("%01x",lo);
+    else   printf(".");
+
+    if(i != len-1) printf(" ");
+  }
+  printf("]");
+}
+
+void printbits2 ( const uint8_t * k, int nbytes )
+{
+  printf("[");
+
+  for(int i = nbytes-1; i >= 0; i--)
+  {
+    uint8_t b = k[i];
+
+    for(int j = 7; j >= 0; j--)
+    {
+      uint8_t c = (b & (1 << j)) ? '#' : ' ';
+
+      putc(c,stdout);
+    }
+  }
+  printf("]");
+}
+
+void printhex32 ( const void * blob, int len )
+{
+  assert((len & 3) == 0);
+
+  uint32_t * d = (uint32_t*)blob;
+
+  printf("{ ");
+
+  for(int i = 0; i < len/4; i++) 
+  {
+    printf("0x%08x, ",d[i]);
+  }
+
+  printf("}");
+}
+
+void printbytes ( const void * blob, int len )
+{
+  uint8_t * d = (uint8_t*)blob;
+
+  printf("{ ");
+
+  for(int i = 0; i < len; i++)
+  {
+    printf("0x%02x, ",d[i]);
+  }
+
+  printf(" };");
+}
+
+void printbytes2 ( const void * blob, int len )
+{
+  uint8_t * d = (uint8_t*)blob;
+
+  for(int i = 0; i < len; i++)
+  {
+    printf("%02x ",d[i]);
+  }
+}
+
+//-----------------------------------------------------------------------------
+// Bit-level manipulation
+
+// These two are from the "Bit Twiddling Hacks" webpage
+
+uint32_t popcount ( uint32_t v )
+{
+	v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
+	v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
+	uint32_t c = ((v + ((v >> 4) & 0xF0F0F0F)) * 0x1010101) >> 24; // count
+
+	return c;
+}
+
+uint32_t parity ( uint32_t v )
+{
+	v ^= v >> 1;
+	v ^= v >> 2;
+	v = (v & 0x11111111U) * 0x11111111U;
+	return (v >> 28) & 1;
+}
+
+//-----------------------------------------------------------------------------
+
+uint32_t getbit ( const void * block, int len, uint32_t bit )
+{
+  uint8_t * b = (uint8_t*)block;
+
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  if(byte < len) return (b[byte] >> bit) & 1;
+
+  return 0;
+}
+
+uint32_t getbit_wrap ( const void * block, int len, uint32_t bit )
+{
+  uint8_t * b = (uint8_t*)block;
+
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  byte %= len;
+    
+  return (b[byte] >> bit) & 1;
+}
+
+void setbit ( void * block, int len, uint32_t bit )
+{
+  uint8_t * b = (uint8_t*)block;
+
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  if(byte < len) b[byte] |= (1 << bit);
+}
+
+void setbit ( void * block, int len, uint32_t bit, uint32_t val )
+{
+  val ? setbit(block,len,bit) : clearbit(block,len,bit);
+}
+
+void clearbit ( void * block, int len, uint32_t bit )
+{
+  uint8_t * b = (uint8_t*)block;
+
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  if(byte < len) b[byte] &= ~(1 << bit);
+}
+
+void flipbit ( void * block, int len, uint32_t bit )
+{
+  uint8_t * b = (uint8_t*)block;
+
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  if(byte < len) b[byte] ^= (1 << bit);
+}
+
+// from the "Bit Twiddling Hacks" webpage
+
+int countbits ( uint32_t v )
+{
+  v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
+  v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
+  int c = ((v + ((v >> 4) & 0xF0F0F0F)) * 0x1010101) >> 24; // count
+
+  return c;
+}
+
+//-----------------------------------------------------------------------------
+
+void lshift1 ( void * blob, int len, int c )
+{
+  int nbits = len*8;
+
+  for(int i = nbits-1; i >= 0; i--)
+  {
+    setbit(blob,len,i,getbit(blob,len,i-c));
+  }
+}
+
+
+void lshift8 ( void * blob, int nbytes, int c )
+{
+  uint8_t * k = (uint8_t*)blob;
+
+  if(c == 0) return;
+
+  int b = c >> 3;
+  c &= 7;
+
+  for(int i = nbytes-1; i >= b; i--)
+  {
+    k[i] = k[i-b];
+  }
+
+  for(int i = b-1; i >= 0; i--)
+  {
+    k[i] = 0;
+  }
+
+  if(c == 0) return;
+
+  for(int i = nbytes-1; i >= 0; i--)
+  {
+    uint8_t a = k[i];
+    uint8_t b = (i == 0) ? 0 : k[i-1];
+
+    k[i] = (a << c) | (b >> (8-c));
+  }
+}
+
+void lshift32 ( void * blob, int len, int c )
+{
+  assert((len & 3) == 0);
+
+  int nbytes  = len;
+  int ndwords = nbytes / 4;
+
+  uint32_t * k = reinterpret_cast<uint32_t*>(blob);
+
+  if(c == 0) return;
+
+  //----------
+
+  int b = c / 32;
+  c &= (32-1);
+
+  for(int i = ndwords-1; i >= b; i--)
+  {
+    k[i] = k[i-b];
+  }
+
+  for(int i = b-1; i >= 0; i--)
+  {
+    k[i] = 0;
+  }
+
+  if(c == 0) return;
+
+  for(int i = ndwords-1; i >= 0; i--)
+  {
+    uint32_t a = k[i];
+    uint32_t b = (i == 0) ? 0 : k[i-1];
+
+    k[i] = (a << c) | (b >> (32-c));
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+void rshift1 ( void * blob, int len, int c )
+{
+  int nbits = len*8;
+
+  for(int i = 0; i < nbits; i++)
+  {
+    setbit(blob,len,i,getbit(blob,len,i+c));
+  }
+}
+
+void rshift8 ( void * blob, int nbytes, int c )
+{
+  uint8_t * k = (uint8_t*)blob;
+
+  if(c == 0) return;
+
+  int b = c >> 3;
+  c &= 7;
+
+  for(int i = 0; i < nbytes-b; i++)
+  {
+    k[i] = k[i+b];
+  }
+
+  for(int i = nbytes-b; i < nbytes; i++)
+  {
+    k[i] = 0;
+  }
+
+  if(c == 0) return;
+
+  for(int i = 0; i < nbytes; i++)
+  {
+    uint8_t a = (i == nbytes-1) ? 0 : k[i+1];
+    uint8_t b = k[i];
+
+    k[i] = (a << (8-c) ) | (b >> c);
+  }
+}
+
+void rshift32 ( void * blob, int len, int c )
+{
+  assert((len & 3) == 0);
+
+  int nbytes  = len;
+  int ndwords = nbytes / 4;
+
+  uint32_t * k = (uint32_t*)blob;
+
+  //----------
+
+  if(c == 0) return;
+
+  int b = c / 32;
+  c &= (32-1);
+
+  for(int i = 0; i < ndwords-b; i++)
+  {
+    k[i] = k[i+b];
+  }
+
+  for(int i = ndwords-b; i < ndwords; i++)
+  {
+    k[i] = 0;
+  }
+
+  if(c == 0) return;
+
+  for(int i = 0; i < ndwords; i++)
+  {
+    uint32_t a = (i == ndwords-1) ? 0 : k[i+1];
+    uint32_t b = k[i];
+
+    k[i] = (a << (32-c) ) | (b >> c);
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+void lrot1 ( void * blob, int len, int c )
+{
+  int nbits = len * 8;
+
+  for(int i = 0; i < c; i++)
+  {
+    uint32_t bit = getbit(blob,len,nbits-1);
+
+    lshift1(blob,len,1);
+
+    setbit(blob,len,0,bit);
+  }
+}
+
+void lrot8 ( void * blob, int len, int c )
+{
+  int nbytes  = len;
+
+  uint8_t * k = (uint8_t*)blob;
+
+  if(c == 0) return;
+
+  //----------
+
+  int b = c / 8;
+  c &= (8-1);
+
+  for(int j = 0; j < b; j++)
+  {
+    uint8_t t = k[nbytes-1];
+
+    for(int i = nbytes-1; i > 0; i--)
+    {
+      k[i] = k[i-1];
+    }
+
+    k[0] = t;
+  }
+
+  uint8_t t = k[nbytes-1];
+
+  if(c == 0) return;
+
+  for(int i = nbytes-1; i >= 0; i--)
+  {
+    uint8_t a = k[i];
+    uint8_t b = (i == 0) ? t : k[i-1];
+
+    k[i] = (a << c) | (b >> (8-c));
+  }
+}
+
+void lrot32 ( void * blob, int len, int c )
+{
+  assert((len & 3) == 0);
+
+  int nbytes  = len;
+  int ndwords = nbytes/4;
+
+  uint32_t * k = (uint32_t*)blob;
+
+  if(c == 0) return;
+
+  //----------
+
+  int b = c / 32;
+  c &= (32-1);
+
+  for(int j = 0; j < b; j++)
+  {
+    uint32_t t = k[ndwords-1];
+
+    for(int i = ndwords-1; i > 0; i--)
+    {
+      k[i] = k[i-1];
+    }
+
+    k[0] = t;
+  }
+
+  uint32_t t = k[ndwords-1];
+
+  if(c == 0) return;
+
+  for(int i = ndwords-1; i >= 0; i--)
+  {
+    uint32_t a = k[i];
+    uint32_t b = (i == 0) ? t : k[i-1];
+
+    k[i] = (a << c) | (b >> (32-c));
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+void rrot1 ( void * blob, int len, int c )
+{
+  int nbits = len * 8;
+
+  for(int i = 0; i < c; i++)
+  {
+    uint32_t bit = getbit(blob,len,0);
+
+    rshift1(blob,len,1);
+
+    setbit(blob,len,nbits-1,bit);
+  }
+}
+
+void rrot8 ( void * blob, int len, int c )
+{
+  int nbytes  = len;
+
+  uint8_t * k = (uint8_t*)blob;
+
+  if(c == 0) return;
+
+  //----------
+
+  int b = c / 8;
+  c &= (8-1);
+
+  for(int j = 0; j < b; j++)
+  {
+    uint8_t t = k[0];
+
+    for(int i = 0; i < nbytes-1; i++)
+    {
+      k[i] = k[i+1];
+    }
+
+    k[nbytes-1] = t;
+  }
+
+  if(c == 0) return;
+
+  //----------
+
+  uint8_t t = k[0];
+
+  for(int i = 0; i < nbytes; i++)
+  {
+    uint8_t a = (i == nbytes-1) ? t : k[i+1];
+    uint8_t b = k[i];
+
+    k[i] = (a << (8-c)) | (b >> c);
+  }
+}
+
+void rrot32 ( void * blob, int len, int c )
+{
+  assert((len & 3) == 0);
+
+  int nbytes  = len;
+  int ndwords = nbytes/4;
+
+  uint32_t * k = (uint32_t*)blob;
+
+  if(c == 0) return;
+
+  //----------
+
+  int b = c / 32;
+  c &= (32-1);
+
+  for(int j = 0; j < b; j++)
+  {
+    uint32_t t = k[0];
+
+    for(int i = 0; i < ndwords-1; i++)
+    {
+      k[i] = k[i+1];
+    }
+
+    k[ndwords-1] = t;
+  }
+
+  if(c == 0) return;
+
+  //----------
+
+  uint32_t t = k[0];
+
+  for(int i = 0; i < ndwords; i++)
+  {
+    uint32_t a = (i == ndwords-1) ? t : k[i+1];
+    uint32_t b = k[i];
+
+    k[i] = (a << (32-c)) | (b >> c);
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+uint32_t window1 ( void * blob, int len, int start, int count )
+{
+  int nbits = len*8;
+  start %= nbits;
+
+  uint32_t t = 0;
+
+  for(int i = 0; i < count; i++)
+  {
+    setbit(&t,sizeof(t),i, getbit_wrap(blob,len,start+i));
+  }
+
+  return t;
+}
+
+uint32_t window8 ( void * blob, int len, int start, int count )
+{
+  int nbits = len*8;
+  start %= nbits;
+
+  uint32_t t = 0;
+  uint8_t * k = (uint8_t*)blob;
+
+  if(count == 0) return 0;
+
+  int c = start & (8-1);
+  int d = start / 8;
+
+  for(int i = 0; i < 4; i++)
+  {
+    int ia = (i + d + 1) % len;
+    int ib = (i + d + 0) % len;
+
+    uint32_t a = k[ia];
+    uint32_t b = k[ib];
+    
+    uint32_t m = (a << (8-c)) | (b >> c);
+
+    t |= (m << (8*i));
+
+  }
+
+  t &= ((1 << count)-1);
+
+  return t;
+}
+
+uint32_t window32 ( void * blob, int len, int start, int count )
+{
+  int nbits = len*8;
+  start %= nbits;
+
+  assert((len & 3) == 0);
+
+  int ndwords = len / 4;
+
+  uint32_t * k = (uint32_t*)blob;
+
+  if(count == 0) return 0;
+
+  int c = start & (32-1);
+  int d = start / 32;
+
+  if(c == 0) return (k[d] & ((1 << count) - 1));
+
+  int ia = (d + 1) % ndwords;
+  int ib = (d + 0) % ndwords;
+
+  uint32_t a = k[ia];
+  uint32_t b = k[ib];
+  
+  uint32_t t = (a << (32-c)) | (b >> c);
+
+  t &= ((1 << count)-1);
+
+  return t;
+}
+
+//-----------------------------------------------------------------------------
+
+bool test_shift ( void )
+{
+  Rand r(1123);
+
+  int nbits   = 64;
+  int nbytes  = nbits / 8;
+  int reps = 10000;
+
+  for(int j = 0; j < reps; j++)
+  {
+    if(j % (reps/10) == 0) printf(".");
+
+    uint64_t a = r.rand_u64();
+    uint64_t b;
+
+    for(int i = 0; i < nbits; i++)
+    {
+      b = a; lshift1  (&b,nbytes,i);  assert(b == (a << i));
+      b = a; lshift8  (&b,nbytes,i);  assert(b == (a << i));
+      b = a; lshift32 (&b,nbytes,i);  assert(b == (a << i));
+
+      b = a; rshift1  (&b,nbytes,i);  assert(b == (a >> i));
+      b = a; rshift8  (&b,nbytes,i);  assert(b == (a >> i));
+      b = a; rshift32 (&b,nbytes,i);  assert(b == (a >> i));
+
+      b = a; lrot1    (&b,nbytes,i);  assert(b == ROTL64(a,i));
+      b = a; lrot8    (&b,nbytes,i);  assert(b == ROTL64(a,i));
+      b = a; lrot32   (&b,nbytes,i);  assert(b == ROTL64(a,i));
+
+      b = a; rrot1    (&b,nbytes,i);  assert(b == ROTR64(a,i));
+      b = a; rrot8    (&b,nbytes,i);  assert(b == ROTR64(a,i));
+      b = a; rrot32   (&b,nbytes,i);  assert(b == ROTR64(a,i));
+    }
+  }
+
+  printf("PASS\n");
+  return true;
+}
+
+//-----------------------------------------------------------------------------
+
+template < int nbits >
+bool test_window2 ( void )
+{
+  Rand r(83874);
+  
+  struct keytype
+  {
+    uint8_t bytes[nbits/8];
+  };
+
+  int nbytes = nbits / 8;
+  int reps = 10000;
+
+  for(int j = 0; j < reps; j++)
+  {
+    if(j % (reps/10) == 0) printf(".");
+
+    keytype k;
+
+    r.rand_p(&k,nbytes);
+
+    for(int start = 0; start < nbits; start++)
+    {
+      for(int count = 0; count < 32; count++)
+      {
+        uint32_t a = window1(&k,nbytes,start,count);
+        uint32_t b = window8(&k,nbytes,start,count);
+        uint32_t c = window(&k,nbytes,start,count);
+
+        assert(a == b);
+        assert(a == c);
+      }
+    }
+  }
+
+  printf("PASS %d\n",nbits);
+
+  return true;
+}
+
+bool test_window ( void )
+{
+  Rand r(48402);
+  
+  int reps = 10000;
+
+  for(int j = 0; j < reps; j++)
+  {
+    if(j % (reps/10) == 0) printf(".");
+
+    int nbits   = 64;
+    int nbytes  = nbits / 8;
+
+    uint64_t x = r.rand_u64();
+
+    for(int start = 0; start < nbits; start++)
+    {
+      for(int count = 0; count < 32; count++)
+      {
+        uint32_t a = (uint32_t)ROTR64(x,start);
+        a &= ((1 << count)-1);
+        
+        uint32_t b = window1 (&x,nbytes,start,count);
+        uint32_t c = window8 (&x,nbytes,start,count);
+        uint32_t d = window32(&x,nbytes,start,count);
+        uint32_t e = window  (x,start,count);
+
+        assert(a == b);
+        assert(a == c);
+        assert(a == d);
+        assert(a == e);
+      }
+    }
+  }
+
+  printf("PASS 64\n");
+
+  test_window2<8>();
+  test_window2<16>();
+  test_window2<24>();
+  test_window2<32>();
+  test_window2<40>();
+  test_window2<48>();
+  test_window2<56>();
+  test_window2<64>();
+
+  return true;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Bitvec.h b/Bitvec.h
index 8a3a1b0..4d61979 100644
--- a/Bitvec.h
+++ b/Bitvec.h
@@ -1,245 +1,245 @@
-#pragma once
-
-#include "Platform.h"
-
-#include <vector>
-
-//-----------------------------------------------------------------------------
-
-void     printbits   ( const void * blob, int len );
-void     printhex32  ( const void * blob, int len );
-void     printbytes  ( const void * blob, int len );
-void     printbytes2 ( const void * blob, int len );
-
-uint32_t popcount    ( uint32_t v );
-uint32_t parity      ( uint32_t v );
-
-uint32_t getbit      ( const void * blob, int len, uint32_t bit );
-uint32_t getbit_wrap ( const void * blob, int len, uint32_t bit );
-
-void     setbit      ( void * blob, int len, uint32_t bit );
-void     setbit      ( void * blob, int len, uint32_t bit, uint32_t val );
-
-void     clearbit    ( void * blob, int len, uint32_t bit );
-
-void     flipbit     ( void * blob, int len, uint32_t bit );
-
-int      countbits   ( uint32_t v );
-int      countbits   ( std::vector<uint32_t> & v );
-
-int      countbits   ( const void * blob, int len );
-
-void     invert      ( std::vector<uint32_t> & v );
-
-//----------
-
-template< typename T >
-inline uint32_t getbit ( T & blob, uint32_t bit )
-{
-  return getbit(&blob,sizeof(blob),bit);
-}
-
-template<> inline uint32_t getbit ( uint32_t & blob, uint32_t bit ) { return (blob >> (bit & 31)) & 1; }
-template<> inline uint32_t getbit ( uint64_t & blob, uint32_t bit ) { return (blob >> (bit & 63)) & 1; }
-
-//----------
-
-template< typename T >
-inline void setbit ( T & blob, uint32_t bit )
-{
-  return setbit(&blob,sizeof(blob),bit);
-}
-
-template<> inline void setbit ( uint32_t & blob, uint32_t bit ) { blob |= uint32_t(1) << (bit & 31); }
-template<> inline void setbit ( uint64_t & blob, uint32_t bit ) { blob |= uint64_t(1) << (bit & 63); }
-
-//----------
-
-template< typename T >
-inline void flipbit ( T & blob, uint32_t bit )
-{
-  flipbit(&blob,sizeof(blob),bit);
-}
-
-template<> inline void flipbit ( uint32_t & blob, uint32_t bit ) { bit &= 31; blob ^= (uint32_t(1) << bit); }
-template<> inline void flipbit ( uint64_t & blob, uint32_t bit ) { bit &= 63; blob ^= (uint64_t(1) << bit); }
-
-//-----------------------------------------------------------------------------
-// Left and right shift of blobs. The shift(N) versions work on chunks of N
-// bits at a time (faster)
-
-void lshift1  ( void * blob, int len, int c );
-void lshift8  ( void * blob, int len, int c );
-void lshift32 ( void * blob, int len, int c );
-
-void rshift1  ( void * blob, int len, int c );
-void rshift8  ( void * blob, int len, int c );
-void rshift32 ( void * blob, int len, int c );
-
-inline void lshift ( void * blob, int len, int c )
-{
-  if((len & 3) == 0)
-  {
-    lshift32(blob,len,c);
-  }
-  else
-  {
-    lshift8(blob,len,c);
-  }
-}
-
-inline void rshift ( void * blob, int len, int c )
-{
-  if((len & 3) == 0)
-  {
-    rshift32(blob,len,c);
-  }
-  else
-  {
-    rshift8(blob,len,c);
-  }
-}
-
-template < typename T >
-inline void lshift ( T & blob, int c )
-{
-  if((sizeof(T) & 3) == 0)
-  {
-    lshift32(&blob,sizeof(T),c);
-  }
-  else
-  {
-    lshift8(&blob,sizeof(T),c);
-  }
-}
-
-template < typename T >
-inline void rshift ( T & blob, int c )
-{
-  if((sizeof(T) & 3) == 0)
-  {
-    lshift32(&blob,sizeof(T),c);
-  }
-  else
-  {
-    lshift8(&blob,sizeof(T),c);
-  }
-}
-
-template<> inline void lshift ( uint32_t & blob, int c ) { blob <<= c; }
-template<> inline void lshift ( uint64_t & blob, int c ) { blob <<= c; }
-template<> inline void rshift ( uint32_t & blob, int c ) { blob >>= c; }
-template<> inline void rshift ( uint64_t & blob, int c ) { blob >>= c; }
-
-//-----------------------------------------------------------------------------
-// Left and right rotate of blobs. The rot(N) versions work on chunks of N
-// bits at a time (faster)
-
-void lrot1    ( void * blob, int len, int c );
-void lrot8    ( void * blob, int len, int c );
-void lrot32   ( void * blob, int len, int c );
-
-void rrot1    ( void * blob, int len, int c );
-void rrot8    ( void * blob, int len, int c );
-void rrot32   ( void * blob, int len, int c );
-
-inline void lrot ( void * blob, int len, int c )
-{
-  if((len & 3) == 0)
-  {
-    return lrot32(blob,len,c);
-  }
-  else
-  {
-    return lrot8(blob,len,c);
-  }
-}
-
-inline void rrot ( void * blob, int len, int c )
-{
-  if((len & 3) == 0)
-  {
-    return rrot32(blob,len,c);
-  }
-  else
-  {
-    return rrot8(blob,len,c);
-  }
-}
-
-template < typename T >
-inline void lrot ( T & blob, int c )
-{
-  if((sizeof(T) & 3) == 0)
-  {
-    return lrot32(&blob,sizeof(T),c);
-  }
-  else
-  {
-    return lrot8(&blob,sizeof(T),c);
-  }
-}
-
-template < typename T >
-inline void rrot ( T & blob, int c )
-{
-  if((sizeof(T) & 3) == 0)
-  {
-    return rrot32(&blob,sizeof(T),c);
-  }
-  else
-  {
-    return rrot8(&blob,sizeof(T),c);
-  }
-}
-
-template<> inline void lrot ( uint32_t & blob, int c ) { blob = ROTL32(blob,c); }
-template<> inline void lrot ( uint64_t & blob, int c ) { blob = ROTL64(blob,c); }
-template<> inline void rrot ( uint32_t & blob, int c ) { blob = ROTR32(blob,c); }
-template<> inline void rrot ( uint64_t & blob, int c ) { blob = ROTR64(blob,c); }
-
-//-----------------------------------------------------------------------------
-// Bit-windowing functions - select some N-bit subset of the input blob
-
-uint32_t window1  ( void * blob, int len, int start, int count );
-uint32_t window8  ( void * blob, int len, int start, int count );
-uint32_t window32 ( void * blob, int len, int start, int count );
-
-inline uint32_t window ( void * blob, int len, int start, int count )
-{
-  if(len & 3)
-  {
-    return window8(blob,len,start,count);
-  }
-  else
-  {
-    return window32(blob,len,start,count);
-  }
-}
-
-template < typename T >
-inline uint32_t window ( T & blob, int start, int count )
-{
-  if((sizeof(T) & 3) == 0)
-  {
-    return window32(&blob,sizeof(T),start,count);
-  }
-  else
-  {
-    return window8(&blob,sizeof(T),start,count);
-  }
-}
-
-template<> 
-inline uint32_t window ( uint32_t & blob, int start, int count )
-{
-  return ROTR32(blob,start) & ((1<<count)-1);
-}
-
-template<> 
-inline uint32_t window ( uint64_t & blob, int start, int count )
-{
-  return (uint32_t)ROTR64(blob,start) & ((1<<count)-1);
-}
-
-//-----------------------------------------------------------------------------
+#pragma once
+
+#include "Platform.h"
+
+#include <vector>
+
+//-----------------------------------------------------------------------------
+
+void     printbits   ( const void * blob, int len );
+void     printhex32  ( const void * blob, int len );
+void     printbytes  ( const void * blob, int len );
+void     printbytes2 ( const void * blob, int len );
+
+uint32_t popcount    ( uint32_t v );
+uint32_t parity      ( uint32_t v );
+
+uint32_t getbit      ( const void * blob, int len, uint32_t bit );
+uint32_t getbit_wrap ( const void * blob, int len, uint32_t bit );
+
+void     setbit      ( void * blob, int len, uint32_t bit );
+void     setbit      ( void * blob, int len, uint32_t bit, uint32_t val );
+
+void     clearbit    ( void * blob, int len, uint32_t bit );
+
+void     flipbit     ( void * blob, int len, uint32_t bit );
+
+int      countbits   ( uint32_t v );
+int      countbits   ( std::vector<uint32_t> & v );
+
+int      countbits   ( const void * blob, int len );
+
+void     invert      ( std::vector<uint32_t> & v );
+
+//----------
+
+template< typename T >
+inline uint32_t getbit ( T & blob, uint32_t bit )
+{
+  return getbit(&blob,sizeof(blob),bit);
+}
+
+template<> inline uint32_t getbit ( uint32_t & blob, uint32_t bit ) { return (blob >> (bit & 31)) & 1; }
+template<> inline uint32_t getbit ( uint64_t & blob, uint32_t bit ) { return (blob >> (bit & 63)) & 1; }
+
+//----------
+
+template< typename T >
+inline void setbit ( T & blob, uint32_t bit )
+{
+  return setbit(&blob,sizeof(blob),bit);
+}
+
+template<> inline void setbit ( uint32_t & blob, uint32_t bit ) { blob |= uint32_t(1) << (bit & 31); }
+template<> inline void setbit ( uint64_t & blob, uint32_t bit ) { blob |= uint64_t(1) << (bit & 63); }
+
+//----------
+
+template< typename T >
+inline void flipbit ( T & blob, uint32_t bit )
+{
+  flipbit(&blob,sizeof(blob),bit);
+}
+
+template<> inline void flipbit ( uint32_t & blob, uint32_t bit ) { bit &= 31; blob ^= (uint32_t(1) << bit); }
+template<> inline void flipbit ( uint64_t & blob, uint32_t bit ) { bit &= 63; blob ^= (uint64_t(1) << bit); }
+
+//-----------------------------------------------------------------------------
+// Left and right shift of blobs. The shift(N) versions work on chunks of N
+// bits at a time (faster)
+
+void lshift1  ( void * blob, int len, int c );
+void lshift8  ( void * blob, int len, int c );
+void lshift32 ( void * blob, int len, int c );
+
+void rshift1  ( void * blob, int len, int c );
+void rshift8  ( void * blob, int len, int c );
+void rshift32 ( void * blob, int len, int c );
+
+inline void lshift ( void * blob, int len, int c )
+{
+  if((len & 3) == 0)
+  {
+    lshift32(blob,len,c);
+  }
+  else
+  {
+    lshift8(blob,len,c);
+  }
+}
+
+inline void rshift ( void * blob, int len, int c )
+{
+  if((len & 3) == 0)
+  {
+    rshift32(blob,len,c);
+  }
+  else
+  {
+    rshift8(blob,len,c);
+  }
+}
+
+template < typename T >
+inline void lshift ( T & blob, int c )
+{
+  if((sizeof(T) & 3) == 0)
+  {
+    lshift32(&blob,sizeof(T),c);
+  }
+  else
+  {
+    lshift8(&blob,sizeof(T),c);
+  }
+}
+
+template < typename T >
+inline void rshift ( T & blob, int c )
+{
+  if((sizeof(T) & 3) == 0)
+  {
+    lshift32(&blob,sizeof(T),c);
+  }
+  else
+  {
+    lshift8(&blob,sizeof(T),c);
+  }
+}
+
+template<> inline void lshift ( uint32_t & blob, int c ) { blob <<= c; }
+template<> inline void lshift ( uint64_t & blob, int c ) { blob <<= c; }
+template<> inline void rshift ( uint32_t & blob, int c ) { blob >>= c; }
+template<> inline void rshift ( uint64_t & blob, int c ) { blob >>= c; }
+
+//-----------------------------------------------------------------------------
+// Left and right rotate of blobs. The rot(N) versions work on chunks of N
+// bits at a time (faster)
+
+void lrot1    ( void * blob, int len, int c );
+void lrot8    ( void * blob, int len, int c );
+void lrot32   ( void * blob, int len, int c );
+
+void rrot1    ( void * blob, int len, int c );
+void rrot8    ( void * blob, int len, int c );
+void rrot32   ( void * blob, int len, int c );
+
+inline void lrot ( void * blob, int len, int c )
+{
+  if((len & 3) == 0)
+  {
+    return lrot32(blob,len,c);
+  }
+  else
+  {
+    return lrot8(blob,len,c);
+  }
+}
+
+inline void rrot ( void * blob, int len, int c )
+{
+  if((len & 3) == 0)
+  {
+    return rrot32(blob,len,c);
+  }
+  else
+  {
+    return rrot8(blob,len,c);
+  }
+}
+
+template < typename T >
+inline void lrot ( T & blob, int c )
+{
+  if((sizeof(T) & 3) == 0)
+  {
+    return lrot32(&blob,sizeof(T),c);
+  }
+  else
+  {
+    return lrot8(&blob,sizeof(T),c);
+  }
+}
+
+template < typename T >
+inline void rrot ( T & blob, int c )
+{
+  if((sizeof(T) & 3) == 0)
+  {
+    return rrot32(&blob,sizeof(T),c);
+  }
+  else
+  {
+    return rrot8(&blob,sizeof(T),c);
+  }
+}
+
+template<> inline void lrot ( uint32_t & blob, int c ) { blob = ROTL32(blob,c); }
+template<> inline void lrot ( uint64_t & blob, int c ) { blob = ROTL64(blob,c); }
+template<> inline void rrot ( uint32_t & blob, int c ) { blob = ROTR32(blob,c); }
+template<> inline void rrot ( uint64_t & blob, int c ) { blob = ROTR64(blob,c); }
+
+//-----------------------------------------------------------------------------
+// Bit-windowing functions - select some N-bit subset of the input blob
+
+uint32_t window1  ( void * blob, int len, int start, int count );
+uint32_t window8  ( void * blob, int len, int start, int count );
+uint32_t window32 ( void * blob, int len, int start, int count );
+
+inline uint32_t window ( void * blob, int len, int start, int count )
+{
+  if(len & 3)
+  {
+    return window8(blob,len,start,count);
+  }
+  else
+  {
+    return window32(blob,len,start,count);
+  }
+}
+
+template < typename T >
+inline uint32_t window ( T & blob, int start, int count )
+{
+  if((sizeof(T) & 3) == 0)
+  {
+    return window32(&blob,sizeof(T),start,count);
+  }
+  else
+  {
+    return window8(&blob,sizeof(T),start,count);
+  }
+}
+
+template<> 
+inline uint32_t window ( uint32_t & blob, int start, int count )
+{
+  return ROTR32(blob,start) & ((1<<count)-1);
+}
+
+template<> 
+inline uint32_t window ( uint64_t & blob, int start, int count )
+{
+  return (uint32_t)ROTR64(blob,start) & ((1<<count)-1);
+}
+
+//-----------------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2b5df45..d04afdf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,6 +10,7 @@ add_library(
   AvalancheTest.cpp
   Bitslice.cpp
   Bitvec.cpp
+  CityTest.cpp
   City.cpp
   crc.cpp
   DifferentialTest.cpp
@@ -24,6 +25,8 @@ add_library(
   Random.cpp
   sha1.cpp
   SpeedTest.cpp
+  Spooky.cpp
+  SpookyTest.cpp
   Stats.cpp
   SuperFastHash.cpp
   Types.cpp
diff --git a/City.cpp b/City.cpp
index 9043440..4d70dd2 100644
--- a/City.cpp
+++ b/City.cpp
@@ -1,321 +1,465 @@
-// Copyright (c) 2011 Google, Inc.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-//
-// CityHash Version 1, by Geoff Pike and Jyrki Alakuijala
-//
-// This file provides CityHash64() and related functions.
-//
-// It's probably possible to create even faster hash functions by
-// writing a program that systematically explores some of the space of
-// possible hash functions, by using SIMD instructions, or by
-// compromising on hash quality.
-
-#include "City.h"
-
-#include <algorithm>
-
-using namespace std;
-
-#define UNALIGNED_LOAD64(p) (*(const uint64*)(p))
-#define UNALIGNED_LOAD32(p) (*(const uint32*)(p))
-
-#if !defined(LIKELY)
-#if defined(__GNUC__)
-#define LIKELY(x) (__builtin_expect(!!(x), 1))
-#else
-#define LIKELY(x) (x)
-#endif
-#endif
-
-// Some primes between 2^63 and 2^64 for various uses.
-static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
-static const uint64 k1 = 0xb492b66fbe98f273ULL;
-static const uint64 k2 = 0x9ae16a3b2f90404fULL;
-static const uint64 k3 = 0xc949d7c7509e6557ULL;
-
-// Bitwise right rotate.  Normally this will compile to a single
-// instruction, especially if the shift is a manifest constant.
-static uint64 Rotate(uint64 val, int shift) {
-  // Avoid shifting by 64: doing so yields an undefined result.
-  return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
-}
-
-// Equivalent to Rotate(), but requires the second arg to be non-zero.
-// On x86-64, and probably others, it's possible for this to compile
-// to a single instruction if both args are already in registers.
-static uint64 RotateByAtLeast1(uint64 val, int shift) {
-  return (val >> shift) | (val << (64 - shift));
-}
-
-static uint64 ShiftMix(uint64 val) {
-  return val ^ (val >> 47);
-}
-
-static uint64 HashLen16(uint64 u, uint64 v) {
-  return Hash128to64(uint128(u, v));
-}
-
-static uint64 HashLen0to16(const char *s, size_t len) {
-  if (len > 8) {
-    uint64 a = UNALIGNED_LOAD64(s);
-    uint64 b = UNALIGNED_LOAD64(s + len - 8);
-    return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;
-  }
-  if (len >= 4) {
-    uint64 a = UNALIGNED_LOAD32(s);
-    return HashLen16(len + (a << 3), UNALIGNED_LOAD32(s + len - 4));
-  }
-  if (len > 0) {
-    uint8 a = s[0];
-    uint8 b = s[len >> 1];
-    uint8 c = s[len - 1];
-    uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
-    uint32 z = len + (static_cast<uint32>(c) << 2);
-    return ShiftMix(y * k2 ^ z * k3) * k2;
-  }
-  return k2;
-}
-
-// This probably works well for 16-byte strings as well, but it may be overkill
-// in that case.
-static uint64 HashLen17to32(const char *s, size_t len) {
-  uint64 a = UNALIGNED_LOAD64(s) * k1;
-  uint64 b = UNALIGNED_LOAD64(s + 8);
-  uint64 c = UNALIGNED_LOAD64(s + len - 8) * k2;
-  uint64 d = UNALIGNED_LOAD64(s + len - 16) * k0;
-  return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
-                   a + Rotate(b ^ k3, 20) - c + len);
-}
-
-// Return a 16-byte hash for 48 bytes.  Quick and dirty.
-// Callers do best to use "random-looking" values for a and b.
-static pair<uint64, uint64> WeakHashLen32WithSeeds(
-    uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) {
-  a += w;
-  b = Rotate(b + a + z, 21);
-  uint64 c = a;
-  a += x;
-  a += y;
-  b += Rotate(a, 44);
-  return make_pair(a + z, b + c);
-}
-
-// Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
-static pair<uint64, uint64> WeakHashLen32WithSeeds(
-    const char* s, uint64 a, uint64 b) {
-  return WeakHashLen32WithSeeds(UNALIGNED_LOAD64(s),
-                                UNALIGNED_LOAD64(s + 8),
-                                UNALIGNED_LOAD64(s + 16),
-                                UNALIGNED_LOAD64(s + 24),
-                                a,
-                                b);
-}
-
-// Return an 8-byte hash for 33 to 64 bytes.
-static uint64 HashLen33to64(const char *s, size_t len) {
-  uint64 z = UNALIGNED_LOAD64(s + 24);
-  uint64 a = UNALIGNED_LOAD64(s) + (len + UNALIGNED_LOAD64(s + len - 16)) * k0;
-  uint64 b = Rotate(a + z, 52);
-  uint64 c = Rotate(a, 37);
-  a += UNALIGNED_LOAD64(s + 8);
-  c += Rotate(a, 7);
-  a += UNALIGNED_LOAD64(s + 16);
-  uint64 vf = a + z;
-  uint64 vs = b + Rotate(a, 31) + c;
-  a = UNALIGNED_LOAD64(s + 16) + UNALIGNED_LOAD64(s + len - 32);
-  z = UNALIGNED_LOAD64(s + len - 8);
-  b = Rotate(a + z, 52);
-  c = Rotate(a, 37);
-  a += UNALIGNED_LOAD64(s + len - 24);
-  c += Rotate(a, 7);
-  a += UNALIGNED_LOAD64(s + len - 16);
-  uint64 wf = a + z;
-  uint64 ws = b + Rotate(a, 31) + c;
-  uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
-  return ShiftMix(r * k0 + vs) * k2;
-}
-
-uint64 CityHash64(const char *s, size_t len) {
-  if (len <= 32) {
-    if (len <= 16) {
-      return HashLen0to16(s, len);
-    } else {
-      return HashLen17to32(s, len);
-    }
-  } else if (len <= 64) {
-    return HashLen33to64(s, len);
-  }
-
-  // For strings over 64 bytes we hash the end first, and then as we
-  // loop we keep 56 bytes of state: v, w, x, y, and z.
-  uint64 x = UNALIGNED_LOAD64(s);
-  uint64 y = UNALIGNED_LOAD64(s + len - 16) ^ k1;
-  uint64 z = UNALIGNED_LOAD64(s + len - 56) ^ k0;
-  pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, y);
-  pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, len * k1, k0);
-  z += ShiftMix(v.second) * k1;
-  x = Rotate(z + x, 39) * k1;
-  y = Rotate(y, 33) * k1;
-
-  // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
-  len = (len - 1) & ~static_cast<size_t>(63);
-  do {
-    x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1;
-    y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1;
-    x ^= w.second;
-    y ^= v.first;
-    z = Rotate(z ^ w.first, 33);
-    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
-    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);
-    std::swap(z, x);
-    s += 64;
-    len -= 64;
-  } while (len != 0);
-  return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
-                   HashLen16(v.second, w.second) + x);
-}
-
-uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) {
-  return CityHash64WithSeeds(s, len, k2, seed);
-}
-
-uint64 CityHash64WithSeeds(const char *s, size_t len,
-                           uint64 seed0, uint64 seed1) {
-  return HashLen16(CityHash64(s, len) - seed0, seed1);
-}
-
-// A subroutine for CityHash128().  Returns a decent 128-bit hash for strings
-// of any length representable in ssize_t.  Based on City and Murmur.
-static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
-  uint64 a = Uint128Low64(seed);
-  uint64 b = Uint128High64(seed);
-  uint64 c = 0;
-  uint64 d = 0;
-  ssize_t l = len - 16;
-  if (l <= 0) {  // len <= 16
-    c = b * k1 + HashLen0to16(s, len);
-    d = Rotate(a + (len >= 8 ? UNALIGNED_LOAD64(s) : c), 32);
-  } else {  // len > 16
-    c = HashLen16(UNALIGNED_LOAD64(s + len - 8) + k1, a);
-    d = HashLen16(b + len, c + UNALIGNED_LOAD64(s + len - 16));
-    a += d;
-    do {
-      a ^= ShiftMix(UNALIGNED_LOAD64(s) * k1) * k1;
-      a *= k1;
-      b ^= a;
-      c ^= ShiftMix(UNALIGNED_LOAD64(s + 8) * k1) * k1;
-      c *= k1;
-      d ^= c;
-      s += 16;
-      l -= 16;
-    } while (l > 0);
-  }
-  a = HashLen16(a, c);
-  b = HashLen16(d, b);
-  return uint128(a ^ b, HashLen16(b, a));
-}
-
-uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
-  if (len < 128) {
-    return CityMurmur(s, len, seed);
-  }
-
-  // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
-  // v, w, x, y, and z.
-  pair<uint64, uint64> v, w;
-  uint64 x = Uint128Low64(seed);
-  uint64 y = Uint128High64(seed);
-  uint64 z = len * k1;
-  v.first = Rotate(y ^ k1, 49) * k1 + UNALIGNED_LOAD64(s);
-  v.second = Rotate(v.first, 42) * k1 + UNALIGNED_LOAD64(s + 8);
-  w.first = Rotate(y + z, 35) * k1 + x;
-  w.second = Rotate(x + UNALIGNED_LOAD64(s + 88), 53) * k1;
-
-  // This is the same inner loop as CityHash64(), manually unrolled.
-  do {
-    x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1;
-    y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1;
-    x ^= w.second;
-    y ^= v.first;
-    z = Rotate(z ^ w.first, 33);
-    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
-    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);
-    std::swap(z, x);
-    s += 64;
-    x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1;
-    y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1;
-    x ^= w.second;
-    y ^= v.first;
-    z = Rotate(z ^ w.first, 33);
-    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
-    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);
-    std::swap(z, x);
-    s += 64;
-    len -= 128;
-  } while (LIKELY(len >= 128));
-  y += Rotate(w.first, 37) * k0 + z;
-  x += Rotate(v.first + z, 49) * k0;
-  // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
-  for (size_t tail_done = 0; tail_done < len; ) {
-    tail_done += 32;
-    y = Rotate(y - x, 42) * k0 + v.second;
-    w.first += UNALIGNED_LOAD64(s + len - tail_done + 16);
-    x = Rotate(x, 49) * k0 + w.first;
-    w.first += v.first;
-    v = WeakHashLen32WithSeeds(s + len - tail_done, v.first, v.second);
-  }
-  // At this point our 48 bytes of state should contain more than
-  // enough information for a strong 128-bit hash.  We use two
-  // different 48-byte-to-8-byte hashes to get a 16-byte final result.
-  x = HashLen16(x, v.first);
-  y = HashLen16(y, w.first);
-  return uint128(HashLen16(x + v.second, w.second) + y,
-                 HashLen16(x + w.second, y + v.second));
-}
-
-uint128 CityHash128(const char *s, size_t len) {
-  if (len >= 16) {
-    return CityHash128WithSeed(s + 16,
-                               len - 16,
-                               uint128(UNALIGNED_LOAD64(s) ^ k3,
-                                       UNALIGNED_LOAD64(s + 8)));
-  } else if (len >= 8) {
-    return CityHash128WithSeed(NULL,
-                               0,
-                               uint128(UNALIGNED_LOAD64(s) ^ (len * k0),
-                                       UNALIGNED_LOAD64(s + len - 8) ^ k1));
-  } else {
-    return CityHash128WithSeed(s, len, uint128(k0, k1));
-  }
-}
-
-void CityHash64_test ( const void * key, int len, uint32_t seed, void * out )
-{
-  *(uint64*)out = CityHash64WithSeed((const char *)key,len,seed);
-}
-
-void CityHash128_test ( const void * key, int len, uint32_t seed, void * out )
-{
-  uint128 s(0,0);
-
-  s.first = seed;
-
-  *(uint128*)out = CityHash128WithSeed((const char*)key,len,s);
-}
+// Copyright (c) 2011 Google, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+// CityHash, by Geoff Pike and Jyrki Alakuijala
+//
+// This file provides CityHash64() and related functions.
+//
+// It's probably possible to create even faster hash functions by
+// writing a program that systematically explores some of the space of
+// possible hash functions, by using SIMD instructions, or by
+// compromising on hash quality.
+
+#include "City.h"
+
+#include <algorithm>
+#include <string.h>  // for memcpy and memset
+
+using namespace std;
+
+static uint64 UNALIGNED_LOAD64(const char *p) {
+  uint64 result;
+  memcpy(&result, p, sizeof(result));
+  return result;
+}
+
+static uint32 UNALIGNED_LOAD32(const char *p) {
+  uint32 result;
+  memcpy(&result, p, sizeof(result));
+  return result;
+}
+
+#ifndef __BIG_ENDIAN__
+
+#define uint32_in_expected_order(x) (x)
+#define uint64_in_expected_order(x) (x)
+
+#else
+
+#ifdef _MSC_VER
+#include <stdlib.h>
+#define bswap_32(x) _byteswap_ulong(x)
+#define bswap_64(x) _byteswap_uint64(x)
+
+#elif defined(__APPLE__)
+// Mac OS X / Darwin features
+#include <libkern/OSByteOrder.h>
+#define bswap_32(x) OSSwapInt32(x)
+#define bswap_64(x) OSSwapInt64(x)
+
+#else
+#include <byteswap.h>
+#endif
+
+#define uint32_in_expected_order(x) (bswap_32(x))
+#define uint64_in_expected_order(x) (bswap_64(x))
+
+#endif  // __BIG_ENDIAN__
+
+#if !defined(LIKELY)
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+#define LIKELY(x) (__builtin_expect(!!(x), 1))
+#else
+#define LIKELY(x) (x)
+#endif
+#endif
+
+static uint64 Fetch64(const char *p) {
+  return uint64_in_expected_order(UNALIGNED_LOAD64(p));
+}
+
+static uint32 Fetch32(const char *p) {
+  return uint32_in_expected_order(UNALIGNED_LOAD32(p));
+}
+
+// Some primes between 2^63 and 2^64 for various uses.
+static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
+static const uint64 k1 = 0xb492b66fbe98f273ULL;
+static const uint64 k2 = 0x9ae16a3b2f90404fULL;
+static const uint64 k3 = 0xc949d7c7509e6557ULL;
+
+// Bitwise right rotate.  Normally this will compile to a single
+// instruction, especially if the shift is a manifest constant.
+static uint64 Rotate(uint64 val, int shift) {
+  // Avoid shifting by 64: doing so yields an undefined result.
+  return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
+}
+
+// Equivalent to Rotate(), but requires the second arg to be non-zero.
+// On x86-64, and probably others, it's possible for this to compile
+// to a single instruction if both args are already in registers.
+static uint64 RotateByAtLeast1(uint64 val, int shift) {
+  return (val >> shift) | (val << (64 - shift));
+}
+
+static uint64 ShiftMix(uint64 val) {
+  return val ^ (val >> 47);
+}
+
+static uint64 HashLen16(uint64 u, uint64 v) {
+  return Hash128to64(uint128(u, v));
+}
+
+static uint64 HashLen0to16(const char *s, size_t len) {
+  if (len > 8) {
+    uint64 a = Fetch64(s);
+    uint64 b = Fetch64(s + len - 8);
+    return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;
+  }
+  if (len >= 4) {
+    uint64 a = Fetch32(s);
+    return HashLen16(len + (a << 3), Fetch32(s + len - 4));
+  }
+  if (len > 0) {
+    uint8 a = s[0];
+    uint8 b = s[len >> 1];
+    uint8 c = s[len - 1];
+    uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
+    uint32 z = len + (static_cast<uint32>(c) << 2);
+    return ShiftMix(y * k2 ^ z * k3) * k2;
+  }
+  return k2;
+}
+
+// This probably works well for 16-byte strings as well, but it may be overkill
+// in that case.
+static uint64 HashLen17to32(const char *s, size_t len) {
+  uint64 a = Fetch64(s) * k1;
+  uint64 b = Fetch64(s + 8);
+  uint64 c = Fetch64(s + len - 8) * k2;
+  uint64 d = Fetch64(s + len - 16) * k0;
+  return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
+                   a + Rotate(b ^ k3, 20) - c + len);
+}
+
+// Return a 16-byte hash for 48 bytes.  Quick and dirty.
+// Callers do best to use "random-looking" values for a and b.
+static pair<uint64, uint64> WeakHashLen32WithSeeds(
+    uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) {
+  a += w;
+  b = Rotate(b + a + z, 21);
+  uint64 c = a;
+  a += x;
+  a += y;
+  b += Rotate(a, 44);
+  return make_pair(a + z, b + c);
+}
+
+// Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
+static pair<uint64, uint64> WeakHashLen32WithSeeds(
+    const char* s, uint64 a, uint64 b) {
+  return WeakHashLen32WithSeeds(Fetch64(s),
+                                Fetch64(s + 8),
+                                Fetch64(s + 16),
+                                Fetch64(s + 24),
+                                a,
+                                b);
+}
+
+// Return an 8-byte hash for 33 to 64 bytes.
+static uint64 HashLen33to64(const char *s, size_t len) {
+  uint64 z = Fetch64(s + 24);
+  uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0;
+  uint64 b = Rotate(a + z, 52);
+  uint64 c = Rotate(a, 37);
+  a += Fetch64(s + 8);
+  c += Rotate(a, 7);
+  a += Fetch64(s + 16);
+  uint64 vf = a + z;
+  uint64 vs = b + Rotate(a, 31) + c;
+  a = Fetch64(s + 16) + Fetch64(s + len - 32);
+  z = Fetch64(s + len - 8);
+  b = Rotate(a + z, 52);
+  c = Rotate(a, 37);
+  a += Fetch64(s + len - 24);
+  c += Rotate(a, 7);
+  a += Fetch64(s + len - 16);
+  uint64 wf = a + z;
+  uint64 ws = b + Rotate(a, 31) + c;
+  uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
+  return ShiftMix(r * k0 + vs) * k2;
+}
+
+uint64 CityHash64(const char *s, size_t len) {
+  if (len <= 32) {
+    if (len <= 16) {
+      return HashLen0to16(s, len);
+    } else {
+      return HashLen17to32(s, len);
+    }
+  } else if (len <= 64) {
+    return HashLen33to64(s, len);
+  }
+
+  // For strings over 64 bytes we hash the end first, and then as we
+  // loop we keep 56 bytes of state: v, w, x, y, and z.
+  uint64 x = Fetch64(s + len - 40);
+  uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
+  uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
+  pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
+  pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
+  x = x * k1 + Fetch64(s);
+
+  // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
+  len = (len - 1) & ~static_cast<size_t>(63);
+  do {
+    x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
+    y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
+    x ^= w.second;
+    y += v.first + Fetch64(s + 40);
+    z = Rotate(z + w.first, 33) * k1;
+    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
+    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
+    std::swap(z, x);
+    s += 64;
+    len -= 64;
+  } while (len != 0);
+  return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
+                   HashLen16(v.second, w.second) + x);
+}
+
+uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) {
+  return CityHash64WithSeeds(s, len, k2, seed);
+}
+
+uint64 CityHash64WithSeeds(const char *s, size_t len,
+                           uint64 seed0, uint64 seed1) {
+  return HashLen16(CityHash64(s, len) - seed0, seed1);
+}
+
+// A subroutine for CityHash128().  Returns a decent 128-bit hash for strings
+// of any length representable in signed long.  Based on City and Murmur.
+static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
+  uint64 a = Uint128Low64(seed);
+  uint64 b = Uint128High64(seed);
+  uint64 c = 0;
+  uint64 d = 0;
+  signed long l = len - 16;
+  if (l <= 0) {  // len <= 16
+    a = ShiftMix(a * k1) * k1;
+    c = b * k1 + HashLen0to16(s, len);
+    d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
+  } else {  // len > 16
+    c = HashLen16(Fetch64(s + len - 8) + k1, a);
+    d = HashLen16(b + len, c + Fetch64(s + len - 16));
+    a += d;
+    do {
+      a ^= ShiftMix(Fetch64(s) * k1) * k1;
+      a *= k1;
+      b ^= a;
+      c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
+      c *= k1;
+      d ^= c;
+      s += 16;
+      l -= 16;
+    } while (l > 0);
+  }
+  a = HashLen16(a, c);
+  b = HashLen16(d, b);
+  return uint128(a ^ b, HashLen16(b, a));
+}
+
+uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
+  if (len < 128) {
+    return CityMurmur(s, len, seed);
+  }
+
+  // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
+  // v, w, x, y, and z.
+  pair<uint64, uint64> v, w;
+  uint64 x = Uint128Low64(seed);
+  uint64 y = Uint128High64(seed);
+  uint64 z = len * k1;
+  v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
+  v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
+  w.first = Rotate(y + z, 35) * k1 + x;
+  w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
+
+  // This is the same inner loop as CityHash64(), manually unrolled.
+  do {
+    x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
+    y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
+    x ^= w.second;
+    y += v.first + Fetch64(s + 40);
+    z = Rotate(z + w.first, 33) * k1;
+    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
+    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
+    std::swap(z, x);
+    s += 64;
+    x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
+    y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
+    x ^= w.second;
+    y += v.first + Fetch64(s + 40);
+    z = Rotate(z + w.first, 33) * k1;
+    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
+    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
+    std::swap(z, x);
+    s += 64;
+    len -= 128;
+  } while (LIKELY(len >= 128));
+  x += Rotate(v.first + z, 49) * k0;
+  z += Rotate(w.first, 37) * k0;
+  // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
+  for (size_t tail_done = 0; tail_done < len; ) {
+    tail_done += 32;
+    y = Rotate(x + y, 42) * k0 + v.second;
+    w.first += Fetch64(s + len - tail_done + 16);
+    x = x * k0 + w.first;
+    z += w.second + Fetch64(s + len - tail_done);
+    w.second += v.first;
+    v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
+  }
+  // At this point our 56 bytes of state should contain more than
+  // enough information for a strong 128-bit hash.  We use two
+  // different 56-byte-to-8-byte hashes to get a 16-byte final result.
+  x = HashLen16(x, v.first);
+  y = HashLen16(y + z, w.first);
+  return uint128(HashLen16(x + v.second, w.second) + y,
+                 HashLen16(x + w.second, y + v.second));
+}
+
+uint128 CityHash128(const char *s, size_t len) {
+  if (len >= 16) {
+    return CityHash128WithSeed(s + 16,
+                               len - 16,
+                               uint128(Fetch64(s) ^ k3,
+                                       Fetch64(s + 8)));
+  } else if (len >= 8) {
+    return CityHash128WithSeed(NULL,
+                               0,
+                               uint128(Fetch64(s) ^ (len * k0),
+                                       Fetch64(s + len - 8) ^ k1));
+  } else {
+    return CityHash128WithSeed(s, len, uint128(k0, k1));
+  }
+}
+
+#ifdef __SSE4_2__
+#include <nmmintrin.h>
+
+// Requires len >= 240.
+static void CityHashCrc256Long(const char *s, size_t len,
+                               uint32 seed, uint64 *result) {
+  uint64 a = Fetch64(s + 56) + k0;
+  uint64 b = Fetch64(s + 96) + k0;
+  uint64 c = result[0] = HashLen16(b, len);
+  uint64 d = result[1] = Fetch64(s + 120) * k0 + len;
+  uint64 e = Fetch64(s + 184) + seed;
+  uint64 f = seed;
+  uint64 g = 0;
+  uint64 h = 0;
+  uint64 i = 0;
+  uint64 j = 0;
+  uint64 t = c + d;
+
+  // 240 bytes of input per iter.
+  size_t iters = len / 240;
+  len -= iters * 240;
+  do {
+#define CHUNK(multiplier, z)                                    \
+    {                                                           \
+      uint64 old_a = a;                                         \
+      a = Rotate(b, 41 ^ z) * multiplier + Fetch64(s);          \
+      b = Rotate(c, 27 ^ z) * multiplier + Fetch64(s + 8);      \
+      c = Rotate(d, 41 ^ z) * multiplier + Fetch64(s + 16);     \
+      d = Rotate(e, 33 ^ z) * multiplier + Fetch64(s + 24);     \
+      e = Rotate(t, 25 ^ z) * multiplier + Fetch64(s + 32);     \
+      t = old_a;                                                \
+    }                                                           \
+    f = _mm_crc32_u64(f, a);                                    \
+    g = _mm_crc32_u64(g, b);                                    \
+    h = _mm_crc32_u64(h, c);                                    \
+    i = _mm_crc32_u64(i, d);                                    \
+    j = _mm_crc32_u64(j, e);                                    \
+    s += 40
+
+    CHUNK(1, 1); CHUNK(k0, 0);
+    CHUNK(1, 1); CHUNK(k0, 0);
+    CHUNK(1, 1); CHUNK(k0, 0);
+  } while (--iters > 0);
+
+  while (len >= 40) {
+    CHUNK(k0, 0);
+    len -= 40;
+  }
+  if (len > 0) {
+    s = s + len - 40;
+    CHUNK(k0, 0);
+  }
+  j += i << 32;
+  a = HashLen16(a, j);
+  h += g << 32;
+  b += h;
+  c = HashLen16(c, f) + i;
+  d = HashLen16(d, e + result[0]);
+  j += e;
+  i += HashLen16(h, t);
+  e = HashLen16(a, d) + j;
+  f = HashLen16(b, c) + a;
+  g = HashLen16(j, i) + c;
+  result[0] = e + f + g + h;
+  a = ShiftMix((a + g) * k0) * k0 + b;
+  result[1] += a + result[0];
+  a = ShiftMix(a * k0) * k0 + c;
+  result[2] = a + result[1];
+  a = ShiftMix((a + e) * k0) * k0;
+  result[3] = a + result[2];
+}
+
+// Requires len < 240.
+static void CityHashCrc256Short(const char *s, size_t len, uint64 *result) {
+  char buf[240];
+  memcpy(buf, s, len);
+  memset(buf + len, 0, 240 - len);
+  CityHashCrc256Long(buf, 240, ~static_cast<uint32>(len), result);
+}
+
+void CityHashCrc256(const char *s, size_t len, uint64 *result) {
+  if (LIKELY(len >= 240)) {
+    CityHashCrc256Long(s, len, 0, result);
+  } else {
+    CityHashCrc256Short(s, len, result);
+  }
+}
+
+uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) {
+  if (len <= 900) {
+    return CityHash128WithSeed(s, len, seed);
+  } else {
+    uint64 result[4];
+    CityHashCrc256(s, len, result);
+    uint64 u = Uint128High64(seed) + result[0];
+    uint64 v = Uint128Low64(seed) + result[1];
+    return uint128(HashLen16(u, v + result[2]),
+                   HashLen16(Rotate(v, 32), u * k0 + result[3]));
+  }
+}
+
+uint128 CityHashCrc128(const char *s, size_t len) {
+  if (len <= 900) {
+    return CityHash128(s, len);
+  } else {
+    uint64 result[4];
+    CityHashCrc256(s, len, result);
+    return uint128(result[2], result[3]);
+  }
+}
+
+#endif
diff --git a/City.h b/City.h
index 171f693..02f3457 100644
--- a/City.h
+++ b/City.h
@@ -1,97 +1,106 @@
-// Copyright (c) 2011 Google, Inc.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-//
-// CityHash Version 1, by Geoff Pike and Jyrki Alakuijala
-//
-// This file provides a few functions for hashing strings. On x86-64
-// hardware in 2011, CityHash64() is faster than other high-quality
-// hash functions, such as Murmur.  This is largely due to higher
-// instruction-level parallelism.  CityHash64() and CityHash128() also perform
-// well on hash-quality tests.
-//
-// CityHash128() is optimized for relatively long strings and returns
-// a 128-bit hash.  For strings more than about 2000 bytes it can be
-// faster than CityHash64().
-//
-// Functions in the CityHash family are not suitable for cryptography.
-//
-// WARNING: This code has not been tested on big-endian platforms!
-// It is known to work well on little-endian platforms that have a small penalty
-// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
-//
-// By the way, for some hash functions, given strings a and b, the hash
-// of a+b is easily derived from the hashes of a and b.  This property
-// doesn't hold for any hash functions in this file.
-
-#ifndef CITY_HASH_H_
-#define CITY_HASH_H_
-
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-#include "pstdint.h"
-typedef int ssize_t;
-#pragma warning(disable:4267)
-#else
-#include <stdint.h>
-#endif
-
-#include <stdlib.h>  // for size_t.
-#include <utility>
-
-typedef uint8_t uint8;
-typedef uint32_t uint32;
-typedef uint64_t uint64;
-typedef std::pair<uint64, uint64> uint128;
-
-inline uint64 Uint128Low64(const uint128& x) { return x.first; }
-inline uint64 Uint128High64(const uint128& x) { return x.second; }
-
-// Hash function for a byte array.
-uint64 CityHash64(const char *buf, size_t len);
-
-// Hash function for a byte array.  For convenience, a 64-bit seed is also
-// hashed into the result.
-uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
-
-// Hash function for a byte array.  For convenience, two seeds are also
-// hashed into the result.
-uint64 CityHash64WithSeeds(const char *buf, size_t len,
-                           uint64 seed0, uint64 seed1);
-
-// Hash function for a byte array.
-uint128 CityHash128(const char *s, size_t len);
-
-// Hash function for a byte array.  For convenience, a 128-bit seed is also
-// hashed into the result.
-uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
-
-// Hash 128 input bits down to 64 bits of output.
-// This is intended to be a reasonably good hash function.
-inline uint64 Hash128to64(const uint128& x) {
-  // Murmur-inspired hashing.
-  const uint64 kMul = 0x9ddfea08eb382d69ULL;
-  uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
-  a ^= (a >> 47);
-  uint64 b = (Uint128High64(x) ^ a) * kMul;
-  b ^= (b >> 47);
-  b *= kMul;
-  return b;
-}
-
-#endif  // CITY_HASH_H_
+// Copyright (c) 2011 Google, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+// CityHash, by Geoff Pike and Jyrki Alakuijala
+//
+// This file provides a few functions for hashing strings. On x86-64
+// hardware in 2011, CityHash64() is faster than other high-quality
+// hash functions, such as Murmur.  This is largely due to higher
+// instruction-level parallelism.  CityHash64() and CityHash128() also perform
+// well on hash-quality tests.
+//
+// CityHash128() is optimized for relatively long strings and returns
+// a 128-bit hash.  For strings more than about 2000 bytes it can be
+// faster than CityHash64().
+//
+// Functions in the CityHash family are not suitable for cryptography.
+//
+// WARNING: This code has not been tested on big-endian platforms!
+// It is known to work well on little-endian platforms that have a small penalty
+// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
+//
+// By the way, for some hash functions, given strings a and b, the hash
+// of a+b is easily derived from the hashes of a and b.  This property
+// doesn't hold for any hash functions in this file.
+
+#ifndef CITY_HASH_H_
+#define CITY_HASH_H_
+
+#include <stdlib.h>  // for size_t.
+#include <stdint.h>
+#include <utility>
+
+typedef uint8_t uint8;
+typedef uint32_t uint32;
+typedef uint64_t uint64;
+typedef std::pair<uint64, uint64> uint128;
+
+inline uint64 Uint128Low64(const uint128& x) { return x.first; }
+inline uint64 Uint128High64(const uint128& x) { return x.second; }
+
+// Hash function for a byte array.
+uint64 CityHash64(const char *buf, size_t len);
+
+// Hash function for a byte array.  For convenience, a 64-bit seed is also
+// hashed into the result.
+uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
+
+// Hash function for a byte array.  For convenience, two seeds are also
+// hashed into the result.
+uint64 CityHash64WithSeeds(const char *buf, size_t len,
+                           uint64 seed0, uint64 seed1);
+
+// Hash function for a byte array.
+uint128 CityHash128(const char *s, size_t len);
+
+// Hash function for a byte array.  For convenience, a 128-bit seed is also
+// hashed into the result.
+uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
+
+// Hash 128 input bits down to 64 bits of output.
+// This is intended to be a reasonably good hash function.
+inline uint64 Hash128to64(const uint128& x) {
+  // Murmur-inspired hashing.
+  const uint64 kMul = 0x9ddfea08eb382d69ULL;
+  uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
+  a ^= (a >> 47);
+  uint64 b = (Uint128High64(x) ^ a) * kMul;
+  b ^= (b >> 47);
+  b *= kMul;
+  return b;
+}
+
+// Conditionally include declarations for versions of City that require SSE4.2
+// instructions to be available.
+#ifdef __SSE4_2__
+
+// Hash function for a byte array.
+uint128 CityHashCrc128(const char *s, size_t len);
+
+// Hash function for a byte array.  For convenience, a 128-bit seed is also
+// hashed into the result.
+uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed);
+
+// Hash function for a byte array.  Sets result[0] ... result[3].
+void CityHashCrc256(const char *s, size_t len, uint64 *result);
+
+#endif  // __SSE4_2__
+
+#endif  // CITY_HASH_H_
diff --git a/CityTest.cpp b/CityTest.cpp
new file mode 100644
index 0000000..4190cc8
--- /dev/null
+++ b/CityTest.cpp
@@ -0,0 +1,15 @@
+#include "City.h"
+
+void CityHash64_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint64*)out = CityHash64WithSeed((const char *)key,len,seed);
+}
+
+void CityHash128_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  uint128 s(0,0);
+
+  s.first = seed;
+
+  *(uint128*)out = CityHash128WithSeed((const char*)key,len,s);
+}
diff --git a/DifferentialTest.cpp b/DifferentialTest.cpp
index b356085..d9067c9 100644
--- a/DifferentialTest.cpp
+++ b/DifferentialTest.cpp
@@ -1,3 +1,3 @@
-#include "DifferentialTest.h"
-
-//----------------------------------------------------------------------------
+#include "DifferentialTest.h"
+
+//----------------------------------------------------------------------------
diff --git a/DifferentialTest.h b/DifferentialTest.h
index 3136cbb..824d72e 100644
--- a/DifferentialTest.h
+++ b/DifferentialTest.h
@@ -1,281 +1,281 @@
-//-----------------------------------------------------------------------------
-// Differential collision & distribution tests - generate a bunch of random keys,
-// see what happens to the hash value when we flip a few bits of the key.
-
-#pragma once
-
-#include "Types.h"
-#include "Stats.h"      // for chooseUpToK
-#include "KeysetTest.h" // for SparseKeygenRecurse
-#include "Random.h"
-
-#include <vector>
-#include <algorithm>
-#include <stdio.h>
-
-//-----------------------------------------------------------------------------
-// Sort through the differentials, ignoring collisions that only occured once 
-// (these could be false positives). If we find collisions of 3 or more, the
-// differential test fails.
-
-template < class keytype >
-bool ProcessDifferentials ( std::vector<keytype> & diffs, int reps, bool dumpCollisions )
-{
-  std::sort(diffs.begin(), diffs.end());
-
-  int count = 1;
-  int ignore = 0;
-
-  bool result = true;
-
-  if(diffs.size())
-  {
-    keytype kp = diffs[0];
-
-    for(int i = 1; i < (int)diffs.size(); i++)
-    {
-      if(diffs[i] == kp)
-      {
-        count++;
-        continue;
-      }
-      else
-      {
-        if(count > 1)
-        {
-          result = false;
-
-          double pct = 100 * (double(count) / double(reps));
-
-          if(dumpCollisions)
-          {
-            printbits((unsigned char*)&kp,sizeof(kp));
-            printf(" - %4.2f%%\n", pct );
-          }
-        }
-        else 
-        {
-          ignore++;
-        }
-
-        kp = diffs[i];
-        count = 1;
-      }
-    }
-
-    if(count > 1)
-    {
-      double pct = 100 * (double(count) / double(reps));
-
-      if(dumpCollisions)
-      {
-        printbits((unsigned char*)&kp,sizeof(kp));
-        printf(" - %4.2f%%\n", pct );
-      }
-    }
-    else 
-    {
-      ignore++;
-    }
-  }
-
-  printf("%d total collisions, of which %d single collisions were ignored",(int)diffs.size(),ignore);
-
-  if(result == false)
-  {
-    printf(" !!!!! ");
-  }
-
-  printf("\n");
-  printf("\n");
-
-  return result;
-}
-
-//-----------------------------------------------------------------------------
-// Check all possible keybits-choose-N differentials for collisions, report
-// ones that occur significantly more often than expected.
-
-// Random collisions can happen with probability 1 in 2^32 - if we do more than
-// 2^32 tests, we'll probably see some spurious random collisions, so don't report
-// them.
-
-template < typename keytype, typename hashtype >
-void DiffTestRecurse ( pfHash hash, keytype & k1, keytype & k2, hashtype & h1, hashtype & h2, int start, int bitsleft, std::vector<keytype> & diffs )
-{
-  const int bits = sizeof(keytype)*8;
-
-  for(int i = start; i < bits; i++)
-  {
-    flipbit(&k2,sizeof(k2),i);
-    bitsleft--;
-
-    hash(&k2,sizeof(k2),0,&h2);
-
-    if(h1 == h2)
-    {
-      diffs.push_back(k1 ^ k2);
-    }
-
-    if(bitsleft)
-    {
-      DiffTestRecurse(hash,k1,k2,h1,h2,i+1,bitsleft,diffs);
-    }
-
-    flipbit(&k2,sizeof(k2),i);
-    bitsleft++;
-  }
-}
-
-//----------
-
-template < typename keytype, typename hashtype >
-bool DiffTest ( pfHash hash, int diffbits, int reps, bool dumpCollisions )
-{
-  const int keybits = sizeof(keytype) * 8;
-  const int hashbits = sizeof(hashtype) * 8;
-
-  double diffcount = chooseUpToK(keybits,diffbits);
-  double testcount = (diffcount * double(reps));
-  double expected  = testcount / pow(2.0,double(hashbits));
-
-  Rand r(100);
-
-  std::vector<keytype> diffs;
-
-  keytype k1,k2;
-  hashtype h1,h2;
-
-  printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",diffcount,diffbits,keybits,hashbits);
-  printf("%d reps, %0.f total tests, expecting %2.2f random collisions",reps,testcount,expected);
-
-  for(int i = 0; i < reps; i++)
-  {
-    if(i % (reps/10) == 0) printf(".");
-
-    r.rand_p(&k1,sizeof(keytype));
-    k2 = k1;
-
-    hash(&k1,sizeof(k1),0,(uint32_t*)&h1);
-
-    DiffTestRecurse<keytype,hashtype>(hash,k1,k2,h1,h2,0,diffbits,diffs);
-  }
-  printf("\n");
-
-  bool result = true;
-
-  result &= ProcessDifferentials(diffs,reps,dumpCollisions);
-
-  return result;
-}
-
-//-----------------------------------------------------------------------------
-// Differential distribution test - for each N-bit input differential, generate
-// a large set of differential key pairs, hash them, and test the output 
-// differentials using our distribution test code.
-
-// This is a very hard test to pass - even if the hash values are well-distributed,
-// the differences between hash values may not be. It's also not entirely relevant
-// for testing hash functions, but it's still interesting.
-
-// This test is a _lot_ of work, as it's essentially a full keyset test for
-// each of a potentially huge number of input differentials. To speed things
-// along, we do only a few distribution tests per keyset instead of the full
-// grid.
-
-// #TODO - put diagram drawing back on
-
-template < typename keytype, typename hashtype >
-void DiffDistTest ( pfHash hash, const int diffbits, int trials, double & worst, double & avg )
-{
-  std::vector<keytype>  keys(trials);
-  std::vector<hashtype> A(trials),B(trials);
-
-  for(int i = 0; i < trials; i++)
-  {
-    rand_p(&keys[i],sizeof(keytype));
-
-    hash(&keys[i],sizeof(keytype),0,(uint32_t*)&A[i]);
-  }
-
-  //----------
-
-  std::vector<keytype> diffs;
-
-  keytype temp(0);
-
-  SparseKeygenRecurse<keytype>(0,diffbits,true,temp,diffs);
-
-  //----------
-
-  worst = 0;
-  avg = 0;
-
-  hashtype h2;
-
-  for(size_t j = 0; j < diffs.size(); j++)
-  {
-    keytype & d = diffs[j];
-
-    for(int i = 0; i < trials; i++)
-    {
-      keytype k2 = keys[i] ^ d;
-
-      hash(&k2,sizeof(k2),0,&h2);
-
-      B[i] = A[i] ^ h2;
-    }
-
-    double dworst,davg;
-
-    TestDistributionFast(B,dworst,davg);
-
-    avg += davg;
-    worst = (dworst > worst) ? dworst : worst;
-  }
-
-  avg /= double(diffs.size());
-}
-
-//-----------------------------------------------------------------------------
-// Simpler differential-distribution test - for all 1-bit differentials,
-// generate random key pairs and run full distribution/collision tests on the
-// hash differentials
-
-template < typename keytype, typename hashtype >
-bool DiffDistTest2 ( pfHash hash  )
-{
-  Rand r(857374);
-
-  int keybits = sizeof(keytype) * 8;
-  const int keycount = 256*256*32;
-  keytype k;
-  
-  std::vector<hashtype> hashes(keycount);
-  hashtype h1,h2;
-
-  bool result = true;
-
-  for(int keybit = 0; keybit < keybits; keybit++)
-  {
-    printf("Testing bit %d\n",keybit);
-
-    for(int i = 0; i < keycount; i++)
-    {
-      r.rand_p(&k,sizeof(keytype));
-      
-      hash(&k,sizeof(keytype),0,&h1);
-      flipbit(&k,sizeof(keytype),keybit);
-      hash(&k,sizeof(keytype),0,&h2);
-
-      hashes[i] = h1 ^ h2;
-    }
-
-    result &= TestHashList<hashtype>(hashes,true,true,true);
-    printf("\n");
-  }
-
-  return result;
-}
-
-//----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+// Differential collision & distribution tests - generate a bunch of random keys,
+// see what happens to the hash value when we flip a few bits of the key.
+
+#pragma once
+
+#include "Types.h"
+#include "Stats.h"      // for chooseUpToK
+#include "KeysetTest.h" // for SparseKeygenRecurse
+#include "Random.h"
+
+#include <vector>
+#include <algorithm>
+#include <stdio.h>
+
+//-----------------------------------------------------------------------------
+// Sort through the differentials, ignoring collisions that only occured once 
+// (these could be false positives). If we find collisions of 3 or more, the
+// differential test fails.
+
+template < class keytype >
+bool ProcessDifferentials ( std::vector<keytype> & diffs, int reps, bool dumpCollisions )
+{
+  std::sort(diffs.begin(), diffs.end());
+
+  int count = 1;
+  int ignore = 0;
+
+  bool result = true;
+
+  if(diffs.size())
+  {
+    keytype kp = diffs[0];
+
+    for(int i = 1; i < (int)diffs.size(); i++)
+    {
+      if(diffs[i] == kp)
+      {
+        count++;
+        continue;
+      }
+      else
+      {
+        if(count > 1)
+        {
+          result = false;
+
+          double pct = 100 * (double(count) / double(reps));
+
+          if(dumpCollisions)
+          {
+            printbits((unsigned char*)&kp,sizeof(kp));
+            printf(" - %4.2f%%\n", pct );
+          }
+        }
+        else 
+        {
+          ignore++;
+        }
+
+        kp = diffs[i];
+        count = 1;
+      }
+    }
+
+    if(count > 1)
+    {
+      double pct = 100 * (double(count) / double(reps));
+
+      if(dumpCollisions)
+      {
+        printbits((unsigned char*)&kp,sizeof(kp));
+        printf(" - %4.2f%%\n", pct );
+      }
+    }
+    else 
+    {
+      ignore++;
+    }
+  }
+
+  printf("%d total collisions, of which %d single collisions were ignored",(int)diffs.size(),ignore);
+
+  if(result == false)
+  {
+    printf(" !!!!! ");
+  }
+
+  printf("\n");
+  printf("\n");
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Check all possible keybits-choose-N differentials for collisions, report
+// ones that occur significantly more often than expected.
+
+// Random collisions can happen with probability 1 in 2^32 - if we do more than
+// 2^32 tests, we'll probably see some spurious random collisions, so don't report
+// them.
+
+template < typename keytype, typename hashtype >
+void DiffTestRecurse ( pfHash hash, keytype & k1, keytype & k2, hashtype & h1, hashtype & h2, int start, int bitsleft, std::vector<keytype> & diffs )
+{
+  const int bits = sizeof(keytype)*8;
+
+  for(int i = start; i < bits; i++)
+  {
+    flipbit(&k2,sizeof(k2),i);
+    bitsleft--;
+
+    hash(&k2,sizeof(k2),0,&h2);
+
+    if(h1 == h2)
+    {
+      diffs.push_back(k1 ^ k2);
+    }
+
+    if(bitsleft)
+    {
+      DiffTestRecurse(hash,k1,k2,h1,h2,i+1,bitsleft,diffs);
+    }
+
+    flipbit(&k2,sizeof(k2),i);
+    bitsleft++;
+  }
+}
+
+//----------
+
+template < typename keytype, typename hashtype >
+bool DiffTest ( pfHash hash, int diffbits, int reps, bool dumpCollisions )
+{
+  const int keybits = sizeof(keytype) * 8;
+  const int hashbits = sizeof(hashtype) * 8;
+
+  double diffcount = chooseUpToK(keybits,diffbits);
+  double testcount = (diffcount * double(reps));
+  double expected  = testcount / pow(2.0,double(hashbits));
+
+  Rand r(100);
+
+  std::vector<keytype> diffs;
+
+  keytype k1,k2;
+  hashtype h1,h2;
+
+  printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",diffcount,diffbits,keybits,hashbits);
+  printf("%d reps, %0.f total tests, expecting %2.2f random collisions",reps,testcount,expected);
+
+  for(int i = 0; i < reps; i++)
+  {
+    if(i % (reps/10) == 0) printf(".");
+
+    r.rand_p(&k1,sizeof(keytype));
+    k2 = k1;
+
+    hash(&k1,sizeof(k1),0,(uint32_t*)&h1);
+
+    DiffTestRecurse<keytype,hashtype>(hash,k1,k2,h1,h2,0,diffbits,diffs);
+  }
+  printf("\n");
+
+  bool result = true;
+
+  result &= ProcessDifferentials(diffs,reps,dumpCollisions);
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Differential distribution test - for each N-bit input differential, generate
+// a large set of differential key pairs, hash them, and test the output 
+// differentials using our distribution test code.
+
+// This is a very hard test to pass - even if the hash values are well-distributed,
+// the differences between hash values may not be. It's also not entirely relevant
+// for testing hash functions, but it's still interesting.
+
+// This test is a _lot_ of work, as it's essentially a full keyset test for
+// each of a potentially huge number of input differentials. To speed things
+// along, we do only a few distribution tests per keyset instead of the full
+// grid.
+
+// #TODO - put diagram drawing back on
+
+template < typename keytype, typename hashtype >
+void DiffDistTest ( pfHash hash, const int diffbits, int trials, double & worst, double & avg )
+{
+  std::vector<keytype>  keys(trials);
+  std::vector<hashtype> A(trials),B(trials);
+
+  for(int i = 0; i < trials; i++)
+  {
+    rand_p(&keys[i],sizeof(keytype));
+
+    hash(&keys[i],sizeof(keytype),0,(uint32_t*)&A[i]);
+  }
+
+  //----------
+
+  std::vector<keytype> diffs;
+
+  keytype temp(0);
+
+  SparseKeygenRecurse<keytype>(0,diffbits,true,temp,diffs);
+
+  //----------
+
+  worst = 0;
+  avg = 0;
+
+  hashtype h2;
+
+  for(size_t j = 0; j < diffs.size(); j++)
+  {
+    keytype & d = diffs[j];
+
+    for(int i = 0; i < trials; i++)
+    {
+      keytype k2 = keys[i] ^ d;
+
+      hash(&k2,sizeof(k2),0,&h2);
+
+      B[i] = A[i] ^ h2;
+    }
+
+    double dworst,davg;
+
+    TestDistributionFast(B,dworst,davg);
+
+    avg += davg;
+    worst = (dworst > worst) ? dworst : worst;
+  }
+
+  avg /= double(diffs.size());
+}
+
+//-----------------------------------------------------------------------------
+// Simpler differential-distribution test - for all 1-bit differentials,
+// generate random key pairs and run full distribution/collision tests on the
+// hash differentials
+
+template < typename keytype, typename hashtype >
+bool DiffDistTest2 ( pfHash hash  )
+{
+  Rand r(857374);
+
+  int keybits = sizeof(keytype) * 8;
+  const int keycount = 256*256*32;
+  keytype k;
+  
+  std::vector<hashtype> hashes(keycount);
+  hashtype h1,h2;
+
+  bool result = true;
+
+  for(int keybit = 0; keybit < keybits; keybit++)
+  {
+    printf("Testing bit %d\n",keybit);
+
+    for(int i = 0; i < keycount; i++)
+    {
+      r.rand_p(&k,sizeof(keytype));
+      
+      hash(&k,sizeof(keytype),0,&h1);
+      flipbit(&k,sizeof(keytype),keybit);
+      hash(&k,sizeof(keytype),0,&h2);
+
+      hashes[i] = h1 ^ h2;
+    }
+
+    result &= TestHashList<hashtype>(hashes,true,true,true);
+    printf("\n");
+  }
+
+  return result;
+}
+
+//----------------------------------------------------------------------------
diff --git a/Hashes.cpp b/Hashes.cpp
index 1930bc5..36a6c96 100644
--- a/Hashes.cpp
+++ b/Hashes.cpp
@@ -1,155 +1,155 @@
-#include "Hashes.h"
-
-#include "Random.h"
-
-
-#include <stdlib.h>
-//#include <stdint.h>
-#include <assert.h>
-//#include <emmintrin.h>
-//#include <xmmintrin.h>
-
-//----------------------------------------------------------------------------
-// fake / bad hashes
-
-void BadHash ( const void * key, int len, uint32_t seed, void * out )
-{
-  uint32_t h = seed;
-
-  const uint8_t * data = (const uint8_t*)key;
-
-  for(int i = 0; i < len; i++)
-  {
-    h ^= h >> 3;
-    h ^= h << 5;
-    h ^= data[i];
-  }
-
-  *(uint32_t*)out = h;
-}
-
-void sumhash ( const void * key, int len, uint32_t seed, void * out )
-{
-  uint32_t h = seed;
-
-  const uint8_t * data = (const uint8_t*)key;
-
-  for(int i = 0; i < len; i++)
-  {
-    h += data[i];
-  }
-
-  *(uint32_t*)out = h;
-}
-
-void sumhash32 ( const void * key, int len, uint32_t seed, void * out )
-{
-  uint32_t h = seed;
-
-  const uint32_t * data = (const uint32_t*)key;
-
-  for(int i = 0; i < len/4; i++)
-  {
-    h += data[i];
-  }
-
-  *(uint32_t*)out = h;
-}
-
-void DoNothingHash ( const void *, int, uint32_t, void * )
-{
-}
-
-//-----------------------------------------------------------------------------
-// One-byte-at-a-time hash based on Murmur's mix
-
-uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed )
-{
-  const uint8_t * data = (const uint8_t*)key;
-
-  uint32_t h = seed;
-
-  for(int i = 0; i < len; i++)
-  {
-    h ^= data[i];
-    h *= 0x5bd1e995;
-    h ^= h >> 15;
-  }
-
-  return h;
-}
-
-void MurmurOAAT_test ( const void * key, int len, uint32_t seed, void * out )
-{
-	*(uint32_t*)out = MurmurOAAT(key,len,seed);
-}
-
-//----------------------------------------------------------------------------
-
-void FNV ( const void * key, int len, uint32_t seed, void * out )
-{
-  unsigned int h = seed;
-
-  const uint8_t * data = (const uint8_t*)key;
-
-  h ^= BIG_CONSTANT(2166136261);
-
-  for(int i = 0; i < len; i++)
-  {
-    h ^= data[i];
-    h *= 16777619;
-  }
-
-  *(uint32_t*)out = h;
-}
-
-//-----------------------------------------------------------------------------
-
-uint32_t x17 ( const void * key, int len, uint32_t h ) 
-{
-  const uint8_t * data = (const uint8_t*)key;
-    
-  for(int i = 0; i < len; ++i) 
-  {
-        h = 17 * h + (data[i] - ' ');
-    }
-
-    return h ^ (h >> 16);
-}
-
-//-----------------------------------------------------------------------------
-
-uint32_t Bernstein ( const void * key, int len, uint32_t h ) 
-{
-  const uint8_t * data = (const uint8_t*)key;
-    
-  for(int i = 0; i < len; ++i) 
-  {
-        h = 33 * h + data[i];
-    }
-
-  return h;
-}
-
-//-----------------------------------------------------------------------------
-// Crap8 hash from http://www.team5150.com/~andrew/noncryptohashzoo/Crap8.html
-
-uint32_t Crap8( const uint8_t *key, uint32_t len, uint32_t seed ) {
-  #define c8fold( a, b, y, z ) { p = (uint32_t)(a) * (uint64_t)(b); y ^= (uint32_t)p; z ^= (uint32_t)(p >> 32); }
-  #define c8mix( in ) { h *= m; c8fold( in, m, k, h ); }
-
-  const uint32_t m = 0x83d2e73b, n = 0x97e1cc59, *key4 = (const uint32_t *)key;
-  uint32_t h = len + seed, k = n + len;
-  uint64_t p;
-
-  while ( len >= 8 ) { c8mix(key4[0]) c8mix(key4[1]) key4 += 2; len -= 8; }
-  if ( len >= 4 ) { c8mix(key4[0]) key4 += 1; len -= 4; }
-  if ( len ) { c8mix( key4[0] & ( ( 1 << ( len * 8 ) ) - 1 ) ) }
-  c8fold( h ^ k, n, k, k )
-  return k;
-}
-
-void Crap8_test ( const void * key, int len, uint32_t seed, void * out )
-{
-  *(uint32_t*)out = Crap8((const uint8_t*)key,len,seed);
-}
+#include "Hashes.h"
+
+#include "Random.h"
+
+
+#include <stdlib.h>
+//#include <stdint.h>
+#include <assert.h>
+//#include <emmintrin.h>
+//#include <xmmintrin.h>
+
+//----------------------------------------------------------------------------
+// fake / bad hashes
+
+void BadHash ( const void * key, int len, uint32_t seed, void * out )
+{
+  uint32_t h = seed;
+
+  const uint8_t * data = (const uint8_t*)key;
+
+  for(int i = 0; i < len; i++)
+  {
+    h ^= h >> 3;
+    h ^= h << 5;
+    h ^= data[i];
+  }
+
+  *(uint32_t*)out = h;
+}
+
+void sumhash ( const void * key, int len, uint32_t seed, void * out )
+{
+  uint32_t h = seed;
+
+  const uint8_t * data = (const uint8_t*)key;
+
+  for(int i = 0; i < len; i++)
+  {
+    h += data[i];
+  }
+
+  *(uint32_t*)out = h;
+}
+
+void sumhash32 ( const void * key, int len, uint32_t seed, void * out )
+{
+  uint32_t h = seed;
+
+  const uint32_t * data = (const uint32_t*)key;
+
+  for(int i = 0; i < len/4; i++)
+  {
+    h += data[i];
+  }
+
+  *(uint32_t*)out = h;
+}
+
+void DoNothingHash ( const void *, int, uint32_t, void * )
+{
+}
+
+//-----------------------------------------------------------------------------
+// One-byte-at-a-time hash based on Murmur's mix
+
+uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed )
+{
+  const uint8_t * data = (const uint8_t*)key;
+
+  uint32_t h = seed;
+
+  for(int i = 0; i < len; i++)
+  {
+    h ^= data[i];
+    h *= 0x5bd1e995;
+    h ^= h >> 15;
+  }
+
+  return h;
+}
+
+void MurmurOAAT_test ( const void * key, int len, uint32_t seed, void * out )
+{
+	*(uint32_t*)out = MurmurOAAT(key,len,seed);
+}
+
+//----------------------------------------------------------------------------
+
+void FNV ( const void * key, int len, uint32_t seed, void * out )
+{
+  unsigned int h = seed;
+
+  const uint8_t * data = (const uint8_t*)key;
+
+  h ^= BIG_CONSTANT(2166136261);
+
+  for(int i = 0; i < len; i++)
+  {
+    h ^= data[i];
+    h *= 16777619;
+  }
+
+  *(uint32_t*)out = h;
+}
+
+//-----------------------------------------------------------------------------
+
+uint32_t x17 ( const void * key, int len, uint32_t h ) 
+{
+  const uint8_t * data = (const uint8_t*)key;
+    
+  for(int i = 0; i < len; ++i) 
+  {
+        h = 17 * h + (data[i] - ' ');
+    }
+
+    return h ^ (h >> 16);
+}
+
+//-----------------------------------------------------------------------------
+
+void Bernstein ( const void * key, int len, uint32_t seed, void * out ) 
+{
+  const uint8_t * data = (const uint8_t*)key;
+    
+  for(int i = 0; i < len; ++i) 
+  {
+        seed = 33 * seed + data[i];
+    }
+
+  *(uint32_t*)out = seed;
+}
+
+//-----------------------------------------------------------------------------
+// Crap8 hash from http://www.team5150.com/~andrew/noncryptohashzoo/Crap8.html
+
+uint32_t Crap8( const uint8_t *key, uint32_t len, uint32_t seed ) {
+  #define c8fold( a, b, y, z ) { p = (uint32_t)(a) * (uint64_t)(b); y ^= (uint32_t)p; z ^= (uint32_t)(p >> 32); }
+  #define c8mix( in ) { h *= m; c8fold( in, m, k, h ); }
+
+  const uint32_t m = 0x83d2e73b, n = 0x97e1cc59, *key4 = (const uint32_t *)key;
+  uint32_t h = len + seed, k = n + len;
+  uint64_t p;
+
+  while ( len >= 8 ) { c8mix(key4[0]) c8mix(key4[1]) key4 += 2; len -= 8; }
+  if ( len >= 4 ) { c8mix(key4[0]) key4 += 1; len -= 4; }
+  if ( len ) { c8mix( key4[0] & ( ( 1 << ( len * 8 ) ) - 1 ) ) }
+  c8fold( h ^ k, n, k, k )
+  return k;
+}
+
+void Crap8_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint32_t*)out = Crap8((const uint8_t*)key,len,seed);
+}
diff --git a/Hashes.h b/Hashes.h
index 2120cd8..6c04ae1 100644
--- a/Hashes.h
+++ b/Hashes.h
@@ -1,73 +1,78 @@
-#pragma once
-
-#include "Types.h"
-
-#include "MurmurHash1.h"
-#include "MurmurHash2.h"
-#include "MurmurHash3.h"
-
-//----------
-// These are _not_ hash functions (even though people tend to use crc32 as one...)
-
-void sumhash               ( const void * key, int len, uint32_t seed, void * out );
-void sumhash32             ( const void * key, int len, uint32_t seed, void * out );
-
-void DoNothingHash         ( const void * key, int len, uint32_t seed, void * out );
-void crc32                 ( const void * key, int len, uint32_t seed, void * out );
-
-void randhash_32           ( const void * key, int len, uint32_t seed, void * out );
-void randhash_64           ( const void * key, int len, uint32_t seed, void * out );
-void randhash_128          ( const void * key, int len, uint32_t seed, void * out );
-
-//----------
-// Cryptographic hashes
-
-void md5_32                ( const void * key, int len, uint32_t seed, void * out );
-void sha1_32a              ( const void * key, int len, uint32_t seed, void * out );
-
-//----------
-// General purpose hashes
-
-void FNV                   ( const void * key, int len, uint32_t seed, void * out );
-void SuperFastHash         ( const void * key, int len, uint32_t seed, void * out );
-void lookup3_test          ( const void * key, int len, uint32_t seed, void * out );
-void MurmurOAAT_test       ( const void * key, int len, uint32_t seed, void * out );
-void Crap8_test            ( const void * key, int len, uint32_t seed, void * out );
-void CityHash128_test      ( const void * key, int len, uint32_t seed, void * out );
-void CityHash64_test       ( const void * key, int len, uint32_t seed, void * out );
-
-uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed );
-
-//----------
-// MurmurHash2
-
-void MurmurHash2_test      ( const void * key, int len, uint32_t seed, void * out );
-void MurmurHash2A_test     ( const void * key, int len, uint32_t seed, void * out );
-
-//-----------------------------------------------------------------------------
-// Test harnesses for Murmur1/2
-
-inline void MurmurHash1_test ( const void * key, int len, uint32_t seed, void * out )
-{
-  *(uint32_t*)out = MurmurHash1(key,len,seed);
-}
-
-inline void MurmurHash2_test ( const void * key, int len, uint32_t seed, void * out )
-{
-  *(uint32_t*)out = MurmurHash2(key,len,seed);
-}
-
-inline void MurmurHash2A_test ( const void * key, int len, uint32_t seed, void * out )
-{
-  *(uint32_t*)out = MurmurHash2A(key,len,seed);
-}
-
-inline void MurmurHash64A_test ( const void * key, int len, uint32_t seed, void * out )
-{
-  *(uint64_t*)out = MurmurHash64A(key,len,seed);
-}
-
-inline void MurmurHash64B_test ( const void * key, int len, uint32_t seed, void * out )
-{
-  *(uint64_t*)out = MurmurHash64B(key,len,seed);
-}
\ No newline at end of file
+#pragma once
+
+#include "Types.h"
+
+#include "MurmurHash1.h"
+#include "MurmurHash2.h"
+#include "MurmurHash3.h"
+
+//----------
+// These are _not_ hash functions (even though people tend to use crc32 as one...)
+
+void sumhash               ( const void * key, int len, uint32_t seed, void * out );
+void sumhash32             ( const void * key, int len, uint32_t seed, void * out );
+
+void DoNothingHash         ( const void * key, int len, uint32_t seed, void * out );
+void crc32                 ( const void * key, int len, uint32_t seed, void * out );
+
+void randhash_32           ( const void * key, int len, uint32_t seed, void * out );
+void randhash_64           ( const void * key, int len, uint32_t seed, void * out );
+void randhash_128          ( const void * key, int len, uint32_t seed, void * out );
+
+//----------
+// Cryptographic hashes
+
+void md5_32                ( const void * key, int len, uint32_t seed, void * out );
+void sha1_32a              ( const void * key, int len, uint32_t seed, void * out );
+
+//----------
+// General purpose hashes
+
+void FNV                   ( const void * key, int len, uint32_t seed, void * out );
+void Bernstein             ( const void * key, int len, uint32_t seed, void * out );
+void SuperFastHash         ( const void * key, int len, uint32_t seed, void * out );
+void lookup3_test          ( const void * key, int len, uint32_t seed, void * out );
+void MurmurOAAT_test       ( const void * key, int len, uint32_t seed, void * out );
+void Crap8_test            ( const void * key, int len, uint32_t seed, void * out );
+void CityHash128_test      ( const void * key, int len, uint32_t seed, void * out );
+void CityHash64_test       ( const void * key, int len, uint32_t seed, void * out );
+
+void SpookyHash32_test     ( const void * key, int len, uint32_t seed, void * out );
+void SpookyHash64_test     ( const void * key, int len, uint32_t seed, void * out );
+void SpookyHash128_test    ( const void * key, int len, uint32_t seed, void * out );
+
+uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed );
+
+//----------
+// MurmurHash2
+
+void MurmurHash2_test      ( const void * key, int len, uint32_t seed, void * out );
+void MurmurHash2A_test     ( const void * key, int len, uint32_t seed, void * out );
+
+//-----------------------------------------------------------------------------
+// Test harnesses for Murmur1/2
+
+inline void MurmurHash1_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint32_t*)out = MurmurHash1(key,len,seed);
+}
+
+inline void MurmurHash2_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint32_t*)out = MurmurHash2(key,len,seed);
+}
+
+inline void MurmurHash2A_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint32_t*)out = MurmurHash2A(key,len,seed);
+}
+
+inline void MurmurHash64A_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint64_t*)out = MurmurHash64A(key,len,seed);
+}
+
+inline void MurmurHash64B_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint64_t*)out = MurmurHash64B(key,len,seed);
+}
diff --git a/KeysetTest.cpp b/KeysetTest.cpp
index 5561030..b3b8a4c 100644
--- a/KeysetTest.cpp
+++ b/KeysetTest.cpp
@@ -1,327 +1,327 @@
-#include "KeysetTest.h"
-
-#include "Platform.h"
-#include "Random.h"
-
-#include <map>
-#include <set>
-
-//-----------------------------------------------------------------------------
-// This should hopefully be a thorough and uambiguous test of whether a hash
-// is correctly implemented on a given platform
-
-bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose )
-{
-  const int hashbytes = hashbits / 8;
-
-  uint8_t * key    = new uint8_t[256];
-  uint8_t * hashes = new uint8_t[hashbytes * 256];
-  uint8_t * final  = new uint8_t[hashbytes];
-
-  memset(key,0,256);
-  memset(hashes,0,hashbytes*256);
-  memset(final,0,hashbytes);
-
-  // Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as
-  // the seed
-
-  for(int i = 0; i < 256; i++)
-  {
-    key[i] = (uint8_t)i;
-
-    hash(key,i,256-i,&hashes[i*hashbytes]);
-  }
-
-  // Then hash the result array
-
-  hash(hashes,hashbytes*256,0,final);
-
-  // The first four bytes of that hash, interpreted as a little-endian integer, is our
-  // verification value
-
-  uint32_t verification = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) | (final[3] << 24);
-
-  delete [] key;
-  delete [] hashes;
-  delete [] final;
-
-  //----------
-
-  if(expected != verification)
-  {
-    if(verbose) printf("Verification value 0x%08X : Failed! (Expected 0x%08x)\n",verification,expected);
-    return false;
-  }
-  else
-  {
-    if(verbose) printf("Verification value 0x%08X : Passed!\n",verification);
-    return true;
-  }
-}
-
-//----------------------------------------------------------------------------
-// Basic sanity checks -
-
-// A hash function should not be reading outside the bounds of the key.
-
-// Flipping a bit of a key should, with overwhelmingly high probability,
-// result in a different hash.
-
-// Hashing the same key twice should always produce the same result.
-
-// The memory alignment of the key should not affect the hash result.
-
-bool SanityTest ( pfHash hash, const int hashbits )
-{
-  printf("Running sanity check 1");
-  
-  Rand r(883741);
-
-  bool result = true;
-
-  const int hashbytes = hashbits/8;
-  const int reps = 10;
-  const int keymax = 128;
-  const int pad = 16;
-  const int buflen = keymax + pad*3;
-  
-  uint8_t * buffer1 = new uint8_t[buflen];
-  uint8_t * buffer2 = new uint8_t[buflen];
-
-  uint8_t * hash1 = new uint8_t[hashbytes];
-  uint8_t * hash2 = new uint8_t[hashbytes];
-
-  //----------
-  
-  for(int irep = 0; irep < reps; irep++)
-  {
-    if(irep % (reps/10) == 0) printf(".");
-
-    for(int len = 4; len <= keymax; len++)
-    {
-      for(int offset = pad; offset < pad*2; offset++)
-      {
-        uint8_t * key1 = &buffer1[pad];
-        uint8_t * key2 = &buffer2[pad+offset];
-
-        r.rand_p(buffer1,buflen);
-        r.rand_p(buffer2,buflen);
-
-        memcpy(key2,key1,len);
-
-        hash(key1,len,0,hash1);
-
-        for(int bit = 0; bit < (len * 8); bit++)
-        {
-          // Flip a bit, hash the key -> we should get a different result.
-
-          flipbit(key2,len,bit);
-          hash(key2,len,0,hash2);
-
-          if(memcmp(hash1,hash2,hashbytes) == 0)
-          {
-            result = false;
-          }
-
-          // Flip it back, hash again -> we should get the original result.
-
-          flipbit(key2,len,bit);
-          hash(key2,len,0,hash2);
-
-          if(memcmp(hash1,hash2,hashbytes) != 0)
-          {
-            result = false;
-          }
-        }
-      }
-    }
-  }
-
-  if(result == false)
-  {
-    printf("*********FAIL*********\n");
-  }
-  else
-  {
-    printf("PASS\n");
-  }
-
-  delete [] hash1;
-  delete [] hash2;
-
-  return result;
-}
-
-//----------------------------------------------------------------------------
-// Appending zero bytes to a key should always cause it to produce a different
-// hash value
-
-void AppendedZeroesTest ( pfHash hash, const int hashbits )
-{
-  printf("Running sanity check 2");
-  
-  Rand r(173994);
-
-  const int hashbytes = hashbits/8;
-
-  for(int rep = 0; rep < 100; rep++)
-  {
-    if(rep % 10 == 0) printf(".");
-
-    unsigned char key[256];
-
-    memset(key,0,sizeof(key));
-
-    r.rand_p(key,32);
-
-    uint32_t h1[16];
-    uint32_t h2[16];
-
-    memset(h1,0,hashbytes);
-    memset(h2,0,hashbytes);
-
-    for(int i = 0; i < 32; i++)
-    {
-      hash(key,32+i,0,h1);
-
-      if(memcmp(h1,h2,hashbytes) == 0)
-      {
-        printf("\n*********FAIL*********\n");
-        return;
-      }
-
-      memcpy(h2,h1,hashbytes);
-    }
-  }
-
-  printf("PASS\n");
-}
-
-//-----------------------------------------------------------------------------
-// Generate all keys of up to N bytes containing two non-zero bytes
-
-void TwoBytesKeygen ( int maxlen, KeyCallback & c )
-{
-  //----------
-  // Compute # of keys
-
-  int keycount = 0;
-
-  for(int i = 2; i <= maxlen; i++) keycount += (int)chooseK(i,2);
-
-  keycount *= 255*255;
-
-  for(int i = 2; i <= maxlen; i++) keycount += i*255;
-
-  printf("Keyset 'TwoBytes' - up-to-%d-byte keys, %d total keys\n",maxlen, keycount);
-
-  c.reserve(keycount);
-
-  //----------
-  // Add all keys with one non-zero byte
-
-  uint8_t key[256];
-
-  memset(key,0,256);
-
-  for(int keylen = 2; keylen <= maxlen; keylen++)
-  for(int byteA = 0; byteA < keylen; byteA++)
-  {
-    for(int valA = 1; valA <= 255; valA++)
-    {
-      key[byteA] = (uint8_t)valA;
-
-      c(key,keylen);
-    }
-
-    key[byteA] = 0;
-  }
-
-  //----------
-  // Add all keys with two non-zero bytes
-
-  for(int keylen = 2; keylen <= maxlen; keylen++)
-  for(int byteA = 0; byteA < keylen-1; byteA++)
-  for(int byteB = byteA+1; byteB < keylen; byteB++)
-  {
-    for(int valA = 1; valA <= 255; valA++)
-    {
-      key[byteA] = (uint8_t)valA;
-
-      for(int valB = 1; valB <= 255; valB++)
-      {
-        key[byteB] = (uint8_t)valB;
-        c(key,keylen);
-      }
-
-      key[byteB] = 0;
-    }
-
-    key[byteA] = 0;
-  }
-}
-
-//-----------------------------------------------------------------------------
-
-template< typename hashtype >
-void DumpCollisionMap ( CollisionMap<hashtype,ByteVec> & cmap )
-{
-  typedef CollisionMap<hashtype,ByteVec> cmap_t;
-
-  for(typename cmap_t::iterator it = cmap.begin(); it != cmap.end(); ++it)
-  {
-    const hashtype & hash = (*it).first;
-
-    printf("Hash - ");
-    printbytes(&hash,sizeof(hashtype));
-    printf("\n");
-
-    std::vector<ByteVec> & keys = (*it).second;
-
-    for(int i = 0; i < (int)keys.size(); i++)
-    {
-      ByteVec & key = keys[i];
-
-      printf("Key  - ");
-      printbytes(&key[0],(int)key.size());
-      printf("\n");
-    }
-    printf("\n");
-  }
-
-}
-
-// test code
-
-void ReportCollisions ( pfHash hash )
-{
-  printf("Hashing keyset\n");
-
-  std::vector<uint128_t> hashes;
-
-  HashCallback<uint128_t> c(hash,hashes);
-
-  TwoBytesKeygen(20,c);
-
-  printf("%d hashes\n",(int)hashes.size());
-
-  printf("Finding collisions\n");
-
-  HashSet<uint128_t> collisions;
-
-  FindCollisions(hashes,collisions,1000);
-
-  printf("%d collisions\n",(int)collisions.size());
-
-  printf("Mapping collisions\n");
-
-  CollisionMap<uint128_t,ByteVec> cmap;
-
-  CollisionCallback<uint128_t> c2(hash,collisions,cmap);
-
-  TwoBytesKeygen(20,c2);
-
-  printf("Dumping collisions\n");
-
-  DumpCollisionMap(cmap);
-}
+#include "KeysetTest.h"
+
+#include "Platform.h"
+#include "Random.h"
+
+#include <map>
+#include <set>
+
+//-----------------------------------------------------------------------------
+// This should hopefully be a thorough and uambiguous test of whether a hash
+// is correctly implemented on a given platform
+
+bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose )
+{
+  const int hashbytes = hashbits / 8;
+
+  uint8_t * key    = new uint8_t[256];
+  uint8_t * hashes = new uint8_t[hashbytes * 256];
+  uint8_t * final  = new uint8_t[hashbytes];
+
+  memset(key,0,256);
+  memset(hashes,0,hashbytes*256);
+  memset(final,0,hashbytes);
+
+  // Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as
+  // the seed
+
+  for(int i = 0; i < 256; i++)
+  {
+    key[i] = (uint8_t)i;
+
+    hash(key,i,256-i,&hashes[i*hashbytes]);
+  }
+
+  // Then hash the result array
+
+  hash(hashes,hashbytes*256,0,final);
+
+  // The first four bytes of that hash, interpreted as a little-endian integer, is our
+  // verification value
+
+  uint32_t verification = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) | (final[3] << 24);
+
+  delete [] key;
+  delete [] hashes;
+  delete [] final;
+
+  //----------
+
+  if(expected != verification)
+  {
+    if(verbose) printf("Verification value 0x%08X : Failed! (Expected 0x%08x)\n",verification,expected);
+    return false;
+  }
+  else
+  {
+    if(verbose) printf("Verification value 0x%08X : Passed!\n",verification);
+    return true;
+  }
+}
+
+//----------------------------------------------------------------------------
+// Basic sanity checks -
+
+// A hash function should not be reading outside the bounds of the key.
+
+// Flipping a bit of a key should, with overwhelmingly high probability,
+// result in a different hash.
+
+// Hashing the same key twice should always produce the same result.
+
+// The memory alignment of the key should not affect the hash result.
+
+bool SanityTest ( pfHash hash, const int hashbits )
+{
+  printf("Running sanity check 1");
+  
+  Rand r(883741);
+
+  bool result = true;
+
+  const int hashbytes = hashbits/8;
+  const int reps = 10;
+  const int keymax = 128;
+  const int pad = 16;
+  const int buflen = keymax + pad*3;
+  
+  uint8_t * buffer1 = new uint8_t[buflen];
+  uint8_t * buffer2 = new uint8_t[buflen];
+
+  uint8_t * hash1 = new uint8_t[hashbytes];
+  uint8_t * hash2 = new uint8_t[hashbytes];
+
+  //----------
+  
+  for(int irep = 0; irep < reps; irep++)
+  {
+    if(irep % (reps/10) == 0) printf(".");
+
+    for(int len = 4; len <= keymax; len++)
+    {
+      for(int offset = pad; offset < pad*2; offset++)
+      {
+        uint8_t * key1 = &buffer1[pad];
+        uint8_t * key2 = &buffer2[pad+offset];
+
+        r.rand_p(buffer1,buflen);
+        r.rand_p(buffer2,buflen);
+
+        memcpy(key2,key1,len);
+
+        hash(key1,len,0,hash1);
+
+        for(int bit = 0; bit < (len * 8); bit++)
+        {
+          // Flip a bit, hash the key -> we should get a different result.
+
+          flipbit(key2,len,bit);
+          hash(key2,len,0,hash2);
+
+          if(memcmp(hash1,hash2,hashbytes) == 0)
+          {
+            result = false;
+          }
+
+          // Flip it back, hash again -> we should get the original result.
+
+          flipbit(key2,len,bit);
+          hash(key2,len,0,hash2);
+
+          if(memcmp(hash1,hash2,hashbytes) != 0)
+          {
+            result = false;
+          }
+        }
+      }
+    }
+  }
+
+  if(result == false)
+  {
+    printf("*********FAIL*********\n");
+  }
+  else
+  {
+    printf("PASS\n");
+  }
+
+  delete [] hash1;
+  delete [] hash2;
+
+  return result;
+}
+
+//----------------------------------------------------------------------------
+// Appending zero bytes to a key should always cause it to produce a different
+// hash value
+
+void AppendedZeroesTest ( pfHash hash, const int hashbits )
+{
+  printf("Running sanity check 2");
+  
+  Rand r(173994);
+
+  const int hashbytes = hashbits/8;
+
+  for(int rep = 0; rep < 100; rep++)
+  {
+    if(rep % 10 == 0) printf(".");
+
+    unsigned char key[256];
+
+    memset(key,0,sizeof(key));
+
+    r.rand_p(key,32);
+
+    uint32_t h1[16];
+    uint32_t h2[16];
+
+    memset(h1,0,hashbytes);
+    memset(h2,0,hashbytes);
+
+    for(int i = 0; i < 32; i++)
+    {
+      hash(key,32+i,0,h1);
+
+      if(memcmp(h1,h2,hashbytes) == 0)
+      {
+        printf("\n*********FAIL*********\n");
+        return;
+      }
+
+      memcpy(h2,h1,hashbytes);
+    }
+  }
+
+  printf("PASS\n");
+}
+
+//-----------------------------------------------------------------------------
+// Generate all keys of up to N bytes containing two non-zero bytes
+
+void TwoBytesKeygen ( int maxlen, KeyCallback & c )
+{
+  //----------
+  // Compute # of keys
+
+  int keycount = 0;
+
+  for(int i = 2; i <= maxlen; i++) keycount += (int)chooseK(i,2);
+
+  keycount *= 255*255;
+
+  for(int i = 2; i <= maxlen; i++) keycount += i*255;
+
+  printf("Keyset 'TwoBytes' - up-to-%d-byte keys, %d total keys\n",maxlen, keycount);
+
+  c.reserve(keycount);
+
+  //----------
+  // Add all keys with one non-zero byte
+
+  uint8_t key[256];
+
+  memset(key,0,256);
+
+  for(int keylen = 2; keylen <= maxlen; keylen++)
+  for(int byteA = 0; byteA < keylen; byteA++)
+  {
+    for(int valA = 1; valA <= 255; valA++)
+    {
+      key[byteA] = (uint8_t)valA;
+
+      c(key,keylen);
+    }
+
+    key[byteA] = 0;
+  }
+
+  //----------
+  // Add all keys with two non-zero bytes
+
+  for(int keylen = 2; keylen <= maxlen; keylen++)
+  for(int byteA = 0; byteA < keylen-1; byteA++)
+  for(int byteB = byteA+1; byteB < keylen; byteB++)
+  {
+    for(int valA = 1; valA <= 255; valA++)
+    {
+      key[byteA] = (uint8_t)valA;
+
+      for(int valB = 1; valB <= 255; valB++)
+      {
+        key[byteB] = (uint8_t)valB;
+        c(key,keylen);
+      }
+
+      key[byteB] = 0;
+    }
+
+    key[byteA] = 0;
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+template< typename hashtype >
+void DumpCollisionMap ( CollisionMap<hashtype,ByteVec> & cmap )
+{
+  typedef CollisionMap<hashtype,ByteVec> cmap_t;
+
+  for(typename cmap_t::iterator it = cmap.begin(); it != cmap.end(); ++it)
+  {
+    const hashtype & hash = (*it).first;
+
+    printf("Hash - ");
+    printbytes(&hash,sizeof(hashtype));
+    printf("\n");
+
+    std::vector<ByteVec> & keys = (*it).second;
+
+    for(int i = 0; i < (int)keys.size(); i++)
+    {
+      ByteVec & key = keys[i];
+
+      printf("Key  - ");
+      printbytes(&key[0],(int)key.size());
+      printf("\n");
+    }
+    printf("\n");
+  }
+
+}
+
+// test code
+
+void ReportCollisions ( pfHash hash )
+{
+  printf("Hashing keyset\n");
+
+  std::vector<uint128_t> hashes;
+
+  HashCallback<uint128_t> c(hash,hashes);
+
+  TwoBytesKeygen(20,c);
+
+  printf("%d hashes\n",(int)hashes.size());
+
+  printf("Finding collisions\n");
+
+  HashSet<uint128_t> collisions;
+
+  FindCollisions(hashes,collisions,1000);
+
+  printf("%d collisions\n",(int)collisions.size());
+
+  printf("Mapping collisions\n");
+
+  CollisionMap<uint128_t,ByteVec> cmap;
+
+  CollisionCallback<uint128_t> c2(hash,collisions,cmap);
+
+  TwoBytesKeygen(20,c2);
+
+  printf("Dumping collisions\n");
+
+  DumpCollisionMap(cmap);
+}
diff --git a/KeysetTest.h b/KeysetTest.h
index 55d5d5f..dce54d2 100644
--- a/KeysetTest.h
+++ b/KeysetTest.h
@@ -1,439 +1,439 @@
-//-----------------------------------------------------------------------------
-// Keyset tests generate various sorts of difficult-to-hash keysets and compare
-// the distribution and collision frequency of the hash results against an
-// ideal random distribution
-
-// The sanity checks are also in this cpp/h
-
-#pragma once
-
-#include "Types.h"
-#include "Stats.h"
-#include "Random.h"   // for rand_p
-
-#include <algorithm>  // for std::swap
-#include <assert.h>
-
-//-----------------------------------------------------------------------------
-// Sanity tests
-
-bool VerificationTest   ( pfHash hash, const int hashbits, uint32_t expected, bool verbose );
-bool SanityTest         ( pfHash hash, const int hashbits );
-void AppendedZeroesTest ( pfHash hash, const int hashbits );
-
-//-----------------------------------------------------------------------------
-// Keyset 'Combination' - all possible combinations of input blocks
-
-template< typename hashtype >
-void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen, 
-                  uint32_t * blocks, int blockcount, 
-                pfHash hash, std::vector<hashtype> & hashes )
-{
-  if(len == maxlen) return;
-
-  for(int i = 0; i < blockcount; i++)
-  {
-    key[len] = blocks[i];
-  
-    //if(len == maxlen-1)
-    {
-      hashtype h;
-      hash(key,(len+1) * sizeof(uint32_t),0,&h);
-      hashes.push_back(h);
-    }
-
-    //else
-    {
-      CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes);
-    }
-  }
-}
-
-template< typename hashtype >
-bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
-{
-  printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount);
-
-  //----------
-
-  std::vector<hashtype> hashes;
-
-  uint32_t * key = new uint32_t[maxlen];
-
-  CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes);
-
-  delete [] key;
-
-  printf("%d keys\n",(int)hashes.size());
-
-  //----------
-
-  bool result = true;
-
-  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
-  
-  printf("\n");
-
-  return result;
-}
-
-//----------------------------------------------------------------------------
-// Keyset 'Permutation' - given a set of 32-bit blocks, generate keys
-// consisting of all possible permutations of those blocks
-
-template< typename hashtype >
-void PermutationKeygenRecurse ( pfHash hash, uint32_t * blocks, int blockcount, int k, std::vector<hashtype> & hashes )
-{
-  if(k == blockcount-1)
-  {
-    hashtype h;
-
-    hash(blocks,blockcount * sizeof(uint32_t),0,&h);
-
-    hashes.push_back(h);
-
-    return;
-  }
-
-  for(int i = k; i < blockcount; i++)
-  {
-    std::swap(blocks[k],blocks[i]);
-
-    PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes);
-
-    std::swap(blocks[k],blocks[i]);
-  }
-}
-
-template< typename hashtype >
-bool PermutationKeyTest ( hashfunc<hashtype> hash, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
-{
-  printf("Keyset 'Permutation' - %d blocks - ",blockcount);
-
-  //----------
-
-  std::vector<hashtype> hashes;
-
-  PermutationKeygenRecurse<hashtype>(hash,blocks,blockcount,0,hashes);
-
-  printf("%d keys\n",(int)hashes.size());
-
-  //----------
-
-  bool result = true;
-
-  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
-  
-  printf("\n");
-
-  return result;
-}
-
-//-----------------------------------------------------------------------------
-// Keyset 'Sparse' - generate all possible N-bit keys with up to K bits set
-
-template < typename keytype, typename hashtype >
-void SparseKeygenRecurse ( pfHash hash, int start, int bitsleft, bool inclusive, keytype & k, std::vector<hashtype> & hashes )
-{
-  const int nbytes = sizeof(keytype);
-  const int nbits = nbytes * 8;
-
-  hashtype h;
-
-  for(int i = start; i < nbits; i++)
-  {
-    flipbit(&k,nbytes,i);
-
-    if(inclusive || (bitsleft == 1))
-    {
-      hash(&k,sizeof(keytype),0,&h);
-      hashes.push_back(h);
-    }
-
-    if(bitsleft > 1)
-    {
-      SparseKeygenRecurse(hash,i+1,bitsleft-1,inclusive,k,hashes);
-    }
-
-    flipbit(&k,nbytes,i);
-  }
-}
-
-//----------
-
-template < int keybits, typename hashtype >
-bool SparseKeyTest ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram  )
-{
-  printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits);
-
-  typedef Blob<keybits> keytype;
-
-  std::vector<hashtype> hashes;
-
-  keytype k;
-  memset(&k,0,sizeof(k));
-
-  if(inclusive)
-  {
-    hashtype h;
-
-    hash(&k,sizeof(keytype),0,&h);
-
-    hashes.push_back(h);
-  }
-
-  SparseKeygenRecurse(hash,0,setbits,inclusive,k,hashes);
-
-  printf("%d keys\n",(int)hashes.size());
-
-  bool result = true;
-  
-  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
-
-  printf("\n");
-
-  return result;
-}
-
-//-----------------------------------------------------------------------------
-// Keyset 'Windows' - for all possible N-bit windows of a K-bit key, generate
-// all possible keys with bits set in that window
-
-template < typename keytype, typename hashtype >
-bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testCollision, bool testDistribution, bool drawDiagram )
-{
-  const int keybits = sizeof(keytype) * 8;
-  const int keycount = 1 << windowbits;
-
-  std::vector<hashtype> hashes;
-  hashes.resize(keycount);
-
-  bool result = true;
-
-  int testcount = keybits;
-
-  printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount);
-
-  for(int j = 0; j <= testcount; j++)
-  {
-    int minbit = j;
-
-    keytype key;
-
-    for(int i = 0; i < keycount; i++)
-    {
-      key = i;
-      //key = key << minbit;
-
-      lrot(&key,sizeof(keytype),minbit);
-
-      hash(&key,sizeof(keytype),0,&hashes[i]);
-    }
-
-    printf("Window at %3d - ",j);
-
-    result &= TestHashList(hashes,testCollision,testDistribution,drawDiagram);
-
-    //printf("\n");
-  }
-
-  return result;
-}
-
-//-----------------------------------------------------------------------------
-// Keyset 'Cyclic' - generate keys that consist solely of N repetitions of M
-// bytes.
-
-// (This keyset type is designed to make MurmurHash2 fail)
-
-template < typename hashtype >
-bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycount, bool drawDiagram )
-{
-  printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount);
-
-  Rand r(483723);
-
-  std::vector<hashtype> hashes;
-  hashes.resize(keycount);
-
-  int keyLen = cycleLen * cycleReps;
-
-  uint8_t * cycle = new uint8_t[cycleLen + 16];
-  uint8_t * key = new uint8_t[keyLen];
-
-  //----------
-
-  for(int i = 0; i < keycount; i++)
-  {
-    r.rand_p(cycle,cycleLen);
-
-    *(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);
-
-    for(int j = 0; j < keyLen; j++)
-    {
-      key[j] = cycle[j % cycleLen];
-    }
-
-    hash(key,keyLen,0,&hashes[i]);
-  }
-
-  //----------
-  
-  bool result = true;
-
-  result &= TestHashList(hashes,true,true,drawDiagram);
-  printf("\n");
-
-  delete [] cycle;
-  delete [] key;
-
-  return result;
-}
-
-//-----------------------------------------------------------------------------
-// Keyset 'TwoBytes' - generate all keys up to length N with two non-zero bytes
-
-void TwoBytesKeygen ( int maxlen, KeyCallback & c );
-
-template < typename hashtype >
-bool TwoBytesTest2 ( pfHash hash, int maxlen, bool drawDiagram )
-{
-  std::vector<hashtype> hashes;
-
-  HashCallback<hashtype> c(hash,hashes);
-
-  TwoBytesKeygen(maxlen,c);
-
-  bool result = true;
-
-  result &= TestHashList(hashes,true,true,drawDiagram);
-  printf("\n");
-
-  return result;
-}
-
-//-----------------------------------------------------------------------------
-// Keyset 'Text' - generate all keys of the form "prefix"+"core"+"suffix",
-// where "core" consists of all possible combinations of the given character
-// set of length N.
-
-template < typename hashtype >
-bool TextKeyTest ( hashfunc<hashtype> hash, const char * prefix, const char * coreset, const int corelen, const char * suffix, bool drawDiagram )
-{
-  const int prefixlen = (int)strlen(prefix);
-  const int suffixlen = (int)strlen(suffix);
-  const int corecount = (int)strlen(coreset);
-
-  const int keybytes = prefixlen + corelen + suffixlen;
-  const int keycount = (int)pow(double(corecount),double(corelen));
-
-  printf("Keyset 'Text' - keys of form \"%s[",prefix);
-  for(int i = 0; i < corelen; i++) printf("X");		
-  printf("]%s\" - %d keys\n",suffix,keycount);
-
-  uint8_t * key = new uint8_t[keybytes+1];
-
-  key[keybytes] = 0;
-
-  memcpy(key,prefix,prefixlen);
-  memcpy(key+prefixlen+corelen,suffix,suffixlen);
-
-  //----------
-
-  std::vector<hashtype> hashes;
-  hashes.resize(keycount);
-
-  for(int i = 0; i < keycount; i++)
-  {
-    int t = i;
-
-    for(int j = 0; j < corelen; j++)
-    {
-      key[prefixlen+j] = coreset[t % corecount]; t /= corecount;
-    }
-
-    hash(key,keybytes,0,&hashes[i]);
-  }
-
-  //----------
-
-  bool result = true;
-
-  result &= TestHashList(hashes,true,true,drawDiagram);
-
-  printf("\n");
-
-  delete [] key;
-
-  return result;
-}
-
-//-----------------------------------------------------------------------------
-// Keyset 'Zeroes' - keys consisting of all zeroes, differing only in length
-
-// We reuse one block of empty bytes, otherwise the RAM cost is enormous.
-
-template < typename hashtype >
-bool ZeroKeyTest ( pfHash hash, bool drawDiagram )
-{
-  int keycount = 64*1024;
-
-  printf("Keyset 'Zeroes' - %d keys\n",keycount);
-
-  unsigned char * nullblock = new unsigned char[keycount];
-  memset(nullblock,0,keycount);
-
-  //----------
-
-  std::vector<hashtype> hashes;
-
-  hashes.resize(keycount);
-
-  for(int i = 0; i < keycount; i++)
-  {
-    hash(nullblock,i,0,&hashes[i]);
-  }
-
-  bool result = true;
-
-  result &= TestHashList(hashes,true,true,drawDiagram);
-
-  printf("\n");
-
-  delete [] nullblock;
-
-  return result;
-}
-
-//-----------------------------------------------------------------------------
-// Keyset 'Seed' - hash "the quick brown fox..." using different seeds
-
-template < typename hashtype >
-bool SeedTest ( pfHash hash, int keycount, bool drawDiagram )
-{
-  printf("Keyset 'Seed' - %d keys\n",keycount);
-
-  const char * text = "The quick brown fox jumps over the lazy dog";
-  const int len = (int)strlen(text);
-
-  //----------
-
-  std::vector<hashtype> hashes;
-
-  hashes.resize(keycount);
-
-  for(int i = 0; i < keycount; i++)
-  {
-    hash(text,len,i,&hashes[i]);
-  }
-
-  bool result = true;
-
-  result &= TestHashList(hashes,true,true,drawDiagram);
-
-  printf("\n");
-
-  return result;
-}
-
-//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+// Keyset tests generate various sorts of difficult-to-hash keysets and compare
+// the distribution and collision frequency of the hash results against an
+// ideal random distribution
+
+// The sanity checks are also in this cpp/h
+
+#pragma once
+
+#include "Types.h"
+#include "Stats.h"
+#include "Random.h"   // for rand_p
+
+#include <algorithm>  // for std::swap
+#include <assert.h>
+
+//-----------------------------------------------------------------------------
+// Sanity tests
+
+bool VerificationTest   ( pfHash hash, const int hashbits, uint32_t expected, bool verbose );
+bool SanityTest         ( pfHash hash, const int hashbits );
+void AppendedZeroesTest ( pfHash hash, const int hashbits );
+
+//-----------------------------------------------------------------------------
+// Keyset 'Combination' - all possible combinations of input blocks
+
+template< typename hashtype >
+void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen, 
+                  uint32_t * blocks, int blockcount, 
+                pfHash hash, std::vector<hashtype> & hashes )
+{
+  if(len == maxlen) return;
+
+  for(int i = 0; i < blockcount; i++)
+  {
+    key[len] = blocks[i];
+  
+    //if(len == maxlen-1)
+    {
+      hashtype h;
+      hash(key,(len+1) * sizeof(uint32_t),0,&h);
+      hashes.push_back(h);
+    }
+
+    //else
+    {
+      CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes);
+    }
+  }
+}
+
+template< typename hashtype >
+bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
+{
+  printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount);
+
+  //----------
+
+  std::vector<hashtype> hashes;
+
+  uint32_t * key = new uint32_t[maxlen];
+
+  CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes);
+
+  delete [] key;
+
+  printf("%d keys\n",(int)hashes.size());
+
+  //----------
+
+  bool result = true;
+
+  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
+  
+  printf("\n");
+
+  return result;
+}
+
+//----------------------------------------------------------------------------
+// Keyset 'Permutation' - given a set of 32-bit blocks, generate keys
+// consisting of all possible permutations of those blocks
+
+template< typename hashtype >
+void PermutationKeygenRecurse ( pfHash hash, uint32_t * blocks, int blockcount, int k, std::vector<hashtype> & hashes )
+{
+  if(k == blockcount-1)
+  {
+    hashtype h;
+
+    hash(blocks,blockcount * sizeof(uint32_t),0,&h);
+
+    hashes.push_back(h);
+
+    return;
+  }
+
+  for(int i = k; i < blockcount; i++)
+  {
+    std::swap(blocks[k],blocks[i]);
+
+    PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes);
+
+    std::swap(blocks[k],blocks[i]);
+  }
+}
+
+template< typename hashtype >
+bool PermutationKeyTest ( hashfunc<hashtype> hash, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
+{
+  printf("Keyset 'Permutation' - %d blocks - ",blockcount);
+
+  //----------
+
+  std::vector<hashtype> hashes;
+
+  PermutationKeygenRecurse<hashtype>(hash,blocks,blockcount,0,hashes);
+
+  printf("%d keys\n",(int)hashes.size());
+
+  //----------
+
+  bool result = true;
+
+  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
+  
+  printf("\n");
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Sparse' - generate all possible N-bit keys with up to K bits set
+
+template < typename keytype, typename hashtype >
+void SparseKeygenRecurse ( pfHash hash, int start, int bitsleft, bool inclusive, keytype & k, std::vector<hashtype> & hashes )
+{
+  const int nbytes = sizeof(keytype);
+  const int nbits = nbytes * 8;
+
+  hashtype h;
+
+  for(int i = start; i < nbits; i++)
+  {
+    flipbit(&k,nbytes,i);
+
+    if(inclusive || (bitsleft == 1))
+    {
+      hash(&k,sizeof(keytype),0,&h);
+      hashes.push_back(h);
+    }
+
+    if(bitsleft > 1)
+    {
+      SparseKeygenRecurse(hash,i+1,bitsleft-1,inclusive,k,hashes);
+    }
+
+    flipbit(&k,nbytes,i);
+  }
+}
+
+//----------
+
+template < int keybits, typename hashtype >
+bool SparseKeyTest ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram  )
+{
+  printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits);
+
+  typedef Blob<keybits> keytype;
+
+  std::vector<hashtype> hashes;
+
+  keytype k;
+  memset(&k,0,sizeof(k));
+
+  if(inclusive)
+  {
+    hashtype h;
+
+    hash(&k,sizeof(keytype),0,&h);
+
+    hashes.push_back(h);
+  }
+
+  SparseKeygenRecurse(hash,0,setbits,inclusive,k,hashes);
+
+  printf("%d keys\n",(int)hashes.size());
+
+  bool result = true;
+  
+  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
+
+  printf("\n");
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Windows' - for all possible N-bit windows of a K-bit key, generate
+// all possible keys with bits set in that window
+
+template < typename keytype, typename hashtype >
+bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testCollision, bool testDistribution, bool drawDiagram )
+{
+  const int keybits = sizeof(keytype) * 8;
+  const int keycount = 1 << windowbits;
+
+  std::vector<hashtype> hashes;
+  hashes.resize(keycount);
+
+  bool result = true;
+
+  int testcount = keybits;
+
+  printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount);
+
+  for(int j = 0; j <= testcount; j++)
+  {
+    int minbit = j;
+
+    keytype key;
+
+    for(int i = 0; i < keycount; i++)
+    {
+      key = i;
+      //key = key << minbit;
+
+      lrot(&key,sizeof(keytype),minbit);
+
+      hash(&key,sizeof(keytype),0,&hashes[i]);
+    }
+
+    printf("Window at %3d - ",j);
+
+    result &= TestHashList(hashes,testCollision,testDistribution,drawDiagram);
+
+    //printf("\n");
+  }
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Cyclic' - generate keys that consist solely of N repetitions of M
+// bytes.
+
+// (This keyset type is designed to make MurmurHash2 fail)
+
+template < typename hashtype >
+bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycount, bool drawDiagram )
+{
+  printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount);
+
+  Rand r(483723);
+
+  std::vector<hashtype> hashes;
+  hashes.resize(keycount);
+
+  int keyLen = cycleLen * cycleReps;
+
+  uint8_t * cycle = new uint8_t[cycleLen + 16];
+  uint8_t * key = new uint8_t[keyLen];
+
+  //----------
+
+  for(int i = 0; i < keycount; i++)
+  {
+    r.rand_p(cycle,cycleLen);
+
+    *(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);
+
+    for(int j = 0; j < keyLen; j++)
+    {
+      key[j] = cycle[j % cycleLen];
+    }
+
+    hash(key,keyLen,0,&hashes[i]);
+  }
+
+  //----------
+  
+  bool result = true;
+
+  result &= TestHashList(hashes,true,true,drawDiagram);
+  printf("\n");
+
+  delete [] cycle;
+  delete [] key;
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'TwoBytes' - generate all keys up to length N with two non-zero bytes
+
+void TwoBytesKeygen ( int maxlen, KeyCallback & c );
+
+template < typename hashtype >
+bool TwoBytesTest2 ( pfHash hash, int maxlen, bool drawDiagram )
+{
+  std::vector<hashtype> hashes;
+
+  HashCallback<hashtype> c(hash,hashes);
+
+  TwoBytesKeygen(maxlen,c);
+
+  bool result = true;
+
+  result &= TestHashList(hashes,true,true,drawDiagram);
+  printf("\n");
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Text' - generate all keys of the form "prefix"+"core"+"suffix",
+// where "core" consists of all possible combinations of the given character
+// set of length N.
+
+template < typename hashtype >
+bool TextKeyTest ( hashfunc<hashtype> hash, const char * prefix, const char * coreset, const int corelen, const char * suffix, bool drawDiagram )
+{
+  const int prefixlen = (int)strlen(prefix);
+  const int suffixlen = (int)strlen(suffix);
+  const int corecount = (int)strlen(coreset);
+
+  const int keybytes = prefixlen + corelen + suffixlen;
+  const int keycount = (int)pow(double(corecount),double(corelen));
+
+  printf("Keyset 'Text' - keys of form \"%s[",prefix);
+  for(int i = 0; i < corelen; i++) printf("X");		
+  printf("]%s\" - %d keys\n",suffix,keycount);
+
+  uint8_t * key = new uint8_t[keybytes+1];
+
+  key[keybytes] = 0;
+
+  memcpy(key,prefix,prefixlen);
+  memcpy(key+prefixlen+corelen,suffix,suffixlen);
+
+  //----------
+
+  std::vector<hashtype> hashes;
+  hashes.resize(keycount);
+
+  for(int i = 0; i < keycount; i++)
+  {
+    int t = i;
+
+    for(int j = 0; j < corelen; j++)
+    {
+      key[prefixlen+j] = coreset[t % corecount]; t /= corecount;
+    }
+
+    hash(key,keybytes,0,&hashes[i]);
+  }
+
+  //----------
+
+  bool result = true;
+
+  result &= TestHashList(hashes,true,true,drawDiagram);
+
+  printf("\n");
+
+  delete [] key;
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Zeroes' - keys consisting of all zeroes, differing only in length
+
+// We reuse one block of empty bytes, otherwise the RAM cost is enormous.
+
+template < typename hashtype >
+bool ZeroKeyTest ( pfHash hash, bool drawDiagram )
+{
+  int keycount = 64*1024;
+
+  printf("Keyset 'Zeroes' - %d keys\n",keycount);
+
+  unsigned char * nullblock = new unsigned char[keycount];
+  memset(nullblock,0,keycount);
+
+  //----------
+
+  std::vector<hashtype> hashes;
+
+  hashes.resize(keycount);
+
+  for(int i = 0; i < keycount; i++)
+  {
+    hash(nullblock,i,0,&hashes[i]);
+  }
+
+  bool result = true;
+
+  result &= TestHashList(hashes,true,true,drawDiagram);
+
+  printf("\n");
+
+  delete [] nullblock;
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Seed' - hash "the quick brown fox..." using different seeds
+
+template < typename hashtype >
+bool SeedTest ( pfHash hash, int keycount, bool drawDiagram )
+{
+  printf("Keyset 'Seed' - %d keys\n",keycount);
+
+  const char * text = "The quick brown fox jumps over the lazy dog";
+  const int len = (int)strlen(text);
+
+  //----------
+
+  std::vector<hashtype> hashes;
+
+  hashes.resize(keycount);
+
+  for(int i = 0; i < keycount; i++)
+  {
+    hash(text,len,i,&hashes[i]);
+  }
+
+  bool result = true;
+
+  result &= TestHashList(hashes,true,true,drawDiagram);
+
+  printf("\n");
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/MurmurHash1.cpp b/MurmurHash1.cpp
index b21e9f7..8225566 100644
--- a/MurmurHash1.cpp
+++ b/MurmurHash1.cpp
@@ -1,174 +1,174 @@
-//-----------------------------------------------------------------------------
-// MurmurHash was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-// Note - This code makes a few assumptions about how your machine behaves -
-
-// 1. We can read a 4-byte value from any address without crashing
-// 2. sizeof(int) == 4
-
-// And it has a few limitations -
-
-// 1. It will not work incrementally.
-// 2. It will not produce the same results on little-endian and big-endian
-//    machines.
-
-#include "MurmurHash1.h"
-
-//-----------------------------------------------------------------------------
-
-uint32_t MurmurHash1 ( const void * key, int len, uint32_t seed )
-{
-  const unsigned int m = 0xc6a4a793;
-
-  const int r = 16;
-
-  unsigned int h = seed ^ (len * m);
-
-  //----------
-  
-  const unsigned char * data = (const unsigned char *)key;
-
-  while(len >= 4)
-  {
-    unsigned int k = *(unsigned int *)data;
-
-    h += k;
-    h *= m;
-    h ^= h >> 16;
-
-    data += 4;
-    len -= 4;
-  }
-  
-  //----------
-  
-  switch(len)
-  {
-  case 3:
-    h += data[2] << 16;
-  case 2:
-    h += data[1] << 8;
-  case 1:
-    h += data[0];
-    h *= m;
-    h ^= h >> r;
-  };
- 
-  //----------
-
-  h *= m;
-  h ^= h >> 10;
-  h *= m;
-  h ^= h >> 17;
-
-  return h;
-} 
-
-//-----------------------------------------------------------------------------
-// MurmurHash1Aligned, by Austin Appleby
-
-// Same algorithm as MurmurHash1, but only does aligned reads - should be safer
-// on certain platforms. 
-
-// Performance should be equal to or better than the simple version.
-
-unsigned int MurmurHash1Aligned ( const void * key, int len, unsigned int seed )
-{
-  const unsigned int m = 0xc6a4a793;
-  const int r = 16;
-
-  const unsigned char * data = (const unsigned char *)key;
-
-  unsigned int h = seed ^ (len * m);
-
-  int align = (uint64_t)data & 3;
-
-  if(align && (len >= 4))
-  {
-    // Pre-load the temp registers
-
-    unsigned int t = 0, d = 0;
-
-    switch(align)
-    {
-      case 1: t |= data[2] << 16;
-      case 2: t |= data[1] << 8;
-      case 3: t |= data[0];
-    }
-
-    t <<= (8 * align);
-
-    data += 4-align;
-    len -= 4-align;
-
-    int sl = 8 * (4-align);
-    int sr = 8 * align;
-
-    // Mix
-
-    while(len >= 4)
-    {
-      d = *(unsigned int *)data;
-      t = (t >> sr) | (d << sl);
-      h += t;
-      h *= m;
-      h ^= h >> r;
-      t = d;
-
-      data += 4;
-      len -= 4;
-    }
-
-    // Handle leftover data in temp registers
-
-    int pack = len < align ? len : align;
-
-    d = 0;
-
-    switch(pack)
-    {
-    case 3: d |= data[2] << 16;
-    case 2: d |= data[1] << 8;
-    case 1: d |= data[0];
-    case 0: h += (t >> sr) | (d << sl);
-        h *= m;
-        h ^= h >> r;
-    }
-
-    data += pack;
-    len -= pack;
-  }
-  else
-  {
-    while(len >= 4)
-    {
-      h += *(unsigned int *)data;
-      h *= m;
-      h ^= h >> r;
-
-      data += 4;
-      len -= 4;
-    }
-  }
-
-  //----------
-  // Handle tail bytes
-
-  switch(len)
-  {
-  case 3: h += data[2] << 16;
-  case 2: h += data[1] << 8;
-  case 1: h += data[0];
-      h *= m;
-      h ^= h >> r;
-  };
-
-  h *= m;
-  h ^= h >> 10;
-  h *= m;
-  h ^= h >> 17;
-
-  return h;
-}
-
+//-----------------------------------------------------------------------------
+// MurmurHash was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - This code makes a few assumptions about how your machine behaves -
+
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4
+
+// And it has a few limitations -
+
+// 1. It will not work incrementally.
+// 2. It will not produce the same results on little-endian and big-endian
+//    machines.
+
+#include "MurmurHash1.h"
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash1 ( const void * key, int len, uint32_t seed )
+{
+  const unsigned int m = 0xc6a4a793;
+
+  const int r = 16;
+
+  unsigned int h = seed ^ (len * m);
+
+  //----------
+  
+  const unsigned char * data = (const unsigned char *)key;
+
+  while(len >= 4)
+  {
+    unsigned int k = *(unsigned int *)data;
+
+    h += k;
+    h *= m;
+    h ^= h >> 16;
+
+    data += 4;
+    len -= 4;
+  }
+  
+  //----------
+  
+  switch(len)
+  {
+  case 3:
+    h += data[2] << 16;
+  case 2:
+    h += data[1] << 8;
+  case 1:
+    h += data[0];
+    h *= m;
+    h ^= h >> r;
+  };
+ 
+  //----------
+
+  h *= m;
+  h ^= h >> 10;
+  h *= m;
+  h ^= h >> 17;
+
+  return h;
+} 
+
+//-----------------------------------------------------------------------------
+// MurmurHash1Aligned, by Austin Appleby
+
+// Same algorithm as MurmurHash1, but only does aligned reads - should be safer
+// on certain platforms. 
+
+// Performance should be equal to or better than the simple version.
+
+unsigned int MurmurHash1Aligned ( const void * key, int len, unsigned int seed )
+{
+  const unsigned int m = 0xc6a4a793;
+  const int r = 16;
+
+  const unsigned char * data = (const unsigned char *)key;
+
+  unsigned int h = seed ^ (len * m);
+
+  int align = (uint64_t)data & 3;
+
+  if(align && (len >= 4))
+  {
+    // Pre-load the temp registers
+
+    unsigned int t = 0, d = 0;
+
+    switch(align)
+    {
+      case 1: t |= data[2] << 16;
+      case 2: t |= data[1] << 8;
+      case 3: t |= data[0];
+    }
+
+    t <<= (8 * align);
+
+    data += 4-align;
+    len -= 4-align;
+
+    int sl = 8 * (4-align);
+    int sr = 8 * align;
+
+    // Mix
+
+    while(len >= 4)
+    {
+      d = *(unsigned int *)data;
+      t = (t >> sr) | (d << sl);
+      h += t;
+      h *= m;
+      h ^= h >> r;
+      t = d;
+
+      data += 4;
+      len -= 4;
+    }
+
+    // Handle leftover data in temp registers
+
+    int pack = len < align ? len : align;
+
+    d = 0;
+
+    switch(pack)
+    {
+    case 3: d |= data[2] << 16;
+    case 2: d |= data[1] << 8;
+    case 1: d |= data[0];
+    case 0: h += (t >> sr) | (d << sl);
+        h *= m;
+        h ^= h >> r;
+    }
+
+    data += pack;
+    len -= pack;
+  }
+  else
+  {
+    while(len >= 4)
+    {
+      h += *(unsigned int *)data;
+      h *= m;
+      h ^= h >> r;
+
+      data += 4;
+      len -= 4;
+    }
+  }
+
+  //----------
+  // Handle tail bytes
+
+  switch(len)
+  {
+  case 3: h += data[2] << 16;
+  case 2: h += data[1] << 8;
+  case 1: h += data[0];
+      h *= m;
+      h ^= h >> r;
+  };
+
+  h *= m;
+  h ^= h >> 10;
+  h *= m;
+  h ^= h >> 17;
+
+  return h;
+}
+
diff --git a/MurmurHash1.h b/MurmurHash1.h
index 40ddbc4..93b08c3 100644
--- a/MurmurHash1.h
+++ b/MurmurHash1.h
@@ -1,34 +1,34 @@
-//-----------------------------------------------------------------------------
-// MurmurHash1 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-#ifndef _MURMURHASH1_H_
-#define _MURMURHASH1_H_
-
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-typedef unsigned char uint8_t;
-typedef unsigned long uint32_t;
-typedef unsigned __int64 uint64_t;
-
-// Other compilers
-
-#else	// defined(_MSC_VER)
-
-#include <stdint.h>
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
-
-uint32_t MurmurHash1        ( const void * key, int len, uint32_t seed );
-uint32_t MurmurHash1Aligned ( const void * key, int len, uint32_t seed );
-
-//-----------------------------------------------------------------------------
-
-#endif // _MURMURHASH1_H_
+//-----------------------------------------------------------------------------
+// MurmurHash1 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH1_H_
+#define _MURMURHASH1_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash1        ( const void * key, int len, uint32_t seed );
+uint32_t MurmurHash1Aligned ( const void * key, int len, uint32_t seed );
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH1_H_
diff --git a/MurmurHash2.cpp b/MurmurHash2.cpp
index dbb2053..cd1e53a 100644
--- a/MurmurHash2.cpp
+++ b/MurmurHash2.cpp
@@ -1,523 +1,523 @@
-//-----------------------------------------------------------------------------
-// MurmurHash2 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-// Note - This code makes a few assumptions about how your machine behaves -
-
-// 1. We can read a 4-byte value from any address without crashing
-// 2. sizeof(int) == 4
-
-// And it has a few limitations -
-
-// 1. It will not work incrementally.
-// 2. It will not produce the same results on little-endian and big-endian
-//    machines.
-
-#include "MurmurHash2.h"
-
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-#define BIG_CONSTANT(x) (x)
-
-// Other compilers
-
-#else	// defined(_MSC_VER)
-
-#define BIG_CONSTANT(x) (x##LLU)
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
-
-uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
-{
-  // 'm' and 'r' are mixing constants generated offline.
-  // They're not really 'magic', they just happen to work well.
-
-  const uint32_t m = 0x5bd1e995;
-  const int r = 24;
-
-  // Initialize the hash to a 'random' value
-
-  uint32_t h = seed ^ len;
-
-  // Mix 4 bytes at a time into the hash
-
-  const unsigned char * data = (const unsigned char *)key;
-
-  while(len >= 4)
-  {
-    uint32_t k = *(uint32_t*)data;
-
-    k *= m;
-    k ^= k >> r;
-    k *= m;
-
-    h *= m;
-    h ^= k;
-
-    data += 4;
-    len -= 4;
-  }
-
-  // Handle the last few bytes of the input array
-
-  switch(len)
-  {
-  case 3: h ^= data[2] << 16;
-  case 2: h ^= data[1] << 8;
-  case 1: h ^= data[0];
-      h *= m;
-  };
-
-  // Do a few final mixes of the hash to ensure the last few
-  // bytes are well-incorporated.
-
-  h ^= h >> 13;
-  h *= m;
-  h ^= h >> 15;
-
-  return h;
-} 
-
-//-----------------------------------------------------------------------------
-// MurmurHash2, 64-bit versions, by Austin Appleby
-
-// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment 
-// and endian-ness issues if used across multiple platforms.
-
-// 64-bit hash for 64-bit platforms
-
-uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed )
-{
-  const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
-  const int r = 47;
-
-  uint64_t h = seed ^ (len * m);
-
-  const uint64_t * data = (const uint64_t *)key;
-  const uint64_t * end = data + (len/8);
-
-  while(data != end)
-  {
-    uint64_t k = *data++;
-
-    k *= m; 
-    k ^= k >> r; 
-    k *= m; 
-    
-    h ^= k;
-    h *= m; 
-  }
-
-  const unsigned char * data2 = (const unsigned char*)data;
-
-  switch(len & 7)
-  {
-  case 7: h ^= uint64_t(data2[6]) << 48;
-  case 6: h ^= uint64_t(data2[5]) << 40;
-  case 5: h ^= uint64_t(data2[4]) << 32;
-  case 4: h ^= uint64_t(data2[3]) << 24;
-  case 3: h ^= uint64_t(data2[2]) << 16;
-  case 2: h ^= uint64_t(data2[1]) << 8;
-  case 1: h ^= uint64_t(data2[0]);
-          h *= m;
-  };
- 
-  h ^= h >> r;
-  h *= m;
-  h ^= h >> r;
-
-  return h;
-} 
-
-
-// 64-bit hash for 32-bit platforms
-
-uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed )
-{
-  const uint32_t m = 0x5bd1e995;
-  const int r = 24;
-
-  uint32_t h1 = uint32_t(seed) ^ len;
-  uint32_t h2 = uint32_t(seed >> 32);
-
-  const uint32_t * data = (const uint32_t *)key;
-
-  while(len >= 8)
-  {
-    uint32_t k1 = *data++;
-    k1 *= m; k1 ^= k1 >> r; k1 *= m;
-    h1 *= m; h1 ^= k1;
-    len -= 4;
-
-    uint32_t k2 = *data++;
-    k2 *= m; k2 ^= k2 >> r; k2 *= m;
-    h2 *= m; h2 ^= k2;
-    len -= 4;
-  }
-
-  if(len >= 4)
-  {
-    uint32_t k1 = *data++;
-    k1 *= m; k1 ^= k1 >> r; k1 *= m;
-    h1 *= m; h1 ^= k1;
-    len -= 4;
-  }
-
-  switch(len)
-  {
-  case 3: h2 ^= ((unsigned char*)data)[2] << 16;
-  case 2: h2 ^= ((unsigned char*)data)[1] << 8;
-  case 1: h2 ^= ((unsigned char*)data)[0];
-      h2 *= m;
-  };
-
-  h1 ^= h2 >> 18; h1 *= m;
-  h2 ^= h1 >> 22; h2 *= m;
-  h1 ^= h2 >> 17; h1 *= m;
-  h2 ^= h1 >> 19; h2 *= m;
-
-  uint64_t h = h1;
-
-  h = (h << 32) | h2;
-
-  return h;
-} 
-
-//-----------------------------------------------------------------------------
-// MurmurHash2A, by Austin Appleby
-
-// This is a variant of MurmurHash2 modified to use the Merkle-Damgard 
-// construction. Bulk speed should be identical to Murmur2, small-key speed 
-// will be 10%-20% slower due to the added overhead at the end of the hash.
-
-// This variant fixes a minor issue where null keys were more likely to
-// collide with each other than expected, and also makes the function
-// more amenable to incremental implementations.
-
-#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
-
-uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed )
-{
-  const uint32_t m = 0x5bd1e995;
-  const int r = 24;
-  uint32_t l = len;
-
-  const unsigned char * data = (const unsigned char *)key;
-
-  uint32_t h = seed;
-
-  while(len >= 4)
-  {
-    uint32_t k = *(uint32_t*)data;
-
-    mmix(h,k);
-
-    data += 4;
-    len -= 4;
-  }
-
-  uint32_t t = 0;
-
-  switch(len)
-  {
-  case 3: t ^= data[2] << 16;
-  case 2: t ^= data[1] << 8;
-  case 1: t ^= data[0];
-  };
-
-  mmix(h,t);
-  mmix(h,l);
-
-  h ^= h >> 13;
-  h *= m;
-  h ^= h >> 15;
-
-  return h;
-}
-
-//-----------------------------------------------------------------------------
-// CMurmurHash2A, by Austin Appleby
-
-// This is a sample implementation of MurmurHash2A designed to work 
-// incrementally.
-
-// Usage - 
-
-// CMurmurHash2A hasher
-// hasher.Begin(seed);
-// hasher.Add(data1,size1);
-// hasher.Add(data2,size2);
-// ...
-// hasher.Add(dataN,sizeN);
-// uint32_t hash = hasher.End()
-
-class CMurmurHash2A
-{
-public:
-
-  void Begin ( uint32_t seed = 0 )
-  {
-    m_hash  = seed;
-    m_tail  = 0;
-    m_count = 0;
-    m_size  = 0;
-  }
-
-  void Add ( const unsigned char * data, int len )
-  {
-    m_size += len;
-
-    MixTail(data,len);
-
-    while(len >= 4)
-    {
-      uint32_t k = *(uint32_t*)data;
-
-      mmix(m_hash,k);
-
-      data += 4;
-      len -= 4;
-    }
-
-    MixTail(data,len);
-  }
-
-  uint32_t End ( void )
-  {
-    mmix(m_hash,m_tail);
-    mmix(m_hash,m_size);
-
-    m_hash ^= m_hash >> 13;
-    m_hash *= m;
-    m_hash ^= m_hash >> 15;
-
-    return m_hash;
-  }
-
-private:
-
-  static const uint32_t m = 0x5bd1e995;
-  static const int r = 24;
-
-  void MixTail ( const unsigned char * & data, int & len )
-  {
-    while( len && ((len<4) || m_count) )
-    {
-      m_tail |= (*data++) << (m_count * 8);
-
-      m_count++;
-      len--;
-
-      if(m_count == 4)
-      {
-        mmix(m_hash,m_tail);
-        m_tail = 0;
-        m_count = 0;
-      }
-    }
-  }
-
-  uint32_t m_hash;
-  uint32_t m_tail;
-  uint32_t m_count;
-  uint32_t m_size;
-};
-
-//-----------------------------------------------------------------------------
-// MurmurHashNeutral2, by Austin Appleby
-
-// Same as MurmurHash2, but endian- and alignment-neutral.
-// Half the speed though, alas.
-
-uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed )
-{
-  const uint32_t m = 0x5bd1e995;
-  const int r = 24;
-
-  uint32_t h = seed ^ len;
-
-  const unsigned char * data = (const unsigned char *)key;
-
-  while(len >= 4)
-  {
-    uint32_t k;
-
-    k  = data[0];
-    k |= data[1] << 8;
-    k |= data[2] << 16;
-    k |= data[3] << 24;
-
-    k *= m; 
-    k ^= k >> r; 
-    k *= m;
-
-    h *= m;
-    h ^= k;
-
-    data += 4;
-    len -= 4;
-  }
-  
-  switch(len)
-  {
-  case 3: h ^= data[2] << 16;
-  case 2: h ^= data[1] << 8;
-  case 1: h ^= data[0];
-          h *= m;
-  };
-
-  h ^= h >> 13;
-  h *= m;
-  h ^= h >> 15;
-
-  return h;
-} 
-
-//-----------------------------------------------------------------------------
-// MurmurHashAligned2, by Austin Appleby
-
-// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
-// on certain platforms. 
-
-// Performance will be lower than MurmurHash2
-
-#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
-
-
-uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed )
-{
-  const uint32_t m = 0x5bd1e995;
-  const int r = 24;
-
-  const unsigned char * data = (const unsigned char *)key;
-
-  uint32_t h = seed ^ len;
-
-  int align = (uint64_t)data & 3;
-
-  if(align && (len >= 4))
-  {
-    // Pre-load the temp registers
-
-    uint32_t t = 0, d = 0;
-
-    switch(align)
-    {
-      case 1: t |= data[2] << 16;
-      case 2: t |= data[1] << 8;
-      case 3: t |= data[0];
-    }
-
-    t <<= (8 * align);
-
-    data += 4-align;
-    len -= 4-align;
-
-    int sl = 8 * (4-align);
-    int sr = 8 * align;
-
-    // Mix
-
-    while(len >= 4)
-    {
-      d = *(uint32_t *)data;
-      t = (t >> sr) | (d << sl);
-
-      uint32_t k = t;
-
-      MIX(h,k,m);
-
-      t = d;
-
-      data += 4;
-      len -= 4;
-    }
-
-    // Handle leftover data in temp registers
-
-    d = 0;
-
-    if(len >= align)
-    {
-      switch(align)
-      {
-      case 3: d |= data[2] << 16;
-      case 2: d |= data[1] << 8;
-      case 1: d |= data[0];
-      }
-
-      uint32_t k = (t >> sr) | (d << sl);
-      MIX(h,k,m);
-
-      data += align;
-      len -= align;
-
-      //----------
-      // Handle tail bytes
-
-      switch(len)
-      {
-      case 3: h ^= data[2] << 16;
-      case 2: h ^= data[1] << 8;
-      case 1: h ^= data[0];
-          h *= m;
-      };
-    }
-    else
-    {
-      switch(len)
-      {
-      case 3: d |= data[2] << 16;
-      case 2: d |= data[1] << 8;
-      case 1: d |= data[0];
-      case 0: h ^= (t >> sr) | (d << sl);
-          h *= m;
-      }
-    }
-
-    h ^= h >> 13;
-    h *= m;
-    h ^= h >> 15;
-
-    return h;
-  }
-  else
-  {
-    while(len >= 4)
-    {
-      uint32_t k = *(uint32_t *)data;
-
-      MIX(h,k,m);
-
-      data += 4;
-      len -= 4;
-    }
-
-    //----------
-    // Handle tail bytes
-
-    switch(len)
-    {
-    case 3: h ^= data[2] << 16;
-    case 2: h ^= data[1] << 8;
-    case 1: h ^= data[0];
-        h *= m;
-    };
-
-    h ^= h >> 13;
-    h *= m;
-    h ^= h >> 15;
-
-    return h;
-  }
-}
-
-//-----------------------------------------------------------------------------
-
+//-----------------------------------------------------------------------------
+// MurmurHash2 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - This code makes a few assumptions about how your machine behaves -
+
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4
+
+// And it has a few limitations -
+
+// 1. It will not work incrementally.
+// 2. It will not produce the same results on little-endian and big-endian
+//    machines.
+
+#include "MurmurHash2.h"
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
+{
+  // 'm' and 'r' are mixing constants generated offline.
+  // They're not really 'magic', they just happen to work well.
+
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+
+  // Initialize the hash to a 'random' value
+
+  uint32_t h = seed ^ len;
+
+  // Mix 4 bytes at a time into the hash
+
+  const unsigned char * data = (const unsigned char *)key;
+
+  while(len >= 4)
+  {
+    uint32_t k = *(uint32_t*)data;
+
+    k *= m;
+    k ^= k >> r;
+    k *= m;
+
+    h *= m;
+    h ^= k;
+
+    data += 4;
+    len -= 4;
+  }
+
+  // Handle the last few bytes of the input array
+
+  switch(len)
+  {
+  case 3: h ^= data[2] << 16;
+  case 2: h ^= data[1] << 8;
+  case 1: h ^= data[0];
+      h *= m;
+  };
+
+  // Do a few final mixes of the hash to ensure the last few
+  // bytes are well-incorporated.
+
+  h ^= h >> 13;
+  h *= m;
+  h ^= h >> 15;
+
+  return h;
+} 
+
+//-----------------------------------------------------------------------------
+// MurmurHash2, 64-bit versions, by Austin Appleby
+
+// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment 
+// and endian-ness issues if used across multiple platforms.
+
+// 64-bit hash for 64-bit platforms
+
+uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed )
+{
+  const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
+  const int r = 47;
+
+  uint64_t h = seed ^ (len * m);
+
+  const uint64_t * data = (const uint64_t *)key;
+  const uint64_t * end = data + (len/8);
+
+  while(data != end)
+  {
+    uint64_t k = *data++;
+
+    k *= m; 
+    k ^= k >> r; 
+    k *= m; 
+    
+    h ^= k;
+    h *= m; 
+  }
+
+  const unsigned char * data2 = (const unsigned char*)data;
+
+  switch(len & 7)
+  {
+  case 7: h ^= uint64_t(data2[6]) << 48;
+  case 6: h ^= uint64_t(data2[5]) << 40;
+  case 5: h ^= uint64_t(data2[4]) << 32;
+  case 4: h ^= uint64_t(data2[3]) << 24;
+  case 3: h ^= uint64_t(data2[2]) << 16;
+  case 2: h ^= uint64_t(data2[1]) << 8;
+  case 1: h ^= uint64_t(data2[0]);
+          h *= m;
+  };
+ 
+  h ^= h >> r;
+  h *= m;
+  h ^= h >> r;
+
+  return h;
+} 
+
+
+// 64-bit hash for 32-bit platforms
+
+uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed )
+{
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+
+  uint32_t h1 = uint32_t(seed) ^ len;
+  uint32_t h2 = uint32_t(seed >> 32);
+
+  const uint32_t * data = (const uint32_t *)key;
+
+  while(len >= 8)
+  {
+    uint32_t k1 = *data++;
+    k1 *= m; k1 ^= k1 >> r; k1 *= m;
+    h1 *= m; h1 ^= k1;
+    len -= 4;
+
+    uint32_t k2 = *data++;
+    k2 *= m; k2 ^= k2 >> r; k2 *= m;
+    h2 *= m; h2 ^= k2;
+    len -= 4;
+  }
+
+  if(len >= 4)
+  {
+    uint32_t k1 = *data++;
+    k1 *= m; k1 ^= k1 >> r; k1 *= m;
+    h1 *= m; h1 ^= k1;
+    len -= 4;
+  }
+
+  switch(len)
+  {
+  case 3: h2 ^= ((unsigned char*)data)[2] << 16;
+  case 2: h2 ^= ((unsigned char*)data)[1] << 8;
+  case 1: h2 ^= ((unsigned char*)data)[0];
+      h2 *= m;
+  };
+
+  h1 ^= h2 >> 18; h1 *= m;
+  h2 ^= h1 >> 22; h2 *= m;
+  h1 ^= h2 >> 17; h1 *= m;
+  h2 ^= h1 >> 19; h2 *= m;
+
+  uint64_t h = h1;
+
+  h = (h << 32) | h2;
+
+  return h;
+} 
+
+//-----------------------------------------------------------------------------
+// MurmurHash2A, by Austin Appleby
+
+// This is a variant of MurmurHash2 modified to use the Merkle-Damgard 
+// construction. Bulk speed should be identical to Murmur2, small-key speed 
+// will be 10%-20% slower due to the added overhead at the end of the hash.
+
+// This variant fixes a minor issue where null keys were more likely to
+// collide with each other than expected, and also makes the function
+// more amenable to incremental implementations.
+
+#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
+
+uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed )
+{
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+  uint32_t l = len;
+
+  const unsigned char * data = (const unsigned char *)key;
+
+  uint32_t h = seed;
+
+  while(len >= 4)
+  {
+    uint32_t k = *(uint32_t*)data;
+
+    mmix(h,k);
+
+    data += 4;
+    len -= 4;
+  }
+
+  uint32_t t = 0;
+
+  switch(len)
+  {
+  case 3: t ^= data[2] << 16;
+  case 2: t ^= data[1] << 8;
+  case 1: t ^= data[0];
+  };
+
+  mmix(h,t);
+  mmix(h,l);
+
+  h ^= h >> 13;
+  h *= m;
+  h ^= h >> 15;
+
+  return h;
+}
+
+//-----------------------------------------------------------------------------
+// CMurmurHash2A, by Austin Appleby
+
+// This is a sample implementation of MurmurHash2A designed to work 
+// incrementally.
+
+// Usage - 
+
+// CMurmurHash2A hasher
+// hasher.Begin(seed);
+// hasher.Add(data1,size1);
+// hasher.Add(data2,size2);
+// ...
+// hasher.Add(dataN,sizeN);
+// uint32_t hash = hasher.End()
+
+class CMurmurHash2A
+{
+public:
+
+  void Begin ( uint32_t seed = 0 )
+  {
+    m_hash  = seed;
+    m_tail  = 0;
+    m_count = 0;
+    m_size  = 0;
+  }
+
+  void Add ( const unsigned char * data, int len )
+  {
+    m_size += len;
+
+    MixTail(data,len);
+
+    while(len >= 4)
+    {
+      uint32_t k = *(uint32_t*)data;
+
+      mmix(m_hash,k);
+
+      data += 4;
+      len -= 4;
+    }
+
+    MixTail(data,len);
+  }
+
+  uint32_t End ( void )
+  {
+    mmix(m_hash,m_tail);
+    mmix(m_hash,m_size);
+
+    m_hash ^= m_hash >> 13;
+    m_hash *= m;
+    m_hash ^= m_hash >> 15;
+
+    return m_hash;
+  }
+
+private:
+
+  static const uint32_t m = 0x5bd1e995;
+  static const int r = 24;
+
+  void MixTail ( const unsigned char * & data, int & len )
+  {
+    while( len && ((len<4) || m_count) )
+    {
+      m_tail |= (*data++) << (m_count * 8);
+
+      m_count++;
+      len--;
+
+      if(m_count == 4)
+      {
+        mmix(m_hash,m_tail);
+        m_tail = 0;
+        m_count = 0;
+      }
+    }
+  }
+
+  uint32_t m_hash;
+  uint32_t m_tail;
+  uint32_t m_count;
+  uint32_t m_size;
+};
+
+//-----------------------------------------------------------------------------
+// MurmurHashNeutral2, by Austin Appleby
+
+// Same as MurmurHash2, but endian- and alignment-neutral.
+// Half the speed though, alas.
+
+uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed )
+{
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+
+  uint32_t h = seed ^ len;
+
+  const unsigned char * data = (const unsigned char *)key;
+
+  while(len >= 4)
+  {
+    uint32_t k;
+
+    k  = data[0];
+    k |= data[1] << 8;
+    k |= data[2] << 16;
+    k |= data[3] << 24;
+
+    k *= m; 
+    k ^= k >> r; 
+    k *= m;
+
+    h *= m;
+    h ^= k;
+
+    data += 4;
+    len -= 4;
+  }
+  
+  switch(len)
+  {
+  case 3: h ^= data[2] << 16;
+  case 2: h ^= data[1] << 8;
+  case 1: h ^= data[0];
+          h *= m;
+  };
+
+  h ^= h >> 13;
+  h *= m;
+  h ^= h >> 15;
+
+  return h;
+} 
+
+//-----------------------------------------------------------------------------
+// MurmurHashAligned2, by Austin Appleby
+
+// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
+// on certain platforms. 
+
+// Performance will be lower than MurmurHash2
+
+#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
+
+
+uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed )
+{
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+
+  const unsigned char * data = (const unsigned char *)key;
+
+  uint32_t h = seed ^ len;
+
+  int align = (uint64_t)data & 3;
+
+  if(align && (len >= 4))
+  {
+    // Pre-load the temp registers
+
+    uint32_t t = 0, d = 0;
+
+    switch(align)
+    {
+      case 1: t |= data[2] << 16;
+      case 2: t |= data[1] << 8;
+      case 3: t |= data[0];
+    }
+
+    t <<= (8 * align);
+
+    data += 4-align;
+    len -= 4-align;
+
+    int sl = 8 * (4-align);
+    int sr = 8 * align;
+
+    // Mix
+
+    while(len >= 4)
+    {
+      d = *(uint32_t *)data;
+      t = (t >> sr) | (d << sl);
+
+      uint32_t k = t;
+
+      MIX(h,k,m);
+
+      t = d;
+
+      data += 4;
+      len -= 4;
+    }
+
+    // Handle leftover data in temp registers
+
+    d = 0;
+
+    if(len >= align)
+    {
+      switch(align)
+      {
+      case 3: d |= data[2] << 16;
+      case 2: d |= data[1] << 8;
+      case 1: d |= data[0];
+      }
+
+      uint32_t k = (t >> sr) | (d << sl);
+      MIX(h,k,m);
+
+      data += align;
+      len -= align;
+
+      //----------
+      // Handle tail bytes
+
+      switch(len)
+      {
+      case 3: h ^= data[2] << 16;
+      case 2: h ^= data[1] << 8;
+      case 1: h ^= data[0];
+          h *= m;
+      };
+    }
+    else
+    {
+      switch(len)
+      {
+      case 3: d |= data[2] << 16;
+      case 2: d |= data[1] << 8;
+      case 1: d |= data[0];
+      case 0: h ^= (t >> sr) | (d << sl);
+          h *= m;
+      }
+    }
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+  }
+  else
+  {
+    while(len >= 4)
+    {
+      uint32_t k = *(uint32_t *)data;
+
+      MIX(h,k,m);
+
+      data += 4;
+      len -= 4;
+    }
+
+    //----------
+    // Handle tail bytes
+
+    switch(len)
+    {
+    case 3: h ^= data[2] << 16;
+    case 2: h ^= data[1] << 8;
+    case 1: h ^= data[0];
+        h *= m;
+    };
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+  }
+}
+
+//-----------------------------------------------------------------------------
+
diff --git a/MurmurHash2.h b/MurmurHash2.h
index 38dbbeb..32993c2 100644
--- a/MurmurHash2.h
+++ b/MurmurHash2.h
@@ -1,39 +1,39 @@
-//-----------------------------------------------------------------------------
-// MurmurHash2 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-#ifndef _MURMURHASH2_H_
-#define _MURMURHASH2_H_
-
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-typedef unsigned char uint8_t;
-typedef unsigned long uint32_t;
-typedef unsigned __int64 uint64_t;
-
-// Other compilers
-
-#else	// defined(_MSC_VER)
-
-#include <stdint.h>
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
-
-uint32_t MurmurHash2        ( const void * key, int len, uint32_t seed );
-uint64_t MurmurHash64A      ( const void * key, int len, uint64_t seed );
-uint64_t MurmurHash64B      ( const void * key, int len, uint64_t seed );
-uint32_t MurmurHash2A       ( const void * key, int len, uint32_t seed );
-uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed );
-uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed );
-
-//-----------------------------------------------------------------------------
-
-#endif // _MURMURHASH2_H_
-
+//-----------------------------------------------------------------------------
+// MurmurHash2 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH2_H_
+#define _MURMURHASH2_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash2        ( const void * key, int len, uint32_t seed );
+uint64_t MurmurHash64A      ( const void * key, int len, uint64_t seed );
+uint64_t MurmurHash64B      ( const void * key, int len, uint64_t seed );
+uint32_t MurmurHash2A       ( const void * key, int len, uint32_t seed );
+uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed );
+uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed );
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH2_H_
+
diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 0bf7386..09ffb26 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -1,335 +1,335 @@
-//-----------------------------------------------------------------------------
-// MurmurHash3 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-// Note - The x86 and x64 versions do _not_ produce the same results, as the
-// algorithms are optimized for their respective platforms. You can still
-// compile and run any of them on any platform, but your performance with the
-// non-native version will be less than optimal.
-
-#include "MurmurHash3.h"
-
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-#define FORCE_INLINE	__forceinline
-
-#include <stdlib.h>
-
-#define ROTL32(x,y)	_rotl(x,y)
-#define ROTL64(x,y)	_rotl64(x,y)
-
-#define BIG_CONSTANT(x) (x)
-
-// Other compilers
-
-#else	// defined(_MSC_VER)
-
-#define	FORCE_INLINE __attribute__((always_inline))
-
-inline uint32_t rotl32 ( uint32_t x, int8_t r )
-{
-  return (x << r) | (x >> (32 - r));
-}
-
-inline uint64_t rotl64 ( uint64_t x, int8_t r )
-{
-  return (x << r) | (x >> (64 - r));
-}
-
-#define	ROTL32(x,y)	rotl32(x,y)
-#define ROTL64(x,y)	rotl64(x,y)
-
-#define BIG_CONSTANT(x) (x##LLU)
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
-// Block read - if your platform needs to do endian-swapping or can only
-// handle aligned reads, do the conversion here
-
-FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
-{
-  return p[i];
-}
-
-FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
-{
-  return p[i];
-}
-
-//-----------------------------------------------------------------------------
-// Finalization mix - force all bits of a hash block to avalanche
-
-FORCE_INLINE uint32_t fmix ( uint32_t h )
-{
-  h ^= h >> 16;
-  h *= 0x85ebca6b;
-  h ^= h >> 13;
-  h *= 0xc2b2ae35;
-  h ^= h >> 16;
-
-  return h;
-}
-
-//----------
-
-FORCE_INLINE uint64_t fmix ( uint64_t k )
-{
-  k ^= k >> 33;
-  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
-  k ^= k >> 33;
-  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
-  k ^= k >> 33;
-
-  return k;
-}
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x86_32 ( const void * key, int len,
-                          uint32_t seed, void * out )
-{
-  const uint8_t * data = (const uint8_t*)key;
-  const int nblocks = len / 4;
-
-  uint32_t h1 = seed;
-
-  uint32_t c1 = 0xcc9e2d51;
-  uint32_t c2 = 0x1b873593;
-
-  //----------
-  // body
-
-  const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
-
-  for(int i = -nblocks; i; i++)
-  {
-    uint32_t k1 = getblock(blocks,i);
-
-    k1 *= c1;
-    k1 = ROTL32(k1,15);
-    k1 *= c2;
-    
-    h1 ^= k1;
-    h1 = ROTL32(h1,13); 
-    h1 = h1*5+0xe6546b64;
-  }
-
-  //----------
-  // tail
-
-  const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
-
-  uint32_t k1 = 0;
-
-  switch(len & 3)
-  {
-  case 3: k1 ^= tail[2] << 16;
-  case 2: k1 ^= tail[1] << 8;
-  case 1: k1 ^= tail[0];
-          k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
-  };
-
-  //----------
-  // finalization
-
-  h1 ^= len;
-
-  h1 = fmix(h1);
-
-  *(uint32_t*)out = h1;
-} 
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x86_128 ( const void * key, const int len,
-                           uint32_t seed, void * out )
-{
-  const uint8_t * data = (const uint8_t*)key;
-  const int nblocks = len / 16;
-
-  uint32_t h1 = seed;
-  uint32_t h2 = seed;
-  uint32_t h3 = seed;
-  uint32_t h4 = seed;
-
-  uint32_t c1 = 0x239b961b; 
-  uint32_t c2 = 0xab0e9789;
-  uint32_t c3 = 0x38b34ae5; 
-  uint32_t c4 = 0xa1e38b93;
-
-  //----------
-  // body
-
-  const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
-
-  for(int i = -nblocks; i; i++)
-  {
-    uint32_t k1 = getblock(blocks,i*4+0);
-    uint32_t k2 = getblock(blocks,i*4+1);
-    uint32_t k3 = getblock(blocks,i*4+2);
-    uint32_t k4 = getblock(blocks,i*4+3);
-
-    k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
-
-    h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
-
-    k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
-
-    h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
-
-    k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
-
-    h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
-
-    k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
-
-    h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
-  }
-
-  //----------
-  // tail
-
-  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
-
-  uint32_t k1 = 0;
-  uint32_t k2 = 0;
-  uint32_t k3 = 0;
-  uint32_t k4 = 0;
-
-  switch(len & 15)
-  {
-  case 15: k4 ^= tail[14] << 16;
-  case 14: k4 ^= tail[13] << 8;
-  case 13: k4 ^= tail[12] << 0;
-           k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
-
-  case 12: k3 ^= tail[11] << 24;
-  case 11: k3 ^= tail[10] << 16;
-  case 10: k3 ^= tail[ 9] << 8;
-  case  9: k3 ^= tail[ 8] << 0;
-           k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
-
-  case  8: k2 ^= tail[ 7] << 24;
-  case  7: k2 ^= tail[ 6] << 16;
-  case  6: k2 ^= tail[ 5] << 8;
-  case  5: k2 ^= tail[ 4] << 0;
-           k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
-
-  case  4: k1 ^= tail[ 3] << 24;
-  case  3: k1 ^= tail[ 2] << 16;
-  case  2: k1 ^= tail[ 1] << 8;
-  case  1: k1 ^= tail[ 0] << 0;
-           k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
-  };
-
-  //----------
-  // finalization
-
-  h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
-
-  h1 += h2; h1 += h3; h1 += h4;
-  h2 += h1; h3 += h1; h4 += h1;
-
-  h1 = fmix(h1);
-  h2 = fmix(h2);
-  h3 = fmix(h3);
-  h4 = fmix(h4);
-
-  h1 += h2; h1 += h3; h1 += h4;
-  h2 += h1; h3 += h1; h4 += h1;
-
-  ((uint32_t*)out)[0] = h1;
-  ((uint32_t*)out)[1] = h2;
-  ((uint32_t*)out)[2] = h3;
-  ((uint32_t*)out)[3] = h4;
-}
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x64_128 ( const void * key, const int len,
-                           const uint32_t seed, void * out )
-{
-  const uint8_t * data = (const uint8_t*)key;
-  const int nblocks = len / 16;
-
-  uint64_t h1 = seed;
-  uint64_t h2 = seed;
-
-  uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
-  uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
-
-  //----------
-  // body
-
-  const uint64_t * blocks = (const uint64_t *)(data);
-
-  for(int i = 0; i < nblocks; i++)
-  {
-    uint64_t k1 = getblock(blocks,i*2+0);
-    uint64_t k2 = getblock(blocks,i*2+1);
-
-    k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
-
-    h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
-
-    k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
-
-    h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
-  }
-
-  //----------
-  // tail
-
-  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
-
-  uint64_t k1 = 0;
-  uint64_t k2 = 0;
-
-  switch(len & 15)
-  {
-  case 15: k2 ^= uint64_t(tail[14]) << 48;
-  case 14: k2 ^= uint64_t(tail[13]) << 40;
-  case 13: k2 ^= uint64_t(tail[12]) << 32;
-  case 12: k2 ^= uint64_t(tail[11]) << 24;
-  case 11: k2 ^= uint64_t(tail[10]) << 16;
-  case 10: k2 ^= uint64_t(tail[ 9]) << 8;
-  case  9: k2 ^= uint64_t(tail[ 8]) << 0;
-           k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
-
-  case  8: k1 ^= uint64_t(tail[ 7]) << 56;
-  case  7: k1 ^= uint64_t(tail[ 6]) << 48;
-  case  6: k1 ^= uint64_t(tail[ 5]) << 40;
-  case  5: k1 ^= uint64_t(tail[ 4]) << 32;
-  case  4: k1 ^= uint64_t(tail[ 3]) << 24;
-  case  3: k1 ^= uint64_t(tail[ 2]) << 16;
-  case  2: k1 ^= uint64_t(tail[ 1]) << 8;
-  case  1: k1 ^= uint64_t(tail[ 0]) << 0;
-           k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
-  };
-
-  //----------
-  // finalization
-
-  h1 ^= len; h2 ^= len;
-
-  h1 += h2;
-  h2 += h1;
-
-  h1 = fmix(h1);
-  h2 = fmix(h2);
-
-  h1 += h2;
-  h2 += h1;
-
-  ((uint64_t*)out)[0] = h1;
-  ((uint64_t*)out)[1] = h2;
-}
-
-//-----------------------------------------------------------------------------
-
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+#include "MurmurHash3.h"
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE	__forceinline
+
+#include <stdlib.h>
+
+#define ROTL32(x,y)	_rotl(x,y)
+#define ROTL64(x,y)	_rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#define	FORCE_INLINE __attribute__((always_inline))
+
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+  return (x << r) | (x >> (32 - r));
+}
+
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+  return (x << r) | (x >> (64 - r));
+}
+
+#define	ROTL32(x,y)	rotl32(x,y)
+#define ROTL64(x,y)	rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+
+FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
+{
+  return p[i];
+}
+
+FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
+{
+  return p[i];
+}
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+FORCE_INLINE uint32_t fmix ( uint32_t h )
+{
+  h ^= h >> 16;
+  h *= 0x85ebca6b;
+  h ^= h >> 13;
+  h *= 0xc2b2ae35;
+  h ^= h >> 16;
+
+  return h;
+}
+
+//----------
+
+FORCE_INLINE uint64_t fmix ( uint64_t k )
+{
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+  k ^= k >> 33;
+
+  return k;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32 ( const void * key, int len,
+                          uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 4;
+
+  uint32_t h1 = seed;
+
+  uint32_t c1 = 0xcc9e2d51;
+  uint32_t c2 = 0x1b873593;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
+
+  for(int i = -nblocks; i; i++)
+  {
+    uint32_t k1 = getblock(blocks,i);
+
+    k1 *= c1;
+    k1 = ROTL32(k1,15);
+    k1 *= c2;
+    
+    h1 ^= k1;
+    h1 = ROTL32(h1,13); 
+    h1 = h1*5+0xe6546b64;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
+
+  uint32_t k1 = 0;
+
+  switch(len & 3)
+  {
+  case 3: k1 ^= tail[2] << 16;
+  case 2: k1 ^= tail[1] << 8;
+  case 1: k1 ^= tail[0];
+          k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len;
+
+  h1 = fmix(h1);
+
+  *(uint32_t*)out = h1;
+} 
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_128 ( const void * key, const int len,
+                           uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+
+  uint32_t h1 = seed;
+  uint32_t h2 = seed;
+  uint32_t h3 = seed;
+  uint32_t h4 = seed;
+
+  uint32_t c1 = 0x239b961b; 
+  uint32_t c2 = 0xab0e9789;
+  uint32_t c3 = 0x38b34ae5; 
+  uint32_t c4 = 0xa1e38b93;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
+
+  for(int i = -nblocks; i; i++)
+  {
+    uint32_t k1 = getblock(blocks,i*4+0);
+    uint32_t k2 = getblock(blocks,i*4+1);
+    uint32_t k3 = getblock(blocks,i*4+2);
+    uint32_t k4 = getblock(blocks,i*4+3);
+
+    k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
+
+    k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+    h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
+
+    k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+    h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
+
+    k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+    h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint32_t k1 = 0;
+  uint32_t k2 = 0;
+  uint32_t k3 = 0;
+  uint32_t k4 = 0;
+
+  switch(len & 15)
+  {
+  case 15: k4 ^= tail[14] << 16;
+  case 14: k4 ^= tail[13] << 8;
+  case 13: k4 ^= tail[12] << 0;
+           k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+  case 12: k3 ^= tail[11] << 24;
+  case 11: k3 ^= tail[10] << 16;
+  case 10: k3 ^= tail[ 9] << 8;
+  case  9: k3 ^= tail[ 8] << 0;
+           k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+  case  8: k2 ^= tail[ 7] << 24;
+  case  7: k2 ^= tail[ 6] << 16;
+  case  6: k2 ^= tail[ 5] << 8;
+  case  5: k2 ^= tail[ 4] << 0;
+           k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+  case  4: k1 ^= tail[ 3] << 24;
+  case  3: k1 ^= tail[ 2] << 16;
+  case  2: k1 ^= tail[ 1] << 8;
+  case  1: k1 ^= tail[ 0] << 0;
+           k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  h1 = fmix(h1);
+  h2 = fmix(h2);
+  h3 = fmix(h3);
+  h4 = fmix(h4);
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  ((uint32_t*)out)[0] = h1;
+  ((uint32_t*)out)[1] = h2;
+  ((uint32_t*)out)[2] = h3;
+  ((uint32_t*)out)[3] = h4;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x64_128 ( const void * key, const int len,
+                           const uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+
+  uint64_t h1 = seed;
+  uint64_t h2 = seed;
+
+  uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+  uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+
+  //----------
+  // body
+
+  const uint64_t * blocks = (const uint64_t *)(data);
+
+  for(int i = 0; i < nblocks; i++)
+  {
+    uint64_t k1 = getblock(blocks,i*2+0);
+    uint64_t k2 = getblock(blocks,i*2+1);
+
+    k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+
+    k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+    h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint64_t k1 = 0;
+  uint64_t k2 = 0;
+
+  switch(len & 15)
+  {
+  case 15: k2 ^= uint64_t(tail[14]) << 48;
+  case 14: k2 ^= uint64_t(tail[13]) << 40;
+  case 13: k2 ^= uint64_t(tail[12]) << 32;
+  case 12: k2 ^= uint64_t(tail[11]) << 24;
+  case 11: k2 ^= uint64_t(tail[10]) << 16;
+  case 10: k2 ^= uint64_t(tail[ 9]) << 8;
+  case  9: k2 ^= uint64_t(tail[ 8]) << 0;
+           k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+  case  8: k1 ^= uint64_t(tail[ 7]) << 56;
+  case  7: k1 ^= uint64_t(tail[ 6]) << 48;
+  case  6: k1 ^= uint64_t(tail[ 5]) << 40;
+  case  5: k1 ^= uint64_t(tail[ 4]) << 32;
+  case  4: k1 ^= uint64_t(tail[ 3]) << 24;
+  case  3: k1 ^= uint64_t(tail[ 2]) << 16;
+  case  2: k1 ^= uint64_t(tail[ 1]) << 8;
+  case  1: k1 ^= uint64_t(tail[ 0]) << 0;
+           k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len; h2 ^= len;
+
+  h1 += h2;
+  h2 += h1;
+
+  h1 = fmix(h1);
+  h2 = fmix(h2);
+
+  h1 += h2;
+  h2 += h1;
+
+  ((uint64_t*)out)[0] = h1;
+  ((uint64_t*)out)[1] = h2;
+}
+
+//-----------------------------------------------------------------------------
+
diff --git a/MurmurHash3.h b/MurmurHash3.h
index 58e9820..54e9d3f 100644
--- a/MurmurHash3.h
+++ b/MurmurHash3.h
@@ -1,37 +1,37 @@
-//-----------------------------------------------------------------------------
-// MurmurHash3 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-#ifndef _MURMURHASH3_H_
-#define _MURMURHASH3_H_
-
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-typedef unsigned char uint8_t;
-typedef unsigned long uint32_t;
-typedef unsigned __int64 uint64_t;
-
-// Other compilers
-
-#else	// defined(_MSC_VER)
-
-#include <stdint.h>
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
-
-void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
-
-void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
-
-//-----------------------------------------------------------------------------
-
-#endif // _MURMURHASH3_H_
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH3_H_
+#define _MURMURHASH3_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH3_H_
diff --git a/Platform.cpp b/Platform.cpp
index d90dab8..d7f5fb8 100644
--- a/Platform.cpp
+++ b/Platform.cpp
@@ -1,42 +1,42 @@
-#include "Platform.h"
-
-#include <stdio.h>
-
-void testRDTSC ( void )
-{
-  int64_t temp = rdtsc();
-
-  printf("%d",(int)temp);
-}
-
-#if defined(_MSC_VER)
-
-#include <windows.h>
-
-void SetAffinity ( int cpu )
-{
-  SetProcessAffinityMask(GetCurrentProcess(),cpu);
-  SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
-}
-
-#else
-
-#include <sched.h>
-
-void SetAffinity ( int /*cpu*/ )
-{
-#ifndef __CYGWIN__
-  cpu_set_t mask;
-    
-  CPU_ZERO(&mask);
-    
-  CPU_SET(2,&mask);
-    
-  if( sched_setaffinity(0,sizeof(mask),&mask) == -1)
-  {
-    printf("WARNING: Could not set CPU affinity\n");
-  }
-#endif
-}
-
-#endif
+#include "Platform.h"
+
+#include <stdio.h>
+
+void testRDTSC ( void )
+{
+  int64_t temp = rdtsc();
+
+  printf("%d",(int)temp);
+}
+
+#if defined(_MSC_VER)
+
+#include <windows.h>
+
+void SetAffinity ( int cpu )
+{
+  SetProcessAffinityMask(GetCurrentProcess(),cpu);
+  SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
+}
+
+#else
+
+#include <sched.h>
+
+void SetAffinity ( int /*cpu*/ )
+{
+#ifndef __CYGWIN__
+  cpu_set_t mask;
+    
+  CPU_ZERO(&mask);
+    
+  CPU_SET(2,&mask);
+    
+  if( sched_setaffinity(0,sizeof(mask),&mask) == -1)
+  {
+    printf("WARNING: Could not set CPU affinity\n");
+  }
+#endif
+}
+
+#endif
diff --git a/Platform.h b/Platform.h
index 8bb0d58..fcb68e8 100644
--- a/Platform.h
+++ b/Platform.h
@@ -1,85 +1,94 @@
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-#pragma once
-
-void SetAffinity ( int cpu );
-
-//-----------------------------------------------------------------------------
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-#define FORCE_INLINE	__forceinline
-#define	NEVER_INLINE  __declspec(noinline)
-
-#include <stdlib.h>
-#include <math.h>   // Has to be included before intrin.h or VC complains about 'ceil'
-#include <intrin.h> // for __rdtsc
-#include "pstdint.h"
-
-#define ROTL32(x,y)	_rotl(x,y)
-#define ROTL64(x,y)	_rotl64(x,y)
-#define ROTR32(x,y)	_rotr(x,y)
-#define ROTR64(x,y)	_rotr64(x,y)
-
-#pragma warning(disable : 4127) // "conditional expression is constant" in the if()s for avalanchetest
-#pragma warning(disable : 4100)
-#pragma warning(disable : 4702)
-
-#define BIG_CONSTANT(x) (x)
-
-// RDTSC == Read Time Stamp Counter
-
-#define rdtsc() __rdtsc()
-
-//-----------------------------------------------------------------------------
-// Other compilers
-
-#else	//	defined(_MSC_VER)
-
-#include <stdint.h>
-
-#define	FORCE_INLINE __attribute__((always_inline))
-#define	NEVER_INLINE __attribute__((noinline))
-
-inline uint32_t rotl32 ( uint32_t x, int8_t r )
-{
-  return (x << r) | (x >> (32 - r));
-}
-
-inline uint64_t rotl64 ( uint64_t x, int8_t r )
-{
-  return (x << r) | (x >> (64 - r));
-}
-
-inline uint32_t rotr32 ( uint32_t x, int8_t r )
-{
-  return (x >> r) | (x << (32 - r));
-}
-
-inline uint64_t rotr64 ( uint64_t x, int8_t r )
-{
-  return (x >> r) | (x << (64 - r));
-}
-
-#define	ROTL32(x,y)	rotl32(x,y)
-#define ROTL64(x,y)	rotl64(x,y)
-#define	ROTR32(x,y)	rotr32(x,y)
-#define ROTR64(x,y)	rotr64(x,y)
-
-#define BIG_CONSTANT(x) (x##LLU)
-
-__inline__ unsigned long long int rdtsc()
-{
-    unsigned long long int x;
-    __asm__ volatile ("rdtsc" : "=A" (x));
-    return x;
-}
-
-#include <strings.h>
-#define _stricmp strcasecmp
-
-#endif	//	!defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+#pragma once
+
+void SetAffinity ( int cpu );
+
+//-----------------------------------------------------------------------------
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE	__forceinline
+#define	NEVER_INLINE  __declspec(noinline)
+
+#include <stdlib.h>
+#include <math.h>   // Has to be included before intrin.h or VC complains about 'ceil'
+#include <intrin.h> // for __rdtsc
+#include "pstdint.h"
+
+#define ROTL32(x,y)	_rotl(x,y)
+#define ROTL64(x,y)	_rotl64(x,y)
+#define ROTR32(x,y)	_rotr(x,y)
+#define ROTR64(x,y)	_rotr64(x,y)
+
+#pragma warning(disable : 4127) // "conditional expression is constant" in the if()s for avalanchetest
+#pragma warning(disable : 4100)
+#pragma warning(disable : 4702)
+
+#define BIG_CONSTANT(x) (x)
+
+// RDTSC == Read Time Stamp Counter
+
+#define rdtsc() __rdtsc()
+
+//-----------------------------------------------------------------------------
+// Other compilers
+
+#else	//	defined(_MSC_VER)
+
+#include <stdint.h>
+
+#define	FORCE_INLINE __attribute__((always_inline))
+#define	NEVER_INLINE __attribute__((noinline))
+
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+  return (x << r) | (x >> (32 - r));
+}
+
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+  return (x << r) | (x >> (64 - r));
+}
+
+inline uint32_t rotr32 ( uint32_t x, int8_t r )
+{
+  return (x >> r) | (x << (32 - r));
+}
+
+inline uint64_t rotr64 ( uint64_t x, int8_t r )
+{
+  return (x >> r) | (x << (64 - r));
+}
+
+#define	ROTL32(x,y)	rotl32(x,y)
+#define ROTL64(x,y)	rotl64(x,y)
+#define	ROTR32(x,y)	rotr32(x,y)
+#define ROTR64(x,y)	rotr64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+__inline__ unsigned long long int rdtsc()
+{
+#ifdef __x86_64__
+    unsigned int a, d;
+    __asm__ volatile ("rdtsc" : "=a" (a), "=d" (d));
+    return (unsigned long)a | ((unsigned long)d << 32);
+#else
+#ifndef __i386__
+#error Must be x86 either 32-bit or 64-bit.
+#endif
+    unsigned long long int x;
+    __asm__ volatile ("rdtsc" : "=A" (x));
+    return x;
+#endif
+}
+
+#include <strings.h>
+#define _stricmp strcasecmp
+
+#endif	//	!defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
diff --git a/Random.cpp b/Random.cpp
index e98f5ef..87de595 100644
--- a/Random.cpp
+++ b/Random.cpp
@@ -1,8 +1,8 @@
-#include "Random.h"
-
-Rand g_rand1(1);
-Rand g_rand2(2);
-Rand g_rand3(3);
-Rand g_rand4(4);
-
-//-----------------------------------------------------------------------------
+#include "Random.h"
+
+Rand g_rand1(1);
+Rand g_rand2(2);
+Rand g_rand3(3);
+Rand g_rand4(4);
+
+//-----------------------------------------------------------------------------
diff --git a/Random.h b/Random.h
index e5a78fb..7e0df3f 100644
--- a/Random.h
+++ b/Random.h
@@ -1,117 +1,117 @@
-#pragma once
-
-#include "Types.h"
-
-//-----------------------------------------------------------------------------
-// Xorshift RNG based on code by George Marsaglia
-// http://en.wikipedia.org/wiki/Xorshift
-
-struct Rand
-{
-  uint32_t x;
-  uint32_t y;
-  uint32_t z;
-  uint32_t w;
-
-  Rand()
-  {
-    reseed(uint32_t(0));
-  }
-
-  Rand( uint32_t seed )
-  {
-    reseed(seed);
-  }
-
-  void reseed ( uint32_t seed )
-  {
-    x = 0x498b3bc5 ^ seed;
-    y = 0;
-    z = 0;
-    w = 0;
-
-    for(int i = 0; i < 10; i++) mix();
-  }
-
-  void reseed ( uint64_t seed )
-  {
-    x = 0x498b3bc5 ^ (uint32_t)(seed >>  0);
-    y = 0x5a05089a ^ (uint32_t)(seed >> 32);
-    z = 0;
-    w = 0;
-
-    for(int i = 0; i < 10; i++) mix();
-  }
-
-  //-----------------------------------------------------------------------------
-
-  void mix ( void )
-  {
-    uint32_t t = x ^ (x << 11);
-    x = y; y = z; z = w;
-    w = w ^ (w >> 19) ^ t ^ (t >> 8); 
-  }
-
-  uint32_t rand_u32 ( void )
-  {
-    mix();
-
-    return x;
-  }
-
-  uint64_t rand_u64 ( void ) 
-  {
-    mix();
-
-    uint64_t a = x;
-    uint64_t b = y;
-
-    return (a << 32) | b;
-  }
-
-  void rand_p ( void * blob, int bytes )
-  {
-    uint32_t * blocks = reinterpret_cast<uint32_t*>(blob);
-
-    while(bytes >= 4)
-    {
-      blocks[0] = rand_u32();
-      blocks++;
-      bytes -= 4;
-    }
-
-    uint8_t * tail = reinterpret_cast<uint8_t*>(blocks);
-
-    for(int i = 0; i < bytes; i++)
-    {
-      tail[i] = (uint8_t)rand_u32();
-    }
-  }
-};
-
-//-----------------------------------------------------------------------------
-
-extern Rand g_rand1;
-
-inline uint32_t rand_u32 ( void ) { return g_rand1.rand_u32(); }
-inline uint64_t rand_u64 ( void ) { return g_rand1.rand_u64(); }
-
-inline void rand_p ( void * blob, int bytes )
-{
-  uint32_t * blocks = (uint32_t*)blob;
-
-  while(bytes >= 4)
-  {
-    *blocks++ = rand_u32();
-    bytes -= 4;
-  }
-
-  uint8_t * tail = (uint8_t*)blocks;
-
-  for(int i = 0; i < bytes; i++)
-  {
-    tail[i] = (uint8_t)rand_u32();
-  }
-}
-
-//-----------------------------------------------------------------------------
+#pragma once
+
+#include "Types.h"
+
+//-----------------------------------------------------------------------------
+// Xorshift RNG based on code by George Marsaglia
+// http://en.wikipedia.org/wiki/Xorshift
+
+struct Rand
+{
+  uint32_t x;
+  uint32_t y;
+  uint32_t z;
+  uint32_t w;
+
+  Rand()
+  {
+    reseed(uint32_t(0));
+  }
+
+  Rand( uint32_t seed )
+  {
+    reseed(seed);
+  }
+
+  void reseed ( uint32_t seed )
+  {
+    x = 0x498b3bc5 ^ seed;
+    y = 0;
+    z = 0;
+    w = 0;
+
+    for(int i = 0; i < 10; i++) mix();
+  }
+
+  void reseed ( uint64_t seed )
+  {
+    x = 0x498b3bc5 ^ (uint32_t)(seed >>  0);
+    y = 0x5a05089a ^ (uint32_t)(seed >> 32);
+    z = 0;
+    w = 0;
+
+    for(int i = 0; i < 10; i++) mix();
+  }
+
+  //-----------------------------------------------------------------------------
+
+  void mix ( void )
+  {
+    uint32_t t = x ^ (x << 11);
+    x = y; y = z; z = w;
+    w = w ^ (w >> 19) ^ t ^ (t >> 8); 
+  }
+
+  uint32_t rand_u32 ( void )
+  {
+    mix();
+
+    return x;
+  }
+
+  uint64_t rand_u64 ( void ) 
+  {
+    mix();
+
+    uint64_t a = x;
+    uint64_t b = y;
+
+    return (a << 32) | b;
+  }
+
+  void rand_p ( void * blob, int bytes )
+  {
+    uint32_t * blocks = reinterpret_cast<uint32_t*>(blob);
+
+    while(bytes >= 4)
+    {
+      blocks[0] = rand_u32();
+      blocks++;
+      bytes -= 4;
+    }
+
+    uint8_t * tail = reinterpret_cast<uint8_t*>(blocks);
+
+    for(int i = 0; i < bytes; i++)
+    {
+      tail[i] = (uint8_t)rand_u32();
+    }
+  }
+};
+
+//-----------------------------------------------------------------------------
+
+extern Rand g_rand1;
+
+inline uint32_t rand_u32 ( void ) { return g_rand1.rand_u32(); }
+inline uint64_t rand_u64 ( void ) { return g_rand1.rand_u64(); }
+
+inline void rand_p ( void * blob, int bytes )
+{
+  uint32_t * blocks = (uint32_t*)blob;
+
+  while(bytes >= 4)
+  {
+    *blocks++ = rand_u32();
+    bytes -= 4;
+  }
+
+  uint8_t * tail = (uint8_t*)blocks;
+
+  for(int i = 0; i < bytes; i++)
+  {
+    tail[i] = (uint8_t)rand_u32();
+  }
+}
+
+//-----------------------------------------------------------------------------
diff --git a/SpeedTest.cpp b/SpeedTest.cpp
index 2265389..d91f6e4 100644
--- a/SpeedTest.cpp
+++ b/SpeedTest.cpp
@@ -1,242 +1,242 @@
-#include "SpeedTest.h"
-
-#include "Random.h"
-
-#include <stdio.h>   // for printf
-#include <memory.h>  // for memset
-#include <math.h>    // for sqrt
-#include <algorithm> // for sort
-
-//-----------------------------------------------------------------------------
-// We view our timing values as a series of random variables V that has been
-// contaminated with occasional outliers due to cache misses, thread
-// preemption, etcetera. To filter out the outliers, we search for the largest
-// subset of V such that all its values are within three standard deviations
-// of the mean.
-
-double CalcMean ( std::vector<double> & v )
-{
-  double mean = 0;
-  
-  for(int i = 0; i < (int)v.size(); i++)
-  {
-    mean += v[i];
-  }
-  
-  mean /= double(v.size());
-  
-  return mean;
-}
-
-double CalcMean ( std::vector<double> & v, int a, int b )
-{
-  double mean = 0;
-  
-  for(int i = a; i <= b; i++)
-  {
-    mean += v[i];
-  }
-  
-  mean /= (b-a+1);
-  
-  return mean;
-}
-
-double CalcStdv ( std::vector<double> & v, int a, int b )
-{
-  double mean = CalcMean(v,a,b);
-
-  double stdv = 0;
-  
-  for(int i = a; i <= b; i++)
-  {
-    double x = v[i] - mean;
-    
-    stdv += x*x;
-  }
-  
-  stdv = sqrt(stdv / (b-a+1));
-  
-  return stdv;
-}
-
-// Return true if the largest value in v[0,len) is more than three
-// standard deviations from the mean
-
-bool ContainsOutlier ( std::vector<double> & v, size_t len )
-{
-  double mean = 0;
-  
-  for(size_t i = 0; i < len; i++)
-  {
-    mean += v[i];
-  }
-  
-  mean /= double(len);
-  
-  double stdv = 0;
-  
-  for(size_t i = 0; i < len; i++)
-  {
-    double x = v[i] - mean;
-    stdv += x*x;
-  }
-  
-  stdv = sqrt(stdv / double(len));
-
-  double cutoff = mean + stdv*3;
-  
-  return v[len-1] > cutoff;  
-}
-
-// Do a binary search to find the largest subset of v that does not contain
-// outliers.
-
-void FilterOutliers ( std::vector<double> & v )
-{
-  std::sort(v.begin(),v.end());
-  
-  size_t len = 0;
-  
-  for(size_t x = 0x40000000; x; x = x >> 1 )
-  {
-    if((len | x) >= v.size()) continue;
-    
-    if(!ContainsOutlier(v,len | x))
-    {
-      len |= x;
-    }
-  }
-  
-  v.resize(len);
-}
-
-// Iteratively tighten the set to find a subset that does not contain
-// outliers. I'm not positive this works correctly in all cases.
-
-void FilterOutliers2 ( std::vector<double> & v )
-{
-  std::sort(v.begin(),v.end());
-  
-  int a = 0;
-  int b = (int)(v.size() - 1);
-  
-  for(int i = 0; i < 10; i++)
-  {
-    //printf("%d %d\n",a,b);
-  
-    double mean = CalcMean(v,a,b);
-    double stdv = CalcStdv(v,a,b);
-    
-    double cutA = mean - stdv*3;  
-    double cutB = mean + stdv*3;
-    
-    while((a < b) && (v[a] < cutA)) a++;
-    while((b > a) && (v[b] > cutB)) b--;
-  }
-  
-  std::vector<double> v2;
-  
-  v2.insert(v2.begin(),v.begin()+a,v.begin()+b+1);
-  
-  v.swap(v2);
-}
-
-//-----------------------------------------------------------------------------
-// We really want the rdtsc() calls to bracket the function call as tightly
-// as possible, but that's hard to do portably. We'll try and get as close as
-// possible by marking the function as NEVER_INLINE (to keep the optimizer from
-// moving it) and marking the timing variables as "volatile register".
-
-NEVER_INLINE int64_t timehash ( pfHash hash, const void * key, int len, int seed )
-{
-  volatile register int64_t begin,end;
-  
-  uint32_t temp[16];
-  
-  begin = rdtsc();
-  
-  hash(key,len,seed,temp);
-  
-  end = rdtsc();
-  
-  return end-begin;
-}
-
-//-----------------------------------------------------------------------------
-
-double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int blocksize, const int align )
-{
-  Rand r(seed);
-  
-  uint8_t * buf = new uint8_t[blocksize + 512];
-
-  uint64_t t1 = reinterpret_cast<uint64_t>(buf);
-  
-  t1 = (t1 + 255) & BIG_CONSTANT(0xFFFFFFFFFFFFFF00);
-  t1 += align;
-  
-  uint8_t * block = reinterpret_cast<uint8_t*>(t1);
-
-  r.rand_p(block,blocksize);
-
-  //----------
-
-  std::vector<double> times;
-  times.reserve(trials);
-
-  for(int itrial = 0; itrial < trials; itrial++)
-  {
-    r.rand_p(block,blocksize);
-    
-    double t = (double)timehash(hash,block,blocksize,itrial);
-    
-    if(t > 0) times.push_back(t);
-  }
-
-  //----------
-  
-  std::sort(times.begin(),times.end());
-  
-  FilterOutliers(times);
-  
-  delete [] buf;
-  
-  return CalcMean(times);
-}
-
-//-----------------------------------------------------------------------------
-// 256k blocks seem to give the best results.
-
-void BulkSpeedTest ( pfHash hash, uint32_t seed )
-{
-  const int trials = 2999;
-  const int blocksize = 256 * 1024;
-
-  printf("Bulk speed test - %d-byte keys\n",blocksize);
-
-  for(int align = 0; align < 8; align++)
-  {
-    double cycles = SpeedTest(hash,seed,trials,blocksize,align);
-    
-    double bestbpc = double(blocksize)/cycles;
-    
-    double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
-    printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);
-  }
-}
-
-//-----------------------------------------------------------------------------
-
-void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & /*outCycles*/ )
-{
-  const int trials = 999999;
-
-  if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);
-  
-  double cycles = SpeedTest(hash,seed,trials,keysize,0);
-  
-  printf("%8.2f cycles/hash\n",cycles);  
-}
-
-//-----------------------------------------------------------------------------
+#include "SpeedTest.h"
+
+#include "Random.h"
+
+#include <stdio.h>   // for printf
+#include <memory.h>  // for memset
+#include <math.h>    // for sqrt
+#include <algorithm> // for sort
+
+//-----------------------------------------------------------------------------
+// We view our timing values as a series of random variables V that has been
+// contaminated with occasional outliers due to cache misses, thread
+// preemption, etcetera. To filter out the outliers, we search for the largest
+// subset of V such that all its values are within three standard deviations
+// of the mean.
+
+double CalcMean ( std::vector<double> & v )
+{
+  double mean = 0;
+  
+  for(int i = 0; i < (int)v.size(); i++)
+  {
+    mean += v[i];
+  }
+  
+  mean /= double(v.size());
+  
+  return mean;
+}
+
+double CalcMean ( std::vector<double> & v, int a, int b )
+{
+  double mean = 0;
+  
+  for(int i = a; i <= b; i++)
+  {
+    mean += v[i];
+  }
+  
+  mean /= (b-a+1);
+  
+  return mean;
+}
+
+double CalcStdv ( std::vector<double> & v, int a, int b )
+{
+  double mean = CalcMean(v,a,b);
+
+  double stdv = 0;
+  
+  for(int i = a; i <= b; i++)
+  {
+    double x = v[i] - mean;
+    
+    stdv += x*x;
+  }
+  
+  stdv = sqrt(stdv / (b-a+1));
+  
+  return stdv;
+}
+
+// Return true if the largest value in v[0,len) is more than three
+// standard deviations from the mean
+
+bool ContainsOutlier ( std::vector<double> & v, size_t len )
+{
+  double mean = 0;
+  
+  for(size_t i = 0; i < len; i++)
+  {
+    mean += v[i];
+  }
+  
+  mean /= double(len);
+  
+  double stdv = 0;
+  
+  for(size_t i = 0; i < len; i++)
+  {
+    double x = v[i] - mean;
+    stdv += x*x;
+  }
+  
+  stdv = sqrt(stdv / double(len));
+
+  double cutoff = mean + stdv*3;
+  
+  return v[len-1] > cutoff;  
+}
+
+// Do a binary search to find the largest subset of v that does not contain
+// outliers.
+
+void FilterOutliers ( std::vector<double> & v )
+{
+  std::sort(v.begin(),v.end());
+  
+  size_t len = 0;
+  
+  for(size_t x = 0x40000000; x; x = x >> 1 )
+  {
+    if((len | x) >= v.size()) continue;
+    
+    if(!ContainsOutlier(v,len | x))
+    {
+      len |= x;
+    }
+  }
+  
+  v.resize(len);
+}
+
+// Iteratively tighten the set to find a subset that does not contain
+// outliers. I'm not positive this works correctly in all cases.
+
+void FilterOutliers2 ( std::vector<double> & v )
+{
+  std::sort(v.begin(),v.end());
+  
+  int a = 0;
+  int b = (int)(v.size() - 1);
+  
+  for(int i = 0; i < 10; i++)
+  {
+    //printf("%d %d\n",a,b);
+  
+    double mean = CalcMean(v,a,b);
+    double stdv = CalcStdv(v,a,b);
+    
+    double cutA = mean - stdv*3;  
+    double cutB = mean + stdv*3;
+    
+    while((a < b) && (v[a] < cutA)) a++;
+    while((b > a) && (v[b] > cutB)) b--;
+  }
+  
+  std::vector<double> v2;
+  
+  v2.insert(v2.begin(),v.begin()+a,v.begin()+b+1);
+  
+  v.swap(v2);
+}
+
+//-----------------------------------------------------------------------------
+// We really want the rdtsc() calls to bracket the function call as tightly
+// as possible, but that's hard to do portably. We'll try and get as close as
+// possible by marking the function as NEVER_INLINE (to keep the optimizer from
+// moving it) and marking the timing variables as "volatile register".
+
+NEVER_INLINE int64_t timehash ( pfHash hash, const void * key, int len, int seed )
+{
+  volatile register int64_t begin,end;
+  
+  uint32_t temp[16];
+  
+  begin = rdtsc();
+  
+  hash(key,len,seed,temp);
+  
+  end = rdtsc();
+  
+  return end-begin;
+}
+
+//-----------------------------------------------------------------------------
+
+double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int blocksize, const int align )
+{
+  Rand r(seed);
+  
+  uint8_t * buf = new uint8_t[blocksize + 512];
+
+  uint64_t t1 = reinterpret_cast<uint64_t>(buf);
+  
+  t1 = (t1 + 255) & BIG_CONSTANT(0xFFFFFFFFFFFFFF00);
+  t1 += align;
+  
+  uint8_t * block = reinterpret_cast<uint8_t*>(t1);
+
+  r.rand_p(block,blocksize);
+
+  //----------
+
+  std::vector<double> times;
+  times.reserve(trials);
+
+  for(int itrial = 0; itrial < trials; itrial++)
+  {
+    r.rand_p(block,blocksize);
+    
+    double t = (double)timehash(hash,block,blocksize,itrial);
+    
+    if(t > 0) times.push_back(t);
+  }
+
+  //----------
+  
+  std::sort(times.begin(),times.end());
+  
+  FilterOutliers(times);
+  
+  delete [] buf;
+  
+  return CalcMean(times);
+}
+
+//-----------------------------------------------------------------------------
+// 256k blocks seem to give the best results.
+
+void BulkSpeedTest ( pfHash hash, uint32_t seed )
+{
+  const int trials = 2999;
+  const int blocksize = 256 * 1024;
+
+  printf("Bulk speed test - %d-byte keys\n",blocksize);
+
+  for(int align = 0; align < 8; align++)
+  {
+    double cycles = SpeedTest(hash,seed,trials,blocksize,align);
+    
+    double bestbpc = double(blocksize)/cycles;
+    
+    double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
+    printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & /*outCycles*/ )
+{
+  const int trials = 999999;
+
+  if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);
+  
+  double cycles = SpeedTest(hash,seed,trials,keysize,0);
+  
+  printf("%8.2f cycles/hash\n",cycles);  
+}
+
+//-----------------------------------------------------------------------------
diff --git a/SpeedTest.h b/SpeedTest.h
index b881a78..7bd2167 100644
--- a/SpeedTest.h
+++ b/SpeedTest.h
@@ -1,8 +1,8 @@
-#pragma once
-
-#include "Types.h"
-
-void BulkSpeedTest ( pfHash hash, uint32_t seed );
-void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & outCycles );
-
-//-----------------------------------------------------------------------------
+#pragma once
+
+#include "Types.h"
+
+void BulkSpeedTest ( pfHash hash, uint32_t seed );
+void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & outCycles );
+
+//-----------------------------------------------------------------------------
diff --git a/Spooky.cpp b/Spooky.cpp
new file mode 100644
index 0000000..47f5d75
--- /dev/null
+++ b/Spooky.cpp
@@ -0,0 +1,347 @@
+// Spooky Hash
+// A 128-bit noncryptographic hash, for checksums and table lookup
+// By Bob Jenkins.  Public domain.
+//   Oct 31 2010: published framework, disclaimer ShortHash isn't right
+//   Nov 7 2010: disabled ShortHash
+//   Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again
+
+#include <memory.h>
+#include "Spooky.h"
+
+#define ALLOW_UNALIGNED_READS 1
+
+//
+// short hash ... it could be used on any message, 
+// but it's used by Spooky just for short messages.
+//
+void SpookyHash::Short(
+    const void *message,
+    size_t length,
+    uint64 *hash1,
+    uint64 *hash2)
+{
+    uint64 buf[sc_numVars];
+    union 
+    { 
+        const uint8 *p8; 
+        uint32 *p32;
+        uint64 *p64; 
+        size_t i; 
+    } u;
+
+    u.p8 = (const uint8 *)message;
+    
+    if (!ALLOW_UNALIGNED_READS && (u.i & 0x7))
+    {
+        memcpy(buf, message, length);
+        u.p64 = buf;
+    }
+
+    size_t remainder = length%32;
+    uint64 a=*hash1;
+    uint64 b=*hash2;
+    uint64 c=sc_const;
+    uint64 d=sc_const;
+
+    if (length > 15)
+    {
+        const uint64 *end = u.p64 + (length/32)*4;
+        
+        // handle all complete sets of 32 bytes
+        for (; u.p64 < end; u.p64 += 4)
+        {
+            c += u.p64[0];
+            d += u.p64[1];
+            ShortMix(a,b,c,d);
+            a += u.p64[2];
+            b += u.p64[3];
+        }
+        
+        //Handle the case of 16+ remaining bytes.
+        if (remainder >= 16)
+        {
+            c += u.p64[0];
+            d += u.p64[1];
+            ShortMix(a,b,c,d);
+            u.p64 += 2;
+            remainder -= 16;
+        }
+    }
+    
+    // Handle the last 0..15 bytes, and its length
+    d = ((uint64)length) << 56;
+    switch (remainder)
+    {
+    case 15:
+    d += ((uint64)u.p8[14]) << 48;
+    case 14:
+        d += ((uint64)u.p8[13]) << 40;
+    case 13:
+        d += ((uint64)u.p8[12]) << 32;
+    case 12:
+        d += u.p32[2];
+        c += u.p64[0];
+        break;
+    case 11:
+        d += ((uint64)u.p8[10]) << 16;
+    case 10:
+        d += ((uint64)u.p8[9]) << 8;
+    case 9:
+        d += (uint64)u.p8[8];
+    case 8:
+        c += u.p64[0];
+        break;
+    case 7:
+        c += ((uint64)u.p8[6]) << 48;
+    case 6:
+        c += ((uint64)u.p8[5]) << 40;
+    case 5:
+        c += ((uint64)u.p8[4]) << 32;
+    case 4:
+        c += u.p32[0];
+        break;
+    case 3:
+        c += ((uint64)u.p8[2]) << 16;
+    case 2:
+        c += ((uint64)u.p8[1]) << 8;
+    case 1:
+        c += (uint64)u.p8[0];
+        break;
+    case 0:
+        c += sc_const;
+        d += sc_const;
+    }
+    ShortEnd(a,b,c,d);
+    *hash1 = a;
+    *hash2 = b;
+}
+
+
+
+
+// do the whole hash in one call
+void SpookyHash::Hash128(
+    const void *message, 
+    size_t length, 
+    uint64 *hash1, 
+    uint64 *hash2)
+{
+    if (length < sc_bufSize)
+    {
+        Short(message, length, hash1, hash2);
+        return;
+    }
+
+    uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
+    uint64 buf[sc_numVars];
+    uint64 *end;
+    union 
+    { 
+        const uint8 *p8; 
+        uint64 *p64; 
+        size_t i; 
+    } u;
+    size_t remainder;
+    
+    h0=h3=h6=h9  = *hash1;
+    h1=h4=h7=h10 = *hash2;
+    h2=h5=h8=h11 = sc_const;
+    
+    u.p8 = (const uint8 *)message;
+    end = u.p64 + (length/sc_blockSize)*sc_numVars;
+
+    // handle all whole sc_blockSize blocks of bytes
+    if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0))
+    {
+        while (u.p64 < end)
+        { 
+            Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+	    u.p64 += sc_numVars;
+        }
+    }
+    else
+    {
+        while (u.p64 < end)
+        {
+            memcpy(buf, u.p64, sc_blockSize);
+            Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+	    u.p64 += sc_numVars;
+        }
+    }
+
+    // handle the last partial block of sc_blockSize bytes
+    remainder = (length - ((const uint8 *)end-(const uint8 *)message));
+    memcpy(buf, end, remainder);
+    memset(((uint8 *)buf)+remainder, 0, sc_blockSize-remainder);
+    ((uint8 *)buf)[sc_blockSize-1] = remainder;
+    Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+    
+    // do some final mixing 
+    End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+    *hash1 = h0;
+    *hash2 = h1;
+}
+
+
+
+// init spooky state
+void SpookyHash::Init(uint64 seed1, uint64 seed2)
+{
+    m_length = 0;
+    m_remainder = 0;
+    m_state[0] = seed1;
+    m_state[1] = seed2;
+}
+
+
+// add a message fragment to the state
+void SpookyHash::Update(const void *message, size_t length)
+{
+    uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
+    size_t newLength = length + m_remainder;
+    uint8  remainder;
+    union 
+    { 
+        const uint8 *p8; 
+        uint64 *p64; 
+        size_t i; 
+    } u;
+    const uint64 *end;
+    
+    // Is this message fragment too short?  If it is, stuff it away.
+    if (newLength < sc_bufSize)
+    {
+        memcpy(&((uint8 *)m_data)[m_remainder], message, length);
+        m_length = length + m_length;
+        m_remainder = (uint8)newLength;
+        return;
+    }
+    
+    // init the variables
+    if (m_length < sc_bufSize)
+    {
+        h0=h3=h6=h9  = m_state[0];
+        h1=h4=h7=h10 = m_state[1];
+        h2=h5=h8=h11 = sc_const;
+    }
+    else
+    {
+        h0 = m_state[0];
+        h1 = m_state[1];
+        h2 = m_state[2];
+        h3 = m_state[3];
+        h4 = m_state[4];
+        h5 = m_state[5];
+        h6 = m_state[6];
+        h7 = m_state[7];
+        h8 = m_state[8];
+        h9 = m_state[9];
+        h10 = m_state[10];
+        h11 = m_state[11];
+    }
+    m_length = length + m_length;
+    
+    // if we've got anything stuffed away, use it now
+    if (m_remainder)
+    {
+        uint8 prefix = sc_bufSize-m_remainder;
+        memcpy(&(((uint8 *)m_data)[m_remainder]), message, prefix);
+        u.p64 = m_data;
+        Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+        Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+        u.p8 = ((const uint8 *)message) + prefix;
+        length -= prefix;
+    }
+    else
+    {
+        u.p8 = (const uint8 *)message;
+    }
+    
+    // handle all whole blocks of sc_blockSize bytes
+    end = u.p64 + (length/sc_blockSize)*sc_numVars;
+    remainder = (uint8)(length-((const uint8 *)end-u.p8));
+    if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0)
+    {
+        while (u.p64 < end)
+        { 
+            Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+	    u.p64 += sc_numVars;
+        }
+    }
+    else
+    {
+        while (u.p64 < end)
+        { 
+            memcpy(m_data, u.p8, sc_blockSize);
+            Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+	    u.p64 += sc_numVars;
+        }
+    }
+
+    // stuff away the last few bytes
+    m_remainder = remainder;
+    memcpy(m_data, end, remainder);
+    
+    // stuff away the variables
+    m_state[0] = h0;
+    m_state[1] = h1;
+    m_state[2] = h2;
+    m_state[3] = h3;
+    m_state[4] = h4;
+    m_state[5] = h5;
+    m_state[6] = h6;
+    m_state[7] = h7;
+    m_state[8] = h8;
+    m_state[9] = h9;
+    m_state[10] = h10;
+    m_state[11] = h11;
+}
+
+
+// report the hash for the concatenation of all message fragments so far
+void SpookyHash::Final(uint64 *hash1, uint64 *hash2)
+{
+    // init the variables
+    if (m_length < sc_bufSize)
+    {
+        Short( m_data, m_length, hash1, hash2);
+        return;
+    }
+    
+    const uint64 *data = (const uint64 *)m_data;
+    uint8 remainder = m_remainder;
+    
+    uint64 h0 = m_state[0];
+    uint64 h1 = m_state[1];
+    uint64 h2 = m_state[2];
+    uint64 h3 = m_state[3];
+    uint64 h4 = m_state[4];
+    uint64 h5 = m_state[5];
+    uint64 h6 = m_state[6];
+    uint64 h7 = m_state[7];
+    uint64 h8 = m_state[8];
+    uint64 h9 = m_state[9];
+    uint64 h10 = m_state[10];
+    uint64 h11 = m_state[11];
+
+    if (remainder >= sc_blockSize)
+    {
+        // m_data can contain two blocks; handle any whole first block
+        Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+	data += sc_numVars;
+	remainder -= sc_blockSize;
+    }
+
+    // mix in the last partial block, and the length mod sc_blockSize
+    memset(&((uint8 *)data)[remainder], 0, (sc_blockSize-remainder));
+
+    ((uint8 *)data)[sc_blockSize-1] = remainder;
+    Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+    
+    // do some final mixing
+    End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+
+    *hash1 = h0;
+    *hash2 = h1;
+}
+
diff --git a/Spooky.h b/Spooky.h
new file mode 100644
index 0000000..cafd52e
--- /dev/null
+++ b/Spooky.h
@@ -0,0 +1,293 @@
+//
+// SpookyHash: a 128-bit noncryptographic hash function
+// By Bob Jenkins, public domain
+//   Oct 31 2010: alpha, framework + SpookyHash::Mix appears right
+//   Oct 31 2011: alpha again, Mix only good to 2^^69 but rest appears right
+//   Dec 31 2011: beta, improved Mix, tested it for 2-bit deltas
+//   Feb  2 2012: production, same bits as beta
+//   Feb  5 2012: adjusted definitions of uint* to be more portable
+// 
+// Up to 4 bytes/cycle for long messages.  Reasonably fast for short messages.
+// All 1 or 2 bit deltas achieve avalanche within 1% bias per output bit.
+//
+// This was developed for and tested on 64-bit x86-compatible processors.
+// It assumes the processor is little-endian.  There is a macro
+// controlling whether unaligned reads are allowed (by default they are).
+// This should be an equally good hash on big-endian machines, but it will
+// compute different results on them than on little-endian machines.
+//
+// Google's CityHash has similar specs to SpookyHash, and CityHash is faster
+// on some platforms.  MD4 and MD5 also have similar specs, but they are orders
+// of magnitude slower.  CRCs are two or more times slower, but unlike 
+// SpookyHash, they have nice math for combining the CRCs of pieces to form 
+// the CRCs of wholes.  There are also cryptographic hashes, but those are even 
+// slower than MD5.
+//
+
+#include <stddef.h>
+
+#ifdef _MSC_VER
+# define INLINE __forceinline
+  typedef  unsigned __int64 uint64;
+  typedef  unsigned __int32 uint32;
+  typedef  unsigned __int16 uint16;
+  typedef  unsigned __int8  uint8;
+#else
+# include <stdint.h>
+# define INLINE inline
+  typedef  uint64_t  uint64;
+  typedef  uint32_t  uint32;
+  typedef  uint16_t  uint16;
+  typedef  uint8_t   uint8;
+#endif
+
+
+class SpookyHash
+{
+public:
+    //
+    // SpookyHash: hash a single message in one call, produce 128-bit output
+    //
+    static void Hash128(
+        const void *message,  // message to hash
+        size_t length,        // length of message in bytes
+        uint64 *hash1,        // in/out: in seed 1, out hash value 1
+        uint64 *hash2);       // in/out: in seed 2, out hash value 2
+
+    //
+    // Hash64: hash a single message in one call, return 64-bit output
+    //
+    static uint64 Hash64(
+        const void *message,  // message to hash
+        size_t length,        // length of message in bytes
+        uint64 seed)          // seed
+    {
+        uint64 hash1 = seed;
+        Hash128(message, length, &hash1, &seed);
+        return hash1;
+    }
+
+    //
+    // Hash32: hash a single message in one call, produce 32-bit output
+    //
+    static uint32 Hash32(
+        const void *message,  // message to hash
+        size_t length,        // length of message in bytes
+        uint32 seed)          // seed
+    {
+        uint64 hash1 = seed, hash2 = seed;
+        Hash128(message, length, &hash1, &hash2);
+        return (uint32)hash1;
+    }
+
+    //
+    // Init: initialize the context of a SpookyHash
+    //
+    void Init(
+        uint64 seed1,       // any 64-bit value will do, including 0
+        uint64 seed2);      // different seeds produce independent hashes
+    
+    //
+    // Update: add a piece of a message to a SpookyHash state
+    //
+    void Update(
+        const void *message,  // message fragment
+        size_t length);       // length of message fragment in bytes
+
+
+    //
+    // Final: compute the hash for the current SpookyHash state
+    //
+    // This does not modify the state; you can keep updating it afterward
+    //
+    // The result is the same as if SpookyHash() had been called with
+    // all the pieces concatenated into one message.
+    //
+    void Final(
+        uint64 *hash1,    // out only: first 64 bits of hash value.
+        uint64 *hash2);   // out only: second 64 bits of hash value.
+
+    //
+    // left rotate a 64-bit value by k bytes
+    //
+    static INLINE uint64 Rot64(uint64 x, int k)
+    {
+        return (x << k) | (x >> (64 - k));
+    }
+
+    //
+    // This is used if the input is 96 bytes long or longer.
+    //
+    // The internal state is fully overwritten every 96 bytes.
+    // Every input bit appears to cause at least 128 bits of entropy
+    // before 96 other bytes are combined, when run forward or backward
+    //   For every input bit,
+    //   Two inputs differing in just that input bit
+    //   Where "differ" means xor or subtraction
+    //   And the base value is random
+    //   When run forward or backwards one Mix
+    // I tried 3 pairs of each; they all differed by at least 212 bits.
+    //
+    static INLINE void Mix(
+        const uint64 *data, 
+        uint64 &s0, uint64 &s1, uint64 &s2, uint64 &s3,
+        uint64 &s4, uint64 &s5, uint64 &s6, uint64 &s7,
+        uint64 &s8, uint64 &s9, uint64 &s10,uint64 &s11)
+    {
+      s0 += data[0];    s2 ^= s10;    s11 ^= s0;    s0 = Rot64(s0,11);    s11 += s1;
+      s1 += data[1];    s3 ^= s11;    s0 ^= s1;    s1 = Rot64(s1,32);    s0 += s2;
+      s2 += data[2];    s4 ^= s0;    s1 ^= s2;    s2 = Rot64(s2,43);    s1 += s3;
+      s3 += data[3];    s5 ^= s1;    s2 ^= s3;    s3 = Rot64(s3,31);    s2 += s4;
+      s4 += data[4];    s6 ^= s2;    s3 ^= s4;    s4 = Rot64(s4,17);    s3 += s5;
+      s5 += data[5];    s7 ^= s3;    s4 ^= s5;    s5 = Rot64(s5,28);    s4 += s6;
+      s6 += data[6];    s8 ^= s4;    s5 ^= s6;    s6 = Rot64(s6,39);    s5 += s7;
+      s7 += data[7];    s9 ^= s5;    s6 ^= s7;    s7 = Rot64(s7,57);    s6 += s8;
+      s8 += data[8];    s10 ^= s6;    s7 ^= s8;    s8 = Rot64(s8,55);    s7 += s9;
+      s9 += data[9];    s11 ^= s7;    s8 ^= s9;    s9 = Rot64(s9,54);    s8 += s10;
+      s10 += data[10];    s0 ^= s8;    s9 ^= s10;    s10 = Rot64(s10,22);    s9 += s11;
+      s11 += data[11];    s1 ^= s9;    s10 ^= s11;    s11 = Rot64(s11,46);    s10 += s0;
+    }
+
+    //
+    // Mix all 12 inputs together so that h0, h1 are a hash of them all.
+    //
+    // For two inputs differing in just the input bits
+    // Where "differ" means xor or subtraction
+    // And the base value is random, or a counting value starting at that bit
+    // The final result will have each bit of h0, h1 flip
+    // For every input bit,
+    // with probability 50 +- .3%
+    // For every pair of input bits,
+    // with probability 50 +- 3%
+    //
+    // This does not rely on the last Mix() call having already mixed some.
+    // Two iterations was almost good enough for a 64-bit result, but a
+    // 128-bit result is reported, so End() does three iterations.
+    //
+    static INLINE void EndPartial(
+        uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3,
+        uint64 &h4, uint64 &h5, uint64 &h6, uint64 &h7, 
+        uint64 &h8, uint64 &h9, uint64 &h10,uint64 &h11)
+    {
+        h11+= h1;    h2 ^= h11;   h1 = Rot64(h1,44);
+	h0 += h2;    h3 ^= h0;    h2 = Rot64(h2,15);
+	h1 += h3;    h4 ^= h1;    h3 = Rot64(h3,34);
+	h2 += h4;    h5 ^= h2;    h4 = Rot64(h4,21);
+	h3 += h5;    h6 ^= h3;    h5 = Rot64(h5,38);
+	h4 += h6;    h7 ^= h4;    h6 = Rot64(h6,33);
+	h5 += h7;    h8 ^= h5;    h7 = Rot64(h7,10);
+	h6 += h8;    h9 ^= h6;    h8 = Rot64(h8,13);
+	h7 += h9;    h10^= h7;    h9 = Rot64(h9,38);
+	h8 += h10;   h11^= h8;    h10= Rot64(h10,53);
+	h9 += h11;   h0 ^= h9;    h11= Rot64(h11,42);
+	h10+= h0;    h1 ^= h10;   h0 = Rot64(h0,54);
+    }
+
+    static INLINE void End(
+        uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3,
+        uint64 &h4, uint64 &h5, uint64 &h6, uint64 &h7, 
+        uint64 &h8, uint64 &h9, uint64 &h10,uint64 &h11)
+    {
+        EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+        EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+        EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+    }
+
+    //
+    // The goal is for each bit of the input to expand into 128 bits of 
+    //   apparent entropy before it is fully overwritten.
+    // n trials both set and cleared at least m bits of h0 h1 h2 h3
+    //   n: 2   m: 29
+    //   n: 3   m: 46
+    //   n: 4   m: 57
+    //   n: 5   m: 107
+    //   n: 6   m: 146
+    //   n: 7   m: 152
+    // when run forwards or backwards
+    // for all 1-bit and 2-bit diffs
+    // with diffs defined by either xor or subtraction
+    // with a base of all zeros plus a counter, or plus another bit, or random
+    //
+    static INLINE void ShortMix(uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3)
+    {
+        h2 = Rot64(h2,50);  h2 += h3;  h0 ^= h2;
+        h3 = Rot64(h3,52);  h3 += h0;  h1 ^= h3;
+        h0 = Rot64(h0,30);  h0 += h1;  h2 ^= h0;
+        h1 = Rot64(h1,41);  h1 += h2;  h3 ^= h1;
+        h2 = Rot64(h2,54);  h2 += h3;  h0 ^= h2;
+        h3 = Rot64(h3,48);  h3 += h0;  h1 ^= h3;
+        h0 = Rot64(h0,38);  h0 += h1;  h2 ^= h0;
+        h1 = Rot64(h1,37);  h1 += h2;  h3 ^= h1;
+        h2 = Rot64(h2,62);  h2 += h3;  h0 ^= h2;
+        h3 = Rot64(h3,34);  h3 += h0;  h1 ^= h3;
+        h0 = Rot64(h0,5);   h0 += h1;  h2 ^= h0;
+        h1 = Rot64(h1,36);  h1 += h2;  h3 ^= h1;
+    }
+
+    //
+    // Mix all 4 inputs together so that h0, h1 are a hash of them all.
+    //
+    // For two inputs differing in just the input bits
+    // Where "differ" means xor or subtraction
+    // And the base value is random, or a counting value starting at that bit
+    // The final result will have each bit of h0, h1 flip
+    // For every input bit,
+    // with probability 50 +- .3% (it is probably better than that)
+    // For every pair of input bits,
+    // with probability 50 +- .75% (the worst case is approximately that)
+    //
+    static INLINE void ShortEnd(uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3)
+    {
+        h3 ^= h2;  h2 = Rot64(h2,15);  h3 += h2;
+        h0 ^= h3;  h3 = Rot64(h3,52);  h0 += h3;
+        h1 ^= h0;  h0 = Rot64(h0,26);  h1 += h0;
+        h2 ^= h1;  h1 = Rot64(h1,51);  h2 += h1;
+        h3 ^= h2;  h2 = Rot64(h2,28);  h3 += h2;
+        h0 ^= h3;  h3 = Rot64(h3,9);   h0 += h3;
+        h1 ^= h0;  h0 = Rot64(h0,47);  h1 += h0;
+        h2 ^= h1;  h1 = Rot64(h1,54);  h2 += h1;
+        h3 ^= h2;  h2 = Rot64(h2,32);  h3 += h2;
+        h0 ^= h3;  h3 = Rot64(h3,25);  h0 += h3;
+        h1 ^= h0;  h0 = Rot64(h0,63);  h1 += h0;
+    }
+    
+private:
+
+    //
+    // Short is used for messages under 192 bytes in length
+    // Short has a low startup cost, the normal mode is good for long
+    // keys, the cost crossover is at about 192 bytes.  The two modes were
+    // held to the same quality bar.
+    // 
+    static void Short(
+        const void *message,
+        size_t length,
+        uint64 *hash1,
+        uint64 *hash2);
+
+    // number of uint64's in internal state
+    static const size_t sc_numVars = 12;
+
+    // size of the internal state
+    static const size_t sc_blockSize = sc_numVars*8;
+
+    // size of buffer of unhashed data, in bytes
+    static const size_t sc_bufSize = 2*sc_blockSize;
+
+    //
+    // sc_const: a constant which:
+    //  * is not zero
+    //  * is odd
+    //  * is a not-very-regular mix of 1's and 0's
+    //  * does not need any other special mathematical properties
+    //
+    static const uint64 sc_const = 0xdeadbeefdeadbeefLL;
+
+    uint64 m_data[2*sc_numVars];   // unhashed data, for partial messages
+    uint64 m_state[sc_numVars];  // internal state of the hash
+    size_t m_length;             // total length of the input so far
+    uint8  m_remainder;          // length of unhashed data stashed in m_data
+};
+
+
+
diff --git a/SpookyTest.cpp b/SpookyTest.cpp
new file mode 100644
index 0000000..df9021e
--- /dev/null
+++ b/SpookyTest.cpp
@@ -0,0 +1,16 @@
+#include "Spooky.h"
+
+void SpookyHash32_test(const void *key, int len, uint32_t seed, void *out) {
+  *(uint32_t*)out = SpookyHash::Hash32(key, len, seed);
+}
+
+void SpookyHash64_test(const void *key, int len, uint32_t seed, void *out) {
+  *(uint64_t*)out = SpookyHash::Hash64(key, len, seed);
+}
+
+void SpookyHash128_test(const void *key, int len, uint32_t seed, void *out) {
+  uint64_t h1 = seed, h2 = seed;
+  SpookyHash::Hash128(key, len, &h1, &h2);
+  ((uint64_t*)out)[0] = h1;
+  ((uint64_t*)out)[1] = h2;
+}
diff --git a/Stats.cpp b/Stats.cpp
index 55e99fc..4452290 100644
--- a/Stats.cpp
+++ b/Stats.cpp
@@ -1,99 +1,99 @@
-#include "Stats.h"
-
-//-----------------------------------------------------------------------------
-
-double chooseK ( int n, int k )
-{
-  if(k > (n - k)) k = n - k;
-
-  double c = 1;
-
-  for(int i = 0; i < k; i++)
-  {
-    double t = double(n-i) / double(i+1);
-
-    c *= t;
-  }
-
-    return c;
-}
-
-double chooseUpToK ( int n, int k )
-{
-  double c = 0;
-
-  for(int i = 1; i <= k; i++)
-  {
-    c += chooseK(n,i);
-  }
-
-  return c;
-}
-
-//-----------------------------------------------------------------------------
-// Distribution "score"
-// TODO - big writeup of what this score means
-
-// Basically, we're computing a constant that says "The test distribution is as
-// uniform, RMS-wise, as a random distribution restricted to (1-X)*100 percent of
-// the bins. This makes for a nice uniform way to rate a distribution that isn't
-// dependent on the number of bins or the number of keys
-
-// (as long as # keys > # bins * 3 or so, otherwise random fluctuations show up
-// as distribution weaknesses)
-
-double calcScore ( const int * bins, const int bincount, const int keycount )
-{
-  double n = bincount;
-  double k = keycount;
-
-  // compute rms value
-
-  double r = 0;
-
-  for(int i = 0; i < bincount; i++)
-  {
-    double b = bins[i];
-
-    r += b*b;
-  }
-
-  r = sqrt(r / n);
-
-  // compute fill factor
-
-  double f = (k*k - 1) / (n*r*r - k);
-
-  // rescale to (0,1) with 0 = good, 1 = bad
-
-  return 1 - (f / n);
-}
-
-
-//----------------------------------------------------------------------------
-
-void plot ( double n )
-{
-  double n2 = n * 1;
-
-  if(n2 < 0) n2 = 0;
-
-  n2 *= 100;
-
-  if(n2 > 64) n2 = 64;
-
-  int n3 = (int)n2;
-
-  if(n3 == 0)
-    printf(".");
-  else
-  {
-    char x = '0' + char(n3);
-
-    if(x > '9') x = 'X';
-
-    printf("%c",x);
-  }
-}
-
-//-----------------------------------------------------------------------------
+#include "Stats.h"
+
+//-----------------------------------------------------------------------------
+
+double chooseK ( int n, int k )
+{
+  if(k > (n - k)) k = n - k;
+
+  double c = 1;
+
+  for(int i = 0; i < k; i++)
+  {
+    double t = double(n-i) / double(i+1);
+
+    c *= t;
+  }
+
+    return c;
+}
+
+double chooseUpToK ( int n, int k )
+{
+  double c = 0;
+
+  for(int i = 1; i <= k; i++)
+  {
+    c += chooseK(n,i);
+  }
+
+  return c;
+}
+
+//-----------------------------------------------------------------------------
+// Distribution "score"
+// TODO - big writeup of what this score means
+
+// Basically, we're computing a constant that says "The test distribution is as
+// uniform, RMS-wise, as a random distribution restricted to (1-X)*100 percent of
+// the bins. This makes for a nice uniform way to rate a distribution that isn't
+// dependent on the number of bins or the number of keys
+
+// (as long as # keys > # bins * 3 or so, otherwise random fluctuations show up
+// as distribution weaknesses)
+
+double calcScore ( const int * bins, const int bincount, const int keycount )
+{
+  double n = bincount;
+  double k = keycount;
+
+  // compute rms value
+
+  double r = 0;
+
+  for(int i = 0; i < bincount; i++)
+  {
+    double b = bins[i];
+
+    r += b*b;
+  }
+
+  r = sqrt(r / n);
+
+  // compute fill factor
+
+  double f = (k*k - 1) / (n*r*r - k);
+
+  // rescale to (0,1) with 0 = good, 1 = bad
+
+  return 1 - (f / n);
+}
+
+
+//----------------------------------------------------------------------------
+
+void plot ( double n )
+{
+  double n2 = n * 1;
+
+  if(n2 < 0) n2 = 0;
+
+  n2 *= 100;
+
+  if(n2 > 64) n2 = 64;
+
+  int n3 = (int)n2;
+
+  if(n3 == 0)
+    printf(".");
+  else
+  {
+    char x = '0' + char(n3);
+
+    if(x > '9') x = 'X';
+
+    printf("%c",x);
+  }
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Stats.h b/Stats.h
index c80393e..3565e80 100644
--- a/Stats.h
+++ b/Stats.h
@@ -1,388 +1,388 @@
-#pragma once
-
-#include "Types.h"
-
-#include <math.h>
-#include <vector>
-#include <map>
-#include <algorithm>   // for std::sort
-#include <string.h>    // for memset
-#include <stdio.h>     // for printf
-
-double calcScore ( const int * bins, const int bincount, const int ballcount );
-
-void plot ( double n );
-
-inline double ExpectedCollisions ( double balls, double bins )
-{
-  return balls - bins + bins * pow(1 - 1/bins,balls);
-}
-
-double chooseK ( int b, int k );
-double chooseUpToK ( int n, int k );
-
-//-----------------------------------------------------------------------------
-
-inline uint32_t f3mix ( uint32_t k )
-{
-  k ^= k >> 16;
-  k *= 0x85ebca6b;
-  k ^= k >> 13;
-  k *= 0xc2b2ae35;
-  k ^= k >> 16;
-
-  return k;
-}
-
-//-----------------------------------------------------------------------------
-// Sort the hash list, count the total number of collisions and return
-// the first N collisions for further processing
-
-template< typename hashtype >
-int FindCollisions ( std::vector<hashtype> & hashes, 
-                     HashSet<hashtype> & collisions,
-                     int maxCollisions )
-{
-  int collcount = 0;
-
-  std::sort(hashes.begin(),hashes.end());
-
-  for(size_t i = 1; i < hashes.size(); i++)
-  {
-    if(hashes[i] == hashes[i-1])
-    {
-      collcount++;
-
-      if((int)collisions.size() < maxCollisions)
-      {
-        collisions.insert(hashes[i]);
-      }
-    }
-  }
-
-  return collcount;
-}
-
-//-----------------------------------------------------------------------------
-
-template < class keytype, typename hashtype >
-int PrintCollisions ( hashfunc<hashtype> hash, std::vector<keytype> & keys )
-{
-  int collcount = 0;
-
-  typedef std::map<hashtype,keytype> htab;
-  htab tab;
-
-  for(size_t i = 1; i < keys.size(); i++)
-  {
-    keytype & k1 = keys[i];
-
-    hashtype h = hash(&k1,sizeof(keytype),0);
-
-    typename htab::iterator it = tab.find(h);
-
-    if(it != tab.end())
-    {
-      keytype & k2 = (*it).second;
-
-      printf("A: ");
-      printbits(&k1,sizeof(keytype));
-      printf("B: ");
-      printbits(&k2,sizeof(keytype));
-    }
-    else
-    {
-      tab.insert( std::make_pair(h,k1) );
-    }
-  }
-
-  return collcount;
-}
-
-//----------------------------------------------------------------------------
-// Measure the distribution "score" for each possible N-bit span up to 20 bits
-
-template< typename hashtype >
-double TestDistribution ( std::vector<hashtype> & hashes, bool drawDiagram )
-{
-  printf("Testing distribution - ");
-
-  if(drawDiagram) printf("\n");
-
-  const int hashbits = sizeof(hashtype) * 8;
-
-  int maxwidth = 20;
-
-  // We need at least 5 keys per bin to reliably test distribution biases
-  // down to 1%, so don't bother to test sparser distributions than that
-
-  while(double(hashes.size()) / double(1 << maxwidth) < 5.0)
-  {
-    maxwidth--;
-  }
-
-  std::vector<int> bins;
-  bins.resize(1 << maxwidth);
-
-  double worst = 0;
-  int worstStart = -1;
-  int worstWidth = -1;
-
-  for(int start = 0; start < hashbits; start++)
-  {
-    int width = maxwidth;
-    int bincount = (1 << width);
-
-    memset(&bins[0],0,sizeof(int)*bincount);
-
-    for(size_t j = 0; j < hashes.size(); j++)
-    {
-      hashtype & hash = hashes[j];
-
-      uint32_t index = window(&hash,sizeof(hash),start,width);
-
-      bins[index]++;
-    }
-
-    // Test the distribution, then fold the bins in half,
-    // repeat until we're down to 256 bins
-
-    if(drawDiagram) printf("[");
-
-    while(bincount >= 256)
-    {
-      double n = calcScore(&bins[0],bincount,(int)hashes.size());
-
-      if(drawDiagram) plot(n);
-
-      if(n > worst)
-      {
-        worst = n;
-        worstStart = start;
-        worstWidth = width;
-      }
-
-      width--;
-      bincount /= 2;
-
-      if(width < 8) break;
-
-      for(int i = 0; i < bincount; i++)
-      {
-        bins[i] += bins[i+bincount];
-      }
-    }
-
-    if(drawDiagram) printf("]\n");
-  }
-
-  double pct = worst * 100.0;
-
-  printf("Worst bias is the %3d-bit window at bit %3d - %5.3f%%",worstWidth,worstStart,pct);
-  if(pct >= 1.0) printf(" !!!!! ");
-  printf("\n");
-
-  return worst;
-}
-
-//----------------------------------------------------------------------------
-
-template < typename hashtype >
-bool TestHashList ( std::vector<hashtype> & hashes, std::vector<hashtype> & collisions, bool testDist, bool drawDiagram )
-{
-  bool result = true;
-
-  {
-    size_t count = hashes.size();
-
-    double expected = (double(count) * double(count-1)) / pow(2.0,double(sizeof(hashtype) * 8 + 1));
-
-    printf("Testing collisions   - Expected %8.2f, ",expected);
-
-    double collcount = 0;
-
-    HashSet<hashtype> collisions;
-
-    collcount = FindCollisions(hashes,collisions,1000);
-
-    printf("actual %8.2f (%5.2fx)",collcount, collcount / expected);
-
-    if(sizeof(hashtype) == sizeof(uint32_t))
-    {
-    // 2x expected collisions = fail
-
-    // #TODO - collision failure cutoff needs to be expressed as a standard deviation instead
-    // of a scale factor, otherwise we fail erroneously if there are a small expected number
-    // of collisions
-
-    if(double(collcount) / double(expected) > 2.0)
-    {
-      printf(" !!!!! ");
-      result = false;
-    }
-    }
-    else
-    {
-      // For all hashes larger than 32 bits, _any_ collisions are a failure.
-      
-      if(collcount > 0)
-      {
-        printf(" !!!!! ");
-        result = false;
-      }
-    }
-
-    printf("\n");
-  }
-
-  //----------
-
-  if(testDist)
-  {
-    TestDistribution(hashes,drawDiagram);
-  }
-
-  return result;
-}
-
-//----------
-
-template < typename hashtype >
-bool TestHashList ( std::vector<hashtype> & hashes, bool /*testColl*/, bool testDist, bool drawDiagram )
-{
-  std::vector<hashtype> collisions;
-
-  return TestHashList(hashes,collisions,testDist,drawDiagram);
-}
-
-//-----------------------------------------------------------------------------
-
-template < class keytype, typename hashtype >
-bool TestKeyList ( hashfunc<hashtype> hash, std::vector<keytype> & keys, bool testColl, bool testDist, bool drawDiagram )
-{
-  int keycount = (int)keys.size();
-
-  std::vector<hashtype> hashes;
-
-  hashes.resize(keycount);
-
-  printf("Hashing");
-
-  for(int i = 0; i < keycount; i++)
-  {
-    if(i % (keycount / 10) == 0) printf(".");
-
-    keytype & k = keys[i];
-
-    hash(&k,sizeof(k),0,&hashes[i]);
-  }
-
-  printf("\n");
-
-  bool result = TestHashList(hashes,testColl,testDist,drawDiagram);
-
-  printf("\n");
-
-  return result;
-}
-
-//-----------------------------------------------------------------------------
-// Bytepair test - generate 16-bit indices from all possible non-overlapping
-// 8-bit sections of the hash value, check distribution on all of them.
-
-// This is a very good test for catching weak intercorrelations between bits - 
-// much harder to pass than the normal distribution test. However, it doesn't
-// really model the normal usage of hash functions in hash table lookup, so
-// I'm not sure it's that useful (and hash functions that fail this test but
-// pass the normal distribution test still work well in practice)
-
-template < typename hashtype >
-double TestDistributionBytepairs ( std::vector<hashtype> & hashes, bool drawDiagram )
-{
-  const int nbytes = sizeof(hashtype);
-  const int hashbits = nbytes * 8;
-  
-  const int nbins = 65536;
-
-  std::vector<int> bins(nbins,0);
-
-  double worst = 0;
-
-  for(int a = 0; a < hashbits; a++)
-  {
-    if(drawDiagram) if((a % 8 == 0) && (a > 0)) printf("\n");
-
-    if(drawDiagram) printf("[");
-
-    for(int b = 0; b < hashbits; b++)
-    {
-      if(drawDiagram) if((b % 8 == 0) && (b > 0)) printf(" ");
-
-      bins.clear();
-      bins.resize(nbins,0);
-
-      for(size_t i = 0; i < hashes.size(); i++)
-      {
-        hashtype & hash = hashes[i];
-
-        uint32_t pa = window(&hash,sizeof(hash),a,8);
-        uint32_t pb = window(&hash,sizeof(hash),b,8);
-
-        bins[pa | (pb << 8)]++;
-      }
-
-      double s = calcScore(bins,bins.size(),hashes.size());
-
-      if(drawDiagram) plot(s);
-
-      if(s > worst)
-      {
-        worst = s;
-      }
-    }
-
-    if(drawDiagram) printf("]\n");
-  }
-
-  return worst;
-}
-
-//-----------------------------------------------------------------------------
-// Simplified test - only check 64k distributions, and only on byte boundaries
-
-template < typename hashtype >
-void TestDistributionFast ( std::vector<hashtype> & hashes, double & dworst, double & davg )
-{
-  const int hashbits = sizeof(hashtype) * 8;
-  const int nbins = 65536;
-  
-  std::vector<int> bins(nbins,0);
-
-  dworst = -1.0e90;
-  davg = 0;
-
-  for(int start = 0; start < hashbits; start += 8)
-  {
-    bins.clear();
-    bins.resize(nbins,0);
-
-    for(size_t j = 0; j < hashes.size(); j++)
-    {
-      hashtype & hash = hashes[j];
-
-      uint32_t index = window(&hash,sizeof(hash),start,16);
-
-      bins[index]++;
-    }
-
-    double n = calcScore(&bins.front(),(int)bins.size(),(int)hashes.size());
-    
-    davg += n;
-
-    if(n > dworst) dworst = n;
-  }
-
-  davg /= double(hashbits/8);
-}
-
-//-----------------------------------------------------------------------------
+#pragma once
+
+#include "Types.h"
+
+#include <math.h>
+#include <vector>
+#include <map>
+#include <algorithm>   // for std::sort
+#include <string.h>    // for memset
+#include <stdio.h>     // for printf
+
+double calcScore ( const int * bins, const int bincount, const int ballcount );
+
+void plot ( double n );
+
+inline double ExpectedCollisions ( double balls, double bins )
+{
+  return balls - bins + bins * pow(1 - 1/bins,balls);
+}
+
+double chooseK ( int b, int k );
+double chooseUpToK ( int n, int k );
+
+//-----------------------------------------------------------------------------
+
+inline uint32_t f3mix ( uint32_t k )
+{
+  k ^= k >> 16;
+  k *= 0x85ebca6b;
+  k ^= k >> 13;
+  k *= 0xc2b2ae35;
+  k ^= k >> 16;
+
+  return k;
+}
+
+//-----------------------------------------------------------------------------
+// Sort the hash list, count the total number of collisions and return
+// the first N collisions for further processing
+
+template< typename hashtype >
+int FindCollisions ( std::vector<hashtype> & hashes, 
+                     HashSet<hashtype> & collisions,
+                     int maxCollisions )
+{
+  int collcount = 0;
+
+  std::sort(hashes.begin(),hashes.end());
+
+  for(size_t i = 1; i < hashes.size(); i++)
+  {
+    if(hashes[i] == hashes[i-1])
+    {
+      collcount++;
+
+      if((int)collisions.size() < maxCollisions)
+      {
+        collisions.insert(hashes[i]);
+      }
+    }
+  }
+
+  return collcount;
+}
+
+//-----------------------------------------------------------------------------
+
+template < class keytype, typename hashtype >
+int PrintCollisions ( hashfunc<hashtype> hash, std::vector<keytype> & keys )
+{
+  int collcount = 0;
+
+  typedef std::map<hashtype,keytype> htab;
+  htab tab;
+
+  for(size_t i = 1; i < keys.size(); i++)
+  {
+    keytype & k1 = keys[i];
+
+    hashtype h = hash(&k1,sizeof(keytype),0);
+
+    typename htab::iterator it = tab.find(h);
+
+    if(it != tab.end())
+    {
+      keytype & k2 = (*it).second;
+
+      printf("A: ");
+      printbits(&k1,sizeof(keytype));
+      printf("B: ");
+      printbits(&k2,sizeof(keytype));
+    }
+    else
+    {
+      tab.insert( std::make_pair(h,k1) );
+    }
+  }
+
+  return collcount;
+}
+
+//----------------------------------------------------------------------------
+// Measure the distribution "score" for each possible N-bit span up to 20 bits
+
+template< typename hashtype >
+double TestDistribution ( std::vector<hashtype> & hashes, bool drawDiagram )
+{
+  printf("Testing distribution - ");
+
+  if(drawDiagram) printf("\n");
+
+  const int hashbits = sizeof(hashtype) * 8;
+
+  int maxwidth = 20;
+
+  // We need at least 5 keys per bin to reliably test distribution biases
+  // down to 1%, so don't bother to test sparser distributions than that
+
+  while(double(hashes.size()) / double(1 << maxwidth) < 5.0)
+  {
+    maxwidth--;
+  }
+
+  std::vector<int> bins;
+  bins.resize(1 << maxwidth);
+
+  double worst = 0;
+  int worstStart = -1;
+  int worstWidth = -1;
+
+  for(int start = 0; start < hashbits; start++)
+  {
+    int width = maxwidth;
+    int bincount = (1 << width);
+
+    memset(&bins[0],0,sizeof(int)*bincount);
+
+    for(size_t j = 0; j < hashes.size(); j++)
+    {
+      hashtype & hash = hashes[j];
+
+      uint32_t index = window(&hash,sizeof(hash),start,width);
+
+      bins[index]++;
+    }
+
+    // Test the distribution, then fold the bins in half,
+    // repeat until we're down to 256 bins
+
+    if(drawDiagram) printf("[");
+
+    while(bincount >= 256)
+    {
+      double n = calcScore(&bins[0],bincount,(int)hashes.size());
+
+      if(drawDiagram) plot(n);
+
+      if(n > worst)
+      {
+        worst = n;
+        worstStart = start;
+        worstWidth = width;
+      }
+
+      width--;
+      bincount /= 2;
+
+      if(width < 8) break;
+
+      for(int i = 0; i < bincount; i++)
+      {
+        bins[i] += bins[i+bincount];
+      }
+    }
+
+    if(drawDiagram) printf("]\n");
+  }
+
+  double pct = worst * 100.0;
+
+  printf("Worst bias is the %3d-bit window at bit %3d - %5.3f%%",worstWidth,worstStart,pct);
+  if(pct >= 1.0) printf(" !!!!! ");
+  printf("\n");
+
+  return worst;
+}
+
+//----------------------------------------------------------------------------
+
+template < typename hashtype >
+bool TestHashList ( std::vector<hashtype> & hashes, std::vector<hashtype> & collisions, bool testDist, bool drawDiagram )
+{
+  bool result = true;
+
+  {
+    size_t count = hashes.size();
+
+    double expected = (double(count) * double(count-1)) / pow(2.0,double(sizeof(hashtype) * 8 + 1));
+
+    printf("Testing collisions   - Expected %8.2f, ",expected);
+
+    double collcount = 0;
+
+    HashSet<hashtype> collisions;
+
+    collcount = FindCollisions(hashes,collisions,1000);
+
+    printf("actual %8.2f (%5.2fx)",collcount, collcount / expected);
+
+    if(sizeof(hashtype) == sizeof(uint32_t))
+    {
+    // 2x expected collisions = fail
+
+    // #TODO - collision failure cutoff needs to be expressed as a standard deviation instead
+    // of a scale factor, otherwise we fail erroneously if there are a small expected number
+    // of collisions
+
+    if(double(collcount) / double(expected) > 2.0)
+    {
+      printf(" !!!!! ");
+      result = false;
+    }
+    }
+    else
+    {
+      // For all hashes larger than 32 bits, _any_ collisions are a failure.
+      
+      if(collcount > 0)
+      {
+        printf(" !!!!! ");
+        result = false;
+      }
+    }
+
+    printf("\n");
+  }
+
+  //----------
+
+  if(testDist)
+  {
+    TestDistribution(hashes,drawDiagram);
+  }
+
+  return result;
+}
+
+//----------
+
+template < typename hashtype >
+bool TestHashList ( std::vector<hashtype> & hashes, bool /*testColl*/, bool testDist, bool drawDiagram )
+{
+  std::vector<hashtype> collisions;
+
+  return TestHashList(hashes,collisions,testDist,drawDiagram);
+}
+
+//-----------------------------------------------------------------------------
+
+template < class keytype, typename hashtype >
+bool TestKeyList ( hashfunc<hashtype> hash, std::vector<keytype> & keys, bool testColl, bool testDist, bool drawDiagram )
+{
+  int keycount = (int)keys.size();
+
+  std::vector<hashtype> hashes;
+
+  hashes.resize(keycount);
+
+  printf("Hashing");
+
+  for(int i = 0; i < keycount; i++)
+  {
+    if(i % (keycount / 10) == 0) printf(".");
+
+    keytype & k = keys[i];
+
+    hash(&k,sizeof(k),0,&hashes[i]);
+  }
+
+  printf("\n");
+
+  bool result = TestHashList(hashes,testColl,testDist,drawDiagram);
+
+  printf("\n");
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Bytepair test - generate 16-bit indices from all possible non-overlapping
+// 8-bit sections of the hash value, check distribution on all of them.
+
+// This is a very good test for catching weak intercorrelations between bits - 
+// much harder to pass than the normal distribution test. However, it doesn't
+// really model the normal usage of hash functions in hash table lookup, so
+// I'm not sure it's that useful (and hash functions that fail this test but
+// pass the normal distribution test still work well in practice)
+
+template < typename hashtype >
+double TestDistributionBytepairs ( std::vector<hashtype> & hashes, bool drawDiagram )
+{
+  const int nbytes = sizeof(hashtype);
+  const int hashbits = nbytes * 8;
+  
+  const int nbins = 65536;
+
+  std::vector<int> bins(nbins,0);
+
+  double worst = 0;
+
+  for(int a = 0; a < hashbits; a++)
+  {
+    if(drawDiagram) if((a % 8 == 0) && (a > 0)) printf("\n");
+
+    if(drawDiagram) printf("[");
+
+    for(int b = 0; b < hashbits; b++)
+    {
+      if(drawDiagram) if((b % 8 == 0) && (b > 0)) printf(" ");
+
+      bins.clear();
+      bins.resize(nbins,0);
+
+      for(size_t i = 0; i < hashes.size(); i++)
+      {
+        hashtype & hash = hashes[i];
+
+        uint32_t pa = window(&hash,sizeof(hash),a,8);
+        uint32_t pb = window(&hash,sizeof(hash),b,8);
+
+        bins[pa | (pb << 8)]++;
+      }
+
+      double s = calcScore(bins,bins.size(),hashes.size());
+
+      if(drawDiagram) plot(s);
+
+      if(s > worst)
+      {
+        worst = s;
+      }
+    }
+
+    if(drawDiagram) printf("]\n");
+  }
+
+  return worst;
+}
+
+//-----------------------------------------------------------------------------
+// Simplified test - only check 64k distributions, and only on byte boundaries
+
+template < typename hashtype >
+void TestDistributionFast ( std::vector<hashtype> & hashes, double & dworst, double & davg )
+{
+  const int hashbits = sizeof(hashtype) * 8;
+  const int nbins = 65536;
+  
+  std::vector<int> bins(nbins,0);
+
+  dworst = -1.0e90;
+  davg = 0;
+
+  for(int start = 0; start < hashbits; start += 8)
+  {
+    bins.clear();
+    bins.resize(nbins,0);
+
+    for(size_t j = 0; j < hashes.size(); j++)
+    {
+      hashtype & hash = hashes[j];
+
+      uint32_t index = window(&hash,sizeof(hash),start,16);
+
+      bins[index]++;
+    }
+
+    double n = calcScore(&bins.front(),(int)bins.size(),(int)hashes.size());
+    
+    davg += n;
+
+    if(n > dworst) dworst = n;
+  }
+
+  davg /= double(hashbits/8);
+}
+
+//-----------------------------------------------------------------------------
diff --git a/SuperFastHash.cpp b/SuperFastHash.cpp
index 38d030d..1f6d39a 100644
--- a/SuperFastHash.cpp
+++ b/SuperFastHash.cpp
@@ -1,76 +1,76 @@
-#include "Platform.h"
-#include <stdio.h> // for NULL
-
-/* By Paul Hsieh (C) 2004, 2005.  Covered under the Paul Hsieh derivative 
-   license. See: 
-   http://www.azillionmonkeys.com/qed/weblicense.html for license details.
-
-   http://www.azillionmonkeys.com/qed/hash.html */
-
-/*
-#undef get16bits
-#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
-  || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
-#define get16bits(d) (*((const uint16_t *) (d)))
-#endif
-
-#if !defined (get16bits)
-#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
-                       +(uint32_t)(((const uint8_t *)(d))[0]) )
-#endif
-*/
-
-FORCE_INLINE uint16_t get16bits ( const void * p )
-{
-  return *(const uint16_t*)p;
-}
-
-uint32_t SuperFastHash (const signed char * data, int len) {
-uint32_t hash = 0, tmp;
-int rem;
-
-  if (len <= 0 || data == NULL) return 0;
-
-  rem = len & 3;
-  len >>= 2;
-
-  /* Main loop */
-  for (;len > 0; len--) {
-    hash  += get16bits (data);
-    tmp    = (get16bits (data+2) << 11) ^ hash;
-    hash   = (hash << 16) ^ tmp;
-    data  += 2*sizeof (uint16_t);
-    hash  += hash >> 11;
-  }
-
-  /* Handle end cases */
-  switch (rem) {
-    case 3:	hash += get16bits (data);
-        hash ^= hash << 16;
-        hash ^= data[sizeof (uint16_t)] << 18;
-        hash += hash >> 11;
-        break;
-    case 2:	hash += get16bits (data);
-        hash ^= hash << 11;
-        hash += hash >> 17;
-        break;
-    case 1: hash += *data;
-        hash ^= hash << 10;
-        hash += hash >> 1;
-  }
-
-  /* Force "avalanching" of final 127 bits */
-  hash ^= hash << 3;
-  hash += hash >> 5;
-  hash ^= hash << 4;
-  hash += hash >> 17;
-  hash ^= hash << 25;
-  hash += hash >> 6;
-
-  return hash;
-}
-
-void SuperFastHash     ( const void * key, int len, uint32_t /*seed*/, void * out )
-{
-  *(uint32_t*)out = SuperFastHash((const signed char*)key,len);
-}
+#include "Platform.h"
+#include <stdio.h> // for NULL
+
+/* By Paul Hsieh (C) 2004, 2005.  Covered under the Paul Hsieh derivative 
+   license. See: 
+   http://www.azillionmonkeys.com/qed/weblicense.html for license details.
+
+   http://www.azillionmonkeys.com/qed/hash.html */
+
+/*
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
+  || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
+#define get16bits(d) (*((const uint16_t *) (d)))
+#endif
+
+#if !defined (get16bits)
+#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
+                       +(uint32_t)(((const uint8_t *)(d))[0]) )
+#endif
+*/
+
+FORCE_INLINE uint16_t get16bits ( const void * p )
+{
+  return *(const uint16_t*)p;
+}
+
+uint32_t SuperFastHash (const signed char * data, int len) {
+uint32_t hash = 0, tmp;
+int rem;
+
+  if (len <= 0 || data == NULL) return 0;
+
+  rem = len & 3;
+  len >>= 2;
+
+  /* Main loop */
+  for (;len > 0; len--) {
+    hash  += get16bits (data);
+    tmp    = (get16bits (data+2) << 11) ^ hash;
+    hash   = (hash << 16) ^ tmp;
+    data  += 2*sizeof (uint16_t);
+    hash  += hash >> 11;
+  }
+
+  /* Handle end cases */
+  switch (rem) {
+    case 3:	hash += get16bits (data);
+        hash ^= hash << 16;
+        hash ^= data[sizeof (uint16_t)] << 18;
+        hash += hash >> 11;
+        break;
+    case 2:	hash += get16bits (data);
+        hash ^= hash << 11;
+        hash += hash >> 17;
+        break;
+    case 1: hash += *data;
+        hash ^= hash << 10;
+        hash += hash >> 1;
+  }
+
+  /* Force "avalanching" of final 127 bits */
+  hash ^= hash << 3;
+  hash += hash >> 5;
+  hash ^= hash << 4;
+  hash += hash >> 17;
+  hash ^= hash << 25;
+  hash += hash >> 6;
+
+  return hash;
+}
+
+void SuperFastHash     ( const void * key, int len, uint32_t /*seed*/, void * out )
+{
+  *(uint32_t*)out = SuperFastHash((const signed char*)key,len);
+}
diff --git a/Types.cpp b/Types.cpp
index f4c9b05..6ad5312 100644
--- a/Types.cpp
+++ b/Types.cpp
@@ -1,148 +1,148 @@
-#include "Types.h"
-
-#include "Random.h"
-
-#include <stdio.h>
-
-uint32_t MurmurOAAT ( const void * blob, int len, uint32_t seed );
-
-//-----------------------------------------------------------------------------
-
-#if defined(_MSC_VER)
-#pragma optimize( "", off )
-#endif
-
-void blackhole ( uint32_t )
-{
-}
-
-uint32_t whitehole ( void )
-{
-  return 0;
-}
-
-#if defined(_MSC_VER)
-#pragma optimize( "", on ) 
-#endif
-
-uint32_t g_verify = 1;
-
-void MixVCode ( const void * blob, int len )
-{
-	g_verify = MurmurOAAT(blob,len,g_verify);
-}
-
-//-----------------------------------------------------------------------------
-
-bool isprime ( uint32_t x )
-{
-  uint32_t p[] = 
-  {
-    2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,
-    103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,
-    199,211,223,227,229,233,239,241,251
-  };
-
-  for(size_t i=0; i < sizeof(p)/sizeof(uint32_t); i++)
-  { 
-    if((x % p[i]) == 0)
-    {
-      return false;
-    }
-  } 
-
-  for(int i = 257; i < 65536; i += 2) 
-  { 
-    if((x % i) == 0)
-    {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-void GenerateMixingConstants ( void )
-{
-  Rand r(8350147);
-
-  int count = 0;
-
-  int trials = 0;
-  int bitfail = 0;
-  int popfail = 0;
-  int matchfail = 0;
-  int primefail = 0;
-
-  //for(uint32_t x = 1; x; x++)
-  while(count < 100)
-  {
-    //if(x % 100000000 == 0) printf(".");
-
-    trials++;
-    uint32_t b = r.rand_u32();
-    //uint32_t b = x;
-
-    //----------
-    // must have between 14 and 18 set bits
-
-    if(popcount(b) < 16) { b = 0; popfail++; }
-    if(popcount(b) > 16) { b = 0; popfail++; }
-
-    if(b == 0) continue;
-
-    //----------
-    // must have 3-5 bits set per 8-bit window
-
-    for(int i = 0; i < 32; i++)
-    {
-      uint32_t c = ROTL32(b,i) & 0xFF;
-
-      if(popcount(c) < 3) { b = 0; bitfail++; break; }
-      if(popcount(c) > 5) { b = 0; bitfail++; break; }
-    }
-
-    if(b == 0) continue;
-
-    //----------
-    // all 8-bit windows must be different
-
-    uint8_t match[256];
-
-    memset(match,0,256);
-
-    for(int i = 0; i < 32; i++)
-    {
-      uint32_t c = ROTL32(b,i) & 0xFF;
-      
-      if(match[c]) { b = 0; matchfail++; break; }
-
-      match[c] = 1;
-    }
-
-    if(b == 0) continue;
-
-    //----------
-    // must be prime
-
-    if(!isprime(b))
-    {
-      b = 0;
-      primefail++;
-    }
-
-    if(b == 0) continue;
-
-    //----------
-
-    if(b)
-    {
-      printf("0x%08x : 0x%08x\n",b,~b);
-      count++;
-    }
-  }
-
-  printf("%d %d %d %d %d %d\n",trials,popfail,bitfail,matchfail,primefail,count);
-}
-
-//-----------------------------------------------------------------------------
+#include "Types.h"
+
+#include "Random.h"
+
+#include <stdio.h>
+
+uint32_t MurmurOAAT ( const void * blob, int len, uint32_t seed );
+
+//-----------------------------------------------------------------------------
+
+#if defined(_MSC_VER)
+#pragma optimize( "", off )
+#endif
+
+void blackhole ( uint32_t )
+{
+}
+
+uint32_t whitehole ( void )
+{
+  return 0;
+}
+
+#if defined(_MSC_VER)
+#pragma optimize( "", on ) 
+#endif
+
+uint32_t g_verify = 1;
+
+void MixVCode ( const void * blob, int len )
+{
+	g_verify = MurmurOAAT(blob,len,g_verify);
+}
+
+//-----------------------------------------------------------------------------
+
+bool isprime ( uint32_t x )
+{
+  uint32_t p[] = 
+  {
+    2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,
+    103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,
+    199,211,223,227,229,233,239,241,251
+  };
+
+  for(size_t i=0; i < sizeof(p)/sizeof(uint32_t); i++)
+  { 
+    if((x % p[i]) == 0)
+    {
+      return false;
+    }
+  } 
+
+  for(int i = 257; i < 65536; i += 2) 
+  { 
+    if((x % i) == 0)
+    {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+void GenerateMixingConstants ( void )
+{
+  Rand r(8350147);
+
+  int count = 0;
+
+  int trials = 0;
+  int bitfail = 0;
+  int popfail = 0;
+  int matchfail = 0;
+  int primefail = 0;
+
+  //for(uint32_t x = 1; x; x++)
+  while(count < 100)
+  {
+    //if(x % 100000000 == 0) printf(".");
+
+    trials++;
+    uint32_t b = r.rand_u32();
+    //uint32_t b = x;
+
+    //----------
+    // must have between 14 and 18 set bits
+
+    if(popcount(b) < 16) { b = 0; popfail++; }
+    if(popcount(b) > 16) { b = 0; popfail++; }
+
+    if(b == 0) continue;
+
+    //----------
+    // must have 3-5 bits set per 8-bit window
+
+    for(int i = 0; i < 32; i++)
+    {
+      uint32_t c = ROTL32(b,i) & 0xFF;
+
+      if(popcount(c) < 3) { b = 0; bitfail++; break; }
+      if(popcount(c) > 5) { b = 0; bitfail++; break; }
+    }
+
+    if(b == 0) continue;
+
+    //----------
+    // all 8-bit windows must be different
+
+    uint8_t match[256];
+
+    memset(match,0,256);
+
+    for(int i = 0; i < 32; i++)
+    {
+      uint32_t c = ROTL32(b,i) & 0xFF;
+      
+      if(match[c]) { b = 0; matchfail++; break; }
+
+      match[c] = 1;
+    }
+
+    if(b == 0) continue;
+
+    //----------
+    // must be prime
+
+    if(!isprime(b))
+    {
+      b = 0;
+      primefail++;
+    }
+
+    if(b == 0) continue;
+
+    //----------
+
+    if(b)
+    {
+      printf("0x%08x : 0x%08x\n",b,~b);
+      count++;
+    }
+  }
+
+  printf("%d %d %d %d %d %d\n",trials,popfail,bitfail,matchfail,primefail,count);
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Types.h b/Types.h
index ee7ae9d..91e7206 100644
--- a/Types.h
+++ b/Types.h
@@ -1,374 +1,374 @@
-#pragma once
-
-#include "Platform.h"
-#include "Bitvec.h"
-
-#include <memory.h>
-#include <vector>
-#include <map>
-#include <set>
-
-//-----------------------------------------------------------------------------
-// If the optimizer detects that a value in a speed test is constant or unused,
-// the optimizer may remove references to it or otherwise create code that
-// would not occur in a real-world application. To prevent the optimizer from
-// doing this we declare two trivial functions that either sink or source data,
-// and bar the compiler from optimizing them.
-
-void     blackhole ( uint32_t x );
-uint32_t whitehole ( void );
-
-//-----------------------------------------------------------------------------
-// We want to verify that every test produces the same result on every platform
-// To do this, we hash the results of every test to produce an overall
-// verification value for the whole test suite. If two runs produce the same
-// verification value, then every test in both run produced the same results
-
-extern uint32_t g_verify;
-
-// Mix the given blob of data into the verification code
-
-void MixVCode ( const void * blob, int len );
-
-
-//-----------------------------------------------------------------------------
-
-typedef void (*pfHash) ( const void * blob, const int len, const uint32_t seed, void * out );
-
-struct ByteVec : public std::vector<uint8_t>
-{
-  ByteVec ( const void * key, int len )
-  {
-    resize(len);
-    memcpy(&front(),key,len);
-  }
-};
-
-template< typename hashtype, typename keytype >
-struct CollisionMap : public std::map< hashtype, std::vector<keytype> >
-{
-};
-
-template< typename hashtype >
-struct HashSet : public std::set<hashtype>
-{
-};
-
-//-----------------------------------------------------------------------------
-
-template < class T >
-class hashfunc
-{
-public:
-
-  hashfunc ( pfHash h ) : m_hash(h)
-  {
-  }
-
-  inline void operator () ( const void * key, const int len, const uint32_t seed, uint32_t * out )
-  {
-    m_hash(key,len,seed,out);
-  }
-
-  inline operator pfHash ( void ) const
-  {
-    return m_hash;
-  }
-
-  inline T operator () ( const void * key, const int len, const uint32_t seed ) 
-  {
-    T result;
-
-    m_hash(key,len,seed,(uint32_t*)&result);
-
-    return result;
-  }
-
-  pfHash m_hash;
-};
-
-//-----------------------------------------------------------------------------
-// Key-processing callback objects. Simplifies keyset testing a bit.
-
-struct KeyCallback
-{
-  KeyCallback() : m_count(0)
-  {
-  }
-
-  virtual ~KeyCallback()
-  {
-  }
-
-  virtual void operator() ( const void * key, int len )
-  {
-    m_count++;
-  }
-
-  virtual void reserve ( int keycount )
-  {
-  };
-
-  int m_count;
-};
-
-//----------
-
-template<typename hashtype>
-struct HashCallback : public KeyCallback
-{
-  typedef std::vector<hashtype> hashvec;
-
-  HashCallback ( pfHash hash, hashvec & hashes ) : m_hashes(hashes), m_pfHash(hash)
-  {
-    m_hashes.clear();
-  }
-
-  virtual void operator () ( const void * key, int len )
-  {
-    size_t newsize = m_hashes.size() + 1;
-    
-    m_hashes.resize(newsize);
-
-    m_pfHash(key,len,0,&m_hashes.back());
-  }
-
-  virtual void reserve ( int keycount )
-  {
-    m_hashes.reserve(keycount);
-  }
-
-  hashvec & m_hashes;
-  pfHash m_pfHash;
-
-  //----------
-
-private:
-
-  HashCallback & operator = ( const HashCallback & );
-};
-
-//----------
-
-template<typename hashtype>
-struct CollisionCallback : public KeyCallback
-{
-  typedef HashSet<hashtype> hashset;
-  typedef CollisionMap<hashtype,ByteVec> collmap;
-
-  CollisionCallback ( pfHash hash, hashset & collisions, collmap & cmap ) 
-  : m_pfHash(hash), 
-    m_collisions(collisions),
-    m_collmap(cmap)
-  {
-  }
-
-  virtual void operator () ( const void * key, int len )
-  {
-    hashtype h;
-
-    m_pfHash(key,len,0,&h);
-    
-    if(m_collisions.count(h))
-    {
-      m_collmap[h].push_back( ByteVec(key,len) );
-    }
-  }
-
-  //----------
-
-  pfHash m_pfHash;
-  hashset & m_collisions;
-  collmap & m_collmap;
-
-private:
-
-  CollisionCallback & operator = ( const CollisionCallback & c );
-};
-
-//-----------------------------------------------------------------------------
-
-template < int _bits >
-class Blob
-{
-public:
-
-  Blob()
-  {
-    for(size_t i = 0; i < sizeof(bytes); i++)
-    {
-      bytes[i] = 0;
-    }
-  }
-
-  Blob ( int x )
-  {
-    for(size_t i = 0; i < sizeof(bytes); i++)
-    {
-      bytes[i] = 0;
-    }
-
-    *(int*)bytes = x;
-  }
-
-  Blob ( const Blob & k )
-  {
-    for(size_t i = 0; i < sizeof(bytes); i++)
-    {
-      bytes[i] = k.bytes[i];
-    }
-  }
-
-  Blob & operator = ( const Blob & k )
-  {
-    for(size_t i = 0; i < sizeof(bytes); i++)
-    {
-      bytes[i] = k.bytes[i];
-    }
-
-    return *this;
-  }
-
-  Blob ( uint64_t a, uint64_t b )
-  {
-    uint64_t t[2] = {a,b};
-    set(&t,16);
-  }
-
-  void set ( const void * blob, size_t len )
-  {
-    const uint8_t * k = (const uint8_t*)blob;
-
-    len = len > sizeof(bytes) ? sizeof(bytes) : len;
-
-    for(size_t i = 0; i < len; i++)
-    {
-      bytes[i] = k[i];
-    }
-
-    for(size_t i = len; i < sizeof(bytes); i++)
-    {
-      bytes[i] = 0;
-    }
-  }
-
-  uint8_t & operator [] ( int i )
-  {
-    return bytes[i];
-  }
-
-  const uint8_t & operator [] ( int i ) const
-  {
-    return bytes[i];
-  }
-
-  //----------
-  // boolean operations
-  
-  bool operator < ( const Blob & k ) const
-  {
-    for(size_t i = 0; i < sizeof(bytes); i++)
-    {
-      if(bytes[i] < k.bytes[i]) return true;
-      if(bytes[i] > k.bytes[i]) return false;
-    }
-
-    return false;
-  }
-
-  bool operator == ( const Blob & k ) const
-  {
-    for(size_t i = 0; i < sizeof(bytes); i++)
-    {
-      if(bytes[i] != k.bytes[i]) return false;
-    }
-
-    return true;
-  }
-
-  bool operator != ( const Blob & k ) const
-  {
-    return !(*this == k);
-  }
-
-  //----------
-  // bitwise operations
-
-  Blob operator ^ ( const Blob & k ) const 
-  {
-    Blob t;
-
-    for(size_t i = 0; i < sizeof(bytes); i++)
-    {
-      t.bytes[i] = bytes[i] ^ k.bytes[i];
-    }
-
-    return t;
-  }
-
-  Blob & operator ^= ( const Blob & k )
-  {
-    for(size_t i = 0; i < sizeof(bytes); i++)
-    {
-      bytes[i] ^= k.bytes[i];
-    }
-
-    return *this;
-  }
-
-  int operator & ( int x )
-  {
-    return (*(int*)bytes) & x;
-  }
-
-  Blob & operator &= ( const Blob & k )
-  {
-    for(size_t i = 0; i < sizeof(bytes); i++)
-    {
-      bytes[i] &= k.bytes[i];
-    }
-  }
-
-  Blob operator << ( int c )
-  {
-    Blob t = *this;
-
-    lshift(&t.bytes[0],sizeof(bytes),c);
-
-    return t;
-  }
-
-  Blob operator >> ( int c )
-  {
-    Blob t = *this;
-
-    rshift(&t.bytes[0],sizeof(bytes),c);
-
-    return t;
-  }
-
-  Blob & operator <<= ( int c )
-  {
-    lshift(&bytes[0],sizeof(bytes),c);
-
-    return *this;
-  }
-
-  Blob & operator >>= ( int c )
-  {
-    rshift(&bytes[0],sizeof(bytes),c);
-
-    return *this;
-  }
-
-  //----------
-  
-private:
-
-  uint8_t bytes[(_bits+7)/8];
-};
-
-typedef Blob<128> uint128_t;
-typedef Blob<256> uint256_t;
-
-//-----------------------------------------------------------------------------
+#pragma once
+
+#include "Platform.h"
+#include "Bitvec.h"
+
+#include <memory.h>
+#include <vector>
+#include <map>
+#include <set>
+
+//-----------------------------------------------------------------------------
+// If the optimizer detects that a value in a speed test is constant or unused,
+// the optimizer may remove references to it or otherwise create code that
+// would not occur in a real-world application. To prevent the optimizer from
+// doing this we declare two trivial functions that either sink or source data,
+// and bar the compiler from optimizing them.
+
+void     blackhole ( uint32_t x );
+uint32_t whitehole ( void );
+
+//-----------------------------------------------------------------------------
+// We want to verify that every test produces the same result on every platform
+// To do this, we hash the results of every test to produce an overall
+// verification value for the whole test suite. If two runs produce the same
+// verification value, then every test in both run produced the same results
+
+extern uint32_t g_verify;
+
+// Mix the given blob of data into the verification code
+
+void MixVCode ( const void * blob, int len );
+
+
+//-----------------------------------------------------------------------------
+
+typedef void (*pfHash) ( const void * blob, const int len, const uint32_t seed, void * out );
+
+struct ByteVec : public std::vector<uint8_t>
+{
+  ByteVec ( const void * key, int len )
+  {
+    resize(len);
+    memcpy(&front(),key,len);
+  }
+};
+
+template< typename hashtype, typename keytype >
+struct CollisionMap : public std::map< hashtype, std::vector<keytype> >
+{
+};
+
+template< typename hashtype >
+struct HashSet : public std::set<hashtype>
+{
+};
+
+//-----------------------------------------------------------------------------
+
+template < class T >
+class hashfunc
+{
+public:
+
+  hashfunc ( pfHash h ) : m_hash(h)
+  {
+  }
+
+  inline void operator () ( const void * key, const int len, const uint32_t seed, uint32_t * out )
+  {
+    m_hash(key,len,seed,out);
+  }
+
+  inline operator pfHash ( void ) const
+  {
+    return m_hash;
+  }
+
+  inline T operator () ( const void * key, const int len, const uint32_t seed ) 
+  {
+    T result;
+
+    m_hash(key,len,seed,(uint32_t*)&result);
+
+    return result;
+  }
+
+  pfHash m_hash;
+};
+
+//-----------------------------------------------------------------------------
+// Key-processing callback objects. Simplifies keyset testing a bit.
+
+struct KeyCallback
+{
+  KeyCallback() : m_count(0)
+  {
+  }
+
+  virtual ~KeyCallback()
+  {
+  }
+
+  virtual void operator() ( const void * key, int len )
+  {
+    m_count++;
+  }
+
+  virtual void reserve ( int keycount )
+  {
+  };
+
+  int m_count;
+};
+
+//----------
+
+template<typename hashtype>
+struct HashCallback : public KeyCallback
+{
+  typedef std::vector<hashtype> hashvec;
+
+  HashCallback ( pfHash hash, hashvec & hashes ) : m_hashes(hashes), m_pfHash(hash)
+  {
+    m_hashes.clear();
+  }
+
+  virtual void operator () ( const void * key, int len )
+  {
+    size_t newsize = m_hashes.size() + 1;
+    
+    m_hashes.resize(newsize);
+
+    m_pfHash(key,len,0,&m_hashes.back());
+  }
+
+  virtual void reserve ( int keycount )
+  {
+    m_hashes.reserve(keycount);
+  }
+
+  hashvec & m_hashes;
+  pfHash m_pfHash;
+
+  //----------
+
+private:
+
+  HashCallback & operator = ( const HashCallback & );
+};
+
+//----------
+
+template<typename hashtype>
+struct CollisionCallback : public KeyCallback
+{
+  typedef HashSet<hashtype> hashset;
+  typedef CollisionMap<hashtype,ByteVec> collmap;
+
+  CollisionCallback ( pfHash hash, hashset & collisions, collmap & cmap ) 
+  : m_pfHash(hash), 
+    m_collisions(collisions),
+    m_collmap(cmap)
+  {
+  }
+
+  virtual void operator () ( const void * key, int len )
+  {
+    hashtype h;
+
+    m_pfHash(key,len,0,&h);
+    
+    if(m_collisions.count(h))
+    {
+      m_collmap[h].push_back( ByteVec(key,len) );
+    }
+  }
+
+  //----------
+
+  pfHash m_pfHash;
+  hashset & m_collisions;
+  collmap & m_collmap;
+
+private:
+
+  CollisionCallback & operator = ( const CollisionCallback & c );
+};
+
+//-----------------------------------------------------------------------------
+
+template < int _bits >
+class Blob
+{
+public:
+
+  Blob()
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      bytes[i] = 0;
+    }
+  }
+
+  Blob ( int x )
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      bytes[i] = 0;
+    }
+
+    *(int*)bytes = x;
+  }
+
+  Blob ( const Blob & k )
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      bytes[i] = k.bytes[i];
+    }
+  }
+
+  Blob & operator = ( const Blob & k )
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      bytes[i] = k.bytes[i];
+    }
+
+    return *this;
+  }
+
+  Blob ( uint64_t a, uint64_t b )
+  {
+    uint64_t t[2] = {a,b};
+    set(&t,16);
+  }
+
+  void set ( const void * blob, size_t len )
+  {
+    const uint8_t * k = (const uint8_t*)blob;
+
+    len = len > sizeof(bytes) ? sizeof(bytes) : len;
+
+    for(size_t i = 0; i < len; i++)
+    {
+      bytes[i] = k[i];
+    }
+
+    for(size_t i = len; i < sizeof(bytes); i++)
+    {
+      bytes[i] = 0;
+    }
+  }
+
+  uint8_t & operator [] ( int i )
+  {
+    return bytes[i];
+  }
+
+  const uint8_t & operator [] ( int i ) const
+  {
+    return bytes[i];
+  }
+
+  //----------
+  // boolean operations
+  
+  bool operator < ( const Blob & k ) const
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      if(bytes[i] < k.bytes[i]) return true;
+      if(bytes[i] > k.bytes[i]) return false;
+    }
+
+    return false;
+  }
+
+  bool operator == ( const Blob & k ) const
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      if(bytes[i] != k.bytes[i]) return false;
+    }
+
+    return true;
+  }
+
+  bool operator != ( const Blob & k ) const
+  {
+    return !(*this == k);
+  }
+
+  //----------
+  // bitwise operations
+
+  Blob operator ^ ( const Blob & k ) const 
+  {
+    Blob t;
+
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      t.bytes[i] = bytes[i] ^ k.bytes[i];
+    }
+
+    return t;
+  }
+
+  Blob & operator ^= ( const Blob & k )
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      bytes[i] ^= k.bytes[i];
+    }
+
+    return *this;
+  }
+
+  int operator & ( int x )
+  {
+    return (*(int*)bytes) & x;
+  }
+
+  Blob & operator &= ( const Blob & k )
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      bytes[i] &= k.bytes[i];
+    }
+  }
+
+  Blob operator << ( int c )
+  {
+    Blob t = *this;
+
+    lshift(&t.bytes[0],sizeof(bytes),c);
+
+    return t;
+  }
+
+  Blob operator >> ( int c )
+  {
+    Blob t = *this;
+
+    rshift(&t.bytes[0],sizeof(bytes),c);
+
+    return t;
+  }
+
+  Blob & operator <<= ( int c )
+  {
+    lshift(&bytes[0],sizeof(bytes),c);
+
+    return *this;
+  }
+
+  Blob & operator >>= ( int c )
+  {
+    rshift(&bytes[0],sizeof(bytes),c);
+
+    return *this;
+  }
+
+  //----------
+  
+private:
+
+  uint8_t bytes[(_bits+7)/8];
+};
+
+typedef Blob<128> uint128_t;
+typedef Blob<256> uint256_t;
+
+//-----------------------------------------------------------------------------
diff --git a/crc.cpp b/crc.cpp
index 76fcfa0..d4d6b84 100644
--- a/crc.cpp
+++ b/crc.cpp
@@ -1,100 +1,100 @@
-#include "Platform.h"
-
-/*
- * This file is derived from crc32.c from the zlib-1.1.3 distribution
- * by Jean-loup Gailly and Mark Adler.
- */
-
-/* crc32.c -- compute the CRC-32 of a data stream
- * Copyright (C) 1995-1998 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-
-/* ========================================================================
- * Table of CRC-32's of all single-byte values (made by make_crc_table)
- */
-static const uint32_t crc_table[256] = {
-  0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
-  0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
-  0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
-  0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
-  0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
-  0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
-  0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
-  0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
-  0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
-  0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
-  0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
-  0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
-  0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
-  0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
-  0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
-  0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
-  0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
-  0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
-  0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
-  0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
-  0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
-  0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
-  0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
-  0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
-  0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
-  0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
-  0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
-  0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
-  0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
-  0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
-  0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
-  0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
-  0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
-  0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
-  0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
-  0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
-  0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
-  0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
-  0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
-  0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
-  0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
-  0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
-  0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
-  0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
-  0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
-  0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
-  0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
-  0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
-  0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
-  0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
-  0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
-  0x2d02ef8dL
-};
-
-/* ========================================================================= */
-
-#define DO1(buf) crc = crc_table[((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8);
-#define DO2(buf)  DO1(buf); DO1(buf);
-#define DO4(buf)  DO2(buf); DO2(buf);
-#define DO8(buf)  DO4(buf); DO4(buf);
-
-/* ========================================================================= */
-
-void crc32 ( const void * key, int len, uint32_t seed, void * out )
-{
-  uint8_t * buf = (uint8_t*)key;
-  uint32_t crc = seed ^ 0xffffffffL;
-
-  while (len >= 8)
-  {
-    DO8(buf);
-    len -= 8;
-  }
-
-  while(len--)
-  {
-    DO1(buf);
-  } 
-
-  crc ^= 0xffffffffL;
-
-  *(uint32_t*)out = crc;
-}
+#include "Platform.h"
+
+/*
+ * This file is derived from crc32.c from the zlib-1.1.3 distribution
+ * by Jean-loup Gailly and Mark Adler.
+ */
+
+/* crc32.c -- compute the CRC-32 of a data stream
+ * Copyright (C) 1995-1998 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+
+/* ========================================================================
+ * Table of CRC-32's of all single-byte values (made by make_crc_table)
+ */
+static const uint32_t crc_table[256] = {
+  0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
+  0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
+  0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
+  0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
+  0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
+  0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
+  0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
+  0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
+  0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
+  0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
+  0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
+  0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
+  0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
+  0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
+  0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
+  0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
+  0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
+  0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
+  0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
+  0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
+  0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
+  0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
+  0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
+  0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
+  0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
+  0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
+  0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
+  0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
+  0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
+  0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
+  0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
+  0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
+  0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
+  0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
+  0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
+  0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
+  0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
+  0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
+  0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
+  0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
+  0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
+  0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
+  0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
+  0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
+  0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
+  0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
+  0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
+  0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
+  0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
+  0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
+  0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
+  0x2d02ef8dL
+};
+
+/* ========================================================================= */
+
+#define DO1(buf) crc = crc_table[((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8);
+#define DO2(buf)  DO1(buf); DO1(buf);
+#define DO4(buf)  DO2(buf); DO2(buf);
+#define DO8(buf)  DO4(buf); DO4(buf);
+
+/* ========================================================================= */
+
+void crc32 ( const void * key, int len, uint32_t seed, void * out )
+{
+  uint8_t * buf = (uint8_t*)key;
+  uint32_t crc = seed ^ 0xffffffffL;
+
+  while (len >= 8)
+  {
+    DO8(buf);
+    len -= 8;
+  }
+
+  while(len--)
+  {
+    DO1(buf);
+  } 
+
+  crc ^= 0xffffffffL;
+
+  *(uint32_t*)out = crc;
+}
diff --git a/lookup3.cpp b/lookup3.cpp
index 60087f1..63f00f8 100644
--- a/lookup3.cpp
+++ b/lookup3.cpp
@@ -1,72 +1,72 @@
-// lookup3 by Bob Jekins, code is public domain.
-
-#include "Platform.h"
-
-#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
-
-#define mix(a,b,c) \
-{ \
-  a -= c;  a ^= rot(c, 4);  c += b; \
-  b -= a;  b ^= rot(a, 6);  a += c; \
-  c -= b;  c ^= rot(b, 8);  b += a; \
-  a -= c;  a ^= rot(c,16);  c += b; \
-  b -= a;  b ^= rot(a,19);  a += c; \
-  c -= b;  c ^= rot(b, 4);  b += a; \
-}
-
-#define final(a,b,c) \
-{ \
-  c ^= b; c -= rot(b,14); \
-  a ^= c; a -= rot(c,11); \
-  b ^= a; b -= rot(a,25); \
-  c ^= b; c -= rot(b,16); \
-  a ^= c; a -= rot(c,4);  \
-  b ^= a; b -= rot(a,14); \
-  c ^= b; c -= rot(b,24); \
-}
-
-uint32_t lookup3 ( const void * key, int length, uint32_t initval )
-{
-  uint32_t a,b,c;                                          /* internal state */
-
-  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
-
-  const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
-
-  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
-  while (length > 12)
-  {
-    a += k[0];
-    b += k[1];
-    c += k[2];
-    mix(a,b,c);
-    length -= 12;
-    k += 3;
-  }
-
-  switch(length)
-  {
-    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
-    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
-    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
-    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
-    case 8 : b+=k[1]; a+=k[0]; break;
-    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
-    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
-    case 5 : b+=k[1]&0xff; a+=k[0]; break;
-    case 4 : a+=k[0]; break;
-    case 3 : a+=k[0]&0xffffff; break;
-    case 2 : a+=k[0]&0xffff; break;
-    case 1 : a+=k[0]&0xff; break;
-    case 0 : { return c; }              /* zero length strings require no mixing */
-  }
-
-  final(a,b,c);
-
-  return c;
-}
-
-void lookup3_test ( const void * key, int len, uint32_t seed, void * out )
-{
-  *(uint32_t*)out = lookup3(key,len,seed);
-}
+// lookup3 by Bob Jekins, code is public domain.
+
+#include "Platform.h"
+
+#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+
+#define mix(a,b,c) \
+{ \
+  a -= c;  a ^= rot(c, 4);  c += b; \
+  b -= a;  b ^= rot(a, 6);  a += c; \
+  c -= b;  c ^= rot(b, 8);  b += a; \
+  a -= c;  a ^= rot(c,16);  c += b; \
+  b -= a;  b ^= rot(a,19);  a += c; \
+  c -= b;  c ^= rot(b, 4);  b += a; \
+}
+
+#define final(a,b,c) \
+{ \
+  c ^= b; c -= rot(b,14); \
+  a ^= c; a -= rot(c,11); \
+  b ^= a; b -= rot(a,25); \
+  c ^= b; c -= rot(b,16); \
+  a ^= c; a -= rot(c,4);  \
+  b ^= a; b -= rot(a,14); \
+  c ^= b; c -= rot(b,24); \
+}
+
+uint32_t lookup3 ( const void * key, int length, uint32_t initval )
+{
+  uint32_t a,b,c;                                          /* internal state */
+
+  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+
+  const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
+
+  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+  while (length > 12)
+  {
+    a += k[0];
+    b += k[1];
+    c += k[2];
+    mix(a,b,c);
+    length -= 12;
+    k += 3;
+  }
+
+  switch(length)
+  {
+    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
+    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
+    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
+    case 8 : b+=k[1]; a+=k[0]; break;
+    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
+    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
+    case 5 : b+=k[1]&0xff; a+=k[0]; break;
+    case 4 : a+=k[0]; break;
+    case 3 : a+=k[0]&0xffffff; break;
+    case 2 : a+=k[0]&0xffff; break;
+    case 1 : a+=k[0]&0xff; break;
+    case 0 : { return c; }              /* zero length strings require no mixing */
+  }
+
+  final(a,b,c);
+
+  return c;
+}
+
+void lookup3_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint32_t*)out = lookup3(key,len,seed);
+}
diff --git a/main.cpp b/main.cpp
index bf25ce0..19c605b 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1,590 +1,595 @@
-#include "Platform.h"
-#include "Hashes.h"
-#include "KeysetTest.h"
-#include "SpeedTest.h"
-#include "AvalancheTest.h"
-#include "DifferentialTest.h"
-
-#include <stdio.h>
-#include <time.h>
-
-//-----------------------------------------------------------------------------
-// Configuration. TODO - move these to command-line flags
-
-bool g_testAll = false;
-
-bool g_testSanity      = false;
-bool g_testSpeed       = false;
-bool g_testDiff        = false;
-bool g_testDiffDist    = false;
-bool g_testAvalanche   = false;
-bool g_testBIC         = false;
-bool g_testCyclic      = false;
-bool g_testTwoBytes    = false;
-bool g_testSparse      = false;
-bool g_testPermutation = false;
-bool g_testWindow      = false;
-bool g_testText        = false;
-bool g_testZeroes      = false;
-bool g_testSeed        = false;
-
-//-----------------------------------------------------------------------------
-// This is the list of all hashes that SMHasher can test.
-
-struct HashInfo
-{
-  pfHash hash;
-  int hashbits;
-  uint32_t verification;
-  const char * name;
-  const char * desc;
-};
-
-HashInfo g_hashes[] =
-{
-  { DoNothingHash,        32, 0x00000000, "donothing32", "Do-Nothing function (only valid for measuring call overhead)" },
-  { DoNothingHash,        64, 0x00000000, "donothing64", "Do-Nothing function (only valid for measuring call overhead)" },
-  { DoNothingHash,       128, 0x00000000, "donothing128", "Do-Nothing function (only valid for measuring call overhead)" },
-
-  { crc32,                32, 0x3719DB20, "crc32",       "CRC-32" },
-
-  { md5_32,               32, 0xC10C356B, "md5_32a",     "MD5, first 32 bits of result" },
-  { sha1_32a,             32, 0xF9376EA7, "sha1_32a",    "SHA1, first 32 bits of result" },
-
-  { FNV,                  32, 0xE3CBBE91, "FNV",         "Fowler-Noll-Vo hash, 32-bit" },
-  { lookup3_test,         32, 0x3D83917A, "lookup3",     "Bob Jenkins' lookup3" },
-  { SuperFastHash,        32, 0x980ACD1D, "superfast",   "Paul Hsieh's SuperFastHash" },
-  { MurmurOAAT_test,      32, 0x5363BD98, "MurmurOAAT",  "Murmur one-at-a-time" },
-  { Crap8_test,           32, 0x743E97A1, "Crap8",       "Crap8" },
-
-  { CityHash64_test,      64, 0x45754A6F, "City64",      "Google CityHash128WithSeed" },
-  { CityHash128_test,    128, 0x94B0EF46, "City128",     "Google CityHash128WithSeed" },
-  
-  // MurmurHash2
-
-  { MurmurHash2_test,     32, 0x27864C1E, "Murmur2",     "MurmurHash2 for x86, 32-bit" },
-  { MurmurHash2A_test,    32, 0x7FBD4396, "Murmur2A",    "MurmurHash2A for x86, 32-bit" },
-  { MurmurHash64A_test,   64, 0x1F0D3804, "Murmur2B",    "MurmurHash2 for x64, 64-bit" },
-  { MurmurHash64B_test,   64, 0xDD537C05, "Murmur2C",    "MurmurHash2 for x86, 64-bit" },
-
-  // MurmurHash3
-
-  { MurmurHash3_x86_32,   32, 0xB0F57EE3, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
-  { MurmurHash3_x86_128, 128, 0xB3ECE62A, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
-  { MurmurHash3_x64_128, 128, 0x6384BA69, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
-
-};
-
-HashInfo * findHash ( const char * name )
-{
-  for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
-  {
-    if(_stricmp(name,g_hashes[i].name) == 0) return &g_hashes[i];
-  }
-
-  return NULL;
-}
-
-//-----------------------------------------------------------------------------
-// Self-test on startup - verify that all installed hashes work correctly.
-
-void SelfTest ( void )
-{
-  bool pass = true;
-
-  for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
-  {
-    HashInfo * info = & g_hashes[i];
-
-    pass &= VerificationTest(info->hash,info->hashbits,info->verification,false);
-  }
-
-  if(!pass)
-  {
-    printf("Self-test FAILED!\n");
-
-    for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
-    {
-      HashInfo * info = & g_hashes[i];
-
-      printf("%16s - ",info->name);
-      pass &= VerificationTest(info->hash,info->hashbits,info->verification,true);
-    }
-
-    exit(1);
-  }
-}
-
-//----------------------------------------------------------------------------
-
-template < typename hashtype >
-void test ( hashfunc<hashtype> hash, HashInfo * info )
-{
-  const int hashbits = sizeof(hashtype) * 8;
-
-  printf("-------------------------------------------------------------------------------\n");
-  printf("--- Testing %s (%s)\n\n",info->name,info->desc);
-
-  //-----------------------------------------------------------------------------
-  // Sanity tests
-
-  if(g_testSanity || g_testAll)
-  {
-    printf("[[[ Sanity Tests ]]]\n\n");
-
-    VerificationTest(hash,hashbits,info->verification,true);
-    SanityTest(hash,hashbits);
-    AppendedZeroesTest(hash,hashbits);
-    printf("\n");
-  }
-
-  //-----------------------------------------------------------------------------
-  // Speed tests
-
-  if(g_testSpeed || g_testAll)
-  {
-    printf("[[[ Speed Tests ]]]\n\n");
-
-    BulkSpeedTest(info->hash,info->verification);
-    printf("\n");
-
-    for(int i = 1; i < 32; i++)
-    {
-      double cycles;
-
-      TinySpeedTest(hashfunc<hashtype>(info->hash),sizeof(hashtype),i,info->verification,true,cycles);
-    }
-
-    printf("\n");
-  }
-
-  //-----------------------------------------------------------------------------
-  // Differential tests
-
-  if(g_testDiff || g_testAll)
-  {
-    printf("[[[ Differential Tests ]]]\n\n");
-
-    bool result = true;
-    bool dumpCollisions = false;
-
-    result &= DiffTest< Blob<64>,  hashtype >(hash,5,1000,dumpCollisions);
-    result &= DiffTest< Blob<128>, hashtype >(hash,4,1000,dumpCollisions);
-    result &= DiffTest< Blob<256>, hashtype >(hash,3,1000,dumpCollisions);
-
-    if(!result) printf("*********FAIL*********\n");
-    printf("\n");
-  }
-
-  //-----------------------------------------------------------------------------
-  // Differential-distribution tests
-
-  if(g_testDiffDist /*|| g_testAll*/)
-  {
-    printf("[[[ Differential Distribution Tests ]]]\n\n");
-
-    bool result = true;
-
-    result &= DiffDistTest2<uint64_t,hashtype>(hash);
-
-    printf("\n");
-  }
-
-  //-----------------------------------------------------------------------------
-  // Avalanche tests
-
-  if(g_testAvalanche || g_testAll)
-  {
-    printf("[[[ Avalanche Tests ]]]\n\n");
-
-    bool result = true;
-
-    result &= AvalancheTest< Blob< 32>, hashtype > (hash,300000);
-    result &= AvalancheTest< Blob< 40>, hashtype > (hash,300000);
-    result &= AvalancheTest< Blob< 48>, hashtype > (hash,300000);
-    result &= AvalancheTest< Blob< 56>, hashtype > (hash,300000);
-
-    result &= AvalancheTest< Blob< 64>, hashtype > (hash,300000);
-    result &= AvalancheTest< Blob< 72>, hashtype > (hash,300000);
-    result &= AvalancheTest< Blob< 80>, hashtype > (hash,300000);
-    result &= AvalancheTest< Blob< 88>, hashtype > (hash,300000);
-
-    result &= AvalancheTest< Blob< 96>, hashtype > (hash,300000);
-    result &= AvalancheTest< Blob<104>, hashtype > (hash,300000);
-    result &= AvalancheTest< Blob<112>, hashtype > (hash,300000);
-    result &= AvalancheTest< Blob<120>, hashtype > (hash,300000);
-
-    result &= AvalancheTest< Blob<128>, hashtype > (hash,300000);
-    result &= AvalancheTest< Blob<136>, hashtype > (hash,300000);
-    result &= AvalancheTest< Blob<144>, hashtype > (hash,300000);
-    result &= AvalancheTest< Blob<152>, hashtype > (hash,300000);
-
-    if(!result) printf("*********FAIL*********\n");
-    printf("\n");
-  }
-
-  //-----------------------------------------------------------------------------
-  // Bit Independence Criteria. Interesting, but doesn't tell us much about
-  // collision or distribution.
-
-  if(g_testBIC)
-  {
-    printf("[[[ Bit Independence Criteria ]]]\n\n");
-
-    bool result = true;
-
-    //result &= BicTest<uint64_t,hashtype>(hash,2000000);
-    BicTest3<Blob<88>,hashtype>(hash,2000000);
-
-    if(!result) printf("*********FAIL*********\n");
-    printf("\n");
-  }
-
-  //-----------------------------------------------------------------------------
-  // Keyset 'Cyclic' - keys of the form "abcdabcdabcd..."
-
-  if(g_testCyclic || g_testAll)
-  {
-    printf("[[[ Keyset 'Cyclic' Tests ]]]\n\n");
-
-    bool result = true;
-    bool drawDiagram = false;
-
-    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+0,8,10000000,drawDiagram);
-    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+1,8,10000000,drawDiagram);
-    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+2,8,10000000,drawDiagram);
-    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+3,8,10000000,drawDiagram);
-    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+4,8,10000000,drawDiagram);
-
-    if(!result) printf("*********FAIL*********\n");
-    printf("\n");
-  }
-
-  //-----------------------------------------------------------------------------
-  // Keyset 'TwoBytes' - all keys up to N bytes containing two non-zero bytes
-
-  // This generates some huge keysets, 128-bit tests will take ~1.3 gigs of RAM.
-
-  if(g_testTwoBytes || g_testAll)
-  {
-    printf("[[[ Keyset 'TwoBytes' Tests ]]]\n\n");
-
-    bool result = true;
-    bool drawDiagram = false;
-
-    for(int i = 4; i <= 20; i += 4)
-    {
-      result &= TwoBytesTest2<hashtype>(hash,i,drawDiagram);
-    }
-
-    if(!result) printf("*********FAIL*********\n");
-    printf("\n");
-  }
-
-  //-----------------------------------------------------------------------------
-  // Keyset 'Sparse' - keys with all bits 0 except a few
-
-  if(g_testSparse || g_testAll)
-  {
-    printf("[[[ Keyset 'Sparse' Tests ]]]\n\n");
-
-    bool result = true;
-    bool drawDiagram = false;
-
-    result &= SparseKeyTest<  32,hashtype>(hash,6,true,true,true,drawDiagram);
-    result &= SparseKeyTest<  40,hashtype>(hash,6,true,true,true,drawDiagram);
-    result &= SparseKeyTest<  48,hashtype>(hash,5,true,true,true,drawDiagram);
-    result &= SparseKeyTest<  56,hashtype>(hash,5,true,true,true,drawDiagram);
-    result &= SparseKeyTest<  64,hashtype>(hash,5,true,true,true,drawDiagram);
-    result &= SparseKeyTest<  96,hashtype>(hash,4,true,true,true,drawDiagram);
-    result &= SparseKeyTest< 256,hashtype>(hash,3,true,true,true,drawDiagram);
-    result &= SparseKeyTest<2048,hashtype>(hash,2,true,true,true,drawDiagram);
-
-    if(!result) printf("*********FAIL*********\n");
-    printf("\n");
-  }
-
-  //-----------------------------------------------------------------------------
-  // Keyset 'Permutation' - all possible combinations of a set of blocks
-
-  if(g_testPermutation || g_testAll)
-  {
-    {
-      // This one breaks lookup3, surprisingly
-
-      printf("[[[ Keyset 'Combination Lowbits' Tests ]]]\n\n");
-
-      bool result = true;
-      bool drawDiagram = false;
-
-      uint32_t blocks[] =
-      {
-        0x00000000,
-
-        0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
-      };
-
-      result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
-
-      if(!result) printf("*********FAIL*********\n");
-      printf("\n");
-    }
-
-    {
-      printf("[[[ Keyset 'Combination Highbits' Tests ]]]\n\n");
-
-      bool result = true;
-      bool drawDiagram = false;
-
-      uint32_t blocks[] =
-      {
-        0x00000000,
-
-        0x20000000, 0x40000000, 0x60000000, 0x80000000, 0xA0000000, 0xC0000000, 0xE0000000
-      };
-
-      result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
-
-      if(!result) printf("*********FAIL*********\n");
-      printf("\n");
-    }
-
-    {
-      printf("[[[ Keyset 'Combination 0x8000000' Tests ]]]\n\n");
-
-      bool result = true;
-      bool drawDiagram = false;
-
-      uint32_t blocks[] =
-      {
-        0x00000000,
-
-        0x80000000,
-      };
-
-      result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
-
-      if(!result) printf("*********FAIL*********\n");
-      printf("\n");
-    }
-
-    {
-      printf("[[[ Keyset 'Combination 0x0000001' Tests ]]]\n\n");
-
-      bool result = true;
-      bool drawDiagram = false;
-
-      uint32_t blocks[] =
-      {
-        0x00000000,
-
-        0x00000001,
-      };
-
-      result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
-
-      if(!result) printf("*********FAIL*********\n");
-      printf("\n");
-    }
-
-    {
-      printf("[[[ Keyset 'Combination Hi-Lo' Tests ]]]\n\n");
-
-      bool result = true;
-      bool drawDiagram = false;
-
-      uint32_t blocks[] =
-      {
-        0x00000000,
-
-        0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
-
-        0x80000000, 0x40000000, 0xC0000000, 0x20000000, 0xA0000000, 0x60000000, 0xE0000000
-      };
-
-      result &= CombinationKeyTest<hashtype>(hash,6,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
-
-      if(!result) printf("*********FAIL*********\n");
-      printf("\n");
-    }
-  }
-
-  //-----------------------------------------------------------------------------
-  // Keyset 'Window'
-
-  // Skip distribution test for these - they're too easy to distribute well,
-  // and it generates a _lot_ of testing
-
-  if(g_testWindow || g_testAll)
-  {
-    printf("[[[ Keyset 'Window' Tests ]]]\n\n");
-
-    bool result = true;
-    bool testCollision = true;
-    bool testDistribution = false;
-    bool drawDiagram = false;
-
-    result &= WindowedKeyTest< Blob<hashbits*2>, hashtype > ( hash, 20, testCollision, testDistribution, drawDiagram );
-
-    if(!result) printf("*********FAIL*********\n");
-    printf("\n");
-  }
-
-  //-----------------------------------------------------------------------------
-  // Keyset 'Text'
-
-  if(g_testText || g_testAll)
-  {
-    printf("[[[ Keyset 'Text' Tests ]]]\n\n");
-
-    bool result = true;
-    bool drawDiagram = false;
-
-    const char * alnum = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
-
-    result &= TextKeyTest( hash, "Foo",    alnum,4, "Bar",    drawDiagram );
-    result &= TextKeyTest( hash, "FooBar", alnum,4, "",       drawDiagram );
-    result &= TextKeyTest( hash, "",       alnum,4, "FooBar", drawDiagram );
-
-    if(!result) printf("*********FAIL*********\n");
-    printf("\n");
-  }
-
-  //-----------------------------------------------------------------------------
-  // Keyset 'Zeroes'
-
-  if(g_testZeroes || g_testAll)
-  {
-    printf("[[[ Keyset 'Zeroes' Tests ]]]\n\n");
-
-    bool result = true;
-    bool drawDiagram = false;
-
-    result &= ZeroKeyTest<hashtype>( hash, drawDiagram );
-
-    if(!result) printf("*********FAIL*********\n");
-    printf("\n");
-  }
-
-  //-----------------------------------------------------------------------------
-  // Keyset 'Seed'
-
-  if(g_testSeed || g_testAll)
-  {
-    printf("[[[ Keyset 'Seed' Tests ]]]\n\n");
-
-    bool result = true;
-    bool drawDiagram = false;
-
-    result &= SeedTest<hashtype>( hash, 1000000, drawDiagram );
-
-    if(!result) printf("*********FAIL*********\n");
-    printf("\n");
-  }
-}
-
-//-----------------------------------------------------------------------------
-
-uint32_t g_inputVCode = 1;
-uint32_t g_outputVCode = 1;
-uint32_t g_resultVCode = 1;
-
-HashInfo * g_hashUnderTest = NULL;
-
-void VerifyHash ( const void * key, int len, uint32_t seed, void * out )
-{
-  g_inputVCode = MurmurOAAT(key,len,g_inputVCode);
-  g_inputVCode = MurmurOAAT(&seed,sizeof(uint32_t),g_inputVCode);
-
-  g_hashUnderTest->hash(key,len,seed,out);
-
-  g_outputVCode = MurmurOAAT(out,g_hashUnderTest->hashbits/8,g_outputVCode);
-}
-
-//-----------------------------------------------------------------------------
-
-void testHash ( const char * name )
-{
-  HashInfo * pInfo = findHash(name);
-
-  if(pInfo == NULL)
-  {
-    printf("Invalid hash '%s' specified\n",name);
-    return;
-  }
-  else
-  {
-    g_hashUnderTest = pInfo;
-
-    if(pInfo->hashbits == 32)
-    {
-      test<uint32_t>( VerifyHash, pInfo );
-    }
-    else if(pInfo->hashbits == 64)
-    {
-      test<uint64_t>( pInfo->hash, pInfo );
-    }
-    else if(pInfo->hashbits == 128)
-    {
-      test<uint128_t>( pInfo->hash, pInfo );
-    }
-    else if(pInfo->hashbits == 256)
-    {
-      test<uint256_t>( pInfo->hash, pInfo );
-    }
-    else
-    {
-      printf("Invalid hash bit width %d for hash '%s'",pInfo->hashbits,pInfo->name);
-    }
-  }
-}
-//-----------------------------------------------------------------------------
-
-int main ( int argc, char ** argv )
-{
-  const char * hashToTest = "murmur3a";
-
-  if(argc < 2)
-  {
-    printf("(No test hash given on command line, testing Murmur3_x86_32.)\n");
-  }
-  else
-  {
-    hashToTest = argv[1];
-  }
-
-  // Code runs on the 3rd CPU by default
-
-  SetAffinity((1 << 2));
-
-  SelfTest();
-
-  int timeBegin = clock();
-
-  g_testAll = true;
-
-  //g_testSanity = true;
-  //g_testSpeed = true;
-  //g_testAvalanche = true;
-  //g_testBIC = true;
-  //g_testCyclic = true;
-  //g_testTwoBytes = true;
-  //g_testDiff = true;
-  //g_testDiffDist = true;
-  //g_testSparse = true;
-  //g_testPermutation = true;
-  //g_testWindow = true;
-  //g_testZeroes = true;
-
-  testHash(hashToTest);
-
-  //----------
-
-  int timeEnd = clock();
-
-  printf("\n");
-  printf("Input vcode 0x%08x, Output vcode 0x%08x, Result vcode 0x%08x\n",g_inputVCode,g_outputVCode,g_resultVCode);
-  printf("Verification value is 0x%08x - Testing took %f seconds\n",g_verify,double(timeEnd-timeBegin)/double(CLOCKS_PER_SEC));
-  printf("-------------------------------------------------------------------------------\n");
-  return 0;
-}
+#include "Platform.h"
+#include "Hashes.h"
+#include "KeysetTest.h"
+#include "SpeedTest.h"
+#include "AvalancheTest.h"
+#include "DifferentialTest.h"
+
+#include <stdio.h>
+#include <time.h>
+
+//-----------------------------------------------------------------------------
+// Configuration. TODO - move these to command-line flags
+
+bool g_testAll = false;
+
+bool g_testSanity      = false;
+bool g_testSpeed       = false;
+bool g_testDiff        = false;
+bool g_testDiffDist    = false;
+bool g_testAvalanche   = false;
+bool g_testBIC         = false;
+bool g_testCyclic      = false;
+bool g_testTwoBytes    = false;
+bool g_testSparse      = false;
+bool g_testPermutation = false;
+bool g_testWindow      = false;
+bool g_testText        = false;
+bool g_testZeroes      = false;
+bool g_testSeed        = false;
+
+//-----------------------------------------------------------------------------
+// This is the list of all hashes that SMHasher can test.
+
+struct HashInfo
+{
+  pfHash hash;
+  int hashbits;
+  uint32_t verification;
+  const char * name;
+  const char * desc;
+};
+
+HashInfo g_hashes[] =
+{
+  { DoNothingHash,        32, 0x00000000, "donothing32", "Do-Nothing function (only valid for measuring call overhead)" },
+  { DoNothingHash,        64, 0x00000000, "donothing64", "Do-Nothing function (only valid for measuring call overhead)" },
+  { DoNothingHash,       128, 0x00000000, "donothing128", "Do-Nothing function (only valid for measuring call overhead)" },
+
+  { crc32,                32, 0x3719DB20, "crc32",       "CRC-32" },
+
+  { md5_32,               32, 0xC10C356B, "md5_32a",     "MD5, first 32 bits of result" },
+  { sha1_32a,             32, 0xF9376EA7, "sha1_32a",    "SHA1, first 32 bits of result" },
+
+  { FNV,                  32, 0xE3CBBE91, "FNV",         "Fowler-Noll-Vo hash, 32-bit" },
+  { Bernstein,            32, 0xBDB4B640, "bernstein",   "Bernstein, 32-bit" },
+  { lookup3_test,         32, 0x3D83917A, "lookup3",     "Bob Jenkins' lookup3" },
+  { SuperFastHash,        32, 0x980ACD1D, "superfast",   "Paul Hsieh's SuperFastHash" },
+  { MurmurOAAT_test,      32, 0x5363BD98, "MurmurOAAT",  "Murmur one-at-a-time" },
+  { Crap8_test,           32, 0x743E97A1, "Crap8",       "Crap8" },
+
+  { CityHash64_test,      64, 0x25A20825, "City64",      "Google CityHash64WithSeed" },
+  { CityHash128_test,    128, 0x6531F54E, "City128",     "Google CityHash128WithSeed" },
+
+  { SpookyHash64_test,    32, 0x3F798BBB, "Spooky32",    "Bob Jenkins' SpookyHash, 32-bit result" },
+  { SpookyHash64_test,    64, 0xA7F955F1, "Spooky64",    "Bob Jenkins' SpookyHash, 64-bit result" },
+  { SpookyHash128_test,  128, 0x8D263080, "Spooky128",   "Bob Jenkins' SpookyHash, 128-bit result" },
+
+  // MurmurHash2
+
+  { MurmurHash2_test,     32, 0x27864C1E, "Murmur2",     "MurmurHash2 for x86, 32-bit" },
+  { MurmurHash2A_test,    32, 0x7FBD4396, "Murmur2A",    "MurmurHash2A for x86, 32-bit" },
+  { MurmurHash64A_test,   64, 0x1F0D3804, "Murmur2B",    "MurmurHash2 for x64, 64-bit" },
+  { MurmurHash64B_test,   64, 0xDD537C05, "Murmur2C",    "MurmurHash2 for x86, 64-bit" },
+
+  // MurmurHash3
+
+  { MurmurHash3_x86_32,   32, 0xB0F57EE3, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
+  { MurmurHash3_x86_128, 128, 0xB3ECE62A, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
+  { MurmurHash3_x64_128, 128, 0x6384BA69, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
+
+};
+
+HashInfo * findHash ( const char * name )
+{
+  for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
+  {
+    if(_stricmp(name,g_hashes[i].name) == 0) return &g_hashes[i];
+  }
+
+  return NULL;
+}
+
+//-----------------------------------------------------------------------------
+// Self-test on startup - verify that all installed hashes work correctly.
+
+void SelfTest ( void )
+{
+  bool pass = true;
+
+  for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
+  {
+    HashInfo * info = & g_hashes[i];
+
+    pass &= VerificationTest(info->hash,info->hashbits,info->verification,false);
+  }
+
+  if(!pass)
+  {
+    printf("Self-test FAILED!\n");
+
+    for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
+    {
+      HashInfo * info = & g_hashes[i];
+
+      printf("%16s - ",info->name);
+      pass &= VerificationTest(info->hash,info->hashbits,info->verification,true);
+    }
+
+    exit(1);
+  }
+}
+
+//----------------------------------------------------------------------------
+
+template < typename hashtype >
+void test ( hashfunc<hashtype> hash, HashInfo * info )
+{
+  const int hashbits = sizeof(hashtype) * 8;
+
+  printf("-------------------------------------------------------------------------------\n");
+  printf("--- Testing %s (%s)\n\n",info->name,info->desc);
+
+  //-----------------------------------------------------------------------------
+  // Sanity tests
+
+  if(g_testSanity || g_testAll)
+  {
+    printf("[[[ Sanity Tests ]]]\n\n");
+
+    VerificationTest(hash,hashbits,info->verification,true);
+    SanityTest(hash,hashbits);
+    AppendedZeroesTest(hash,hashbits);
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Speed tests
+
+  if(g_testSpeed || g_testAll)
+  {
+    printf("[[[ Speed Tests ]]]\n\n");
+
+    BulkSpeedTest(info->hash,info->verification);
+    printf("\n");
+
+    for(int i = 1; i < 32; i++)
+    {
+      double cycles;
+
+      TinySpeedTest(hashfunc<hashtype>(info->hash),sizeof(hashtype),i,info->verification,true,cycles);
+    }
+
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Differential tests
+
+  if(g_testDiff || g_testAll)
+  {
+    printf("[[[ Differential Tests ]]]\n\n");
+
+    bool result = true;
+    bool dumpCollisions = false;
+
+    result &= DiffTest< Blob<64>,  hashtype >(hash,5,1000,dumpCollisions);
+    result &= DiffTest< Blob<128>, hashtype >(hash,4,1000,dumpCollisions);
+    result &= DiffTest< Blob<256>, hashtype >(hash,3,1000,dumpCollisions);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Differential-distribution tests
+
+  if(g_testDiffDist /*|| g_testAll*/)
+  {
+    printf("[[[ Differential Distribution Tests ]]]\n\n");
+
+    bool result = true;
+
+    result &= DiffDistTest2<uint64_t,hashtype>(hash);
+
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Avalanche tests
+
+  if(g_testAvalanche || g_testAll)
+  {
+    printf("[[[ Avalanche Tests ]]]\n\n");
+
+    bool result = true;
+
+    result &= AvalancheTest< Blob< 32>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 40>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 48>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 56>, hashtype > (hash,300000);
+
+    result &= AvalancheTest< Blob< 64>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 72>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 80>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 88>, hashtype > (hash,300000);
+
+    result &= AvalancheTest< Blob< 96>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<104>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<112>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<120>, hashtype > (hash,300000);
+
+    result &= AvalancheTest< Blob<128>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<136>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<144>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<152>, hashtype > (hash,300000);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Bit Independence Criteria. Interesting, but doesn't tell us much about
+  // collision or distribution.
+
+  if(g_testBIC)
+  {
+    printf("[[[ Bit Independence Criteria ]]]\n\n");
+
+    bool result = true;
+
+    //result &= BicTest<uint64_t,hashtype>(hash,2000000);
+    BicTest3<Blob<88>,hashtype>(hash,2000000);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Cyclic' - keys of the form "abcdabcdabcd..."
+
+  if(g_testCyclic || g_testAll)
+  {
+    printf("[[[ Keyset 'Cyclic' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+0,8,10000000,drawDiagram);
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+1,8,10000000,drawDiagram);
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+2,8,10000000,drawDiagram);
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+3,8,10000000,drawDiagram);
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+4,8,10000000,drawDiagram);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'TwoBytes' - all keys up to N bytes containing two non-zero bytes
+
+  // This generates some huge keysets, 128-bit tests will take ~1.3 gigs of RAM.
+
+  if(g_testTwoBytes || g_testAll)
+  {
+    printf("[[[ Keyset 'TwoBytes' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    for(int i = 4; i <= 20; i += 4)
+    {
+      result &= TwoBytesTest2<hashtype>(hash,i,drawDiagram);
+    }
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Sparse' - keys with all bits 0 except a few
+
+  if(g_testSparse || g_testAll)
+  {
+    printf("[[[ Keyset 'Sparse' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    result &= SparseKeyTest<  32,hashtype>(hash,6,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  40,hashtype>(hash,6,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  48,hashtype>(hash,5,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  56,hashtype>(hash,5,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  64,hashtype>(hash,5,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  96,hashtype>(hash,4,true,true,true,drawDiagram);
+    result &= SparseKeyTest< 256,hashtype>(hash,3,true,true,true,drawDiagram);
+    result &= SparseKeyTest<2048,hashtype>(hash,2,true,true,true,drawDiagram);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Permutation' - all possible combinations of a set of blocks
+
+  if(g_testPermutation || g_testAll)
+  {
+    {
+      // This one breaks lookup3, surprisingly
+
+      printf("[[[ Keyset 'Combination Lowbits' Tests ]]]\n\n");
+
+      bool result = true;
+      bool drawDiagram = false;
+
+      uint32_t blocks[] =
+      {
+        0x00000000,
+
+        0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
+      };
+
+      result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
+
+    {
+      printf("[[[ Keyset 'Combination Highbits' Tests ]]]\n\n");
+
+      bool result = true;
+      bool drawDiagram = false;
+
+      uint32_t blocks[] =
+      {
+        0x00000000,
+
+        0x20000000, 0x40000000, 0x60000000, 0x80000000, 0xA0000000, 0xC0000000, 0xE0000000
+      };
+
+      result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
+
+    {
+      printf("[[[ Keyset 'Combination 0x8000000' Tests ]]]\n\n");
+
+      bool result = true;
+      bool drawDiagram = false;
+
+      uint32_t blocks[] =
+      {
+        0x00000000,
+
+        0x80000000,
+      };
+
+      result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
+
+    {
+      printf("[[[ Keyset 'Combination 0x0000001' Tests ]]]\n\n");
+
+      bool result = true;
+      bool drawDiagram = false;
+
+      uint32_t blocks[] =
+      {
+        0x00000000,
+
+        0x00000001,
+      };
+
+      result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
+
+    {
+      printf("[[[ Keyset 'Combination Hi-Lo' Tests ]]]\n\n");
+
+      bool result = true;
+      bool drawDiagram = false;
+
+      uint32_t blocks[] =
+      {
+        0x00000000,
+
+        0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
+
+        0x80000000, 0x40000000, 0xC0000000, 0x20000000, 0xA0000000, 0x60000000, 0xE0000000
+      };
+
+      result &= CombinationKeyTest<hashtype>(hash,6,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Window'
+
+  // Skip distribution test for these - they're too easy to distribute well,
+  // and it generates a _lot_ of testing
+
+  if(g_testWindow || g_testAll)
+  {
+    printf("[[[ Keyset 'Window' Tests ]]]\n\n");
+
+    bool result = true;
+    bool testCollision = true;
+    bool testDistribution = false;
+    bool drawDiagram = false;
+
+    result &= WindowedKeyTest< Blob<hashbits*2>, hashtype > ( hash, 20, testCollision, testDistribution, drawDiagram );
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Text'
+
+  if(g_testText || g_testAll)
+  {
+    printf("[[[ Keyset 'Text' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    const char * alnum = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+
+    result &= TextKeyTest( hash, "Foo",    alnum,4, "Bar",    drawDiagram );
+    result &= TextKeyTest( hash, "FooBar", alnum,4, "",       drawDiagram );
+    result &= TextKeyTest( hash, "",       alnum,4, "FooBar", drawDiagram );
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Zeroes'
+
+  if(g_testZeroes || g_testAll)
+  {
+    printf("[[[ Keyset 'Zeroes' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    result &= ZeroKeyTest<hashtype>( hash, drawDiagram );
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Seed'
+
+  if(g_testSeed || g_testAll)
+  {
+    printf("[[[ Keyset 'Seed' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    result &= SeedTest<hashtype>( hash, 1000000, drawDiagram );
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+uint32_t g_inputVCode = 1;
+uint32_t g_outputVCode = 1;
+uint32_t g_resultVCode = 1;
+
+HashInfo * g_hashUnderTest = NULL;
+
+void VerifyHash ( const void * key, int len, uint32_t seed, void * out )
+{
+  g_inputVCode = MurmurOAAT(key,len,g_inputVCode);
+  g_inputVCode = MurmurOAAT(&seed,sizeof(uint32_t),g_inputVCode);
+
+  g_hashUnderTest->hash(key,len,seed,out);
+
+  g_outputVCode = MurmurOAAT(out,g_hashUnderTest->hashbits/8,g_outputVCode);
+}
+
+//-----------------------------------------------------------------------------
+
+void testHash ( const char * name )
+{
+  HashInfo * pInfo = findHash(name);
+
+  if(pInfo == NULL)
+  {
+    printf("Invalid hash '%s' specified\n",name);
+    return;
+  }
+  else
+  {
+    g_hashUnderTest = pInfo;
+
+    if(pInfo->hashbits == 32)
+    {
+      test<uint32_t>( VerifyHash, pInfo );
+    }
+    else if(pInfo->hashbits == 64)
+    {
+      test<uint64_t>( pInfo->hash, pInfo );
+    }
+    else if(pInfo->hashbits == 128)
+    {
+      test<uint128_t>( pInfo->hash, pInfo );
+    }
+    else if(pInfo->hashbits == 256)
+    {
+      test<uint256_t>( pInfo->hash, pInfo );
+    }
+    else
+    {
+      printf("Invalid hash bit width %d for hash '%s'",pInfo->hashbits,pInfo->name);
+    }
+  }
+}
+//-----------------------------------------------------------------------------
+
+int main ( int argc, char ** argv )
+{
+  const char * hashToTest = "murmur3a";
+
+  if(argc < 2)
+  {
+    printf("(No test hash given on command line, testing Murmur3_x86_32.)\n");
+  }
+  else
+  {
+    hashToTest = argv[1];
+  }
+
+  // Code runs on the 3rd CPU by default
+
+  SetAffinity((1 << 2));
+
+  SelfTest();
+
+  int timeBegin = clock();
+
+  g_testAll = true;
+
+  //g_testSanity = true;
+  //g_testSpeed = true;
+  //g_testAvalanche = true;
+  //g_testBIC = true;
+  //g_testCyclic = true;
+  //g_testTwoBytes = true;
+  //g_testDiff = true;
+  //g_testDiffDist = true;
+  //g_testSparse = true;
+  //g_testPermutation = true;
+  //g_testWindow = true;
+  //g_testZeroes = true;
+
+  testHash(hashToTest);
+
+  //----------
+
+  int timeEnd = clock();
+
+  printf("\n");
+  printf("Input vcode 0x%08x, Output vcode 0x%08x, Result vcode 0x%08x\n",g_inputVCode,g_outputVCode,g_resultVCode);
+  printf("Verification value is 0x%08x - Testing took %f seconds\n",g_verify,double(timeEnd-timeBegin)/double(CLOCKS_PER_SEC));
+  printf("-------------------------------------------------------------------------------\n");
+  return 0;
+}
diff --git a/md5.cpp b/md5.cpp
index 43b870a..8e50c79 100644
--- a/md5.cpp
+++ b/md5.cpp
@@ -1,382 +1,382 @@
-#include <memory.h>
-#include "Types.h"
-
-// "Derived from the RSA Data Security, Inc. MD5 Message Digest Algorithm"
-
-/**
- * \brief          MD5 context structure
- */
-typedef struct
-{
-    unsigned long total[2];     /*!< number of bytes processed  */
-    unsigned long state[4];     /*!< intermediate digest state  */
-    unsigned char buffer[64];   /*!< data block being processed */
-
-    unsigned char ipad[64];     /*!< HMAC: inner padding        */
-    unsigned char opad[64];     /*!< HMAC: outer padding        */
-}
-md5_context;
-
-/**
- * \brief          MD5 context setup
- *
- * \param ctx      context to be initialized
- */
-void md5_starts( md5_context *ctx );
-
-/**
- * \brief          MD5 process buffer
- *
- * \param ctx      MD5 context
- * \param input    buffer holding the  data
- * \param ilen     length of the input data
- */
-void md5_update( md5_context *ctx, unsigned char *input, int ilen );
-
-/**
- * \brief          MD5 final digest
- *
- * \param ctx      MD5 context
- * \param output   MD5 checksum result
- */
-void md5_finish( md5_context *ctx, unsigned char output[16] );
-
-/**
- * \brief          Output = MD5( input buffer )
- *
- * \param input    buffer holding the  data
- * \param ilen     length of the input data
- * \param output   MD5 checksum result
- */
-void md5( unsigned char *input, int ilen, unsigned char output[16] );
-
-/**
- * \brief          Output = MD5( file contents )
- *
- * \param path     input file name
- * \param output   MD5 checksum result
- *
- * \return         0 if successful, 1 if fopen failed,
- *                 or 2 if fread failed
- */
-int md5_file( char *path, unsigned char output[16] );
-
-/**
- * \brief          MD5 HMAC context setup
- *
- * \param ctx      HMAC context to be initialized
- * \param key      HMAC secret key
- * \param keylen   length of the HMAC key
- */
-void md5_hmac_starts( md5_context *ctx, unsigned char *key, int keylen );
-
-/**
- * \brief          MD5 HMAC process buffer
- *
- * \param ctx      HMAC context
- * \param input    buffer holding the  data
- * \param ilen     length of the input data
- */
-void md5_hmac_update( md5_context *ctx, unsigned char *input, int ilen );
-
-/**
- * \brief          MD5 HMAC final digest
- *
- * \param ctx      HMAC context
- * \param output   MD5 HMAC checksum result
- */
-void md5_hmac_finish( md5_context *ctx, unsigned char output[16] );
-
-/**
- * \brief          Output = HMAC-MD5( hmac key, input buffer )
- *
- * \param key      HMAC secret key
- * \param keylen   length of the HMAC key
- * \param input    buffer holding the  data
- * \param ilen     length of the input data
- * \param output   HMAC-MD5 result
- */
-void md5_hmac( unsigned char *key, int keylen,
-               unsigned char *input, int ilen,
-               unsigned char output[16] );
-
-/**
- * \brief          Checkup routine
- *
- * \return         0 if successful, or 1 if the test failed
- */
-int md5_self_test( int verbose );
-
-/*
- * 32-bit integer manipulation macros (little endian)
- */
-#ifndef GET_ULONG_LE
-#define GET_ULONG_LE(n,b,i)                             \
-{                                                       \
-    (n) = ( (unsigned long) (b)[(i)    ]       )        \
-        | ( (unsigned long) (b)[(i) + 1] <<  8 )        \
-        | ( (unsigned long) (b)[(i) + 2] << 16 )        \
-        | ( (unsigned long) (b)[(i) + 3] << 24 );       \
-}
-#endif
-
-#ifndef PUT_ULONG_LE
-#define PUT_ULONG_LE(n,b,i)                             \
-{                                                       \
-    (b)[(i)    ] = (unsigned char) ( (n)       );       \
-    (b)[(i) + 1] = (unsigned char) ( (n) >>  8 );       \
-    (b)[(i) + 2] = (unsigned char) ( (n) >> 16 );       \
-    (b)[(i) + 3] = (unsigned char) ( (n) >> 24 );       \
-}
-#endif
-
-/*
- * MD5 context setup
- */
-void md5_starts( md5_context *ctx )
-{
-    ctx->total[0] = 0;
-    ctx->total[1] = 0;
-
-    ctx->state[0] = 0x67452301;
-    ctx->state[1] = 0xEFCDAB89;
-    ctx->state[2] = 0x98BADCFE;
-    ctx->state[3] = 0x10325476;
-}
-
-static void md5_process( md5_context *ctx, unsigned char data[64] )
-{
-    unsigned long X[16], A, B, C, D;
-
-    GET_ULONG_LE( X[ 0], data,  0 );
-    GET_ULONG_LE( X[ 1], data,  4 );
-    GET_ULONG_LE( X[ 2], data,  8 );
-    GET_ULONG_LE( X[ 3], data, 12 );
-    GET_ULONG_LE( X[ 4], data, 16 );
-    GET_ULONG_LE( X[ 5], data, 20 );
-    GET_ULONG_LE( X[ 6], data, 24 );
-    GET_ULONG_LE( X[ 7], data, 28 );
-    GET_ULONG_LE( X[ 8], data, 32 );
-    GET_ULONG_LE( X[ 9], data, 36 );
-    GET_ULONG_LE( X[10], data, 40 );
-    GET_ULONG_LE( X[11], data, 44 );
-    GET_ULONG_LE( X[12], data, 48 );
-    GET_ULONG_LE( X[13], data, 52 );
-    GET_ULONG_LE( X[14], data, 56 );
-    GET_ULONG_LE( X[15], data, 60 );
-
-#define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n)))
-
-#define P(a,b,c,d,k,s,t)                                \
-{                                                       \
-    a += F(b,c,d) + X[k] + t; a = S(a,s) + b;           \
-}
-
-    A = ctx->state[0];
-    B = ctx->state[1];
-    C = ctx->state[2];
-    D = ctx->state[3];
-
-#define F(x,y,z) (z ^ (x & (y ^ z)))
-
-    P( A, B, C, D,  0,  7, 0xD76AA478 );
-    P( D, A, B, C,  1, 12, 0xE8C7B756 );
-    P( C, D, A, B,  2, 17, 0x242070DB );
-    P( B, C, D, A,  3, 22, 0xC1BDCEEE );
-    P( A, B, C, D,  4,  7, 0xF57C0FAF );
-    P( D, A, B, C,  5, 12, 0x4787C62A );
-    P( C, D, A, B,  6, 17, 0xA8304613 );
-    P( B, C, D, A,  7, 22, 0xFD469501 );
-    P( A, B, C, D,  8,  7, 0x698098D8 );
-    P( D, A, B, C,  9, 12, 0x8B44F7AF );
-    P( C, D, A, B, 10, 17, 0xFFFF5BB1 );
-    P( B, C, D, A, 11, 22, 0x895CD7BE );
-    P( A, B, C, D, 12,  7, 0x6B901122 );
-    P( D, A, B, C, 13, 12, 0xFD987193 );
-    P( C, D, A, B, 14, 17, 0xA679438E );
-    P( B, C, D, A, 15, 22, 0x49B40821 );
-
-#undef F
-
-#define F(x,y,z) (y ^ (z & (x ^ y)))
-
-    P( A, B, C, D,  1,  5, 0xF61E2562 );
-    P( D, A, B, C,  6,  9, 0xC040B340 );
-    P( C, D, A, B, 11, 14, 0x265E5A51 );
-    P( B, C, D, A,  0, 20, 0xE9B6C7AA );
-    P( A, B, C, D,  5,  5, 0xD62F105D );
-    P( D, A, B, C, 10,  9, 0x02441453 );
-    P( C, D, A, B, 15, 14, 0xD8A1E681 );
-    P( B, C, D, A,  4, 20, 0xE7D3FBC8 );
-    P( A, B, C, D,  9,  5, 0x21E1CDE6 );
-    P( D, A, B, C, 14,  9, 0xC33707D6 );
-    P( C, D, A, B,  3, 14, 0xF4D50D87 );
-    P( B, C, D, A,  8, 20, 0x455A14ED );
-    P( A, B, C, D, 13,  5, 0xA9E3E905 );
-    P( D, A, B, C,  2,  9, 0xFCEFA3F8 );
-    P( C, D, A, B,  7, 14, 0x676F02D9 );
-    P( B, C, D, A, 12, 20, 0x8D2A4C8A );
-
-#undef F
-    
-#define F(x,y,z) (x ^ y ^ z)
-
-    P( A, B, C, D,  5,  4, 0xFFFA3942 );
-    P( D, A, B, C,  8, 11, 0x8771F681 );
-    P( C, D, A, B, 11, 16, 0x6D9D6122 );
-    P( B, C, D, A, 14, 23, 0xFDE5380C );
-    P( A, B, C, D,  1,  4, 0xA4BEEA44 );
-    P( D, A, B, C,  4, 11, 0x4BDECFA9 );
-    P( C, D, A, B,  7, 16, 0xF6BB4B60 );
-    P( B, C, D, A, 10, 23, 0xBEBFBC70 );
-    P( A, B, C, D, 13,  4, 0x289B7EC6 );
-    P( D, A, B, C,  0, 11, 0xEAA127FA );
-    P( C, D, A, B,  3, 16, 0xD4EF3085 );
-    P( B, C, D, A,  6, 23, 0x04881D05 );
-    P( A, B, C, D,  9,  4, 0xD9D4D039 );
-    P( D, A, B, C, 12, 11, 0xE6DB99E5 );
-    P( C, D, A, B, 15, 16, 0x1FA27CF8 );
-    P( B, C, D, A,  2, 23, 0xC4AC5665 );
-
-#undef F
-
-#define F(x,y,z) (y ^ (x | ~z))
-
-    P( A, B, C, D,  0,  6, 0xF4292244 );
-    P( D, A, B, C,  7, 10, 0x432AFF97 );
-    P( C, D, A, B, 14, 15, 0xAB9423A7 );
-    P( B, C, D, A,  5, 21, 0xFC93A039 );
-    P( A, B, C, D, 12,  6, 0x655B59C3 );
-    P( D, A, B, C,  3, 10, 0x8F0CCC92 );
-    P( C, D, A, B, 10, 15, 0xFFEFF47D );
-    P( B, C, D, A,  1, 21, 0x85845DD1 );
-    P( A, B, C, D,  8,  6, 0x6FA87E4F );
-    P( D, A, B, C, 15, 10, 0xFE2CE6E0 );
-    P( C, D, A, B,  6, 15, 0xA3014314 );
-    P( B, C, D, A, 13, 21, 0x4E0811A1 );
-    P( A, B, C, D,  4,  6, 0xF7537E82 );
-    P( D, A, B, C, 11, 10, 0xBD3AF235 );
-    P( C, D, A, B,  2, 15, 0x2AD7D2BB );
-    P( B, C, D, A,  9, 21, 0xEB86D391 );
-
-#undef F
-
-    ctx->state[0] += A;
-    ctx->state[1] += B;
-    ctx->state[2] += C;
-    ctx->state[3] += D;
-}
-
-/*
- * MD5 process buffer
- */
-void md5_update( md5_context *ctx, unsigned char *input, int ilen )
-{
-    int fill;
-    unsigned long left;
-
-    if( ilen <= 0 )
-        return;
-
-    left = ctx->total[0] & 0x3F;
-    fill = 64 - left;
-
-    ctx->total[0] += ilen;
-    ctx->total[0] &= 0xFFFFFFFF;
-
-    if( ctx->total[0] < (unsigned long) ilen )
-        ctx->total[1]++;
-
-    if( left && ilen >= fill )
-    {
-        memcpy( (void *) (ctx->buffer + left),
-                (void *) input, fill );
-        md5_process( ctx, ctx->buffer );
-        input += fill;
-        ilen  -= fill;
-        left = 0;
-    }
-
-    while( ilen >= 64 )
-    {
-        md5_process( ctx, input );
-        input += 64;
-        ilen  -= 64;
-    }
-
-    if( ilen > 0 )
-    {
-        memcpy( (void *) (ctx->buffer + left),
-                (void *) input, ilen );
-    }
-}
-
-static const unsigned char md5_padding[64] =
-{
- 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-/*
- * MD5 final digest
- */
-void md5_finish( md5_context *ctx, unsigned char output[16] )
-{
-    unsigned long last, padn;
-    unsigned long high, low;
-    unsigned char msglen[8];
-
-    high = ( ctx->total[0] >> 29 )
-         | ( ctx->total[1] <<  3 );
-    low  = ( ctx->total[0] <<  3 );
-
-    PUT_ULONG_LE( low,  msglen, 0 );
-    PUT_ULONG_LE( high, msglen, 4 );
-
-    last = ctx->total[0] & 0x3F;
-    padn = ( last < 56 ) ? ( 56 - last ) : ( 120 - last );
-
-    md5_update( ctx, (unsigned char *) md5_padding, padn );
-    md5_update( ctx, msglen, 8 );
-
-    PUT_ULONG_LE( ctx->state[0], output,  0 );
-    PUT_ULONG_LE( ctx->state[1], output,  4 );
-    PUT_ULONG_LE( ctx->state[2], output,  8 );
-    PUT_ULONG_LE( ctx->state[3], output, 12 );
-}
-
-/*
- * output = MD5( input buffer )
- */
-void md5( unsigned char *input, int ilen, unsigned char output[16] )
-{
-    md5_context ctx;
-
-    md5_starts( &ctx );
-    md5_update( &ctx, input, ilen );
-    md5_finish( &ctx, output );
-
-    memset( &ctx, 0, sizeof( md5_context ) );
-}
-
-unsigned int md5hash ( const void * input, int len, unsigned int /*seed*/ )
-{
-  unsigned int hash[4];
-
-  md5((unsigned char *)input,len,(unsigned char *)hash);
-
-  //return hash[0] ^ hash[1] ^ hash[2] ^ hash[3];
-
-  return hash[0];
-}	
-
-void md5_32            ( const void * key, int len, uint32_t /*seed*/, void * out )
-{
-  unsigned int hash[4];
-
-  md5((unsigned char*)key,len,(unsigned char*)hash);
-
-  *(uint32_t*)out = hash[0];
+#include <memory.h>
+#include "Types.h"
+
+// "Derived from the RSA Data Security, Inc. MD5 Message Digest Algorithm"
+
+/**
+ * \brief          MD5 context structure
+ */
+typedef struct
+{
+    unsigned long total[2];     /*!< number of bytes processed  */
+    unsigned long state[4];     /*!< intermediate digest state  */
+    unsigned char buffer[64];   /*!< data block being processed */
+
+    unsigned char ipad[64];     /*!< HMAC: inner padding        */
+    unsigned char opad[64];     /*!< HMAC: outer padding        */
+}
+md5_context;
+
+/**
+ * \brief          MD5 context setup
+ *
+ * \param ctx      context to be initialized
+ */
+void md5_starts( md5_context *ctx );
+
+/**
+ * \brief          MD5 process buffer
+ *
+ * \param ctx      MD5 context
+ * \param input    buffer holding the  data
+ * \param ilen     length of the input data
+ */
+void md5_update( md5_context *ctx, unsigned char *input, int ilen );
+
+/**
+ * \brief          MD5 final digest
+ *
+ * \param ctx      MD5 context
+ * \param output   MD5 checksum result
+ */
+void md5_finish( md5_context *ctx, unsigned char output[16] );
+
+/**
+ * \brief          Output = MD5( input buffer )
+ *
+ * \param input    buffer holding the  data
+ * \param ilen     length of the input data
+ * \param output   MD5 checksum result
+ */
+void md5( unsigned char *input, int ilen, unsigned char output[16] );
+
+/**
+ * \brief          Output = MD5( file contents )
+ *
+ * \param path     input file name
+ * \param output   MD5 checksum result
+ *
+ * \return         0 if successful, 1 if fopen failed,
+ *                 or 2 if fread failed
+ */
+int md5_file( char *path, unsigned char output[16] );
+
+/**
+ * \brief          MD5 HMAC context setup
+ *
+ * \param ctx      HMAC context to be initialized
+ * \param key      HMAC secret key
+ * \param keylen   length of the HMAC key
+ */
+void md5_hmac_starts( md5_context *ctx, unsigned char *key, int keylen );
+
+/**
+ * \brief          MD5 HMAC process buffer
+ *
+ * \param ctx      HMAC context
+ * \param input    buffer holding the  data
+ * \param ilen     length of the input data
+ */
+void md5_hmac_update( md5_context *ctx, unsigned char *input, int ilen );
+
+/**
+ * \brief          MD5 HMAC final digest
+ *
+ * \param ctx      HMAC context
+ * \param output   MD5 HMAC checksum result
+ */
+void md5_hmac_finish( md5_context *ctx, unsigned char output[16] );
+
+/**
+ * \brief          Output = HMAC-MD5( hmac key, input buffer )
+ *
+ * \param key      HMAC secret key
+ * \param keylen   length of the HMAC key
+ * \param input    buffer holding the  data
+ * \param ilen     length of the input data
+ * \param output   HMAC-MD5 result
+ */
+void md5_hmac( unsigned char *key, int keylen,
+               unsigned char *input, int ilen,
+               unsigned char output[16] );
+
+/**
+ * \brief          Checkup routine
+ *
+ * \return         0 if successful, or 1 if the test failed
+ */
+int md5_self_test( int verbose );
+
+/*
+ * 32-bit integer manipulation macros (little endian)
+ */
+#ifndef GET_ULONG_LE
+#define GET_ULONG_LE(n,b,i)                             \
+{                                                       \
+    (n) = ( (unsigned long) (b)[(i)    ]       )        \
+        | ( (unsigned long) (b)[(i) + 1] <<  8 )        \
+        | ( (unsigned long) (b)[(i) + 2] << 16 )        \
+        | ( (unsigned long) (b)[(i) + 3] << 24 );       \
+}
+#endif
+
+#ifndef PUT_ULONG_LE
+#define PUT_ULONG_LE(n,b,i)                             \
+{                                                       \
+    (b)[(i)    ] = (unsigned char) ( (n)       );       \
+    (b)[(i) + 1] = (unsigned char) ( (n) >>  8 );       \
+    (b)[(i) + 2] = (unsigned char) ( (n) >> 16 );       \
+    (b)[(i) + 3] = (unsigned char) ( (n) >> 24 );       \
+}
+#endif
+
+/*
+ * MD5 context setup
+ */
+void md5_starts( md5_context *ctx )
+{
+    ctx->total[0] = 0;
+    ctx->total[1] = 0;
+
+    ctx->state[0] = 0x67452301;
+    ctx->state[1] = 0xEFCDAB89;
+    ctx->state[2] = 0x98BADCFE;
+    ctx->state[3] = 0x10325476;
+}
+
+static void md5_process( md5_context *ctx, unsigned char data[64] )
+{
+    unsigned long X[16], A, B, C, D;
+
+    GET_ULONG_LE( X[ 0], data,  0 );
+    GET_ULONG_LE( X[ 1], data,  4 );
+    GET_ULONG_LE( X[ 2], data,  8 );
+    GET_ULONG_LE( X[ 3], data, 12 );
+    GET_ULONG_LE( X[ 4], data, 16 );
+    GET_ULONG_LE( X[ 5], data, 20 );
+    GET_ULONG_LE( X[ 6], data, 24 );
+    GET_ULONG_LE( X[ 7], data, 28 );
+    GET_ULONG_LE( X[ 8], data, 32 );
+    GET_ULONG_LE( X[ 9], data, 36 );
+    GET_ULONG_LE( X[10], data, 40 );
+    GET_ULONG_LE( X[11], data, 44 );
+    GET_ULONG_LE( X[12], data, 48 );
+    GET_ULONG_LE( X[13], data, 52 );
+    GET_ULONG_LE( X[14], data, 56 );
+    GET_ULONG_LE( X[15], data, 60 );
+
+#define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n)))
+
+#define P(a,b,c,d,k,s,t)                                \
+{                                                       \
+    a += F(b,c,d) + X[k] + t; a = S(a,s) + b;           \
+}
+
+    A = ctx->state[0];
+    B = ctx->state[1];
+    C = ctx->state[2];
+    D = ctx->state[3];
+
+#define F(x,y,z) (z ^ (x & (y ^ z)))
+
+    P( A, B, C, D,  0,  7, 0xD76AA478 );
+    P( D, A, B, C,  1, 12, 0xE8C7B756 );
+    P( C, D, A, B,  2, 17, 0x242070DB );
+    P( B, C, D, A,  3, 22, 0xC1BDCEEE );
+    P( A, B, C, D,  4,  7, 0xF57C0FAF );
+    P( D, A, B, C,  5, 12, 0x4787C62A );
+    P( C, D, A, B,  6, 17, 0xA8304613 );
+    P( B, C, D, A,  7, 22, 0xFD469501 );
+    P( A, B, C, D,  8,  7, 0x698098D8 );
+    P( D, A, B, C,  9, 12, 0x8B44F7AF );
+    P( C, D, A, B, 10, 17, 0xFFFF5BB1 );
+    P( B, C, D, A, 11, 22, 0x895CD7BE );
+    P( A, B, C, D, 12,  7, 0x6B901122 );
+    P( D, A, B, C, 13, 12, 0xFD987193 );
+    P( C, D, A, B, 14, 17, 0xA679438E );
+    P( B, C, D, A, 15, 22, 0x49B40821 );
+
+#undef F
+
+#define F(x,y,z) (y ^ (z & (x ^ y)))
+
+    P( A, B, C, D,  1,  5, 0xF61E2562 );
+    P( D, A, B, C,  6,  9, 0xC040B340 );
+    P( C, D, A, B, 11, 14, 0x265E5A51 );
+    P( B, C, D, A,  0, 20, 0xE9B6C7AA );
+    P( A, B, C, D,  5,  5, 0xD62F105D );
+    P( D, A, B, C, 10,  9, 0x02441453 );
+    P( C, D, A, B, 15, 14, 0xD8A1E681 );
+    P( B, C, D, A,  4, 20, 0xE7D3FBC8 );
+    P( A, B, C, D,  9,  5, 0x21E1CDE6 );
+    P( D, A, B, C, 14,  9, 0xC33707D6 );
+    P( C, D, A, B,  3, 14, 0xF4D50D87 );
+    P( B, C, D, A,  8, 20, 0x455A14ED );
+    P( A, B, C, D, 13,  5, 0xA9E3E905 );
+    P( D, A, B, C,  2,  9, 0xFCEFA3F8 );
+    P( C, D, A, B,  7, 14, 0x676F02D9 );
+    P( B, C, D, A, 12, 20, 0x8D2A4C8A );
+
+#undef F
+    
+#define F(x,y,z) (x ^ y ^ z)
+
+    P( A, B, C, D,  5,  4, 0xFFFA3942 );
+    P( D, A, B, C,  8, 11, 0x8771F681 );
+    P( C, D, A, B, 11, 16, 0x6D9D6122 );
+    P( B, C, D, A, 14, 23, 0xFDE5380C );
+    P( A, B, C, D,  1,  4, 0xA4BEEA44 );
+    P( D, A, B, C,  4, 11, 0x4BDECFA9 );
+    P( C, D, A, B,  7, 16, 0xF6BB4B60 );
+    P( B, C, D, A, 10, 23, 0xBEBFBC70 );
+    P( A, B, C, D, 13,  4, 0x289B7EC6 );
+    P( D, A, B, C,  0, 11, 0xEAA127FA );
+    P( C, D, A, B,  3, 16, 0xD4EF3085 );
+    P( B, C, D, A,  6, 23, 0x04881D05 );
+    P( A, B, C, D,  9,  4, 0xD9D4D039 );
+    P( D, A, B, C, 12, 11, 0xE6DB99E5 );
+    P( C, D, A, B, 15, 16, 0x1FA27CF8 );
+    P( B, C, D, A,  2, 23, 0xC4AC5665 );
+
+#undef F
+
+#define F(x,y,z) (y ^ (x | ~z))
+
+    P( A, B, C, D,  0,  6, 0xF4292244 );
+    P( D, A, B, C,  7, 10, 0x432AFF97 );
+    P( C, D, A, B, 14, 15, 0xAB9423A7 );
+    P( B, C, D, A,  5, 21, 0xFC93A039 );
+    P( A, B, C, D, 12,  6, 0x655B59C3 );
+    P( D, A, B, C,  3, 10, 0x8F0CCC92 );
+    P( C, D, A, B, 10, 15, 0xFFEFF47D );
+    P( B, C, D, A,  1, 21, 0x85845DD1 );
+    P( A, B, C, D,  8,  6, 0x6FA87E4F );
+    P( D, A, B, C, 15, 10, 0xFE2CE6E0 );
+    P( C, D, A, B,  6, 15, 0xA3014314 );
+    P( B, C, D, A, 13, 21, 0x4E0811A1 );
+    P( A, B, C, D,  4,  6, 0xF7537E82 );
+    P( D, A, B, C, 11, 10, 0xBD3AF235 );
+    P( C, D, A, B,  2, 15, 0x2AD7D2BB );
+    P( B, C, D, A,  9, 21, 0xEB86D391 );
+
+#undef F
+
+    ctx->state[0] += A;
+    ctx->state[1] += B;
+    ctx->state[2] += C;
+    ctx->state[3] += D;
+}
+
+/*
+ * MD5 process buffer
+ */
+void md5_update( md5_context *ctx, unsigned char *input, int ilen )
+{
+    int fill;
+    unsigned long left;
+
+    if( ilen <= 0 )
+        return;
+
+    left = ctx->total[0] & 0x3F;
+    fill = 64 - left;
+
+    ctx->total[0] += ilen;
+    ctx->total[0] &= 0xFFFFFFFF;
+
+    if( ctx->total[0] < (unsigned long) ilen )
+        ctx->total[1]++;
+
+    if( left && ilen >= fill )
+    {
+        memcpy( (void *) (ctx->buffer + left),
+                (void *) input, fill );
+        md5_process( ctx, ctx->buffer );
+        input += fill;
+        ilen  -= fill;
+        left = 0;
+    }
+
+    while( ilen >= 64 )
+    {
+        md5_process( ctx, input );
+        input += 64;
+        ilen  -= 64;
+    }
+
+    if( ilen > 0 )
+    {
+        memcpy( (void *) (ctx->buffer + left),
+                (void *) input, ilen );
+    }
+}
+
+static const unsigned char md5_padding[64] =
+{
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/*
+ * MD5 final digest
+ */
+void md5_finish( md5_context *ctx, unsigned char output[16] )
+{
+    unsigned long last, padn;
+    unsigned long high, low;
+    unsigned char msglen[8];
+
+    high = ( ctx->total[0] >> 29 )
+         | ( ctx->total[1] <<  3 );
+    low  = ( ctx->total[0] <<  3 );
+
+    PUT_ULONG_LE( low,  msglen, 0 );
+    PUT_ULONG_LE( high, msglen, 4 );
+
+    last = ctx->total[0] & 0x3F;
+    padn = ( last < 56 ) ? ( 56 - last ) : ( 120 - last );
+
+    md5_update( ctx, (unsigned char *) md5_padding, padn );
+    md5_update( ctx, msglen, 8 );
+
+    PUT_ULONG_LE( ctx->state[0], output,  0 );
+    PUT_ULONG_LE( ctx->state[1], output,  4 );
+    PUT_ULONG_LE( ctx->state[2], output,  8 );
+    PUT_ULONG_LE( ctx->state[3], output, 12 );
+}
+
+/*
+ * output = MD5( input buffer )
+ */
+void md5( unsigned char *input, int ilen, unsigned char output[16] )
+{
+    md5_context ctx;
+
+    md5_starts( &ctx );
+    md5_update( &ctx, input, ilen );
+    md5_finish( &ctx, output );
+
+    memset( &ctx, 0, sizeof( md5_context ) );
+}
+
+unsigned int md5hash ( const void * input, int len, unsigned int /*seed*/ )
+{
+  unsigned int hash[4];
+
+  md5((unsigned char *)input,len,(unsigned char *)hash);
+
+  //return hash[0] ^ hash[1] ^ hash[2] ^ hash[3];
+
+  return hash[0];
+}	
+
+void md5_32            ( const void * key, int len, uint32_t /*seed*/, void * out )
+{
+  unsigned int hash[4];
+
+  md5((unsigned char*)key,len,(unsigned char*)hash);
+
+  *(uint32_t*)out = hash[0];
 }
\ No newline at end of file
diff --git a/pstdint.h b/pstdint.h
index 3320264..43dce62 100644
--- a/pstdint.h
+++ b/pstdint.h
@@ -1,799 +1,799 @@
-/*  A portable stdint.h
- ****************************************************************************
- *  BSD License:
- ****************************************************************************
- *
- *  Copyright (c) 2005-2007 Paul Hsieh
- *  All rights reserved.
- *  
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *  
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. The name of the author may not be used to endorse or promote products
- *     derived from this software without specific prior written permission.
- *  
- *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- ****************************************************************************
- *
- *  Version 0.1.11
- *
- *  The ANSI C standard committee, for the C99 standard, specified the
- *  inclusion of a new standard include file called stdint.h.  This is
- *  a very useful and long desired include file which contains several
- *  very precise definitions for integer scalar types that is
- *  critically important for making portable several classes of
- *  applications including cryptography, hashing, variable length
- *  integer libraries and so on.  But for most developers its likely
- *  useful just for programming sanity.
- *
- *  The problem is that most compiler vendors have decided not to
- *  implement the C99 standard, and the next C++ language standard
- *  (which has a lot more mindshare these days) will be a long time in
- *  coming and its unknown whether or not it will include stdint.h or
- *  how much adoption it will have.  Either way, it will be a long time
- *  before all compilers come with a stdint.h and it also does nothing
- *  for the extremely large number of compilers available today which
- *  do not include this file, or anything comparable to it.
- *
- *  So that's what this file is all about.  Its an attempt to build a
- *  single universal include file that works on as many platforms as
- *  possible to deliver what stdint.h is supposed to.  A few things
- *  that should be noted about this file:
- *
- *    1) It is not guaranteed to be portable and/or present an identical
- *       interface on all platforms.  The extreme variability of the
- *       ANSI C standard makes this an impossibility right from the
- *       very get go. Its really only meant to be useful for the vast
- *       majority of platforms that possess the capability of
- *       implementing usefully and precisely defined, standard sized
- *       integer scalars.  Systems which are not intrinsically 2s
- *       complement may produce invalid constants.
- *
- *    2) There is an unavoidable use of non-reserved symbols.
- *
- *    3) Other standard include files are invoked.
- *
- *    4) This file may come in conflict with future platforms that do
- *       include stdint.h.  The hope is that one or the other can be
- *       used with no real difference.
- *
- *    5) In the current verison, if your platform can't represent
- *       int32_t, int16_t and int8_t, it just dumps out with a compiler
- *       error.
- *
- *    6) 64 bit integers may or may not be defined.  Test for their
- *       presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX.
- *       Note that this is different from the C99 specification which
- *       requires the existence of 64 bit support in the compiler.  If
- *       this is not defined for your platform, yet it is capable of
- *       dealing with 64 bits then it is because this file has not yet
- *       been extended to cover all of your system's capabilities.
- *
- *    7) (u)intptr_t may or may not be defined.  Test for its presence
- *       with the test: #ifdef PTRDIFF_MAX.  If this is not defined
- *       for your platform, then it is because this file has not yet
- *       been extended to cover all of your system's capabilities, not
- *       because its optional.
- *
- *    8) The following might not been defined even if your platform is
- *       capable of defining it:
- *
- *       WCHAR_MIN
- *       WCHAR_MAX
- *       (u)int64_t
- *       PTRDIFF_MIN
- *       PTRDIFF_MAX
- *       (u)intptr_t
- *
- *    9) The following have not been defined:
- *
- *       WINT_MIN
- *       WINT_MAX
- *
- *   10) The criteria for defining (u)int_least(*)_t isn't clear,
- *       except for systems which don't have a type that precisely
- *       defined 8, 16, or 32 bit types (which this include file does
- *       not support anyways). Default definitions have been given.
- *
- *   11) The criteria for defining (u)int_fast(*)_t isn't something I
- *       would trust to any particular compiler vendor or the ANSI C
- *       committee.  It is well known that "compatible systems" are
- *       commonly created that have very different performance
- *       characteristics from the systems they are compatible with,
- *       especially those whose vendors make both the compiler and the
- *       system.  Default definitions have been given, but its strongly
- *       recommended that users never use these definitions for any
- *       reason (they do *NOT* deliver any serious guarantee of
- *       improved performance -- not in this file, nor any vendor's
- *       stdint.h).
- *
- *   12) The following macros:
- *
- *       PRINTF_INTMAX_MODIFIER
- *       PRINTF_INT64_MODIFIER
- *       PRINTF_INT32_MODIFIER
- *       PRINTF_INT16_MODIFIER
- *       PRINTF_LEAST64_MODIFIER
- *       PRINTF_LEAST32_MODIFIER
- *       PRINTF_LEAST16_MODIFIER
- *       PRINTF_INTPTR_MODIFIER
- *
- *       are strings which have been defined as the modifiers required
- *       for the "d", "u" and "x" printf formats to correctly output
- *       (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t,
- *       (u)least32_t, (u)least16_t and (u)intptr_t types respectively.
- *       PRINTF_INTPTR_MODIFIER is not defined for some systems which
- *       provide their own stdint.h.  PRINTF_INT64_MODIFIER is not
- *       defined if INT64_MAX is not defined.  These are an extension
- *       beyond what C99 specifies must be in stdint.h.
- *
- *       In addition, the following macros are defined:
- *
- *       PRINTF_INTMAX_HEX_WIDTH
- *       PRINTF_INT64_HEX_WIDTH
- *       PRINTF_INT32_HEX_WIDTH
- *       PRINTF_INT16_HEX_WIDTH
- *       PRINTF_INT8_HEX_WIDTH
- *       PRINTF_INTMAX_DEC_WIDTH
- *       PRINTF_INT64_DEC_WIDTH
- *       PRINTF_INT32_DEC_WIDTH
- *       PRINTF_INT16_DEC_WIDTH
- *       PRINTF_INT8_DEC_WIDTH
- *
- *       Which specifies the maximum number of characters required to
- *       print the number of that type in either hexadecimal or decimal.
- *       These are an extension beyond what C99 specifies must be in
- *       stdint.h.
- *
- *  Compilers tested (all with 0 warnings at their highest respective
- *  settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32
- *  bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio
- *  .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3
- *
- *  This file should be considered a work in progress.  Suggestions for
- *  improvements, especially those which increase coverage are strongly
- *  encouraged.
- *
- *  Acknowledgements
- *
- *  The following people have made significant contributions to the
- *  development and testing of this file:
- *
- *  Chris Howie
- *  John Steele Scott
- *  Dave Thorup
- *
- */
-
-#include <stddef.h>
-#include <limits.h>
-#include <signal.h>
-
-/*
- *  For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and
- *  do nothing else.  On the Mac OS X version of gcc this is _STDINT_H_.
- */
-
-#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) )) && !defined (_PSTDINT_H_INCLUDED)
-#include <stdint.h>
-#define _PSTDINT_H_INCLUDED
-# ifndef PRINTF_INT64_MODIFIER
-#  define PRINTF_INT64_MODIFIER "ll"
-# endif
-# ifndef PRINTF_INT32_MODIFIER
-#  define PRINTF_INT32_MODIFIER "l"
-# endif
-# ifndef PRINTF_INT16_MODIFIER
-#  define PRINTF_INT16_MODIFIER "h"
-# endif
-# ifndef PRINTF_INTMAX_MODIFIER
-#  define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
-# endif
-# ifndef PRINTF_INT64_HEX_WIDTH
-#  define PRINTF_INT64_HEX_WIDTH "16"
-# endif
-# ifndef PRINTF_INT32_HEX_WIDTH
-#  define PRINTF_INT32_HEX_WIDTH "8"
-# endif
-# ifndef PRINTF_INT16_HEX_WIDTH
-#  define PRINTF_INT16_HEX_WIDTH "4"
-# endif
-# ifndef PRINTF_INT8_HEX_WIDTH
-#  define PRINTF_INT8_HEX_WIDTH "2"
-# endif
-# ifndef PRINTF_INT64_DEC_WIDTH
-#  define PRINTF_INT64_DEC_WIDTH "20"
-# endif
-# ifndef PRINTF_INT32_DEC_WIDTH
-#  define PRINTF_INT32_DEC_WIDTH "10"
-# endif
-# ifndef PRINTF_INT16_DEC_WIDTH
-#  define PRINTF_INT16_DEC_WIDTH "5"
-# endif
-# ifndef PRINTF_INT8_DEC_WIDTH
-#  define PRINTF_INT8_DEC_WIDTH "3"
-# endif
-# ifndef PRINTF_INTMAX_HEX_WIDTH
-#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
-# endif
-# ifndef PRINTF_INTMAX_DEC_WIDTH
-#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
-# endif
-
-/*
- *  Something really weird is going on with Open Watcom.  Just pull some of
- *  these duplicated definitions from Open Watcom's stdint.h file for now.
- */
-
-# if defined (__WATCOMC__) && __WATCOMC__ >= 1250
-#  if !defined (INT64_C)
-#   define INT64_C(x)   (x + (INT64_MAX - INT64_MAX))
-#  endif
-#  if !defined (UINT64_C)
-#   define UINT64_C(x)  (x + (UINT64_MAX - UINT64_MAX))
-#  endif
-#  if !defined (INT32_C)
-#   define INT32_C(x)   (x + (INT32_MAX - INT32_MAX))
-#  endif
-#  if !defined (UINT32_C)
-#   define UINT32_C(x)  (x + (UINT32_MAX - UINT32_MAX))
-#  endif
-#  if !defined (INT16_C)
-#   define INT16_C(x)   (x)
-#  endif
-#  if !defined (UINT16_C)
-#   define UINT16_C(x)  (x)
-#  endif
-#  if !defined (INT8_C)
-#   define INT8_C(x)   (x)
-#  endif
-#  if !defined (UINT8_C)
-#   define UINT8_C(x)  (x)
-#  endif
-#  if !defined (UINT64_MAX)
-#   define UINT64_MAX  18446744073709551615ULL
-#  endif
-#  if !defined (INT64_MAX)
-#   define INT64_MAX  9223372036854775807LL
-#  endif
-#  if !defined (UINT32_MAX)
-#   define UINT32_MAX  4294967295UL
-#  endif
-#  if !defined (INT32_MAX)
-#   define INT32_MAX  2147483647L
-#  endif
-#  if !defined (INTMAX_MAX)
-#   define INTMAX_MAX INT64_MAX
-#  endif
-#  if !defined (INTMAX_MIN)
-#   define INTMAX_MIN INT64_MIN
-#  endif
-# endif
-#endif
-
-#ifndef _PSTDINT_H_INCLUDED
-#define _PSTDINT_H_INCLUDED
-
-#ifndef SIZE_MAX
-# define SIZE_MAX (~(size_t)0)
-#endif
-
-/*
- *  Deduce the type assignments from limits.h under the assumption that
- *  integer sizes in bits are powers of 2, and follow the ANSI
- *  definitions.
- */
-
-#ifndef UINT8_MAX
-# define UINT8_MAX 0xff
-#endif
-#ifndef uint8_t
-# if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S)
-    typedef unsigned char uint8_t;
-#   define UINT8_C(v) ((uint8_t) v)
-# else
-#   error "Platform not supported"
-# endif
-#endif
-
-#ifndef INT8_MAX
-# define INT8_MAX 0x7f
-#endif
-#ifndef INT8_MIN
-# define INT8_MIN INT8_C(0x80)
-#endif
-#ifndef int8_t
-# if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S)
-    typedef signed char int8_t;
-#   define INT8_C(v) ((int8_t) v)
-# else
-#   error "Platform not supported"
-# endif
-#endif
-
-#ifndef UINT16_MAX
-# define UINT16_MAX 0xffff
-#endif
-#ifndef uint16_t
-#if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S)
-  typedef unsigned int uint16_t;
-# ifndef PRINTF_INT16_MODIFIER
-#  define PRINTF_INT16_MODIFIER ""
-# endif
-# define UINT16_C(v) ((uint16_t) (v))
-#elif (USHRT_MAX == UINT16_MAX)
-  typedef unsigned short uint16_t;
-# define UINT16_C(v) ((uint16_t) (v))
-# ifndef PRINTF_INT16_MODIFIER
-#  define PRINTF_INT16_MODIFIER "h"
-# endif
-#else
-#error "Platform not supported"
-#endif
-#endif
-
-#ifndef INT16_MAX
-# define INT16_MAX 0x7fff
-#endif
-#ifndef INT16_MIN
-# define INT16_MIN INT16_C(0x8000)
-#endif
-#ifndef int16_t
-#if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S)
-  typedef signed int int16_t;
-# define INT16_C(v) ((int16_t) (v))
-# ifndef PRINTF_INT16_MODIFIER
-#  define PRINTF_INT16_MODIFIER ""
-# endif
-#elif (SHRT_MAX == INT16_MAX)
-  typedef signed short int16_t;
-# define INT16_C(v) ((int16_t) (v))
-# ifndef PRINTF_INT16_MODIFIER
-#  define PRINTF_INT16_MODIFIER "h"
-# endif
-#else
-#error "Platform not supported"
-#endif
-#endif
-
-#ifndef UINT32_MAX
-# define UINT32_MAX (0xffffffffUL)
-#endif
-#ifndef uint32_t
-#if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S)
-  typedef unsigned long uint32_t;
-# define UINT32_C(v) v ## UL
-# ifndef PRINTF_INT32_MODIFIER
-#  define PRINTF_INT32_MODIFIER "l"
-# endif
-#elif (UINT_MAX == UINT32_MAX)
-  typedef unsigned int uint32_t;
-# ifndef PRINTF_INT32_MODIFIER
-#  define PRINTF_INT32_MODIFIER ""
-# endif
-# define UINT32_C(v) v ## U
-#elif (USHRT_MAX == UINT32_MAX)
-  typedef unsigned short uint32_t;
-# define UINT32_C(v) ((unsigned short) (v))
-# ifndef PRINTF_INT32_MODIFIER
-#  define PRINTF_INT32_MODIFIER ""
-# endif
-#else
-#error "Platform not supported"
-#endif
-#endif
-
-#ifndef INT32_MAX
-# define INT32_MAX (0x7fffffffL)
-#endif
-#ifndef INT32_MIN
-# define INT32_MIN INT32_C(0x80000000)
-#endif
-#ifndef int32_t
-#if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S)
-  typedef signed long int32_t;
-# define INT32_C(v) v ## L
-# ifndef PRINTF_INT32_MODIFIER
-#  define PRINTF_INT32_MODIFIER "l"
-# endif
-#elif (INT_MAX == INT32_MAX)
-  typedef signed int int32_t;
-# define INT32_C(v) v
-# ifndef PRINTF_INT32_MODIFIER
-#  define PRINTF_INT32_MODIFIER ""
-# endif
-#elif (SHRT_MAX == INT32_MAX)
-  typedef signed short int32_t;
-# define INT32_C(v) ((short) (v))
-# ifndef PRINTF_INT32_MODIFIER
-#  define PRINTF_INT32_MODIFIER ""
-# endif
-#else
-#error "Platform not supported"
-#endif
-#endif
-
-/*
- *  The macro stdint_int64_defined is temporarily used to record
- *  whether or not 64 integer support is available.  It must be
- *  defined for any 64 integer extensions for new platforms that are
- *  added.
- */
-
-#undef stdint_int64_defined
-#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S)
-# if (__STDC__ && __STDC_VERSION >= 199901L) || defined (S_SPLINT_S)
-#  define stdint_int64_defined
-   typedef long long int64_t;
-   typedef unsigned long long uint64_t;
-#  define UINT64_C(v) v ## ULL
-#  define  INT64_C(v) v ## LL
-#  ifndef PRINTF_INT64_MODIFIER
-#   define PRINTF_INT64_MODIFIER "ll"
-#  endif
-# endif
-#endif
-
-#if !defined (stdint_int64_defined)
-# if defined(__GNUC__)
-#  define stdint_int64_defined
-   __extension__ typedef long long int64_t;
-   __extension__ typedef unsigned long long uint64_t;
-#  define UINT64_C(v) v ## ULL
-#  define  INT64_C(v) v ## LL
-#  ifndef PRINTF_INT64_MODIFIER
-#   define PRINTF_INT64_MODIFIER "ll"
-#  endif
-# elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S)
-#  define stdint_int64_defined
-   typedef long long int64_t;
-   typedef unsigned long long uint64_t;
-#  define UINT64_C(v) v ## ULL
-#  define  INT64_C(v) v ## LL
-#  ifndef PRINTF_INT64_MODIFIER
-#   define PRINTF_INT64_MODIFIER "ll"
-#  endif
-# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC)
-#  define stdint_int64_defined
-   typedef __int64 int64_t;
-   typedef unsigned __int64 uint64_t;
-#  define UINT64_C(v) v ## UI64
-#  define  INT64_C(v) v ## I64
-#  ifndef PRINTF_INT64_MODIFIER
-#   define PRINTF_INT64_MODIFIER "I64"
-#  endif
-# endif
-#endif
-
-#if !defined (LONG_LONG_MAX) && defined (INT64_C)
-# define LONG_LONG_MAX INT64_C (9223372036854775807)
-#endif
-#ifndef ULONG_LONG_MAX
-# define ULONG_LONG_MAX UINT64_C (18446744073709551615)
-#endif
-
-#if !defined (INT64_MAX) && defined (INT64_C)
-# define INT64_MAX INT64_C (9223372036854775807)
-#endif
-#if !defined (INT64_MIN) && defined (INT64_C)
-# define INT64_MIN INT64_C (-9223372036854775808)
-#endif
-#if !defined (UINT64_MAX) && defined (INT64_C)
-# define UINT64_MAX UINT64_C (18446744073709551615)
-#endif
-
-/*
- *  Width of hexadecimal for number field.
- */
-
-#ifndef PRINTF_INT64_HEX_WIDTH
-# define PRINTF_INT64_HEX_WIDTH "16"
-#endif
-#ifndef PRINTF_INT32_HEX_WIDTH
-# define PRINTF_INT32_HEX_WIDTH "8"
-#endif
-#ifndef PRINTF_INT16_HEX_WIDTH
-# define PRINTF_INT16_HEX_WIDTH "4"
-#endif
-#ifndef PRINTF_INT8_HEX_WIDTH
-# define PRINTF_INT8_HEX_WIDTH "2"
-#endif
-
-#ifndef PRINTF_INT64_DEC_WIDTH
-# define PRINTF_INT64_DEC_WIDTH "20"
-#endif
-#ifndef PRINTF_INT32_DEC_WIDTH
-# define PRINTF_INT32_DEC_WIDTH "10"
-#endif
-#ifndef PRINTF_INT16_DEC_WIDTH
-# define PRINTF_INT16_DEC_WIDTH "5"
-#endif
-#ifndef PRINTF_INT8_DEC_WIDTH
-# define PRINTF_INT8_DEC_WIDTH "3"
-#endif
-
-/*
- *  Ok, lets not worry about 128 bit integers for now.  Moore's law says
- *  we don't need to worry about that until about 2040 at which point
- *  we'll have bigger things to worry about.
- */
-
-#ifdef stdint_int64_defined
-  typedef int64_t intmax_t;
-  typedef uint64_t uintmax_t;
-# define  INTMAX_MAX   INT64_MAX
-# define  INTMAX_MIN   INT64_MIN
-# define UINTMAX_MAX  UINT64_MAX
-# define UINTMAX_C(v) UINT64_C(v)
-# define  INTMAX_C(v)  INT64_C(v)
-# ifndef PRINTF_INTMAX_MODIFIER
-#   define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
-# endif
-# ifndef PRINTF_INTMAX_HEX_WIDTH
-#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
-# endif
-# ifndef PRINTF_INTMAX_DEC_WIDTH
-#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
-# endif
-#else
-  typedef int32_t intmax_t;
-  typedef uint32_t uintmax_t;
-# define  INTMAX_MAX   INT32_MAX
-# define UINTMAX_MAX  UINT32_MAX
-# define UINTMAX_C(v) UINT32_C(v)
-# define  INTMAX_C(v)  INT32_C(v)
-# ifndef PRINTF_INTMAX_MODIFIER
-#   define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER
-# endif
-# ifndef PRINTF_INTMAX_HEX_WIDTH
-#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH
-# endif
-# ifndef PRINTF_INTMAX_DEC_WIDTH
-#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH
-# endif
-#endif
-
-/*
- *  Because this file currently only supports platforms which have
- *  precise powers of 2 as bit sizes for the default integers, the
- *  least definitions are all trivial.  Its possible that a future
- *  version of this file could have different definitions.
- */
-
-#ifndef stdint_least_defined
-  typedef   int8_t   int_least8_t;
-  typedef  uint8_t  uint_least8_t;
-  typedef  int16_t  int_least16_t;
-  typedef uint16_t uint_least16_t;
-  typedef  int32_t  int_least32_t;
-  typedef uint32_t uint_least32_t;
-# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER
-# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER
-# define  UINT_LEAST8_MAX  UINT8_MAX
-# define   INT_LEAST8_MAX   INT8_MAX
-# define UINT_LEAST16_MAX UINT16_MAX
-# define  INT_LEAST16_MAX  INT16_MAX
-# define UINT_LEAST32_MAX UINT32_MAX
-# define  INT_LEAST32_MAX  INT32_MAX
-# define   INT_LEAST8_MIN   INT8_MIN
-# define  INT_LEAST16_MIN  INT16_MIN
-# define  INT_LEAST32_MIN  INT32_MIN
-# ifdef stdint_int64_defined
-    typedef  int64_t  int_least64_t;
-    typedef uint64_t uint_least64_t;
-#   define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER
-#   define UINT_LEAST64_MAX UINT64_MAX
-#   define  INT_LEAST64_MAX  INT64_MAX
-#   define  INT_LEAST64_MIN  INT64_MIN
-# endif
-#endif
-#undef stdint_least_defined
-
-/*
- *  The ANSI C committee pretending to know or specify anything about
- *  performance is the epitome of misguided arrogance.  The mandate of
- *  this file is to *ONLY* ever support that absolute minimum
- *  definition of the fast integer types, for compatibility purposes.
- *  No extensions, and no attempt to suggest what may or may not be a
- *  faster integer type will ever be made in this file.  Developers are
- *  warned to stay away from these types when using this or any other
- *  stdint.h.
- */
-
-typedef   int_least8_t   int_fast8_t;
-typedef  uint_least8_t  uint_fast8_t;
-typedef  int_least16_t  int_fast16_t;
-typedef uint_least16_t uint_fast16_t;
-typedef  int_least32_t  int_fast32_t;
-typedef uint_least32_t uint_fast32_t;
-#define  UINT_FAST8_MAX  UINT_LEAST8_MAX
-#define   INT_FAST8_MAX   INT_LEAST8_MAX
-#define UINT_FAST16_MAX UINT_LEAST16_MAX
-#define  INT_FAST16_MAX  INT_LEAST16_MAX
-#define UINT_FAST32_MAX UINT_LEAST32_MAX
-#define  INT_FAST32_MAX  INT_LEAST32_MAX
-#define   INT_FAST8_MIN   INT_LEAST8_MIN
-#define  INT_FAST16_MIN  INT_LEAST16_MIN
-#define  INT_FAST32_MIN  INT_LEAST32_MIN
-#ifdef stdint_int64_defined
-  typedef  int_least64_t  int_fast64_t;
-  typedef uint_least64_t uint_fast64_t;
-# define UINT_FAST64_MAX UINT_LEAST64_MAX
-# define  INT_FAST64_MAX  INT_LEAST64_MAX
-# define  INT_FAST64_MIN  INT_LEAST64_MIN
-#endif
-
-#undef stdint_int64_defined
-
-/*
- *  Whatever piecemeal, per compiler thing we can do about the wchar_t
- *  type limits.
- */
-
-#if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__)
-# include <wchar.h>
-# ifndef WCHAR_MIN
-#  define WCHAR_MIN 0
-# endif
-# ifndef WCHAR_MAX
-#  define WCHAR_MAX ((wchar_t)-1)
-# endif
-#endif
-
-/*
- *  Whatever piecemeal, per compiler/platform thing we can do about the
- *  (u)intptr_t types and limits.
- */
-
-#if defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED)
-# define STDINT_H_UINTPTR_T_DEFINED
-#endif
-
-#ifndef STDINT_H_UINTPTR_T_DEFINED
-# if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64)
-#  define stdint_intptr_bits 64
-# elif defined (__WATCOMC__) || defined (__TURBOC__)
-#  if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__)
-#    define stdint_intptr_bits 16
-#  else
-#    define stdint_intptr_bits 32
-#  endif
-# elif defined (__i386__) || defined (_WIN32) || defined (WIN32)
-#  define stdint_intptr_bits 32
-# elif defined (__INTEL_COMPILER)
-/* TODO -- what will Intel do about x86-64? */
-# endif
-
-# ifdef stdint_intptr_bits
-#  define stdint_intptr_glue3_i(a,b,c)  a##b##c
-#  define stdint_intptr_glue3(a,b,c)    stdint_intptr_glue3_i(a,b,c)
-#  ifndef PRINTF_INTPTR_MODIFIER
-#    define PRINTF_INTPTR_MODIFIER      stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER)
-#  endif
-#  ifndef PTRDIFF_MAX
-#    define PTRDIFF_MAX                 stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
-#  endif
-#  ifndef PTRDIFF_MIN
-#    define PTRDIFF_MIN                 stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
-#  endif
-#  ifndef UINTPTR_MAX
-#    define UINTPTR_MAX                 stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX)
-#  endif
-#  ifndef INTPTR_MAX
-#    define INTPTR_MAX                  stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
-#  endif
-#  ifndef INTPTR_MIN
-#    define INTPTR_MIN                  stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
-#  endif
-#  ifndef INTPTR_C
-#    define INTPTR_C(x)                 stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x)
-#  endif
-#  ifndef UINTPTR_C
-#    define UINTPTR_C(x)                stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x)
-#  endif
-  typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t;
-  typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t)  intptr_t;
-# else
-/* TODO -- This following is likely wrong for some platforms, and does
-   nothing for the definition of uintptr_t. */
-  typedef ptrdiff_t intptr_t;
-# endif
-# define STDINT_H_UINTPTR_T_DEFINED
-#endif
-
-/*
- *  Assumes sig_atomic_t is signed and we have a 2s complement machine.
- */
-
-#ifndef SIG_ATOMIC_MAX
-# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1)
-#endif
-
-#endif
-
-#if defined (__TEST_PSTDINT_FOR_CORRECTNESS)
-
-/* 
- *  Please compile with the maximum warning settings to make sure macros are not
- *  defined more than once.
- */
- 
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
- 
-#define glue3_aux(x,y,z) x ## y ## z
-#define glue3(x,y,z) glue3_aux(x,y,z)
-
-#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,=) glue3(UINT,bits,_C) (0);
-#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,=) glue3(INT,bits,_C) (0);
-
-#define DECL(us,bits) glue3(DECL,us,) (bits)
-
-#define TESTUMAX(bits) glue3(u,bits,=) glue3(~,u,bits); if (glue3(UINT,bits,_MAX) glue3(!=,u,bits)) printf ("Something wrong with UINT%d_MAX\n", bits)
- 
-int main () {
-  DECL(I,8)
-  DECL(U,8)
-  DECL(I,16)
-  DECL(U,16)
-  DECL(I,32)
-  DECL(U,32)
-#ifdef INT64_MAX
-  DECL(I,64)
-  DECL(U,64)
-#endif
-  intmax_t imax = INTMAX_C(0);
-  uintmax_t umax = UINTMAX_C(0);
-  char str0[256], str1[256];
-
-  sprintf (str0, "%d %x\n", 0, ~0);
-  
-  sprintf (str1, "%d %x\n",  i8, ~0);
-  if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1);
-  sprintf (str1, "%u %x\n",  u8, ~0);
-  if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1);
-  sprintf (str1, "%d %x\n",  i16, ~0);
-  if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1);
-  sprintf (str1, "%u %x\n",  u16, ~0);
-  if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1);	
-  sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n",  i32, ~0);
-  if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1);
-  sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n",  u32, ~0);
-  if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1);
-#ifdef INT64_MAX	
-  sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n",  i64, ~0);
-  if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1);
-#endif
-  sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n",  imax, ~0);
-  if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1);
-  sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n",  umax, ~0);
-  if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1);	
-  
-  TESTUMAX(8);
-  TESTUMAX(16);
-  TESTUMAX(32);
-#ifdef INT64_MAX
-  TESTUMAX(64);
-#endif
-
-  return EXIT_SUCCESS;
-}
-
-#endif
+/*  A portable stdint.h
+ ****************************************************************************
+ *  BSD License:
+ ****************************************************************************
+ *
+ *  Copyright (c) 2005-2007 Paul Hsieh
+ *  All rights reserved.
+ *  
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *  
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. The name of the author may not be used to endorse or promote products
+ *     derived from this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************
+ *
+ *  Version 0.1.11
+ *
+ *  The ANSI C standard committee, for the C99 standard, specified the
+ *  inclusion of a new standard include file called stdint.h.  This is
+ *  a very useful and long desired include file which contains several
+ *  very precise definitions for integer scalar types that is
+ *  critically important for making portable several classes of
+ *  applications including cryptography, hashing, variable length
+ *  integer libraries and so on.  But for most developers its likely
+ *  useful just for programming sanity.
+ *
+ *  The problem is that most compiler vendors have decided not to
+ *  implement the C99 standard, and the next C++ language standard
+ *  (which has a lot more mindshare these days) will be a long time in
+ *  coming and its unknown whether or not it will include stdint.h or
+ *  how much adoption it will have.  Either way, it will be a long time
+ *  before all compilers come with a stdint.h and it also does nothing
+ *  for the extremely large number of compilers available today which
+ *  do not include this file, or anything comparable to it.
+ *
+ *  So that's what this file is all about.  Its an attempt to build a
+ *  single universal include file that works on as many platforms as
+ *  possible to deliver what stdint.h is supposed to.  A few things
+ *  that should be noted about this file:
+ *
+ *    1) It is not guaranteed to be portable and/or present an identical
+ *       interface on all platforms.  The extreme variability of the
+ *       ANSI C standard makes this an impossibility right from the
+ *       very get go. Its really only meant to be useful for the vast
+ *       majority of platforms that possess the capability of
+ *       implementing usefully and precisely defined, standard sized
+ *       integer scalars.  Systems which are not intrinsically 2s
+ *       complement may produce invalid constants.
+ *
+ *    2) There is an unavoidable use of non-reserved symbols.
+ *
+ *    3) Other standard include files are invoked.
+ *
+ *    4) This file may come in conflict with future platforms that do
+ *       include stdint.h.  The hope is that one or the other can be
+ *       used with no real difference.
+ *
+ *    5) In the current verison, if your platform can't represent
+ *       int32_t, int16_t and int8_t, it just dumps out with a compiler
+ *       error.
+ *
+ *    6) 64 bit integers may or may not be defined.  Test for their
+ *       presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX.
+ *       Note that this is different from the C99 specification which
+ *       requires the existence of 64 bit support in the compiler.  If
+ *       this is not defined for your platform, yet it is capable of
+ *       dealing with 64 bits then it is because this file has not yet
+ *       been extended to cover all of your system's capabilities.
+ *
+ *    7) (u)intptr_t may or may not be defined.  Test for its presence
+ *       with the test: #ifdef PTRDIFF_MAX.  If this is not defined
+ *       for your platform, then it is because this file has not yet
+ *       been extended to cover all of your system's capabilities, not
+ *       because its optional.
+ *
+ *    8) The following might not been defined even if your platform is
+ *       capable of defining it:
+ *
+ *       WCHAR_MIN
+ *       WCHAR_MAX
+ *       (u)int64_t
+ *       PTRDIFF_MIN
+ *       PTRDIFF_MAX
+ *       (u)intptr_t
+ *
+ *    9) The following have not been defined:
+ *
+ *       WINT_MIN
+ *       WINT_MAX
+ *
+ *   10) The criteria for defining (u)int_least(*)_t isn't clear,
+ *       except for systems which don't have a type that precisely
+ *       defined 8, 16, or 32 bit types (which this include file does
+ *       not support anyways). Default definitions have been given.
+ *
+ *   11) The criteria for defining (u)int_fast(*)_t isn't something I
+ *       would trust to any particular compiler vendor or the ANSI C
+ *       committee.  It is well known that "compatible systems" are
+ *       commonly created that have very different performance
+ *       characteristics from the systems they are compatible with,
+ *       especially those whose vendors make both the compiler and the
+ *       system.  Default definitions have been given, but its strongly
+ *       recommended that users never use these definitions for any
+ *       reason (they do *NOT* deliver any serious guarantee of
+ *       improved performance -- not in this file, nor any vendor's
+ *       stdint.h).
+ *
+ *   12) The following macros:
+ *
+ *       PRINTF_INTMAX_MODIFIER
+ *       PRINTF_INT64_MODIFIER
+ *       PRINTF_INT32_MODIFIER
+ *       PRINTF_INT16_MODIFIER
+ *       PRINTF_LEAST64_MODIFIER
+ *       PRINTF_LEAST32_MODIFIER
+ *       PRINTF_LEAST16_MODIFIER
+ *       PRINTF_INTPTR_MODIFIER
+ *
+ *       are strings which have been defined as the modifiers required
+ *       for the "d", "u" and "x" printf formats to correctly output
+ *       (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t,
+ *       (u)least32_t, (u)least16_t and (u)intptr_t types respectively.
+ *       PRINTF_INTPTR_MODIFIER is not defined for some systems which
+ *       provide their own stdint.h.  PRINTF_INT64_MODIFIER is not
+ *       defined if INT64_MAX is not defined.  These are an extension
+ *       beyond what C99 specifies must be in stdint.h.
+ *
+ *       In addition, the following macros are defined:
+ *
+ *       PRINTF_INTMAX_HEX_WIDTH
+ *       PRINTF_INT64_HEX_WIDTH
+ *       PRINTF_INT32_HEX_WIDTH
+ *       PRINTF_INT16_HEX_WIDTH
+ *       PRINTF_INT8_HEX_WIDTH
+ *       PRINTF_INTMAX_DEC_WIDTH
+ *       PRINTF_INT64_DEC_WIDTH
+ *       PRINTF_INT32_DEC_WIDTH
+ *       PRINTF_INT16_DEC_WIDTH
+ *       PRINTF_INT8_DEC_WIDTH
+ *
+ *       Which specifies the maximum number of characters required to
+ *       print the number of that type in either hexadecimal or decimal.
+ *       These are an extension beyond what C99 specifies must be in
+ *       stdint.h.
+ *
+ *  Compilers tested (all with 0 warnings at their highest respective
+ *  settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32
+ *  bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio
+ *  .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3
+ *
+ *  This file should be considered a work in progress.  Suggestions for
+ *  improvements, especially those which increase coverage are strongly
+ *  encouraged.
+ *
+ *  Acknowledgements
+ *
+ *  The following people have made significant contributions to the
+ *  development and testing of this file:
+ *
+ *  Chris Howie
+ *  John Steele Scott
+ *  Dave Thorup
+ *
+ */
+
+#include <stddef.h>
+#include <limits.h>
+#include <signal.h>
+
+/*
+ *  For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and
+ *  do nothing else.  On the Mac OS X version of gcc this is _STDINT_H_.
+ */
+
+#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) )) && !defined (_PSTDINT_H_INCLUDED)
+#include <stdint.h>
+#define _PSTDINT_H_INCLUDED
+# ifndef PRINTF_INT64_MODIFIER
+#  define PRINTF_INT64_MODIFIER "ll"
+# endif
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER "l"
+# endif
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER "h"
+# endif
+# ifndef PRINTF_INTMAX_MODIFIER
+#  define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
+# endif
+# ifndef PRINTF_INT64_HEX_WIDTH
+#  define PRINTF_INT64_HEX_WIDTH "16"
+# endif
+# ifndef PRINTF_INT32_HEX_WIDTH
+#  define PRINTF_INT32_HEX_WIDTH "8"
+# endif
+# ifndef PRINTF_INT16_HEX_WIDTH
+#  define PRINTF_INT16_HEX_WIDTH "4"
+# endif
+# ifndef PRINTF_INT8_HEX_WIDTH
+#  define PRINTF_INT8_HEX_WIDTH "2"
+# endif
+# ifndef PRINTF_INT64_DEC_WIDTH
+#  define PRINTF_INT64_DEC_WIDTH "20"
+# endif
+# ifndef PRINTF_INT32_DEC_WIDTH
+#  define PRINTF_INT32_DEC_WIDTH "10"
+# endif
+# ifndef PRINTF_INT16_DEC_WIDTH
+#  define PRINTF_INT16_DEC_WIDTH "5"
+# endif
+# ifndef PRINTF_INT8_DEC_WIDTH
+#  define PRINTF_INT8_DEC_WIDTH "3"
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
+# endif
+
+/*
+ *  Something really weird is going on with Open Watcom.  Just pull some of
+ *  these duplicated definitions from Open Watcom's stdint.h file for now.
+ */
+
+# if defined (__WATCOMC__) && __WATCOMC__ >= 1250
+#  if !defined (INT64_C)
+#   define INT64_C(x)   (x + (INT64_MAX - INT64_MAX))
+#  endif
+#  if !defined (UINT64_C)
+#   define UINT64_C(x)  (x + (UINT64_MAX - UINT64_MAX))
+#  endif
+#  if !defined (INT32_C)
+#   define INT32_C(x)   (x + (INT32_MAX - INT32_MAX))
+#  endif
+#  if !defined (UINT32_C)
+#   define UINT32_C(x)  (x + (UINT32_MAX - UINT32_MAX))
+#  endif
+#  if !defined (INT16_C)
+#   define INT16_C(x)   (x)
+#  endif
+#  if !defined (UINT16_C)
+#   define UINT16_C(x)  (x)
+#  endif
+#  if !defined (INT8_C)
+#   define INT8_C(x)   (x)
+#  endif
+#  if !defined (UINT8_C)
+#   define UINT8_C(x)  (x)
+#  endif
+#  if !defined (UINT64_MAX)
+#   define UINT64_MAX  18446744073709551615ULL
+#  endif
+#  if !defined (INT64_MAX)
+#   define INT64_MAX  9223372036854775807LL
+#  endif
+#  if !defined (UINT32_MAX)
+#   define UINT32_MAX  4294967295UL
+#  endif
+#  if !defined (INT32_MAX)
+#   define INT32_MAX  2147483647L
+#  endif
+#  if !defined (INTMAX_MAX)
+#   define INTMAX_MAX INT64_MAX
+#  endif
+#  if !defined (INTMAX_MIN)
+#   define INTMAX_MIN INT64_MIN
+#  endif
+# endif
+#endif
+
+#ifndef _PSTDINT_H_INCLUDED
+#define _PSTDINT_H_INCLUDED
+
+#ifndef SIZE_MAX
+# define SIZE_MAX (~(size_t)0)
+#endif
+
+/*
+ *  Deduce the type assignments from limits.h under the assumption that
+ *  integer sizes in bits are powers of 2, and follow the ANSI
+ *  definitions.
+ */
+
+#ifndef UINT8_MAX
+# define UINT8_MAX 0xff
+#endif
+#ifndef uint8_t
+# if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S)
+    typedef unsigned char uint8_t;
+#   define UINT8_C(v) ((uint8_t) v)
+# else
+#   error "Platform not supported"
+# endif
+#endif
+
+#ifndef INT8_MAX
+# define INT8_MAX 0x7f
+#endif
+#ifndef INT8_MIN
+# define INT8_MIN INT8_C(0x80)
+#endif
+#ifndef int8_t
+# if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S)
+    typedef signed char int8_t;
+#   define INT8_C(v) ((int8_t) v)
+# else
+#   error "Platform not supported"
+# endif
+#endif
+
+#ifndef UINT16_MAX
+# define UINT16_MAX 0xffff
+#endif
+#ifndef uint16_t
+#if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S)
+  typedef unsigned int uint16_t;
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER ""
+# endif
+# define UINT16_C(v) ((uint16_t) (v))
+#elif (USHRT_MAX == UINT16_MAX)
+  typedef unsigned short uint16_t;
+# define UINT16_C(v) ((uint16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER "h"
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef INT16_MAX
+# define INT16_MAX 0x7fff
+#endif
+#ifndef INT16_MIN
+# define INT16_MIN INT16_C(0x8000)
+#endif
+#ifndef int16_t
+#if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S)
+  typedef signed int int16_t;
+# define INT16_C(v) ((int16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER ""
+# endif
+#elif (SHRT_MAX == INT16_MAX)
+  typedef signed short int16_t;
+# define INT16_C(v) ((int16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER "h"
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef UINT32_MAX
+# define UINT32_MAX (0xffffffffUL)
+#endif
+#ifndef uint32_t
+#if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S)
+  typedef unsigned long uint32_t;
+# define UINT32_C(v) v ## UL
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER "l"
+# endif
+#elif (UINT_MAX == UINT32_MAX)
+  typedef unsigned int uint32_t;
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+# define UINT32_C(v) v ## U
+#elif (USHRT_MAX == UINT32_MAX)
+  typedef unsigned short uint32_t;
+# define UINT32_C(v) ((unsigned short) (v))
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef INT32_MAX
+# define INT32_MAX (0x7fffffffL)
+#endif
+#ifndef INT32_MIN
+# define INT32_MIN INT32_C(0x80000000)
+#endif
+#ifndef int32_t
+#if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S)
+  typedef signed long int32_t;
+# define INT32_C(v) v ## L
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER "l"
+# endif
+#elif (INT_MAX == INT32_MAX)
+  typedef signed int int32_t;
+# define INT32_C(v) v
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+#elif (SHRT_MAX == INT32_MAX)
+  typedef signed short int32_t;
+# define INT32_C(v) ((short) (v))
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+/*
+ *  The macro stdint_int64_defined is temporarily used to record
+ *  whether or not 64 integer support is available.  It must be
+ *  defined for any 64 integer extensions for new platforms that are
+ *  added.
+ */
+
+#undef stdint_int64_defined
+#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S)
+# if (__STDC__ && __STDC_VERSION >= 199901L) || defined (S_SPLINT_S)
+#  define stdint_int64_defined
+   typedef long long int64_t;
+   typedef unsigned long long uint64_t;
+#  define UINT64_C(v) v ## ULL
+#  define  INT64_C(v) v ## LL
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "ll"
+#  endif
+# endif
+#endif
+
+#if !defined (stdint_int64_defined)
+# if defined(__GNUC__)
+#  define stdint_int64_defined
+   __extension__ typedef long long int64_t;
+   __extension__ typedef unsigned long long uint64_t;
+#  define UINT64_C(v) v ## ULL
+#  define  INT64_C(v) v ## LL
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "ll"
+#  endif
+# elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S)
+#  define stdint_int64_defined
+   typedef long long int64_t;
+   typedef unsigned long long uint64_t;
+#  define UINT64_C(v) v ## ULL
+#  define  INT64_C(v) v ## LL
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "ll"
+#  endif
+# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC)
+#  define stdint_int64_defined
+   typedef __int64 int64_t;
+   typedef unsigned __int64 uint64_t;
+#  define UINT64_C(v) v ## UI64
+#  define  INT64_C(v) v ## I64
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "I64"
+#  endif
+# endif
+#endif
+
+#if !defined (LONG_LONG_MAX) && defined (INT64_C)
+# define LONG_LONG_MAX INT64_C (9223372036854775807)
+#endif
+#ifndef ULONG_LONG_MAX
+# define ULONG_LONG_MAX UINT64_C (18446744073709551615)
+#endif
+
+#if !defined (INT64_MAX) && defined (INT64_C)
+# define INT64_MAX INT64_C (9223372036854775807)
+#endif
+#if !defined (INT64_MIN) && defined (INT64_C)
+# define INT64_MIN INT64_C (-9223372036854775808)
+#endif
+#if !defined (UINT64_MAX) && defined (INT64_C)
+# define UINT64_MAX UINT64_C (18446744073709551615)
+#endif
+
+/*
+ *  Width of hexadecimal for number field.
+ */
+
+#ifndef PRINTF_INT64_HEX_WIDTH
+# define PRINTF_INT64_HEX_WIDTH "16"
+#endif
+#ifndef PRINTF_INT32_HEX_WIDTH
+# define PRINTF_INT32_HEX_WIDTH "8"
+#endif
+#ifndef PRINTF_INT16_HEX_WIDTH
+# define PRINTF_INT16_HEX_WIDTH "4"
+#endif
+#ifndef PRINTF_INT8_HEX_WIDTH
+# define PRINTF_INT8_HEX_WIDTH "2"
+#endif
+
+#ifndef PRINTF_INT64_DEC_WIDTH
+# define PRINTF_INT64_DEC_WIDTH "20"
+#endif
+#ifndef PRINTF_INT32_DEC_WIDTH
+# define PRINTF_INT32_DEC_WIDTH "10"
+#endif
+#ifndef PRINTF_INT16_DEC_WIDTH
+# define PRINTF_INT16_DEC_WIDTH "5"
+#endif
+#ifndef PRINTF_INT8_DEC_WIDTH
+# define PRINTF_INT8_DEC_WIDTH "3"
+#endif
+
+/*
+ *  Ok, lets not worry about 128 bit integers for now.  Moore's law says
+ *  we don't need to worry about that until about 2040 at which point
+ *  we'll have bigger things to worry about.
+ */
+
+#ifdef stdint_int64_defined
+  typedef int64_t intmax_t;
+  typedef uint64_t uintmax_t;
+# define  INTMAX_MAX   INT64_MAX
+# define  INTMAX_MIN   INT64_MIN
+# define UINTMAX_MAX  UINT64_MAX
+# define UINTMAX_C(v) UINT64_C(v)
+# define  INTMAX_C(v)  INT64_C(v)
+# ifndef PRINTF_INTMAX_MODIFIER
+#   define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
+# endif
+#else
+  typedef int32_t intmax_t;
+  typedef uint32_t uintmax_t;
+# define  INTMAX_MAX   INT32_MAX
+# define UINTMAX_MAX  UINT32_MAX
+# define UINTMAX_C(v) UINT32_C(v)
+# define  INTMAX_C(v)  INT32_C(v)
+# ifndef PRINTF_INTMAX_MODIFIER
+#   define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH
+# endif
+#endif
+
+/*
+ *  Because this file currently only supports platforms which have
+ *  precise powers of 2 as bit sizes for the default integers, the
+ *  least definitions are all trivial.  Its possible that a future
+ *  version of this file could have different definitions.
+ */
+
+#ifndef stdint_least_defined
+  typedef   int8_t   int_least8_t;
+  typedef  uint8_t  uint_least8_t;
+  typedef  int16_t  int_least16_t;
+  typedef uint16_t uint_least16_t;
+  typedef  int32_t  int_least32_t;
+  typedef uint32_t uint_least32_t;
+# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER
+# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER
+# define  UINT_LEAST8_MAX  UINT8_MAX
+# define   INT_LEAST8_MAX   INT8_MAX
+# define UINT_LEAST16_MAX UINT16_MAX
+# define  INT_LEAST16_MAX  INT16_MAX
+# define UINT_LEAST32_MAX UINT32_MAX
+# define  INT_LEAST32_MAX  INT32_MAX
+# define   INT_LEAST8_MIN   INT8_MIN
+# define  INT_LEAST16_MIN  INT16_MIN
+# define  INT_LEAST32_MIN  INT32_MIN
+# ifdef stdint_int64_defined
+    typedef  int64_t  int_least64_t;
+    typedef uint64_t uint_least64_t;
+#   define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER
+#   define UINT_LEAST64_MAX UINT64_MAX
+#   define  INT_LEAST64_MAX  INT64_MAX
+#   define  INT_LEAST64_MIN  INT64_MIN
+# endif
+#endif
+#undef stdint_least_defined
+
+/*
+ *  The ANSI C committee pretending to know or specify anything about
+ *  performance is the epitome of misguided arrogance.  The mandate of
+ *  this file is to *ONLY* ever support that absolute minimum
+ *  definition of the fast integer types, for compatibility purposes.
+ *  No extensions, and no attempt to suggest what may or may not be a
+ *  faster integer type will ever be made in this file.  Developers are
+ *  warned to stay away from these types when using this or any other
+ *  stdint.h.
+ */
+
+typedef   int_least8_t   int_fast8_t;
+typedef  uint_least8_t  uint_fast8_t;
+typedef  int_least16_t  int_fast16_t;
+typedef uint_least16_t uint_fast16_t;
+typedef  int_least32_t  int_fast32_t;
+typedef uint_least32_t uint_fast32_t;
+#define  UINT_FAST8_MAX  UINT_LEAST8_MAX
+#define   INT_FAST8_MAX   INT_LEAST8_MAX
+#define UINT_FAST16_MAX UINT_LEAST16_MAX
+#define  INT_FAST16_MAX  INT_LEAST16_MAX
+#define UINT_FAST32_MAX UINT_LEAST32_MAX
+#define  INT_FAST32_MAX  INT_LEAST32_MAX
+#define   INT_FAST8_MIN   INT_LEAST8_MIN
+#define  INT_FAST16_MIN  INT_LEAST16_MIN
+#define  INT_FAST32_MIN  INT_LEAST32_MIN
+#ifdef stdint_int64_defined
+  typedef  int_least64_t  int_fast64_t;
+  typedef uint_least64_t uint_fast64_t;
+# define UINT_FAST64_MAX UINT_LEAST64_MAX
+# define  INT_FAST64_MAX  INT_LEAST64_MAX
+# define  INT_FAST64_MIN  INT_LEAST64_MIN
+#endif
+
+#undef stdint_int64_defined
+
+/*
+ *  Whatever piecemeal, per compiler thing we can do about the wchar_t
+ *  type limits.
+ */
+
+#if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__)
+# include <wchar.h>
+# ifndef WCHAR_MIN
+#  define WCHAR_MIN 0
+# endif
+# ifndef WCHAR_MAX
+#  define WCHAR_MAX ((wchar_t)-1)
+# endif
+#endif
+
+/*
+ *  Whatever piecemeal, per compiler/platform thing we can do about the
+ *  (u)intptr_t types and limits.
+ */
+
+#if defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED)
+# define STDINT_H_UINTPTR_T_DEFINED
+#endif
+
+#ifndef STDINT_H_UINTPTR_T_DEFINED
+# if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64)
+#  define stdint_intptr_bits 64
+# elif defined (__WATCOMC__) || defined (__TURBOC__)
+#  if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__)
+#    define stdint_intptr_bits 16
+#  else
+#    define stdint_intptr_bits 32
+#  endif
+# elif defined (__i386__) || defined (_WIN32) || defined (WIN32)
+#  define stdint_intptr_bits 32
+# elif defined (__INTEL_COMPILER)
+/* TODO -- what will Intel do about x86-64? */
+# endif
+
+# ifdef stdint_intptr_bits
+#  define stdint_intptr_glue3_i(a,b,c)  a##b##c
+#  define stdint_intptr_glue3(a,b,c)    stdint_intptr_glue3_i(a,b,c)
+#  ifndef PRINTF_INTPTR_MODIFIER
+#    define PRINTF_INTPTR_MODIFIER      stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER)
+#  endif
+#  ifndef PTRDIFF_MAX
+#    define PTRDIFF_MAX                 stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
+#  endif
+#  ifndef PTRDIFF_MIN
+#    define PTRDIFF_MIN                 stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
+#  endif
+#  ifndef UINTPTR_MAX
+#    define UINTPTR_MAX                 stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX)
+#  endif
+#  ifndef INTPTR_MAX
+#    define INTPTR_MAX                  stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
+#  endif
+#  ifndef INTPTR_MIN
+#    define INTPTR_MIN                  stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
+#  endif
+#  ifndef INTPTR_C
+#    define INTPTR_C(x)                 stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x)
+#  endif
+#  ifndef UINTPTR_C
+#    define UINTPTR_C(x)                stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x)
+#  endif
+  typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t;
+  typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t)  intptr_t;
+# else
+/* TODO -- This following is likely wrong for some platforms, and does
+   nothing for the definition of uintptr_t. */
+  typedef ptrdiff_t intptr_t;
+# endif
+# define STDINT_H_UINTPTR_T_DEFINED
+#endif
+
+/*
+ *  Assumes sig_atomic_t is signed and we have a 2s complement machine.
+ */
+
+#ifndef SIG_ATOMIC_MAX
+# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1)
+#endif
+
+#endif
+
+#if defined (__TEST_PSTDINT_FOR_CORRECTNESS)
+
+/* 
+ *  Please compile with the maximum warning settings to make sure macros are not
+ *  defined more than once.
+ */
+ 
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+ 
+#define glue3_aux(x,y,z) x ## y ## z
+#define glue3(x,y,z) glue3_aux(x,y,z)
+
+#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,=) glue3(UINT,bits,_C) (0);
+#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,=) glue3(INT,bits,_C) (0);
+
+#define DECL(us,bits) glue3(DECL,us,) (bits)
+
+#define TESTUMAX(bits) glue3(u,bits,=) glue3(~,u,bits); if (glue3(UINT,bits,_MAX) glue3(!=,u,bits)) printf ("Something wrong with UINT%d_MAX\n", bits)
+ 
+int main () {
+  DECL(I,8)
+  DECL(U,8)
+  DECL(I,16)
+  DECL(U,16)
+  DECL(I,32)
+  DECL(U,32)
+#ifdef INT64_MAX
+  DECL(I,64)
+  DECL(U,64)
+#endif
+  intmax_t imax = INTMAX_C(0);
+  uintmax_t umax = UINTMAX_C(0);
+  char str0[256], str1[256];
+
+  sprintf (str0, "%d %x\n", 0, ~0);
+  
+  sprintf (str1, "%d %x\n",  i8, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1);
+  sprintf (str1, "%u %x\n",  u8, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1);
+  sprintf (str1, "%d %x\n",  i16, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1);
+  sprintf (str1, "%u %x\n",  u16, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1);	
+  sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n",  i32, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1);
+  sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n",  u32, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1);
+#ifdef INT64_MAX	
+  sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n",  i64, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1);
+#endif
+  sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n",  imax, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1);
+  sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n",  umax, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1);	
+  
+  TESTUMAX(8);
+  TESTUMAX(16);
+  TESTUMAX(32);
+#ifdef INT64_MAX
+  TESTUMAX(64);
+#endif
+
+  return EXIT_SUCCESS;
+}
+
+#endif
diff --git a/sha1.cpp b/sha1.cpp
index 9578438..0e23c31 100644
--- a/sha1.cpp
+++ b/sha1.cpp
@@ -1,325 +1,325 @@
-/*
-SHA-1 in C
-By Steve Reid <sreid@sea-to-sky.net>
-100% Public Domain
-
------------------
-Modified 7/98
-By James H. Brown <jbrown@burgoyne.com>
-Still 100% Public Domain
-
-Corrected a problem which generated improper hash values on 16 bit machines
-Routine SHA1Update changed from
-  void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned int
-len)
-to
-  void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned
-long len)
-
-The 'len' parameter was declared an int which works fine on 32 bit machines.
-However, on 16 bit machines an int is too small for the shifts being done
-against
-it.  This caused the hash function to generate incorrect values if len was
-greater than 8191 (8K - 1) due to the 'len << 3' on line 3 of SHA1Update().
-
-Since the file IO in main() reads 16K at a time, any file 8K or larger would
-be guaranteed to generate the wrong hash (e.g. Test Vector #3, a million
-"a"s).
-
-I also changed the declaration of variables i & j in SHA1Update to
-unsigned long from unsigned int for the same reason.
-
-These changes should make no difference to any 32 bit implementations since
-an
-int and a long are the same size in those environments.
-
---
-I also corrected a few compiler warnings generated by Borland C.
-1. Added #include <process.h> for exit() prototype
-2. Removed unused variable 'j' in SHA1Final
-3. Changed exit(0) to return(0) at end of main.
-
-ALL changes I made can be located by searching for comments containing 'JHB'
------------------
-Modified 8/98
-By Steve Reid <sreid@sea-to-sky.net>
-Still 100% public domain
-
-1- Removed #include <process.h> and used return() instead of exit()
-2- Fixed overwriting of finalcount in SHA1Final() (discovered by Chris Hall)
-3- Changed email address from steve@edmweb.com to sreid@sea-to-sky.net
-
------------------
-Modified 4/01
-By Saul Kravitz <Saul.Kravitz@celera.com>
-Still 100% PD
-Modified to run on Compaq Alpha hardware.
-
------------------
-Modified 07/2002
-By Ralph Giles <giles@ghostscript.com>
-Still 100% public domain
-modified for use with stdint types, autoconf
-code cleanup, removed attribution comments
-switched SHA1Final() argument order for consistency
-use SHA1_ prefix for public api
-move public api to sha1.h
-*/
-
-/*
-Test Vectors (from FIPS PUB 180-1)
-"abc"
-  A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D
-"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
-  84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1
-A million repetitions of "a"
-  34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F
-*/
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-#include "sha1.h"
-
-#if defined(_MSC_VER)
-#pragma warning(disable : 4267)
-#pragma warning(disable : 4996)
-#pragma warning(disable : 4100)
-#endif
-
-void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64]);
-
-#define rol ROTL32
-
-/* blk0() and blk() perform the initial expand. */
-/* I got the idea of expanding during the round function from SSLeay */
-/* FIXME: can we do this in an endian-proof way? */
-
-#ifdef WORDS_BIGENDIAN
-#define blk0(i) block->l[i]
-#else
-#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) | (rol(block->l[i],8)&0x00FF00FF))
-#endif
-#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] ^ block->l[(i+2)&15]^block->l[i&15],1))
-
-/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
-#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30);
-#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30);
-#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30);
-#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);
-#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
-
-
-/* Hash a single 512-bit block. This is the core of the algorithm. */
-void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64])
-{
-    uint32_t a, b, c, d, e;
-    typedef union {
-        uint8_t c[64];
-        uint32_t l[16];
-    } CHAR64LONG16;
-    CHAR64LONG16* block;
-
-    block = (CHAR64LONG16*)buffer;
-
-    /* Copy context->state[] to working vars */
-    a = state[0];
-    b = state[1];
-    c = state[2];
-    d = state[3];
-    e = state[4];
-
-    /* 4 rounds of 20 operations each. Loop unrolled. */
-    R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
-    R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
-    R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
-    R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
-    R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
-    R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
-    R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
-    R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
-    R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
-    R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
-    R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
-    R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
-    R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
-    R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
-    R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
-    R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
-    R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
-    R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
-    R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
-    R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
-
-    /* Add the working vars back into context.state[] */
-    state[0] += a;
-    state[1] += b;
-    state[2] += c;
-    state[3] += d;
-    state[4] += e;
-
-    /* Wipe variables */
-    a = b = c = d = e = 0;
-}
-
-
-/* SHA1Init - Initialize new context */
-void SHA1_Init(SHA1_CTX* context)
-{
-    /* SHA1 initialization constants */
-    context->state[0] = 0x67452301;
-    context->state[1] = 0xEFCDAB89;
-    context->state[2] = 0x98BADCFE;
-    context->state[3] = 0x10325476;
-    context->state[4] = 0xC3D2E1F0;
-    context->count[0] = 0;
-  context->count[1] = 0;
-}
-
-
-/* Run your data through this. */
-void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len)
-{
-    size_t i, j;
-
-    j = (context->count[0] >> 3) & 63;
-    if ((context->count[0] += len << 3) < (len << 3)) context->count[1]++;
-
-    context->count[1] += (len >> 29);
-
-    if ((j + len) > 63) 
-  {
-        memcpy(&context->buffer[j], data, (i = 64-j));
-        SHA1_Transform(context->state, context->buffer);
-
-        for ( ; i + 63 < len; i += 64) 
-    {
-            SHA1_Transform(context->state, data + i);
-        }
-
-        j = 0;
-    }
-    else i = 0;
-    memcpy(&context->buffer[j], &data[i], len - i);
-}
-
-
-/* Add padding and return the message digest. */
-void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE])
-{
-    uint32_t i;
-    uint8_t  finalcount[8];
-
-    for (i = 0; i < 8; i++) {
-        finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)]
-         >> ((3-(i & 3)) * 8) ) & 255);  /* Endian independent */
-    }
-    SHA1_Update(context, (uint8_t *)"\200", 1);
-    while ((context->count[0] & 504) != 448) {
-        SHA1_Update(context, (uint8_t *)"\0", 1);
-    }
-    SHA1_Update(context, finalcount, 8);  /* Should cause a SHA1_Transform() */
-    for (i = 0; i < SHA1_DIGEST_SIZE; i++) {
-        digest[i] = (uint8_t)
-         ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
-    }
-
-    /* Wipe variables */
-    i = 0;
-    memset(context->buffer, 0, 64);
-    memset(context->state, 0, 20);
-    memset(context->count, 0, 8);
-    memset(finalcount, 0, 8);	/* SWR */
-}
-
-//-----------------------------------------------------------------------------
-
-void sha1_32a ( const void * key, int len, uint32_t seed, void * out )
-{
-  SHA1_CTX context;
-
-  uint8_t digest[20];
-
-  SHA1_Init(&context);
-  SHA1_Update(&context, (uint8_t*)key, len);
-  SHA1_Final(&context, digest);
-
-  memcpy(out,&digest[0],4);
-}
-
-//-----------------------------------------------------------------------------
-// self test
-
-//#define TEST
-
-#ifdef TEST
-
-static char *test_data[] = {
-    "abc",
-    "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
-    "A million repetitions of 'a'"};
-static char *test_results[] = {
-    "A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D",
-    "84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1",
-    "34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F"};
-
-
-void digest_to_hex(const uint8_t digest[SHA1_DIGEST_SIZE], char *output)
-{
-    int i,j;
-    char *c = output;
-
-    for (i = 0; i < SHA1_DIGEST_SIZE/4; i++) {
-        for (j = 0; j < 4; j++) {
-            sprintf(c,"%02X", digest[i*4+j]);
-            c += 2;
-        }
-        sprintf(c, " ");
-        c += 1;
-    }
-    *(c - 1) = '\0';
-}
-
-int main(int argc, char** argv)
-{
-    int k;
-    SHA1_CTX context;
-    uint8_t digest[20];
-    char output[80];
-
-    fprintf(stdout, "verifying SHA-1 implementation... ");
-
-    for (k = 0; k < 2; k++){
-        SHA1_Init(&context);
-        SHA1_Update(&context, (uint8_t*)test_data[k], strlen(test_data[k]));
-        SHA1_Final(&context, digest);
-  digest_to_hex(digest, output);
-
-        if (strcmp(output, test_results[k])) {
-            fprintf(stdout, "FAIL\n");
-            fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[k]);
-            fprintf(stderr,"\t%s returned\n", output);
-            fprintf(stderr,"\t%s is correct\n", test_results[k]);
-            return (1);
-        }
-    }
-    /* million 'a' vector we feed separately */
-    SHA1_Init(&context);
-    for (k = 0; k < 1000000; k++)
-        SHA1_Update(&context, (uint8_t*)"a", 1);
-    SHA1_Final(&context, digest);
-    digest_to_hex(digest, output);
-    if (strcmp(output, test_results[2])) {
-        fprintf(stdout, "FAIL\n");
-        fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[2]);
-        fprintf(stderr,"\t%s returned\n", output);
-        fprintf(stderr,"\t%s is correct\n", test_results[2]);
-        return (1);
-    }
-
-    /* success */
-    fprintf(stdout, "ok\n");
-    return(0);
-}
-#endif /* TEST */
+/*
+SHA-1 in C
+By Steve Reid <sreid@sea-to-sky.net>
+100% Public Domain
+
+-----------------
+Modified 7/98
+By James H. Brown <jbrown@burgoyne.com>
+Still 100% Public Domain
+
+Corrected a problem which generated improper hash values on 16 bit machines
+Routine SHA1Update changed from
+  void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned int
+len)
+to
+  void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned
+long len)
+
+The 'len' parameter was declared an int which works fine on 32 bit machines.
+However, on 16 bit machines an int is too small for the shifts being done
+against
+it.  This caused the hash function to generate incorrect values if len was
+greater than 8191 (8K - 1) due to the 'len << 3' on line 3 of SHA1Update().
+
+Since the file IO in main() reads 16K at a time, any file 8K or larger would
+be guaranteed to generate the wrong hash (e.g. Test Vector #3, a million
+"a"s).
+
+I also changed the declaration of variables i & j in SHA1Update to
+unsigned long from unsigned int for the same reason.
+
+These changes should make no difference to any 32 bit implementations since
+an
+int and a long are the same size in those environments.
+
+--
+I also corrected a few compiler warnings generated by Borland C.
+1. Added #include <process.h> for exit() prototype
+2. Removed unused variable 'j' in SHA1Final
+3. Changed exit(0) to return(0) at end of main.
+
+ALL changes I made can be located by searching for comments containing 'JHB'
+-----------------
+Modified 8/98
+By Steve Reid <sreid@sea-to-sky.net>
+Still 100% public domain
+
+1- Removed #include <process.h> and used return() instead of exit()
+2- Fixed overwriting of finalcount in SHA1Final() (discovered by Chris Hall)
+3- Changed email address from steve@edmweb.com to sreid@sea-to-sky.net
+
+-----------------
+Modified 4/01
+By Saul Kravitz <Saul.Kravitz@celera.com>
+Still 100% PD
+Modified to run on Compaq Alpha hardware.
+
+-----------------
+Modified 07/2002
+By Ralph Giles <giles@ghostscript.com>
+Still 100% public domain
+modified for use with stdint types, autoconf
+code cleanup, removed attribution comments
+switched SHA1Final() argument order for consistency
+use SHA1_ prefix for public api
+move public api to sha1.h
+*/
+
+/*
+Test Vectors (from FIPS PUB 180-1)
+"abc"
+  A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D
+"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+  84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1
+A million repetitions of "a"
+  34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "sha1.h"
+
+#if defined(_MSC_VER)
+#pragma warning(disable : 4267)
+#pragma warning(disable : 4996)
+#pragma warning(disable : 4100)
+#endif
+
+void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64]);
+
+#define rol ROTL32
+
+/* blk0() and blk() perform the initial expand. */
+/* I got the idea of expanding during the round function from SSLeay */
+/* FIXME: can we do this in an endian-proof way? */
+
+#ifdef WORDS_BIGENDIAN
+#define blk0(i) block->l[i]
+#else
+#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) | (rol(block->l[i],8)&0x00FF00FF))
+#endif
+#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] ^ block->l[(i+2)&15]^block->l[i&15],1))
+
+/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
+#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30);
+#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);
+#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
+
+
+/* Hash a single 512-bit block. This is the core of the algorithm. */
+void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64])
+{
+    uint32_t a, b, c, d, e;
+    typedef union {
+        uint8_t c[64];
+        uint32_t l[16];
+    } CHAR64LONG16;
+    CHAR64LONG16* block;
+
+    block = (CHAR64LONG16*)buffer;
+
+    /* Copy context->state[] to working vars */
+    a = state[0];
+    b = state[1];
+    c = state[2];
+    d = state[3];
+    e = state[4];
+
+    /* 4 rounds of 20 operations each. Loop unrolled. */
+    R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
+    R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
+    R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
+    R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
+    R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
+    R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
+    R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
+    R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
+    R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
+    R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
+    R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
+    R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
+    R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
+    R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
+    R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
+    R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
+    R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
+    R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
+    R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
+    R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
+
+    /* Add the working vars back into context.state[] */
+    state[0] += a;
+    state[1] += b;
+    state[2] += c;
+    state[3] += d;
+    state[4] += e;
+
+    /* Wipe variables */
+    a = b = c = d = e = 0;
+}
+
+
+/* SHA1Init - Initialize new context */
+void SHA1_Init(SHA1_CTX* context)
+{
+    /* SHA1 initialization constants */
+    context->state[0] = 0x67452301;
+    context->state[1] = 0xEFCDAB89;
+    context->state[2] = 0x98BADCFE;
+    context->state[3] = 0x10325476;
+    context->state[4] = 0xC3D2E1F0;
+    context->count[0] = 0;
+  context->count[1] = 0;
+}
+
+
+/* Run your data through this. */
+void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len)
+{
+    size_t i, j;
+
+    j = (context->count[0] >> 3) & 63;
+    if ((context->count[0] += len << 3) < (len << 3)) context->count[1]++;
+
+    context->count[1] += (len >> 29);
+
+    if ((j + len) > 63) 
+  {
+        memcpy(&context->buffer[j], data, (i = 64-j));
+        SHA1_Transform(context->state, context->buffer);
+
+        for ( ; i + 63 < len; i += 64) 
+    {
+            SHA1_Transform(context->state, data + i);
+        }
+
+        j = 0;
+    }
+    else i = 0;
+    memcpy(&context->buffer[j], &data[i], len - i);
+}
+
+
+/* Add padding and return the message digest. */
+void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE])
+{
+    uint32_t i;
+    uint8_t  finalcount[8];
+
+    for (i = 0; i < 8; i++) {
+        finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)]
+         >> ((3-(i & 3)) * 8) ) & 255);  /* Endian independent */
+    }
+    SHA1_Update(context, (uint8_t *)"\200", 1);
+    while ((context->count[0] & 504) != 448) {
+        SHA1_Update(context, (uint8_t *)"\0", 1);
+    }
+    SHA1_Update(context, finalcount, 8);  /* Should cause a SHA1_Transform() */
+    for (i = 0; i < SHA1_DIGEST_SIZE; i++) {
+        digest[i] = (uint8_t)
+         ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
+    }
+
+    /* Wipe variables */
+    i = 0;
+    memset(context->buffer, 0, 64);
+    memset(context->state, 0, 20);
+    memset(context->count, 0, 8);
+    memset(finalcount, 0, 8);	/* SWR */
+}
+
+//-----------------------------------------------------------------------------
+
+void sha1_32a ( const void * key, int len, uint32_t seed, void * out )
+{
+  SHA1_CTX context;
+
+  uint8_t digest[20];
+
+  SHA1_Init(&context);
+  SHA1_Update(&context, (uint8_t*)key, len);
+  SHA1_Final(&context, digest);
+
+  memcpy(out,&digest[0],4);
+}
+
+//-----------------------------------------------------------------------------
+// self test
+
+//#define TEST
+
+#ifdef TEST
+
+static char *test_data[] = {
+    "abc",
+    "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
+    "A million repetitions of 'a'"};
+static char *test_results[] = {
+    "A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D",
+    "84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1",
+    "34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F"};
+
+
+void digest_to_hex(const uint8_t digest[SHA1_DIGEST_SIZE], char *output)
+{
+    int i,j;
+    char *c = output;
+
+    for (i = 0; i < SHA1_DIGEST_SIZE/4; i++) {
+        for (j = 0; j < 4; j++) {
+            sprintf(c,"%02X", digest[i*4+j]);
+            c += 2;
+        }
+        sprintf(c, " ");
+        c += 1;
+    }
+    *(c - 1) = '\0';
+}
+
+int main(int argc, char** argv)
+{
+    int k;
+    SHA1_CTX context;
+    uint8_t digest[20];
+    char output[80];
+
+    fprintf(stdout, "verifying SHA-1 implementation... ");
+
+    for (k = 0; k < 2; k++){
+        SHA1_Init(&context);
+        SHA1_Update(&context, (uint8_t*)test_data[k], strlen(test_data[k]));
+        SHA1_Final(&context, digest);
+  digest_to_hex(digest, output);
+
+        if (strcmp(output, test_results[k])) {
+            fprintf(stdout, "FAIL\n");
+            fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[k]);
+            fprintf(stderr,"\t%s returned\n", output);
+            fprintf(stderr,"\t%s is correct\n", test_results[k]);
+            return (1);
+        }
+    }
+    /* million 'a' vector we feed separately */
+    SHA1_Init(&context);
+    for (k = 0; k < 1000000; k++)
+        SHA1_Update(&context, (uint8_t*)"a", 1);
+    SHA1_Final(&context, digest);
+    digest_to_hex(digest, output);
+    if (strcmp(output, test_results[2])) {
+        fprintf(stdout, "FAIL\n");
+        fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[2]);
+        fprintf(stderr,"\t%s returned\n", output);
+        fprintf(stderr,"\t%s is correct\n", test_results[2]);
+        return (1);
+    }
+
+    /* success */
+    fprintf(stdout, "ok\n");
+    return(0);
+}
+#endif /* TEST */
diff --git a/sha1.h b/sha1.h
index b81088f..16b10a1 100644
--- a/sha1.h
+++ b/sha1.h
@@ -1,21 +1,21 @@
-/* public api for steve reid's public domain SHA-1 implementation */
-/* this file is in the public domain */
-
-#pragma once
-
-#include "Platform.h"
-
-struct SHA1_CTX
-{
-    uint32_t state[5];
-    uint32_t count[2];
-    uint8_t  buffer[64];
-};
-
-#define SHA1_DIGEST_SIZE 20
-
-void SHA1_Init(SHA1_CTX* context);
-void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len);
-void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE]);
-
+/* public api for steve reid's public domain SHA-1 implementation */
+/* this file is in the public domain */
+
+#pragma once
+
+#include "Platform.h"
+
+struct SHA1_CTX
+{
+    uint32_t state[5];
+    uint32_t count[2];
+    uint8_t  buffer[64];
+};
+
+#define SHA1_DIGEST_SIZE 20
+
+void SHA1_Init(SHA1_CTX* context);
+void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len);
+void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE]);
+
 void sha1_32a ( const void * key, int len, uint32_t seed, void * out );
\ No newline at end of file
-- 
cgit v1.2.3


From dd462f2c6817d4e09a6ecd99f35e3a04c342afb2 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Thu, 1 Mar 2012 06:06:01 +0000
Subject: Add #include "Platform.h" to fix Windows build remove CXX_FLAGS,
 which make Visual Studio complain Widen sanity test out of paranoia. Fix typo
 in g_hashes reference to SpookyHash Remove Visual Studio-specific .sln &
 .vcproj files

git-svn-id: http://smhasher.googlecode.com/svn/trunk@145 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 CMakeLists.txt  |   1 -
 City.h          |   3 +-
 KeysetTest.cpp  |   2 +-
 SMHasher.sln    |  26 ---
 SMHasher.vcproj | 485 --------------------------------------------------------
 Spooky.h        |   3 +-
 main.cpp        |   2 +-
 7 files changed, 6 insertions(+), 516 deletions(-)
 delete mode 100644 SMHasher.sln
 delete mode 100644 SMHasher.vcproj

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d04afdf..5b64292 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,6 @@ project(SMHasher)
 cmake_minimum_required(VERSION 2.4)
 
 set(CMAKE_BUILD_TYPE Release)
-set(CMAKE_CXX_FLAGS "-g -fno-strict-aliasing -Wall")
 
 add_library(
   SMHasherSupport
diff --git a/City.h b/City.h
index 02f3457..fe2d3e2 100644
--- a/City.h
+++ b/City.h
@@ -43,8 +43,9 @@
 #ifndef CITY_HASH_H_
 #define CITY_HASH_H_
 
+#include "Platform.h"
 #include <stdlib.h>  // for size_t.
-#include <stdint.h>
+//#include <stdint.h>
 #include <utility>
 
 typedef uint8_t uint8;
diff --git a/KeysetTest.cpp b/KeysetTest.cpp
index b3b8a4c..148b065 100644
--- a/KeysetTest.cpp
+++ b/KeysetTest.cpp
@@ -81,7 +81,7 @@ bool SanityTest ( pfHash hash, const int hashbits )
 
   const int hashbytes = hashbits/8;
   const int reps = 10;
-  const int keymax = 128;
+  const int keymax = 256;
   const int pad = 16;
   const int buflen = keymax + pad*3;
   
diff --git a/SMHasher.sln b/SMHasher.sln
deleted file mode 100644
index f8c7a41..0000000
--- a/SMHasher.sln
+++ /dev/null
@@ -1,26 +0,0 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 10.00
-# Visual Studio 2008
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SMHasher", "SMHasher.vcproj", "{AF3C61C4-642A-425B-928D-CEC37C678442}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Win32 = Debug|Win32
-		Debug|x64 = Debug|x64
-		Release|Win32 = Release|Win32
-		Release|x64 = Release|x64
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{AF3C61C4-642A-425B-928D-CEC37C678442}.Debug|Win32.ActiveCfg = Debug|Win32
-		{AF3C61C4-642A-425B-928D-CEC37C678442}.Debug|Win32.Build.0 = Debug|Win32
-		{AF3C61C4-642A-425B-928D-CEC37C678442}.Debug|x64.ActiveCfg = Debug|x64
-		{AF3C61C4-642A-425B-928D-CEC37C678442}.Debug|x64.Build.0 = Debug|x64
-		{AF3C61C4-642A-425B-928D-CEC37C678442}.Release|Win32.ActiveCfg = Release|Win32
-		{AF3C61C4-642A-425B-928D-CEC37C678442}.Release|Win32.Build.0 = Release|Win32
-		{AF3C61C4-642A-425B-928D-CEC37C678442}.Release|x64.ActiveCfg = Release|x64
-		{AF3C61C4-642A-425B-928D-CEC37C678442}.Release|x64.Build.0 = Release|x64
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-EndGlobal
diff --git a/SMHasher.vcproj b/SMHasher.vcproj
deleted file mode 100644
index e5a59da..0000000
--- a/SMHasher.vcproj
+++ /dev/null
@@ -1,485 +0,0 @@
-<?xml version="1.0" encoding="Windows-1252"?>
-<VisualStudioProject
-	ProjectType="Visual C++"
-	Version="9.00"
-	Name="SMHasher"
-	ProjectGUID="{AF3C61C4-642A-425B-928D-CEC37C678442}"
-	RootNamespace="SMHasher"
-	Keyword="Win32Proj"
-	TargetFrameworkVersion="196613"
-	>
-	<Platforms>
-		<Platform
-			Name="Win32"
-		/>
-		<Platform
-			Name="x64"
-		/>
-	</Platforms>
-	<ToolFiles>
-	</ToolFiles>
-	<Configurations>
-		<Configuration
-			Name="Debug|Win32"
-			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
-			IntermediateDirectory="$(ConfigurationName)"
-			ConfigurationType="1"
-			CharacterSet="1"
-			>
-			<Tool
-				Name="VCPreBuildEventTool"
-			/>
-			<Tool
-				Name="VCCustomBuildTool"
-			/>
-			<Tool
-				Name="VCXMLDataGeneratorTool"
-			/>
-			<Tool
-				Name="VCWebServiceProxyGeneratorTool"
-			/>
-			<Tool
-				Name="VCMIDLTool"
-			/>
-			<Tool
-				Name="VCCLCompilerTool"
-				Optimization="0"
-				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
-				MinimalRebuild="true"
-				BasicRuntimeChecks="3"
-				RuntimeLibrary="3"
-				UsePrecompiledHeader="0"
-				WarningLevel="4"
-				WarnAsError="true"
-				DebugInformationFormat="4"
-			/>
-			<Tool
-				Name="VCManagedResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCPreLinkEventTool"
-			/>
-			<Tool
-				Name="VCLinkerTool"
-				LinkIncremental="2"
-				GenerateDebugInformation="true"
-				SubSystem="1"
-				TargetMachine="1"
-			/>
-			<Tool
-				Name="VCALinkTool"
-			/>
-			<Tool
-				Name="VCManifestTool"
-			/>
-			<Tool
-				Name="VCXDCMakeTool"
-			/>
-			<Tool
-				Name="VCBscMakeTool"
-			/>
-			<Tool
-				Name="VCFxCopTool"
-			/>
-			<Tool
-				Name="VCAppVerifierTool"
-			/>
-			<Tool
-				Name="VCPostBuildEventTool"
-			/>
-		</Configuration>
-		<Configuration
-			Name="Debug|x64"
-			OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
-			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
-			ConfigurationType="1"
-			CharacterSet="1"
-			>
-			<Tool
-				Name="VCPreBuildEventTool"
-			/>
-			<Tool
-				Name="VCCustomBuildTool"
-			/>
-			<Tool
-				Name="VCXMLDataGeneratorTool"
-			/>
-			<Tool
-				Name="VCWebServiceProxyGeneratorTool"
-			/>
-			<Tool
-				Name="VCMIDLTool"
-				TargetEnvironment="3"
-			/>
-			<Tool
-				Name="VCCLCompilerTool"
-				Optimization="0"
-				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
-				MinimalRebuild="true"
-				BasicRuntimeChecks="3"
-				RuntimeLibrary="3"
-				UsePrecompiledHeader="0"
-				WarningLevel="4"
-				WarnAsError="true"
-				DebugInformationFormat="3"
-			/>
-			<Tool
-				Name="VCManagedResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCPreLinkEventTool"
-			/>
-			<Tool
-				Name="VCLinkerTool"
-				LinkIncremental="2"
-				GenerateDebugInformation="true"
-				SubSystem="1"
-				TargetMachine="17"
-			/>
-			<Tool
-				Name="VCALinkTool"
-			/>
-			<Tool
-				Name="VCManifestTool"
-			/>
-			<Tool
-				Name="VCXDCMakeTool"
-			/>
-			<Tool
-				Name="VCBscMakeTool"
-			/>
-			<Tool
-				Name="VCFxCopTool"
-			/>
-			<Tool
-				Name="VCAppVerifierTool"
-			/>
-			<Tool
-				Name="VCPostBuildEventTool"
-			/>
-		</Configuration>
-		<Configuration
-			Name="Release|Win32"
-			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
-			IntermediateDirectory="$(ConfigurationName)"
-			ConfigurationType="1"
-			CharacterSet="1"
-			WholeProgramOptimization="1"
-			>
-			<Tool
-				Name="VCPreBuildEventTool"
-			/>
-			<Tool
-				Name="VCCustomBuildTool"
-			/>
-			<Tool
-				Name="VCXMLDataGeneratorTool"
-			/>
-			<Tool
-				Name="VCWebServiceProxyGeneratorTool"
-			/>
-			<Tool
-				Name="VCMIDLTool"
-			/>
-			<Tool
-				Name="VCCLCompilerTool"
-				Optimization="2"
-				EnableIntrinsicFunctions="true"
-				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
-				RuntimeLibrary="2"
-				EnableFunctionLevelLinking="true"
-				UsePrecompiledHeader="0"
-				WarningLevel="4"
-				WarnAsError="true"
-				DebugInformationFormat="3"
-			/>
-			<Tool
-				Name="VCManagedResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCPreLinkEventTool"
-			/>
-			<Tool
-				Name="VCLinkerTool"
-				LinkIncremental="1"
-				GenerateDebugInformation="true"
-				SubSystem="1"
-				OptimizeReferences="2"
-				EnableCOMDATFolding="2"
-				TargetMachine="1"
-			/>
-			<Tool
-				Name="VCALinkTool"
-			/>
-			<Tool
-				Name="VCManifestTool"
-			/>
-			<Tool
-				Name="VCXDCMakeTool"
-			/>
-			<Tool
-				Name="VCBscMakeTool"
-			/>
-			<Tool
-				Name="VCFxCopTool"
-			/>
-			<Tool
-				Name="VCAppVerifierTool"
-			/>
-			<Tool
-				Name="VCPostBuildEventTool"
-			/>
-		</Configuration>
-		<Configuration
-			Name="Release|x64"
-			OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
-			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
-			ConfigurationType="1"
-			CharacterSet="1"
-			WholeProgramOptimization="1"
-			>
-			<Tool
-				Name="VCPreBuildEventTool"
-			/>
-			<Tool
-				Name="VCCustomBuildTool"
-			/>
-			<Tool
-				Name="VCXMLDataGeneratorTool"
-			/>
-			<Tool
-				Name="VCWebServiceProxyGeneratorTool"
-			/>
-			<Tool
-				Name="VCMIDLTool"
-				TargetEnvironment="3"
-			/>
-			<Tool
-				Name="VCCLCompilerTool"
-				Optimization="2"
-				EnableIntrinsicFunctions="true"
-				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
-				RuntimeLibrary="2"
-				EnableFunctionLevelLinking="true"
-				UsePrecompiledHeader="0"
-				WarningLevel="4"
-				WarnAsError="true"
-				DebugInformationFormat="3"
-			/>
-			<Tool
-				Name="VCManagedResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCPreLinkEventTool"
-			/>
-			<Tool
-				Name="VCLinkerTool"
-				LinkIncremental="1"
-				GenerateDebugInformation="true"
-				SubSystem="1"
-				OptimizeReferences="2"
-				EnableCOMDATFolding="2"
-				TargetMachine="17"
-			/>
-			<Tool
-				Name="VCALinkTool"
-			/>
-			<Tool
-				Name="VCManifestTool"
-			/>
-			<Tool
-				Name="VCXDCMakeTool"
-			/>
-			<Tool
-				Name="VCBscMakeTool"
-			/>
-			<Tool
-				Name="VCFxCopTool"
-			/>
-			<Tool
-				Name="VCAppVerifierTool"
-			/>
-			<Tool
-				Name="VCPostBuildEventTool"
-			/>
-		</Configuration>
-	</Configurations>
-	<References>
-	</References>
-	<Files>
-		<Filter
-			Name="Hashes"
-			>
-			<File
-				RelativePath=".\City.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\City.h"
-				>
-			</File>
-			<File
-				RelativePath=".\crc.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\Hashes.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\Hashes.h"
-				>
-			</File>
-			<File
-				RelativePath=".\lookup3.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\md5.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\MurmurHash1.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\MurmurHash1.h"
-				>
-			</File>
-			<File
-				RelativePath=".\MurmurHash2.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\MurmurHash2.h"
-				>
-			</File>
-			<File
-				RelativePath=".\MurmurHash3.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\MurmurHash3.h"
-				>
-			</File>
-			<File
-				RelativePath=".\sha1.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\sha1.h"
-				>
-			</File>
-			<File
-				RelativePath=".\SuperFastHash.cpp"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="Tests"
-			>
-			<File
-				RelativePath=".\AvalancheTest.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\AvalancheTest.h"
-				>
-			</File>
-			<File
-				RelativePath=".\Bitslice.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\DifferentialTest.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\DifferentialTest.h"
-				>
-			</File>
-			<File
-				RelativePath=".\KeysetTest.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\KeysetTest.h"
-				>
-			</File>
-			<File
-				RelativePath=".\SpeedTest.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\SpeedTest.h"
-				>
-			</File>
-		</Filter>
-		<Filter
-			Name="Utils"
-			>
-			<File
-				RelativePath=".\Bitvec.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\Bitvec.h"
-				>
-			</File>
-			<File
-				RelativePath=".\Platform.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\Platform.h"
-				>
-			</File>
-			<File
-				RelativePath=".\pstdint.h"
-				>
-			</File>
-			<File
-				RelativePath=".\Random.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\Random.h"
-				>
-			</File>
-			<File
-				RelativePath=".\Stats.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\Stats.h"
-				>
-			</File>
-			<File
-				RelativePath=".\Types.cpp"
-				>
-			</File>
-			<File
-				RelativePath=".\Types.h"
-				>
-			</File>
-		</Filter>
-		<File
-			RelativePath=".\main.cpp"
-			>
-		</File>
-	</Files>
-	<Globals>
-	</Globals>
-</VisualStudioProject>
diff --git a/Spooky.h b/Spooky.h
index cafd52e..047335d 100644
--- a/Spooky.h
+++ b/Spooky.h
@@ -24,6 +24,7 @@
 // slower than MD5.
 //
 
+#include "Platform.h"
 #include <stddef.h>
 
 #ifdef _MSC_VER
@@ -281,7 +282,7 @@ private:
     //  * is a not-very-regular mix of 1's and 0's
     //  * does not need any other special mathematical properties
     //
-    static const uint64 sc_const = 0xdeadbeefdeadbeefLL;
+    static const uint64 sc_const = 0xdeadbeefdeadbeefULL;
 
     uint64 m_data[2*sc_numVars];   // unhashed data, for partial messages
     uint64 m_state[sc_numVars];  // internal state of the hash
diff --git a/main.cpp b/main.cpp
index 19c605b..4facb6d 100644
--- a/main.cpp
+++ b/main.cpp
@@ -61,7 +61,7 @@ HashInfo g_hashes[] =
   { CityHash64_test,      64, 0x25A20825, "City64",      "Google CityHash64WithSeed" },
   { CityHash128_test,    128, 0x6531F54E, "City128",     "Google CityHash128WithSeed" },
 
-  { SpookyHash64_test,    32, 0x3F798BBB, "Spooky32",    "Bob Jenkins' SpookyHash, 32-bit result" },
+  { SpookyHash32_test,    32, 0x3F798BBB, "Spooky32",    "Bob Jenkins' SpookyHash, 32-bit result" },
   { SpookyHash64_test,    64, 0xA7F955F1, "Spooky64",    "Bob Jenkins' SpookyHash, 64-bit result" },
   { SpookyHash128_test,  128, 0x8D263080, "Spooky128",   "Bob Jenkins' SpookyHash, 128-bit result" },
 
-- 
cgit v1.2.3


From 6895dced6534c9a041ceba5cabed231d3b2d8518 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Fri, 11 May 2012 04:59:07 +0000
Subject: Merge in PMurHash by Shane Day, a MurmurHash3_x86_32 implementation
 that should work on virtually all platforms regardless of endian-ness or
 alignment issues.

git-svn-id: http://smhasher.googlecode.com/svn/trunk@146 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 CMakeLists.txt |   1 +
 PMurHash.c     | 317 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 PMurHash.h     |  64 ++++++++++++
 Platform.cpp   |   2 +-
 main.cpp       |   2 +
 5 files changed, 385 insertions(+), 1 deletion(-)
 create mode 100644 PMurHash.c
 create mode 100644 PMurHash.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5b64292..3aaec87 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -29,6 +29,7 @@ add_library(
   Stats.cpp
   SuperFastHash.cpp
   Types.cpp
+  PMurHash.c
 )
 
 add_executable(
diff --git a/PMurHash.c b/PMurHash.c
new file mode 100644
index 0000000..0175012
--- /dev/null
+++ b/PMurHash.c
@@ -0,0 +1,317 @@
+/*-----------------------------------------------------------------------------
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain.
+ *
+ * This implementation was written by Shane Day, and is also public domain.
+ *
+ * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A)
+ * with support for progressive processing.
+ */
+
+/*-----------------------------------------------------------------------------
+ 
+If you want to understand the MurmurHash algorithm you would be much better
+off reading the original source. Just point your browser at:
+http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
+
+
+What this version provides?
+
+1. Progressive data feeding. Useful when the entire payload to be hashed
+does not fit in memory or when the data is streamed through the application.
+Also useful when hashing a number of strings with a common prefix. A partial
+hash of a prefix string can be generated and reused for each suffix string.
+
+2. Portability. Plain old C so that it should compile on any old compiler.
+Both CPU endian and access-alignment neutral, but avoiding inefficient code
+when possible depending on CPU capabilities.
+
+3. Drop in. I personally like nice self contained public domain code, making it
+easy to pilfer without loads of refactoring to work properly in the existing
+application code & makefile structure and mucking around with licence files.
+Just copy PMurHash.h and PMurHash.c and you're ready to go.
+
+
+How does it work?
+
+We can only process entire 32 bit chunks of input, except for the very end
+that may be shorter. So along with the partial hash we need to give back to
+the caller a carry containing up to 3 bytes that we were unable to process.
+This carry also needs to record the number of bytes the carry holds. I use
+the low 2 bits as a count (0..3) and the carry bytes are shifted into the
+high byte in stream order.
+
+To handle endianess I simply use a macro that reads a uint32_t and define
+that macro to be a direct read on little endian machines, a read and swap
+on big endian machines, or a byte-by-byte read if the endianess is unknown.
+
+-----------------------------------------------------------------------------*/
+
+
+#include "PMurHash.h"
+
+/* I used ugly type names in the header to avoid potential conflicts with
+ * application or system typedefs & defines. Since I'm not including any more
+ * headers below here I can rename these so that the code reads like C99 */
+#undef uint32_t
+#define uint32_t MH_UINT32
+#undef uint8_t
+#define uint8_t  MH_UINT8
+
+/* MSVC warnings we choose to ignore */
+#if defined(_MSC_VER)
+  #pragma warning(disable: 4127) /* conditional expression is constant */
+#endif
+
+/*-----------------------------------------------------------------------------
+ * Endianess, misalignment capabilities and util macros
+ *
+ * The following 3 macros are defined in this section. The other macros defined
+ * are only needed to help derive these 3.
+ *
+ * READ_UINT32(x)   Read a little endian unsigned 32-bit int
+ * UNALIGNED_SAFE   Defined if READ_UINT32 works on non-word boundaries
+ * ROTL32(x,r)      Rotate x left by r bits
+ */
+
+/* Convention is to define __BYTE_ORDER == to one of these values */
+#if !defined(__BIG_ENDIAN)
+  #define __BIG_ENDIAN 4321
+#endif
+#if !defined(__LITTLE_ENDIAN)
+  #define __LITTLE_ENDIAN 1234
+#endif
+
+/* I386 */
+#if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(i386)
+  #define __BYTE_ORDER __LITTLE_ENDIAN
+  #define UNALIGNED_SAFE
+#endif
+
+/* gcc 'may' define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ to 1 (Note the trailing __),
+ * or even _LITTLE_ENDIAN or _BIG_ENDIAN (Note the single _ prefix) */
+#if !defined(__BYTE_ORDER)
+  #if defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__==1 || defined(_LITTLE_ENDIAN) && _LITTLE_ENDIAN==1
+    #define __BYTE_ORDER __LITTLE_ENDIAN
+  #elif defined(__BIG_ENDIAN__) && __BIG_ENDIAN__==1 || defined(_BIG_ENDIAN) && _BIG_ENDIAN==1
+    #define __BYTE_ORDER __BIG_ENDIAN
+  #endif
+#endif
+
+/* gcc (usually) defines xEL/EB macros for ARM and MIPS endianess */
+#if !defined(__BYTE_ORDER)
+  #if defined(__ARMEL__) || defined(__MIPSEL__)
+    #define __BYTE_ORDER __LITTLE_ENDIAN
+  #endif
+  #if defined(__ARMEB__) || defined(__MIPSEB__)
+    #define __BYTE_ORDER __BIG_ENDIAN
+  #endif
+#endif
+
+/* Now find best way we can to READ_UINT32 */
+#if __BYTE_ORDER==__LITTLE_ENDIAN
+  /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */
+  #define READ_UINT32(ptr)   (*((uint32_t*)(ptr)))
+#elif __BYTE_ORDER==__BIG_ENDIAN
+  /* TODO: Add additional cases below where a compiler provided bswap32 is available */
+  #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3))
+    #define READ_UINT32(ptr)   (__builtin_bswap32(*((uint32_t*)(ptr))))
+  #else
+    /* Without a known fast bswap32 we're just as well off doing this */
+    #define READ_UINT32(ptr)   (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
+    #define UNALIGNED_SAFE
+  #endif
+#else
+  /* Unknown endianess so last resort is to read individual bytes */
+  #define READ_UINT32(ptr)   (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
+
+  /* Since we're not doing word-reads we can skip the messing about with realignment */
+  #define UNALIGNED_SAFE
+#endif
+
+/* Find best way to ROTL32 */
+#if defined(_MSC_VER)
+  #include <stdlib.h>  /* Microsoft put _rotl declaration in here */
+  #define ROTL32(x,r)  _rotl(x,r)
+#else
+  /* gcc recognises this code and generates a rotate instruction for CPUs with one */
+  #define ROTL32(x,r)  (((uint32_t)x << r) | ((uint32_t)x >> (32 - r)))
+#endif
+
+
+/*-----------------------------------------------------------------------------
+ * Core murmurhash algorithm macros */
+
+#define C1  (0xcc9e2d51)
+#define C2  (0x1b873593)
+
+/* This is the main processing body of the algorithm. It operates
+ * on each full 32-bits of input. */
+#define DOBLOCK(h1, k1) do{ \
+        k1 *= C1; \
+        k1 = ROTL32(k1,15); \
+        k1 *= C2; \
+        \
+        h1 ^= k1; \
+        h1 = ROTL32(h1,13); \
+        h1 = h1*5+0xe6546b64; \
+    }while(0)
+
+
+/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */
+/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */
+#define DOBYTES(cnt, h1, c, n, ptr, len) do{ \
+    int _i = cnt; \
+    while(_i--) { \
+        c = c>>8 | *ptr++<<24; \
+        n++; len--; \
+        if(n==4) { \
+            DOBLOCK(h1, c); \
+            n = 0; \
+        } \
+    } }while(0)
+
+/*---------------------------------------------------------------------------*/
+
+/* Main hashing function. Initialise carry to 0 and h1 to 0 or an initial seed
+ * if wanted. Both ph1 and pcarry are required arguments. */
+void PMurHash32_Process(uint32_t *ph1, uint32_t *pcarry, const void *key, int len)
+{
+  uint32_t h1 = *ph1;
+  uint32_t c = *pcarry;
+
+  const uint8_t *ptr = (uint8_t*)key;
+  const uint8_t *end;
+
+  /* Extract carry count from low 2 bits of c value */
+  int n = c & 3;
+
+#if defined(UNALIGNED_SAFE)
+  /* This CPU handles unaligned word access */
+
+  /* Consume any carry bytes */
+  int i = (4-n) & 3;
+  if(i && i <= len) {
+    DOBYTES(i, h1, c, n, ptr, len);
+  }
+
+  /* Process 32-bit chunks */
+  end = ptr + len/4*4;
+  for( ; ptr < end ; ptr+=4) {
+    uint32_t k1 = READ_UINT32(ptr);
+    DOBLOCK(h1, k1);
+  }
+
+#else /*UNALIGNED_SAFE*/
+  /* This CPU does not handle unaligned word access */
+
+  /* Consume enough so that the next data byte is word aligned */
+  int i = -(long)ptr & 3;
+  if(i && i <= len) {
+      DOBYTES(i, h1, c, n, ptr, len);
+  }
+
+  /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
+  end = ptr + len/4*4;
+  switch(n) { /* how many bytes in c */
+  case 0: /* c=[----]  w=[3210]  b=[3210]=w            c'=[----] */
+    for( ; ptr < end ; ptr+=4) {
+      uint32_t k1 = READ_UINT32(ptr);
+      DOBLOCK(h1, k1);
+    }
+    break;
+  case 1: /* c=[0---]  w=[4321]  b=[3210]=c>>24|w<<8   c'=[4---] */
+    for( ; ptr < end ; ptr+=4) {
+      uint32_t k1 = c>>24;
+      c = READ_UINT32(ptr);
+      k1 |= c<<8;
+      DOBLOCK(h1, k1);
+    }
+    break;
+  case 2: /* c=[10--]  w=[5432]  b=[3210]=c>>16|w<<16  c'=[54--] */
+    for( ; ptr < end ; ptr+=4) {
+      uint32_t k1 = c>>16;
+      c = READ_UINT32(ptr);
+      k1 |= c<<16;
+      DOBLOCK(h1, k1);
+    }
+    break;
+  case 3: /* c=[210-]  w=[6543]  b=[3210]=c>>8|w<<24   c'=[654-] */
+    for( ; ptr < end ; ptr+=4) {
+      uint32_t k1 = c>>8;
+      c = READ_UINT32(ptr);
+      k1 |= c<<24;
+      DOBLOCK(h1, k1);
+    }
+  }
+#endif /*UNALIGNED_SAFE*/
+
+  /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */
+  len -= len/4*4;
+
+  /* Append any remaining bytes into carry */
+  DOBYTES(len, h1, c, n, ptr, len);
+
+  /* Copy out new running hash and carry */
+  *ph1 = h1;
+  *pcarry = (c & ~0xff) | n;
+} 
+
+/*---------------------------------------------------------------------------*/
+
+/* Finalize a hash. To match the original Murmur3A the total_length must be provided */
+uint32_t PMurHash32_Result(uint32_t h, uint32_t carry, uint32_t total_length)
+{
+  uint32_t k1;
+  int n = carry & 3;
+  if(n) {
+    k1 = carry >> (4-n)*8;
+    k1 *= C1; k1 = ROTL32(k1,15); k1 *= C2; h ^= k1;
+  }
+  h ^= total_length;
+
+  /* fmix */
+  h ^= h >> 16;
+  h *= 0x85ebca6b;
+  h ^= h >> 13;
+  h *= 0xc2b2ae35;
+  h ^= h >> 16;
+
+  return h;
+}
+
+/*---------------------------------------------------------------------------*/
+
+/* Murmur3A compatable all-at-once */
+uint32_t PMurHash32(uint32_t seed, const void *key, int len)
+{
+  uint32_t h1=seed, carry=0;
+  PMurHash32_Process(&h1, &carry, key, len);
+  return PMurHash32_Result(h1, carry, len);
+}
+
+/*---------------------------------------------------------------------------*/
+
+/* Provide an API suitable for smhasher */
+void PMurHash32_test(const void *key, int len, uint32_t seed, void *out)
+{
+  uint32_t h1=seed, carry=0;
+  const uint8_t *ptr = (uint8_t*)key;
+  const uint8_t *end = ptr + len;
+
+#if 0 /* Exercise the progressive processing */
+  while(ptr < end) {
+    //const uint8_t *mid = ptr + rand()%(end-ptr)+1;
+    const uint8_t *mid = ptr + (rand()&0xF);
+    mid = mid<end?mid:end;
+    PMurHash32_Process(&h1, &carry, ptr, mid-ptr);
+    ptr = mid;
+  }
+#else
+  PMurHash32_Process(&h1, &carry, ptr, (int)(end-ptr));
+#endif
+  h1 = PMurHash32_Result(h1, carry, len);
+  *(uint32_t*)out = h1;
+}
+
+/*---------------------------------------------------------------------------*/
diff --git a/PMurHash.h b/PMurHash.h
new file mode 100644
index 0000000..28ead00
--- /dev/null
+++ b/PMurHash.h
@@ -0,0 +1,64 @@
+/*-----------------------------------------------------------------------------
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain.
+ *
+ * This implementation was written by Shane Day, and is also public domain.
+ *
+ * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A)
+ * with support for progressive processing.
+ */
+
+/* ------------------------------------------------------------------------- */
+/* Determine what native type to use for uint32_t */
+
+/* We can't use the name 'uint32_t' here because it will conflict with
+ * any version provided by the system headers or application. */
+
+/* First look for special cases */
+#if defined(_MSC_VER)
+  #define MH_UINT32 unsigned long
+#endif
+
+/* If the compiler says it's C99 then take its word for it */
+#if !defined(MH_UINT32) && ( \
+     defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L )
+  #include <stdint.h>
+  #define MH_UINT32 uint32_t
+#endif
+
+/* Otherwise try testing against max value macros from limit.h */
+#if !defined(MH_UINT32)
+  #include  <limits.h>
+  #if   (USHRT_MAX == 0xffffffffUL)
+    #define MH_UINT32 unsigned short
+  #elif (UINT_MAX == 0xffffffffUL)
+    #define MH_UINT32 unsigned int
+  #elif (ULONG_MAX == 0xffffffffUL)
+    #define MH_UINT32 unsigned long
+  #endif
+#endif
+
+#if !defined(MH_UINT32)
+  #error Unable to determine type name for unsigned 32-bit int
+#endif
+
+/* I'm yet to work on a platform where 'unsigned char' is not 8 bits */
+#define MH_UINT8  unsigned char
+
+
+/* ------------------------------------------------------------------------- */
+/* Prototypes */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void PMurHash32_Process(MH_UINT32 *ph1, MH_UINT32 *pcarry, const void *key, int len);
+MH_UINT32 PMurHash32_Result(MH_UINT32 h1, MH_UINT32 carry, MH_UINT32 total_length);
+MH_UINT32 PMurHash32(MH_UINT32 seed, const void *key, int len);
+
+void PMurHash32_test(const void *key, int len, MH_UINT32 seed, void *out);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/Platform.cpp b/Platform.cpp
index d7f5fb8..3c97694 100644
--- a/Platform.cpp
+++ b/Platform.cpp
@@ -25,7 +25,7 @@ void SetAffinity ( int cpu )
 
 void SetAffinity ( int /*cpu*/ )
 {
-#ifndef __CYGWIN__
+#if !defined(__CYGWIN__) && !defined(__APPLE__)
   cpu_set_t mask;
     
   CPU_ZERO(&mask);
diff --git a/main.cpp b/main.cpp
index 4facb6d..678ddb2 100644
--- a/main.cpp
+++ b/main.cpp
@@ -4,6 +4,7 @@
 #include "SpeedTest.h"
 #include "AvalancheTest.h"
 #include "DifferentialTest.h"
+#include "PMurHash.h"
 
 #include <stdio.h>
 #include <time.h>
@@ -78,6 +79,7 @@ HashInfo g_hashes[] =
   { MurmurHash3_x86_128, 128, 0xB3ECE62A, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
   { MurmurHash3_x64_128, 128, 0x6384BA69, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
 
+  { PMurHash32_test,      32, 0xB0F57EE3, "PMurHash32",  "Shane Day's portable-ized MurmurHash3 for x86, 32-bit." },
 };
 
 HashInfo * findHash ( const char * name )
-- 
cgit v1.2.3


From e813f9b95be7adad5a2e441f4484278c453e5261 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Fri, 11 May 2012 06:19:58 +0000
Subject: Minor fixes & tweaks.

git-svn-id: http://smhasher.googlecode.com/svn/trunk@147 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 KeysetTest.cpp  |  3 +++
 MurmurHash3.cpp | 16 ++++++++--------
 Platform.h      |  8 ++++----
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/KeysetTest.cpp b/KeysetTest.cpp
index 148b065..7077277 100644
--- a/KeysetTest.cpp
+++ b/KeysetTest.cpp
@@ -146,6 +146,9 @@ bool SanityTest ( pfHash hash, const int hashbits )
     printf("PASS\n");
   }
 
+  delete [] buffer1;
+  delete [] buffer2;
+
   delete [] hash1;
   delete [] hash2;
 
diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 09ffb26..302e974 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -99,8 +99,8 @@ void MurmurHash3_x86_32 ( const void * key, int len,
 
   uint32_t h1 = seed;
 
-  uint32_t c1 = 0xcc9e2d51;
-  uint32_t c2 = 0x1b873593;
+  const uint32_t c1 = 0xcc9e2d51;
+  const uint32_t c2 = 0x1b873593;
 
   //----------
   // body
@@ -158,10 +158,10 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
   uint32_t h3 = seed;
   uint32_t h4 = seed;
 
-  uint32_t c1 = 0x239b961b; 
-  uint32_t c2 = 0xab0e9789;
-  uint32_t c3 = 0x38b34ae5; 
-  uint32_t c4 = 0xa1e38b93;
+  const uint32_t c1 = 0x239b961b; 
+  const uint32_t c2 = 0xab0e9789;
+  const uint32_t c3 = 0x38b34ae5; 
+  const uint32_t c4 = 0xa1e38b93;
 
   //----------
   // body
@@ -261,8 +261,8 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
   uint64_t h1 = seed;
   uint64_t h2 = seed;
 
-  uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
-  uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+  const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+  const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
 
   //----------
   // body
diff --git a/Platform.h b/Platform.h
index fcb68e8..8de26be 100644
--- a/Platform.h
+++ b/Platform.h
@@ -76,13 +76,13 @@ __inline__ unsigned long long int rdtsc()
     unsigned int a, d;
     __asm__ volatile ("rdtsc" : "=a" (a), "=d" (d));
     return (unsigned long)a | ((unsigned long)d << 32);
-#else
-#ifndef __i386__
-#error Must be x86 either 32-bit or 64-bit.
-#endif
+#elif defined(__i386__)
     unsigned long long int x;
     __asm__ volatile ("rdtsc" : "=A" (x));
     return x;
+#else
+#define NO_CYCLE_COUNTER
+    return 0;
 #endif
 }
 
-- 
cgit v1.2.3

-- 
cgit v1.2.3


From 8adb1336422e3ad4d78ba54fb56692f2ed07124c Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Fri, 19 Apr 2013 19:57:16 +0000
Subject: City.cpp/h was using the _mm_crc32_u64 intrinsic, which is only
 supported on x64 platforms - see
 https://bugs.gentoo.org/show_bug.cgi?id=459126 for details.

git-svn-id: http://smhasher.googlecode.com/svn/trunk@148 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 City.cpp | 2 +-
 City.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/City.cpp b/City.cpp
index 4d70dd2..d7c33bc 100644
--- a/City.cpp
+++ b/City.cpp
@@ -351,7 +351,7 @@ uint128 CityHash128(const char *s, size_t len) {
   }
 }
 
-#ifdef __SSE4_2__
+#if defined(__SSE4_2__) && defined(__x86_64__)
 #include <nmmintrin.h>
 
 // Requires len >= 240.
diff --git a/City.h b/City.h
index fe2d3e2..c12c1bb 100644
--- a/City.h
+++ b/City.h
@@ -90,7 +90,7 @@ inline uint64 Hash128to64(const uint128& x) {
 
 // Conditionally include declarations for versions of City that require SSE4.2
 // instructions to be available.
-#ifdef __SSE4_2__
+#if defined(__SSE4_2__) && defined(__x86_64__)
 
 // Hash function for a byte array.
 uint128 CityHashCrc128(const char *s, size_t len);
-- 
cgit v1.2.3


From 6f63a4882e6b2cf87e8eec1a3ef8644e0d963283 Mon Sep 17 00:00:00 2001
From: "tanjent@gmail.com"
 <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>
Date: Fri, 10 May 2013 18:34:06 +0000
Subject: Add required 'inline' keyword to FORCE_INLINE macros.

git-svn-id: http://smhasher.googlecode.com/svn/trunk@149 77a7d1d3-4c08-bdc2-d393-d5859734b01a
---
 MurmurHash3.cpp | 2 +-
 Platform.h      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 302e974..5a55b7a 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -29,7 +29,7 @@
 
 #else	// defined(_MSC_VER)
 
-#define	FORCE_INLINE __attribute__((always_inline))
+#define	FORCE_INLINE inline __attribute__((always_inline))
 
 inline uint32_t rotl32 ( uint32_t x, int8_t r )
 {
diff --git a/Platform.h b/Platform.h
index 8de26be..6d0f0df 100644
--- a/Platform.h
+++ b/Platform.h
@@ -40,7 +40,7 @@ void SetAffinity ( int cpu );
 
 #include <stdint.h>
 
-#define	FORCE_INLINE __attribute__((always_inline))
+#define	FORCE_INLINE inline __attribute__((always_inline))
 #define	NEVER_INLINE __attribute__((noinline))
 
 inline uint32_t rotl32 ( uint32_t x, int8_t r )
-- 
cgit v1.2.3