cleanup (requires update-workspaces):

major refactor of wfilesystem - remove gotos and workaround for FAT file times replace more round_up of constants with Align<> remove old unused allocators (STL aligned, matrix, some shared_ptr) move allocator utils into separate headers. remove lockfree, wterminal avoid testing __cplusplus This was SVN commit r9361.
2011-04-30 12:34:28 +00:00 · 2011-04-30 12:34:28 +00:00 · 6c915291cc
commit 6c915291cc
parent 188c020ae0
45 changed files with 530 additions and 2831 deletions
--- a/source/lib/alignment.h
+++ b/source/lib/alignment.h
@ -2,6 +2,7 @@
 #define INCLUDED_ALIGNMENT

 #include "lib/sysdep/compiler.h"	// MSC_VERSION
+#include "lib/sysdep/arch.h"	// ARCH_AMD64

 template<typename T>
 inline bool IsAligned(T t, uintptr_t multiple)
@ -17,6 +18,9 @@ inline size_t Align(size_t n)
 }


+static const size_t allocationAlignment = ARCH_AMD64? 16 : 8;
+
+
 //
 // SIMD vector
 //
@ -60,46 +64,3 @@ static const size_t largePageSize = 0x200000;	// 2 MB
 static const uintptr_t maxSectorSize = 0x1000;

 #endif	// #ifndef INCLUDED_ALIGNMENT
-#ifndef INCLUDED_ALIGNMENT
-#define INCLUDED_ALIGNMENT
-
-template<typename T>
-inline bool IsAligned(T t, uintptr_t multiple)
-{
-	return (uintptr_t(t) % multiple) == 0;
-}
-
-
-//
-// SIMD vector
-//
-
-static const size_t vectorSize = 16;
-
-#define VERIFY_VECTOR_MULTIPLE(size)\
-	VERIFY(IsAligned(size, vectorSize))
-
-#define VERIFY_VECTOR_ALIGNED(pointer)\
-	VERIFY_VECTOR_MULTIPLE(pointer);\
-	ASSUME_ALIGNED(pointer, vectorSize)
-
-
-//
-// CPU cache
-//
-
-static const size_t cacheLineSize = 64;	// (L2)
-
-#if MSC_VERSION
-#define CACHE_ALIGNED __declspec(align(64))	// align() requires a literal; keep in sync with cacheLineSize
-#endif
-
-
-//
-// MMU pages
-//
-
-static const size_t pageSize = 0x1000;	// 4 KB
-static const size_t largePageSize = 0x200000;	// 2 MB
-
-#endif	// #ifndef INCLUDED_ALIGNMENT
--- a/source/lib/allocators/aligned_allocator.h
+++ b/source/lib/allocators/aligned_allocator.h
@ -1,147 +0,0 @@
-/* Copyright (c) 2010 Wildfire Games
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * STL allocator for aligned memory
- */
-
-#ifndef ALIGNED_ALLOCATOR
-#define ALIGNED_ALLOCATOR
-
-#include "lib/bits.h"	// round_up
-#include "lib/sysdep/arch/x86_x64/cache.h"
-#include "lib/sysdep/rtl.h"	// rtl_AllocateAligned
-
-
-/**
- * stateless STL allocator that aligns elements to the L1 cache line size.
- *
- * note: the alignment is hard-coded to avoid any allocator state.
- * this avoids portability problems, which is important since allocators
- * are rather poorly specified.
- *
- * references:
- * http://www.tantalon.com/pete/customallocators.ppt
- * http://www.flipcode.com/archives/Aligned_Block_Allocation.shtml
- * http://www.josuttis.com/cppcode/allocator.html
- *
- * derived from code that bears the following copyright notice:
- * (C) Copyright Nicolai M. Josuttis 1999.
- * Permission to copy, use, modify, sell and distribute this software
- * is granted provided this copyright notice appears in all copies.
- * This software is provided "as is" without express or implied
- * warranty, and with no claim as to its suitability for any purpose.
- **/
-template<class T>
-class AlignedAllocator
-{
-public:
-	// type definitions
-	typedef T        value_type;
-	typedef T*       pointer;
-	typedef const T* const_pointer;
-	typedef T&       reference;
-	typedef const T& const_reference;
-	typedef std::size_t    size_type;
-	typedef std::ptrdiff_t difference_type;
-
-	// rebind allocator to type U
-	template <class U>
-	struct rebind
-	{
-		typedef AlignedAllocator<U> other;
-	};
-
-	pointer address(reference value) const
-	{
-		return &value;
-	}
-
-	const_pointer address(const_reference value) const
-	{
-		return &value;
-	}
-
-	AlignedAllocator() throw()
-	{
-	}
-
-	AlignedAllocator(const AlignedAllocator&) throw()
-	{
-	}
-
-	template <class U>
-	AlignedAllocator (const AlignedAllocator<U>&) throw()
-	{
-	}
-
-	~AlignedAllocator() throw()
-	{
-	}
-
-	size_type max_size() const throw()
-	{
-		// maximum number of *elements* that can be allocated
-		return std::numeric_limits<std::size_t>::max() / sizeof(T);
-	}
-
-	// allocate uninitialized storage
-	pointer allocate(size_type numElements)
-	{
-		const size_type alignment = x86_x64_Caches(L1D)->entrySize;
-		const size_type elementSize = round_up(sizeof(T), alignment);
-		const size_type size = numElements * elementSize;
-		pointer p = (pointer)rtl_AllocateAligned(size, alignment);
-		return p;
-	}
-
-	// deallocate storage of elements that have been destroyed
-	void deallocate(pointer p, size_type UNUSED(num))
-	{
-		rtl_FreeAligned((void*)p);
-	}
-
-	void construct(pointer p, const T& value)
-	{
-		new((void*)p) T(value);
-	}
-
-	void destroy(pointer p)
-	{
-		p->~T();
-	}
-};
-
-// indicate that all specializations of this allocator are interchangeable
-template <class T1, class T2>
-bool operator==(const AlignedAllocator<T1>&, const AlignedAllocator<T2>&) throw()
-{
-	return true;
-}
-
-template <class T1, class T2>
-bool operator!=(const AlignedAllocator<T1>&, const AlignedAllocator<T2>&) throw()
-{
-	return false;
-}
-
-#endif	// #ifndef ALIGNED_ALLOCATOR
--- a/source/lib/allocators/allocator_checker.h
+++ b/source/lib/allocators/allocator_checker.h
@ -0,0 +1,71 @@
+/* Copyright (c) 2011 Wildfire Games
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef INCLUDED_ALLOCATORS_ALLOCATOR_CHECKER
+#define INCLUDED_ALLOCATORS_ALLOCATOR_CHECKER
+
+#include <map>
+
+/**
+ * allocator test rig.
+ * call from each allocator operation to sanity-check them.
+ * should only be used during debug mode due to serious overhead.
+ **/
+class AllocatorChecker
+{
+public:
+	void OnAllocate(void* p, size_t size)
+	{
+		const Allocs::value_type item = std::make_pair(p, size);
+		std::pair<Allocs::iterator, bool> ret = allocs.insert(item);
+		debug_assert(ret.second == true);	// wasn't already in map
+	}
+
+	void OnDeallocate(void* p, size_t size)
+	{
+		Allocs::iterator it = allocs.find(p);
+		if(it == allocs.end())
+			debug_assert(0);	// freeing invalid pointer
+		else
+		{
+			// size must match what was passed to OnAllocate
+			const size_t allocated_size = it->second;
+			debug_assert(size == allocated_size);
+
+			allocs.erase(it);
+		}
+	}
+
+	/**
+	 * allocator is resetting itself, i.e. wiping out all allocs.
+	 **/
+	void OnClear()
+	{
+		allocs.clear();
+	}
+
+private:
+	typedef std::map<void*, size_t> Allocs;
+	Allocs allocs;
+};
+
+#endif	// #ifndef INCLUDED_ALLOCATORS_ALLOCATOR_CHECKER
--- a/source/lib/allocators/allocators.cpp
+++ b/source/lib/allocators/allocators.cpp
@ -1,163 +0,0 @@
-/* Copyright (c) 2010 Wildfire Games
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * memory suballocators.
- */
-
-#include "precompiled.h"
-#include "lib/allocators/allocators.h"
-
-#include "lib/alignment.h"
-#include "lib/sysdep/cpu.h"	// cpu_CAS
-#include "lib/allocators/mem_util.h"
-
-
-//-----------------------------------------------------------------------------
-// page aligned allocator
-//-----------------------------------------------------------------------------
-
-void* page_aligned_alloc(size_t unaligned_size)
-{
-	const size_t size_pa = mem_RoundUpToPage(unaligned_size);
-	u8* p = 0;
-	RETURN0_IF_ERR(mem_Reserve(size_pa, &p));
-	RETURN0_IF_ERR(mem_Commit(p, size_pa, PROT_READ|PROT_WRITE));
-	return p;
-}
-
-
-void page_aligned_free(void* p, size_t unaligned_size)
-{
-	if(!p)
-		return;
-	debug_assert(mem_IsPageMultiple((uintptr_t)p));
-	const size_t size_pa = mem_RoundUpToPage(unaligned_size);
-	(void)mem_Release((u8*)p, size_pa);
-}
-
-
-//-----------------------------------------------------------------------------
-// matrix allocator
-//-----------------------------------------------------------------------------
-
-void** matrix_alloc(size_t cols, size_t rows, size_t el_size)
-{
-	const size_t initial_align = 64;
-	// note: no provision for padding rows. this is a bit more work and
-	// if el_size isn't a power-of-2, performance is going to suck anyway.
-	// otherwise, the initial alignment will take care of it.
-
-	const size_t ptr_array_size = cols*sizeof(void*);
-	const size_t row_size = cols*el_size;
-	const size_t data_size = rows*row_size;
-	const size_t total_size = ptr_array_size + initial_align + data_size;
-
-	void* p = malloc(total_size);
-	if(!p)
-		return 0;
-
-	uintptr_t data_addr = (uintptr_t)p + ptr_array_size + initial_align;
-	data_addr -= data_addr % initial_align;
-
-	// alignment check didn't set address to before allocation
-	debug_assert(data_addr >= (uintptr_t)p+ptr_array_size);
-
-	void** ptr_array = (void**)p;
-	for(size_t i = 0; i < cols; i++)
-	{
-		ptr_array[i] = (void*)data_addr;
-		data_addr += row_size;
-	}
-
-	// didn't overrun total allocation
-	debug_assert(data_addr <= (uintptr_t)p+total_size);
-
-	return ptr_array;
-}
-
-
-void matrix_free(void** matrix)
-{
-	free(matrix);
-}
-
-
-//-----------------------------------------------------------------------------
-// allocator optimized for single instances
-//-----------------------------------------------------------------------------
-
-void* single_calloc(void* storage, volatile intptr_t* in_use_flag, size_t size)
-{
-	// sanity check
-	debug_assert(*in_use_flag == 0 || *in_use_flag == 1);
-
-	void* p;
-
-	// successfully reserved the single instance
-	if(cpu_CAS(in_use_flag, 0, 1))
-		p = storage;
-	// already in use (rare) - allocate from heap
-	else
-		p = new u8[size];
-
-	memset(p, 0, size);
-	return p;
-}
-
-
-void single_free(void* storage, volatile intptr_t* in_use_flag, void* p)
-{
-	// sanity check
-	debug_assert(*in_use_flag == 0 || *in_use_flag == 1);
-
-	if(p == storage)
-	{
-		if(cpu_CAS(in_use_flag, 1, 0))
-		{
-			// ok, flag has been reset to 0
-		}
-		else
-			debug_assert(0);	// in_use_flag out of sync (double free?)
-	}
-	// was allocated from heap
-	else
-	{
-		// single instance may have been freed by now - cannot assume
-		// anything about in_use_flag.
-
-		delete[] (u8*)p;
-	}
-}
-
-
-//-----------------------------------------------------------------------------
-// static allocator
-//-----------------------------------------------------------------------------
-
-void* static_calloc(StaticStorage* ss, size_t size)
-{
-	void* p = (void*)Align<16>((uintptr_t)ss->pos);
-	ss->pos = (u8*)p+size;
-	debug_assert(ss->pos <= ss->end);
-	return p;
-}
--- a/source/lib/allocators/allocators.h
+++ b/source/lib/allocators/allocators.h
@ -1,373 +0,0 @@
-/* Copyright (c) 2010 Wildfire Games
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * memory suballocators.
- */
-
-#ifndef INCLUDED_ALLOCATORS
-#define INCLUDED_ALLOCATORS
-
-#include <map>
-
-#include "lib/config2.h"	// CONFIG2_ALLOCATORS_OVERRUN_PROTECTION
-#include "lib/posix/posix_mman.h"	// PROT_*
-#include "lib/sysdep/cpu.h"	// cpu_CAS
-
-
-//
-// page aligned allocator
-//
-
-/**
- * allocate memory aligned to the system page size.
- *
- * this is useful for file_cache_alloc, which uses this allocator to
- * get sector-aligned (hopefully; see sys_max_sector_size) IO buffers.
- *
- * note that this allocator is stateless and very litte error checking
- * can be performed.
- *
- * the memory is initially writable and you can use mprotect to set other
- * access permissions if desired.
- *
- * @param unaligned_size minimum size [bytes] to allocate.
- * @return page-aligned and -padded memory or 0 on error / out of memory.
- **/
-LIB_API void* page_aligned_alloc(size_t unaligned_size);
-
-/**
- * free a previously allocated page-aligned region.
- *
- * @param p Exact value returned from page_aligned_alloc
- * @param unaligned_size Exact value passed to page_aligned_alloc
- **/
-LIB_API void page_aligned_free(void* p, size_t unaligned_size);
-
-#ifdef __cplusplus
-
-template<typename T>
-class PageAlignedDeleter
-{
-public:
-	PageAlignedDeleter(size_t size)
-		: m_size(size)
-	{
-		debug_assert(m_size != 0);
-	}
-
-	void operator()(T* p)
-	{
-		debug_assert(m_size != 0);
-		page_aligned_free(p, m_size);
-		m_size = 0;
-	}
-
-private:
-	size_t m_size;
-};
-
-template<typename T>
-class PageAlignedAllocator
-{
-public:
-	shared_ptr<T> operator()(size_t size) const
-	{
-		return shared_ptr<T>((T*)page_aligned_alloc(size), PageAlignedDeleter<T>(size));
-	}
-};
-
-#endif
-
-
-//
-// matrix allocator
-//
-
-/**
- * allocate a 2D matrix accessible as matrix[col][row].
- *
- * takes care of the dirty work of allocating 2D matrices:
- * - aligns data
- * - only allocates one memory block, which is more efficient than
- *   malloc/new for each row.
- *
- * @param cols, rows: dimension (cols x rows)
- * @param el_size size [bytes] of a matrix cell
- * @return 0 if out of memory, otherwise matrix that should be cast to
- * type** (sizeof(type) == el_size). must be freed via matrix_free.
- **/
-extern void** matrix_alloc(size_t cols, size_t rows, size_t el_size);
-
-/**
- * free the given matrix.
- *
- * @param matrix allocated by matrix_alloc; no-op if 0.
- * callers will likely want to pass variables of a different type
- * (e.g. int**); they must be cast to void**.
- **/
-extern void matrix_free(void** matrix);
-
-
-//-----------------------------------------------------------------------------
-// allocator optimized for single instances
-//-----------------------------------------------------------------------------
-
-/**
- * Allocate \<size\> bytes of zeroed memory.
- *
- * intended for applications that frequently alloc/free a single
- * fixed-size object. caller provides static storage and an in-use flag;
- * we use that memory if available and otherwise fall back to the heap.
- * if the application only has one object in use at a time, malloc is
- * avoided; this is faster and avoids heap fragmentation.
- *
- * note: thread-safe despite use of shared static data.
- *
- * @param storage Caller-allocated memory of at least \<size\> bytes
- * (typically a static array of bytes)
- * @param in_use_flag Pointer to a flag we set when \<storage\> is in-use.
- * @param size [bytes] to allocate
- * @return allocated memory (typically = \<storage\>, but falls back to
- * malloc if that's in-use), or 0 (with warning) if out of memory.
- **/
-extern void* single_calloc(void* storage, volatile intptr_t* in_use_flag, size_t size);
-
-/**
- * Free a memory block that had been allocated by single_calloc.
- *
- * @param storage Exact value passed to single_calloc.
- * @param in_use_flag Exact value passed to single_calloc.
- * @param p Exact value returned by single_calloc.
- **/
-extern void single_free(void* storage, volatile intptr_t* in_use_flag, void* p);
-
-#ifdef __cplusplus
-
-/**
- * C++ wrapper on top of single_calloc that's slightly easier to use.
- *
- * T must be POD (Plain Old Data) because it is memset to 0!
- **/
-template<class T> class SingleAllocator
-{
-	// evil but necessary hack: we don't want to instantiate a T directly
-	// because it may not have a default ctor. an array of uninitialized
-	// storage is used instead. single_calloc doesn't know about alignment,
-	// so we fix this by asking for an array of doubles.
-	double storage[(sizeof(T)+sizeof(double)-1)/sizeof(double)];
-	volatile uintptr_t is_in_use;
-
-public:
-	typedef T value_type;
-
-	SingleAllocator()
-	{
-		is_in_use = 0;
-	}
-
-	T* Allocate()
-	{
-		T* t = (T*)single_calloc(&storage, &is_in_use, sizeof(storage));
-		if(!t)
-			throw std::bad_alloc();
-		return t;
-	}
-
-	void Free(T* p)
-	{
-		single_free(&storage, &is_in_use, p);
-	}
-};
-
-#endif	// #ifdef __cplusplus
-
-
-//-----------------------------------------------------------------------------
-// static allocator
-//-----------------------------------------------------------------------------
-
-// dole out chunks of memory from storage reserved in the BSS.
-// freeing isn't necessary.
-
-/**
- * opaque; initialized by STATIC_STORAGE and used by static_calloc
- **/
-struct StaticStorage
-{
-	void* pos;
-	void* end;
-};
-
-// define \<size\> bytes of storage and prepare \<name\> for use with
-// static_calloc.
-// must be invoked from file or function scope.
-#define STATIC_STORAGE(name, size)\
-	static u8 storage[(size)];\
-	static StaticStorage name = { storage, storage+(size) }
-
-/*
-usage example:
-static Object* pObject;
-void InitObject()
-{
-	STATIC_STORAGE(ss, 100);	// includes padding
-	void* addr = static_calloc(ss, sizeof(Object));
-	pObject = new(addr) Object;
-}
-*/
-
-/**
- * dole out memory from static storage reserved in BSS.
- *
- * this is useful for static objects that are used before _cinit - callers
- * define static storage for one or several objects, use this function to
- * retrieve an aligned pointer, then construct there via placement new.
- *
- * @param ss - initialized via STATIC_STORAGE
- * @param size [bytes] to allocate
- * @return aligned (suitable for any type) pointer
- *
- * raises a warning if there's not enough room (indicates incorrect usage)
- **/
-extern void* static_calloc(StaticStorage* ss, size_t size);
-
-// (no need to free static_calloc-ed memory since it's in the BSS)
-
-
-//-----------------------------------------------------------------------------
-// OverrunProtector
-//-----------------------------------------------------------------------------
-
-/**
-OverrunProtector wraps an arbitrary object in DynArray memory and can detect
-inadvertent writes to it. this is useful for tracking down memory overruns.
-
-the basic idea is to require users to request access to the object and
-notify us when done; memory access permission is temporarily granted.
-(similar in principle to Software Transaction Memory).
-
-since this is quite slow, the protection is disabled unless
-CONFIG2_ALLOCATORS_OVERRUN_PROTECTION == 1; this avoids having to remove the
-wrapper code in release builds and re-write when looking for overruns.
-
-example usage:
-OverrunProtector\<your_class\> your_class_wrapper;
-..
-your_class* yc = your_class_wrapper.get();	// unlock, make ready for use
-if(!yc)			// your_class_wrapper's one-time alloc of a your_class-
-	abort();	// instance had failed - can't continue.
-doSomethingWith(yc);	// read/write access
-your_class_wrapper.lock();	// disallow further access until next .get()
-..
-**/
-template<class T> class OverrunProtector
-{
-public:
-	OverrunProtector()
-	{
-		void* mem = page_aligned_alloc(sizeof(T));
-		object = new(mem) T();
-		lock();
-	}
-
-	~OverrunProtector()
-	{
-		unlock();
-		object->~T();	// call dtor (since we used placement new)
-		page_aligned_free(object, sizeof(T));
-		object = 0;
-	}
-
-	T* get()
-	{
-		unlock();
-		return object;
-	}
-
-	void lock()
-	{
-#if CONFIG2_ALLOCATORS_OVERRUN_PROTECTION
-		mprotect(object, sizeof(T), PROT_NONE);
-#endif
-	}
-
-private:
-	void unlock()
-	{
-#if CONFIG2_ALLOCATORS_OVERRUN_PROTECTION
-		mprotect(object, sizeof(T), PROT_READ|PROT_WRITE);
-#endif
-	}
-
-	T* object;
-};
-
-
-//-----------------------------------------------------------------------------
-// AllocatorChecker
-//-----------------------------------------------------------------------------
-
-/**
- * allocator test rig.
- * call from each allocator operation to sanity-check them.
- * should only be used during debug mode due to serious overhead.
- **/
-class AllocatorChecker
-{
-public:
-	void OnAllocate(void* p, size_t size)
-	{
-		const Allocs::value_type item = std::make_pair(p, size);
-		std::pair<Allocs::iterator, bool> ret = allocs.insert(item);
-		debug_assert(ret.second == true);	// wasn't already in map
-	}
-
-	void OnDeallocate(void* p, size_t size)
-	{
-		Allocs::iterator it = allocs.find(p);
-		if(it == allocs.end())
-			debug_assert(0);	// freeing invalid pointer
-		else
-		{
-			// size must match what was passed to OnAllocate
-			const size_t allocated_size = it->second;
-			debug_assert(size == allocated_size);
-
-			allocs.erase(it);
-		}
-	}
-
-	/**
-	 * allocator is resetting itself, i.e. wiping out all allocs.
-	 **/
-	void OnClear()
-	{
-		allocs.clear();
-	}
-
-private:
-	typedef std::map<void*, size_t> Allocs;
-	Allocs allocs;
-};
-
-#endif	// #ifndef INCLUDED_ALLOCATORS
--- a/source/lib/allocators/bucket.cpp
+++ b/source/lib/allocators/bucket.cpp
@ -27,8 +27,8 @@
 #include "precompiled.h"
 #include "lib/allocators/bucket.h"

-#include "lib/bits.h"
-#include "lib/allocators/mem_util.h"
+#include "lib/alignment.h"
+#include "lib/allocators/freelist.h"


 // power-of-2 isn't required; value is arbitrary.
@ -38,7 +38,7 @@ const size_t bucketSize = 4000;
 LibError bucket_create(Bucket* b, size_t el_size)
 {
 	b->freelist = mem_freelist_Sentinel();
-	b->el_size = mem_RoundUpToAlignment(el_size);
+	b->el_size = Align<allocationAlignment>(el_size);

 	// note: allocating here avoids the is-this-the-first-time check
 	// in bucket_alloc, which speeds things up.
@ -52,7 +52,7 @@ LibError bucket_create(Bucket* b, size_t el_size)
 	}

 	*(u8**)b->bucket = 0;	// terminate list
-	b->pos = mem_RoundUpToAlignment(sizeof(u8*));
+	b->pos = Align<allocationAlignment>(sizeof(u8*));
 	b->num_buckets = 1;
 	return INFO::OK;
 }
@ -78,7 +78,7 @@ void bucket_destroy(Bucket* b)

 void* bucket_alloc(Bucket* b, size_t size)
 {
-	size_t el_size = b->el_size? b->el_size : mem_RoundUpToAlignment(size);
+	size_t el_size = b->el_size? b->el_size : Align<allocationAlignment>(size);
 	// must fit in a bucket
 	debug_assert(el_size <= bucketSize-sizeof(u8*));

@ -98,7 +98,7 @@ void* bucket_alloc(Bucket* b, size_t size)
 		b->bucket = bucket;
 		// skip bucket list field and align (note: malloc already
 		// aligns to at least 8 bytes, so don't take b->bucket into account)
-		b->pos = mem_RoundUpToAlignment(sizeof(u8*));;
+		b->pos = Align<allocationAlignment>(sizeof(u8*));;
 		b->num_buckets++;
 	}

--- a/source/lib/allocators/bucket.h
+++ b/source/lib/allocators/bucket.h
@ -24,8 +24,8 @@
 * bucket allocator
 */

-#ifndef INCLUDED_BUCKET
-#define INCLUDED_BUCKET
+#ifndef INCLUDED_ALLOCATORS_BUCKET
+#define INCLUDED_ALLOCATORS_BUCKET

 /**
 * allocator design goals:
@ -111,4 +111,4 @@ LIB_API void* bucket_fast_alloc(Bucket* b);
 **/
 LIB_API void bucket_free(Bucket* b, void* el);

-#endif	// #ifndef INCLUDED_BUCKET
+#endif	// #ifndef INCLUDED_ALLOCATORS_BUCKET
--- a/source/lib/allocators/dynarray.cpp
+++ b/source/lib/allocators/dynarray.cpp
@ -27,9 +27,8 @@
 #include "precompiled.h"
 #include "lib/allocators/dynarray.h"

-#include "lib/posix/posix_mman.h"	// PROT_* constants for da_set_prot
-#include "lib/sysdep/cpu.h"
-#include "lib/allocators/mem_util.h"
+#include "lib/alignment.h"
+#include "lib/allocators/page_aligned.h"


 // indicates that this DynArray must not be resized or freed
@ -52,9 +51,7 @@ static LibError validate_da(DynArray* da)
 //		WARN_RETURN(ERR::_1);
 	// note: don't check if base is page-aligned -
 	// might not be true for 'wrapped' mem regions.
-//	if(!mem_IsPageMultiple((uintptr_t)base))
-//		WARN_RETURN(ERR::_2);
-	if(!mem_IsPageMultiple(max_size_pa))
+	if(!IsAligned(max_size_pa, pageSize))
 		WARN_RETURN(ERR::_3);
 	if(cur_size > max_size_pa)
 		WARN_RETURN(ERR::_4);
@ -71,7 +68,7 @@ static LibError validate_da(DynArray* da)

 LibError da_alloc(DynArray* da, size_t max_size)
 {
-	const size_t max_size_pa = mem_RoundUpToPage(max_size);
+	const size_t max_size_pa = Align<pageSize>(max_size);

 	u8* p = 0;
 	if(max_size_pa)	// (avoid mmap failure)
@ -117,8 +114,8 @@ LibError da_set_size(DynArray* da, size_t new_size)
 		WARN_RETURN(ERR::LOGIC);

 	// determine how much to add/remove
-	const size_t cur_size_pa = mem_RoundUpToPage(da->cur_size);
-	const size_t new_size_pa = mem_RoundUpToPage(new_size);
+	const size_t cur_size_pa = Align<pageSize>(da->cur_size);
+	const size_t new_size_pa = Align<pageSize>(new_size);
 	const ssize_t size_delta_pa = (ssize_t)new_size_pa - (ssize_t)cur_size_pa;

 	// not enough memory to satisfy this expand request: abort.
@ -173,7 +170,7 @@ LibError da_set_prot(DynArray* da, int prot)
 LibError da_wrap_fixed(DynArray* da, u8* p, size_t size)
 {
 	da->base        = p;
-	da->max_size_pa = mem_RoundUpToPage(size);
+	da->max_size_pa = Align<pageSize>(size);
 	da->cur_size    = size;
 	da->cur_size_pa = da->max_size_pa;
 	da->prot        = PROT_READ|PROT_WRITE|DA_NOT_OUR_MEM;
--- a/source/lib/allocators/dynarray.h
+++ b/source/lib/allocators/dynarray.h
@ -24,8 +24,10 @@
 * dynamic (expandable) array
 */

-#ifndef INCLUDED_DYNARRAY
-#define INCLUDED_DYNARRAY
+#ifndef INCLUDED_ALLOCATORS_DYNARRAY
+#define INCLUDED_ALLOCATORS_DYNARRAY
+
+#include "lib/posix/posix_mman.h"	// PROT_*

 /**
 * provides a memory range that can be expanded but doesn't waste
@ -146,4 +148,4 @@ LIB_API LibError da_read(DynArray* da, void* data_dst, size_t size);
 **/
 LIB_API LibError da_append(DynArray* da, const void* data_src, size_t size);

-#endif	// #ifndef INCLUDED_DYNARRAY
+#endif	// #ifndef INCLUDED_ALLOCATORS_DYNARRAY
--- a/source/lib/allocators/aligned_allocator.cpp
+++ b/source/lib/allocators/aligned_allocator.cpp
@ -1,28 +1,31 @@
-/* Copyright (c) 2010 Wildfire Games
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * STL allocator for aligned memory
- */
-
-#include "precompiled.h"
-#include "lib/allocators/aligned_allocator.h"
+/* Copyright (c) 2010 Wildfire Games
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "precompiled.h"
+#include "lib/allocators/freelist.h"
+
+void* mem_freelist_Sentinel()
+{
+	// sentinel storing its own address
+	static void* storageForPrevPtr = &storageForPrevPtr;
+	return &storageForPrevPtr;
+}
--- a/source/lib/allocators/freelist.h
+++ b/source/lib/allocators/freelist.h
@ -1,86 +1,68 @@
-/* Copyright (c) 2010 Wildfire Games
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * memory allocator helper routines.
- */
-
-#ifndef INCLUDED_MEM_UTIL
-#define INCLUDED_MEM_UTIL
-
-LIB_API bool mem_IsPageMultiple(uintptr_t x);
-
-LIB_API size_t mem_RoundUpToPage(size_t size);
-LIB_API size_t mem_RoundUpToAlignment(size_t size);
-
-
-// very thin wrapper on top of sys/mman.h that makes the intent more obvious
-// (its commit/decommit semantics are difficult to tell apart)
-LIB_API LibError mem_Reserve(size_t size, u8** pp);
-LIB_API LibError mem_Release(u8* p, size_t size);
-LIB_API LibError mem_Commit(u8* p, size_t size, int prot);
-LIB_API LibError mem_Decommit(u8* p, size_t size);
-LIB_API LibError mem_Protect(u8* p, size_t size, int prot);
-
-
-// "freelist" is a pointer to the first unused element or a sentinel.
-// their memory holds a pointer to the previous element in the freelist
-// (or its own address in the case of sentinels to avoid branches)
-//
-// rationale for the function-based interface: a class encapsulating the
-// freelist pointer would force each header to include mem_util.h,
-// whereas this approach only requires a void* pointer and calling
-// mem_freelist_Init from the implementation.
-//
-// these functions are inlined because allocation is sometimes time-critical.
-
-// @return the address of a sentinel element, suitable for initializing
-// a freelist pointer. subsequent mem_freelist_Detach will return 0.
-LIB_API void* mem_freelist_Sentinel();
-
-static inline void mem_freelist_AddToFront(void*& freelist, void* el)
-{
-#ifndef NDEBUG
-	debug_assert(freelist != 0);
-	debug_assert(el != 0);
-#endif
-
-	memcpy(el, &freelist, sizeof(void*));
-	freelist = el;
-}
-
-// @return 0 if the freelist is empty, else a pointer that had
-// previously been passed to mem_freelist_AddToFront.
-static inline void* mem_freelist_Detach(void*& freelist)
-{
-#ifndef NDEBUG
-	debug_assert(freelist != 0);
-#endif
-
-	void* prev_el;
-	memcpy(&prev_el, freelist, sizeof(void*));
-	void* el = (freelist == prev_el)? 0 : freelist;
-	freelist = prev_el;
-	return el;
-}
-
-#endif	// #ifndef INCLUDED_MEM_UTIL
+/* Copyright (c) 2011 Wildfire Games
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef INCLUDED_ALLOCATORS_FREELIST
+#define INCLUDED_ALLOCATORS_FREELIST
+
+// "freelist" is a pointer to the first unused element or a sentinel.
+// their memory holds a pointer to the previous element in the freelist
+// (or its own address in the case of sentinels to avoid branches)
+//
+// rationale for the function-based interface: a class encapsulating the
+// freelist pointer would force each header to include this header,
+// whereas this approach only requires a void* pointer and calling
+// mem_freelist_Sentinel from the implementation.
+//
+// these functions are inlined because allocation is sometimes time-critical.
+
+// @return the address of a sentinel element, suitable for initializing
+// a freelist pointer. subsequent mem_freelist_Detach on that freelist
+// will return 0.
+LIB_API void* mem_freelist_Sentinel();
+
+static inline void mem_freelist_AddToFront(void*& freelist, void* el)
+{
+#ifndef NDEBUG
+	debug_assert(freelist != 0);
+	debug_assert(el != 0);
+#endif
+
+	memcpy(el, &freelist, sizeof(void*));
+	freelist = el;
+}
+
+// @return 0 if the freelist is empty, else a pointer that had
+// previously been passed to mem_freelist_AddToFront.
+static inline void* mem_freelist_Detach(void*& freelist)
+{
+#ifndef NDEBUG
+	debug_assert(freelist != 0);
+#endif
+
+	void* prev_el;
+	memcpy(&prev_el, freelist, sizeof(void*));
+	void* el = (freelist == prev_el)? 0 : freelist;
+	freelist = prev_el;
+	return el;
+}
+
+#endif	// #ifndef INCLUDED_ALLOCATORS_FREELIST
--- a/source/lib/allocators/headerless.cpp
+++ b/source/lib/allocators/headerless.cpp
@ -27,9 +27,8 @@
 #include "precompiled.h"
 #include "lib/allocators/headerless.h"

-#include "lib/allocators/mem_util.h"
-#include "lib/allocators/pool.h"
 #include "lib/bits.h"
+#include "lib/allocators/pool.h"


 static const bool performSanityChecks = true;
@ -128,7 +127,7 @@ static bool IsValidSize(size_t size)
 	if(size < HeaderlessAllocator::minAllocationSize)
 		return false;

-	if(size % HeaderlessAllocator::allocationGranularity)
+	if(size % HeaderlessAllocator::allocationAlignment)
 		return false;

 	return true;
@ -395,7 +394,7 @@ private:
 // prev/next pointers should reside between the magic and ID fields.
 // maintaining separate FreedBlock and Footer classes is also undesirable;
 // we prefer to use FreedBlock for both, which increases the minimum
-// allocation size to 64 + allocationGranularity, e.g. 128.
+// allocation size to 64 + allocationAlignment, e.g. 128.
 // that's not a problem because the allocator is designed for
 // returning pages or IO buffers (4..256 KB).
 cassert(HeaderlessAllocator::minAllocationSize >= 2*sizeof(FreedBlock));
@ -645,13 +644,13 @@ public:
 		m_stats.OnAllocate(size);

 		Validate();
-		debug_assert((uintptr_t)p % allocationGranularity == 0);
+		debug_assert((uintptr_t)p % allocationAlignment == 0);
 		return p;
 	}

 	void Deallocate(u8* p, size_t size)
 	{
-		debug_assert((uintptr_t)p % allocationGranularity == 0);
+		debug_assert((uintptr_t)p % allocationAlignment == 0);
 		debug_assert(IsValidSize(size));
 		debug_assert(pool_contains(&m_pool, p));
 		debug_assert(pool_contains(&m_pool, p+size-1));
--- a/source/lib/allocators/headerless.h
+++ b/source/lib/allocators/headerless.h
@ -24,8 +24,8 @@
 * (header-less) pool-based heap allocator
 */

-#ifndef INCLUDED_HEADERLESS
-#define INCLUDED_HEADERLESS
+#ifndef INCLUDED_ALLOCATORS_HEADERLESS
+#define INCLUDED_ALLOCATORS_HEADERLESS

 /**
 * (header-less) pool-based heap allocator
@ -52,8 +52,8 @@ public:
 	// allocators must 'naturally' align pointers, i.e. ensure they are
 	// multiples of the largest native type (currently __m128).
 	// since there are no headers, we can guarantee alignment by
-	// requiring sizes to be multiples of allocationGranularity.
-	static const size_t allocationGranularity = 16;
+	// requiring sizes to be multiples of allocationAlignment.
+	static const size_t allocationAlignment = 16;

 	// allocations must be large enough to hold our boundary tags
 	// when freed. (see rationale above BoundaryTagManager)
@ -72,7 +72,7 @@ public:
 	void Reset();

 	/**
-	 * @param size [bytes] (= minAllocationSize + i*allocationGranularity).
+	 * @param size [bytes] (= minAllocationSize + i*allocationAlignment).
 	 * (this allocator is designed for requests on the order of several KiB)
 	 * @return allocated memory or 0 if the pool is too fragmented or full.
 	 **/
@ -81,7 +81,7 @@ public:
 	/**
 	 * deallocate memory.
 	 * @param p must be exactly as returned by Allocate (in particular,
-	 * evenly divisible by allocationGranularity)
+	 * evenly divisible by allocationAlignment)
 	 * @param size must be exactly as specified to Allocate.
 	 **/
 	void Deallocate(void* p, size_t size);
@ -96,4 +96,4 @@ private:
 	shared_ptr<Impl> impl;
 };

-#endif	// #ifndef INCLUDED_HEADERLESS
+#endif	// #ifndef INCLUDED_ALLOCATORS_HEADERLESS
--- a/source/lib/allocators/overrun_protector.h
+++ b/source/lib/allocators/overrun_protector.h
@ -0,0 +1,93 @@
+/* Copyright (c) 2011 Wildfire Games
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef INCLUDED_ALLOCATORS_OVERRUN_PROTECTOR
+#define INCLUDED_ALLOCATORS_OVERRUN_PROTECTOR
+
+#include "lib/config2.h"	// CONFIG2_ALLOCATORS_OVERRUN_PROTECTION
+#include "lib/allocators/page_aligned.h"
+
+/**
+OverrunProtector wraps an arbitrary object in isolated page(s) and
+can detect inadvertent writes to it. this is useful for
+tracking down memory overruns.
+
+the basic idea is to require users to request access to the object and
+notify us when done; memory access permission is temporarily granted.
+(similar in principle to Software Transaction Memory).
+
+since this is quite slow, the protection is disabled unless
+CONFIG2_ALLOCATORS_OVERRUN_PROTECTION == 1; this avoids having to remove the
+wrapper code in release builds and re-write when looking for overruns.
+
+example usage:
+OverrunProtector\<T\> wrapper;
+..
+T* p = wrapper.get();   // unlock, make ready for use
+if(!p)                  // wrapper's one-time alloc of a T-
+	abort();            // instance had failed - can't continue.
+DoSomethingWith(p);     // (read/write access)
+wrapper.lock();         // disallow further access until next .get()
+..
+**/
+template<class T> class OverrunProtector
+{
+	NONCOPYABLE(OverrunProtector);	// const member
+public:
+	OverrunProtector()
+		: object(new(page_aligned_alloc(sizeof(T))) T())
+	{
+		lock();
+	}
+
+	~OverrunProtector()
+	{
+		unlock();
+		object->~T();	// call dtor (since we used placement new)
+		page_aligned_free(object, sizeof(T));
+	}
+
+	T* get() const
+	{
+		unlock();
+		return object;
+	}
+
+	void lock() const
+	{
+#if CONFIG2_ALLOCATORS_OVERRUN_PROTECTION
+		mprotect(object, sizeof(T), PROT_NONE);
+#endif
+	}
+
+private:
+	void unlock() const
+	{
+#if CONFIG2_ALLOCATORS_OVERRUN_PROTECTION
+		mprotect(object, sizeof(T), PROT_READ|PROT_WRITE);
+#endif
+	}
+
+	T* const object;
+};
+
+#endif	// #ifndef INCLUDED_ALLOCATORS_OVERRUN_PROTECTOR
--- a/source/lib/allocators/page_aligned.cpp
+++ b/source/lib/allocators/page_aligned.cpp
@ -20,34 +20,11 @@
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

-/*
- * memory allocator helper routines.
- */
-
 #include "precompiled.h"
-#include "lib/allocators/mem_util.h"
+#include "lib/allocators/page_aligned.h"

-#include "lib/bits.h"				// round_up
 #include "lib/alignment.h"
-#include "lib/posix/posix_mman.h"
-#include "lib/sysdep/os_cpu.h"			// os_cpu_PageSize
-
-
-bool mem_IsPageMultiple(uintptr_t x)
-{
-	return (x & (os_cpu_PageSize()-1)) == 0;
-}
-
-size_t mem_RoundUpToPage(size_t size)
-{
-	return round_up(size, os_cpu_PageSize());
-}
-
-size_t mem_RoundUpToAlignment(size_t size)
-{
-	// all allocators should align to at least this many bytes:
-	return Align<8>(size);
-}
+#include "lib/sysdep/cpu.h"	// cpu_CAS


 //-----------------------------------------------------------------------------
@ -110,11 +87,23 @@ LibError mem_Protect(u8* p, size_t size, int prot)
 }


-void* mem_freelist_Sentinel()
+//-----------------------------------------------------------------------------
+
+void* page_aligned_alloc(size_t size)
 {
-	// sentinel storing its own address
-	static void* storageForPrevPtr;
-	void* const storageAddress = &storageForPrevPtr;
-	memcpy(&storageForPrevPtr, &storageAddress, sizeof(storageForPrevPtr));
-	return storageAddress;
+	const size_t alignedSize = Align<pageSize>(size);
+	u8* p = 0;
+	RETURN0_IF_ERR(mem_Reserve(alignedSize, &p));
+	RETURN0_IF_ERR(mem_Commit(p, alignedSize, PROT_READ|PROT_WRITE));
+	return p;
+}
+
+
+void page_aligned_free(void* p, size_t size)
+{
+	if(!p)
+		return;
+	debug_assert(IsAligned(p, pageSize));
+	const size_t alignedSize = Align<pageSize>(size);
+	(void)mem_Release((u8*)p, alignedSize);
 }
--- a/source/lib/allocators/page_aligned.h
+++ b/source/lib/allocators/page_aligned.h
@ -0,0 +1,62 @@
+/* Copyright (c) 2010 Wildfire Games
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef INCLUDED_ALLOCATORS_PAGE_ALIGNED
+#define INCLUDED_ALLOCATORS_PAGE_ALIGNED
+
+#include "lib/posix/posix_mman.h"	// PROT_*
+
+// very thin wrapper on top of sys/mman.h that makes the intent more obvious
+// (its commit/decommit semantics are difficult to tell apart)
+LIB_API LibError mem_Reserve(size_t size, u8** pp);
+LIB_API LibError mem_Release(u8* p, size_t size);
+LIB_API LibError mem_Commit(u8* p, size_t size, int prot);
+LIB_API LibError mem_Decommit(u8* p, size_t size);
+LIB_API LibError mem_Protect(u8* p, size_t size, int prot);
+
+
+/**
+ * allocate memory aligned to the system page size.
+ *
+ * this is useful for file_cache_alloc, which uses this allocator to
+ * get sector-aligned (hopefully; see sys_max_sector_size) IO buffers.
+ *
+ * note that this allocator is stateless and very little error checking
+ * can be performed.
+ *
+ * the memory is initially writable and you can use mprotect to set other
+ * access permissions if desired.
+ *
+ * @param unaligned_size minimum size [bytes] to allocate.
+ * @return page-aligned and -padded memory or 0 on error / out of memory.
+ **/
+LIB_API void* page_aligned_alloc(size_t unaligned_size);
+
+/**
+ * free a previously allocated page-aligned region.
+ *
+ * @param p Exact value returned from page_aligned_alloc
+ * @param unaligned_size Exact value passed to page_aligned_alloc
+ **/
+LIB_API void page_aligned_free(void* p, size_t unaligned_size);
+
+#endif	// #ifndef INCLUDED_ALLOCATORS_PAGE_ALIGNED
--- a/source/lib/allocators/pool.cpp
+++ b/source/lib/allocators/pool.cpp
@ -27,7 +27,8 @@
 #include "precompiled.h"
 #include "lib/allocators/pool.h"

-#include "lib/allocators/mem_util.h"
+#include "lib/alignment.h"
+#include "lib/allocators/freelist.h"

 #include "lib/timer.h"

@ -39,7 +40,7 @@ LibError pool_create(Pool* p, size_t max_size, size_t el_size)
 	if(el_size == POOL_VARIABLE_ALLOCS)
 		p->el_size = 0;
 	else
-		p->el_size = mem_RoundUpToAlignment(el_size);
+		p->el_size = Align<allocationAlignment>(el_size);
 	p->freelist = mem_freelist_Sentinel();
 	RETURN_ERR(da_alloc(&p->da, max_size));
 	return INFO::OK;
@ -73,7 +74,7 @@ void* pool_alloc(Pool* p, size_t size)
 	TIMER_ACCRUE(tc_pool_alloc);
 	// if pool allows variable sizes, go with the size parameter,
 	// otherwise the pool el_size setting.
-	const size_t el_size = p->el_size? p->el_size : mem_RoundUpToAlignment(size);
+	const size_t el_size = p->el_size? p->el_size : Align<allocationAlignment>(size);

 	// note: this can never happen in pools with variable-sized elements
 	// because they disallow pool_free.
--- a/source/lib/allocators/pool.h
+++ b/source/lib/allocators/pool.h
@ -24,8 +24,8 @@
 * pool allocator
 */

-#ifndef INCLUDED_POOL
-#define INCLUDED_POOL
+#ifndef INCLUDED_ALLOCATORS_POOL
+#define INCLUDED_ALLOCATORS_POOL

 #include "lib/allocators/dynarray.h"

@ -321,4 +321,4 @@ bool operator!=(const pool_allocator<T1>&, const pool_allocator<T2>&) throw ()
 	return false;
 }

-#endif	// #ifndef INCLUDED_POOL
+#endif	// #ifndef INCLUDED_ALLOCATORS_POOL
--- a/source/lib/allocators/shared_ptr.cpp
+++ b/source/lib/allocators/shared_ptr.cpp
@ -23,7 +23,7 @@
 #include "precompiled.h"
 #include "lib/allocators/shared_ptr.h"

-#include "lib/allocators/allocators.h"	// AllocatorChecker
+#include "lib/allocators/allocator_checker.h"


 #ifndef NDEBUG
--- a/source/lib/allocators/shared_ptr.h
+++ b/source/lib/allocators/shared_ptr.h
@ -20,8 +20,8 @@
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

-#ifndef INCLUDED_SHARED_PTR
-#define INCLUDED_SHARED_PTR
+#ifndef INCLUDED_ALLOCATORS_SHARED_PTR
+#define INCLUDED_ALLOCATORS_SHARED_PTR

 #include "lib/alignment.h"
 #include "lib/sysdep/rtl.h" // rtl_AllocateAligned
@ -49,15 +49,6 @@ struct ArrayDeleter
 	}
 };

-struct FreeDeleter
-{
-	template<class T>
-	void operator()(T* p)
-	{
-		free(p);
-	}
-};
-
 // (note: uses CheckedArrayDeleter)
 LIB_API shared_ptr<u8> Allocate(size_t size);

@ -81,4 +72,4 @@ static inline LibError AllocateAligned(shared_ptr<T>& p, size_t size, size_t ali
 	return INFO::OK;
 }

-#endif	// #ifndef INCLUDED_SHARED_PTR
+#endif	// #ifndef INCLUDED_ALLOCATORS_SHARED_PTR
--- a/source/lib/allocators/string_pool.cpp
+++ b/source/lib/allocators/string_pool.cpp
@ -1,109 +0,0 @@
-/* Copyright (c) 2010 Wildfire Games
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * shared storage for strings
- */
-
-#include "precompiled.h"
-#include "lib/allocators/string_pool.h"
-
-#include "lib/rand.h"
-
-
-StringPool::StringPool(size_t maxSize)
-{
-	pool_create(&m_pool, maxSize, POOL_VARIABLE_ALLOCS);
-}
-
-
-StringPool::~StringPool()
-{
-	m_map.clear();
-	(void)pool_destroy(&m_pool);
-}
-
-
-const char* StringPool::UniqueCopy(const char* string)
-{
-	// early out: check if it already lies in the pool
-	if(Contains(string))
-		return string;
-
-	// check if equivalent to an existing string.
-	//
-	// rationale: the entire storage could be done via container,
-	// rather than simply using it as a lookup mapping.
-	// however, DynHashTbl together with Pool (see above) is more efficient.
-	const char* existingString = m_map.find(string);
-	if(existingString)
-		return existingString;
-
-	const size_t length = strlen(string);
-	char* uniqueCopy = (char*)pool_alloc(&m_pool, length+1);
-	if(!uniqueCopy)
-		throw std::bad_alloc();
-	memcpy((void*)uniqueCopy, string, length);
-	uniqueCopy[length] = '\0';
-
-	m_map.insert(uniqueCopy, uniqueCopy);
-
-	return uniqueCopy;
-}
-
-
-bool StringPool::Contains(const char* string) const
-{
-	return pool_contains(&m_pool, (void*)string);
-}
-
-
-const char* StringPool::RandomString() const
-{
-	// there had better be names in m_pool, else this will fail.
-	debug_assert(m_pool.da.pos != 0);
-
-again:
-	const size_t start_ofs = (size_t)rand(0, (size_t)m_pool.da.pos);
-
-	// scan back to start of string (don't scan ahead; this must
-	// work even if m_pool only contains one entry).
-	const char* start = (const char*)m_pool.da.base + start_ofs;
-	for(size_t i = 0; i < start_ofs; i++)
-	{
-		if(*start == '\0')
-			break;
-		start--;
-	}
-
-	// skip past the '\0' we found. loop is needed because there may be
-	// several if we land in padding (due to pool alignment).
-	size_t chars_left = m_pool.da.pos - start_ofs;
-	for(; *start == '\0'; start++)
-	{
-		// we had landed in padding at the end of the buffer.
-		if(chars_left-- == 0)
-			goto again;
-	}
-
-	return start;
-}
--- a/source/lib/allocators/string_pool.h
+++ b/source/lib/allocators/string_pool.h
@ -1,62 +0,0 @@
-/* Copyright (c) 2010 Wildfire Games
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * shared storage for strings
- */
-
-#ifndef INCLUDED_STRING_POOL
-#define INCLUDED_STRING_POOL
-
-#include "lib/adts.h"	// DynHashTbl
-#include "lib/allocators/pool.h"
-
-class StringPool
-{
-public:
-	StringPool(size_t maxSize);
-	~StringPool();
-
-	/**
-	 * allocate a copy of the string.
-	 *
-	 * @return a unique pointer for the string (addresses are equal iff
-	 * the string contents match). can return 0, but would raise a
-	 * warning first.
-	 **/
-	const char* UniqueCopy(const char* string);
-
-	bool Contains(const char* string) const;
-
-	const char* RandomString() const;
-
-private:
-	// rationale: we want an O(1) Contains() so that redundant UniqueCopy
-	// calls are cheap. that requires allocating from one contiguous arena,
-	// which is also more memory-efficient than the heap (no headers).
-	Pool m_pool;
-
-	typedef DynHashTbl<const char*, const char*> Map;
-	Map m_map;
-};
-
-#endif	// #ifndef INCLUDED_STRING_POOL
--- a/source/lib/allocators/tests/test_allocators.h
+++ b/source/lib/allocators/tests/test_allocators.h
@ -22,7 +22,6 @@

 #include "lib/self_test.h"

-#include "lib/allocators/allocators.h"
 #include "lib/allocators/dynarray.h"
 #include "lib/byte_order.h"

@ -50,19 +49,4 @@ public:
 		TS_ASSERT_EQUALS((uint32_t)debug_StopSkippingErrors(), (uint32_t)1);
 		TS_ASSERT_OK(da_free(&da));
 	}
-
-	void test_matrix()
-	{
-		// not much we can do here; allocate a matrix, write to it and
-		// make sure it can be freed.
-		// (note: can't check memory layout because "matrix" is int** -
-		// array of pointers. the matrix interface doesn't guarantee
-		// that data comes in row-major order after the row pointers)
-		int** m = (int**)matrix_alloc(3, 3, sizeof(int));
-		m[0][0] = 1;
-		m[0][1] = 2;
-		m[1][0] = 3;
-		m[2][2] = 4;
-		matrix_free((void**)m);
-	}
 };
--- a/source/lib/allocators/tests/test_headerless.h
+++ b/source/lib/allocators/tests/test_headerless.h
@ -120,7 +120,7 @@ public:
 			if(rand() >= RAND_MAX/2)
 			{
 				const size_t maxSize = (size_t)((rand() / (float)RAND_MAX) * poolSize);
-				const size_t size = std::max((size_t)HeaderlessAllocator::minAllocationSize, round_down(maxSize, HeaderlessAllocator::allocationGranularity));
+				const size_t size = std::max((size_t)HeaderlessAllocator::minAllocationSize, round_down(maxSize, HeaderlessAllocator::allocationAlignment));
 				// (the size_t cast on minAllocationSize prevents max taking a reference to the non-defined variable)
 				void* p = a.Allocate(size);
 				if(!p)
--- a/source/lib/allocators/unique_range.h
+++ b/source/lib/allocators/unique_range.h
@ -1,6 +1,5 @@
-#ifndef INCLUDED_UNIQUE_RANGE
-#define INCLUDED_UNIQUE_RANGE
-
+#ifndef INCLUDED_ALLOCATORS_UNIQUE_RANGE
+#define INCLUDED_ALLOCATORS_UNIQUE_RANGE

 #define ASSERT debug_assert

@ -183,4 +182,4 @@ static inline void swap(UniqueRange& p1, RVALUE_REF(UniqueRange) p2)

 }

-#endif	// #ifndef INCLUDED_UNIQUE_RANGE
+#endif	// #ifndef INCLUDED_ALLOCATORS_UNIQUE_RANGE
--- a/source/lib/debug.cpp
+++ b/source/lib/debug.cpp
@ -32,7 +32,7 @@
 #include <cstdio>

 #include "lib/app_hooks.h"
-#include "lib/allocators/allocators.h"	// page_aligned_alloc
+#include "lib/allocators/page_aligned.h"
 #include "lib/fnv_hash.h"
 #include "lib/sysdep/cpu.h"	// cpu_CAS
 #include "lib/sysdep/sysdep.h"
--- a/source/lib/file/archive/stream.cpp
+++ b/source/lib/file/archive/stream.cpp
@ -23,7 +23,7 @@
 #include "precompiled.h"
 #include "lib/file/archive/stream.h"

-#include "lib/allocators/allocators.h"	// page_aligned_alloc
+#include "lib/allocators/page_aligned.h"
 #include "lib/allocators/shared_ptr.h"
 #include "lib/file/archive/codec.h"
 //#include "lib/timer.h"
@ -64,7 +64,7 @@ void OutputBufferManager::AllocateBuffer(size_t size)
 	// no buffer or the previous one wasn't big enough: reallocate
 	if(!m_mem || m_capacity < size)
 	{
-		m_mem.reset((u8*)page_aligned_alloc(size), PageAlignedDeleter<u8>(size));
+		AllocateAligned(m_mem, size);
 		m_capacity = size;
 	}

--- a/source/lib/file/vfs/file_cache.cpp
+++ b/source/lib/file/vfs/file_cache.cpp
@ -32,10 +32,11 @@
 #include "lib/file/common/file_stats.h"
 #include "lib/cache_adt.h"              // Cache
 #include "lib/bits.h"                   // round_up
-#include "lib/allocators/allocators.h"
+#include "lib/allocators/allocator_checker.h"
 #include "lib/allocators/shared_ptr.h"
 #include "lib/allocators/headerless.h"
 #include "lib/sysdep/os_cpu.h"	// os_cpu_PageSize
+#include "lib/posix/posix_mman.h"	// mprotect


 //-----------------------------------------------------------------------------
--- a/source/lib/lockfree.cpp
+++ b/source/lib/lockfree.cpp
@ -1,720 +0,0 @@
-/* Copyright (c) 2010 Wildfire Games
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * lock-free synchronized data structures.
- */
-
-#include "precompiled.h"
-#if 0	// JW: disabled, not used
-#include "lib/lockfree.h"
-
-#include <set>
-#include <algorithm>
-
-#include "lib/posix/posix_pthread.h"
-#include "lib/bits.h"
-#include "lib/sysdep/cpu.h"
-#include "lib/sysdep/sysdep.h"
-#include "lib/timer.h"
-#include "lib/module_init.h"
-
-
-/*
-liberties taken:
- R(H) will remain constant
-  (since TLS rlist is fixed-size, and we don't care about O(1)
-  amortization proofs)
-
-
-lacking from pseudocode:
- mark HPRec as active when allocated
-
-
-questions:
- does hp0 ("private, static") need to be in TLS? or is per-"find()" ok?
- memory barriers where?
-
-
-todo:
-make sure retired node array doesn't overflow. add padding (i.e.  "Scan" if half-full?)
-see why SMR had algo extension of HelpScan
-simple iteration is safe?
-*/
-
-// total number of hazard pointers needed by each thread.
-// determined by the algorithms using SMR; the LF list requires 2.
-static const size_t NUM_HPS = 2;
-
-// number of slots for the per-thread node freelist.
-// this is a reasonable size and pads struct TLS to 64 bytes.
-static const size_t MAX_RETIRED = 11;
-
-
-// used to allocate a flat array of all hazard pointers.
-// changed via cpu_AtomicAdd by TLS when a thread first calls us / exits.
-static intptr_t active_threads;
-
-// basically module refcount; we can't shut down before it's 0.
-// changed via cpu_AtomicAdd by each data structure's init/free.
-static intptr_t active_data_structures;
-
-
-// Nodes are internal to this module. having callers pass them in would
-// be more convenient but risky, since they might change <next> and <key>,
-// or not allocate via malloc (necessary since Nodes are garbage-collected
-// and allowing user-specified destructors would be more work).
-//
-// to still allow storing arbitrary user data without requiring an
-// additional memory alloc per node, we append <user_size> bytes to the
-// end of the Node structure; this is what is returned by find.
-struct Node
-{
-	Node* next;
-	uintptr_t key;
-
-	// <additional_bytes> are allocated here at the caller's discretion.
-};
-
-static inline Node* node_alloc(size_t additional_bytes)
-{
-	return (Node*)calloc(1, sizeof(Node) + additional_bytes);
-}
-
-static inline void node_free(Node* n)
-{
-	free(n);
-}
-
-static inline void* node_user_data(Node* n)
-{
-	return (u8*)n + sizeof(Node);
-}
-
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// thread-local storage for SMR
-//
-//////////////////////////////////////////////////////////////////////////////
-
-static pthread_key_t tls_key;
-
-struct TLS
-{
-	TLS* next;
-
-	void* hp[NUM_HPS];
-	uintptr_t active;	// used as bool, but set by cpu_CAS
-
-	Node* retired_nodes[MAX_RETIRED];
-	size_t num_retired_nodes;
-};
-
-static TLS* tls_list = 0;
-
-
-// mark a participating thread's slot as unused; clear its hazard pointers.
-// called during smr_shutdown and when a thread exits
-// (by pthread dtor, which is registered in tls_init).
-static void tls_retire(void* tls_)
-{
-	TLS* tls = (TLS*)tls_;
-
-	// our hazard pointers are no longer in use
-	for(size_t i = 0; i < NUM_HPS; i++)
-		tls->hp[i] = 0;
-
-	// successfully marked as unused (must only decrement once)
-	if(cpu_CAS(&tls->active, 1, 0))
-	{
-		cpu_AtomicAdd(&active_threads, -1);
-		debug_assert(active_threads >= 0);
-	}
-}
-
-
-static void tls_init()
-{
-	WARN_ERR(pthread_key_create(&tls_key, tls_retire));
-}
-
-
-// free all TLS info. called by smr_shutdown.
-static void tls_shutdown()
-{
-	WARN_ERR(pthread_key_delete(tls_key));
-	memset(&tls_key, 0, sizeof(tls_key));
-
-	while(tls_list)
-	{
-		TLS* tls = tls_list;
-		tls_list = tls->next;
-		free(tls);
-	}
-}
-
-
-// return a new TLS struct ready for use; either a previously
-// retired slot, or if none are available, a newly allocated one.
-// if out of memory, return (TLS*)-1; see fail path.
-// called from tls_get after tls_init.
-static TLS* tls_alloc()
-{
-	TLS* tls;
-
-	// try to reuse a retired TLS slot
-	for(tls = tls_list; tls; tls = tls->next)
-		// .. succeeded in reactivating one.
-		if(cpu_CAS(&tls->active, 0, 1))
-			goto have_tls;
-
-	// no unused slots available - allocate another
-	{
-	tls = (TLS*)calloc(1, sizeof(TLS));
-	// .. not enough memory. poison the thread's TLS value to
-	//    prevent a later tls_get from succeeding, because that
-	//    would potentially break the user's LF data structure.
-	if(!tls)
-	{
-		tls = (TLS*)-1;
-		WARN_ERR(pthread_setspecific(tls_key, tls));
-		return tls;
-	}
-	tls->active = 1;
-	// insert at front of list (wait free since # threads is finite).
-	TLS* old_tls_list;
-	do
-	{
-		old_tls_list = tls_list;
-		tls->next = old_tls_list;
-	}
-	while(!cpu_CAS(&tls_list, old_tls_list, tls));
-	}
-
-
-have_tls:
-	cpu_AtomicAdd(&active_threads, 1);
-
-	WARN_ERR(pthread_setspecific(tls_key, tls));
-	return tls;
-}
-
-
-// return this thread's struct TLS, or (TLS*)-1 if tls_alloc failed.
-// called from each lfl_* function, so don't waste any time.
-static TLS* tls_get()
-{
-	// already allocated or tls_alloc failed.
-	TLS* tls = (TLS*)pthread_getspecific(tls_key);
-	if(tls)
-		return tls;
-
-	// first call: return a newly allocated slot.
-	return tls_alloc();
-}
-
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// "Safe Memory Reclamation for Lock-Free Objects" via hazard pointers
-//
-//////////////////////////////////////////////////////////////////////////////
-
-// is one of the hazard pointers in <hps> pointing at <node>?
-static bool is_node_referenced(Node* node, void** hps, size_t num_hps)
-{
-	for(size_t i = 0; i < num_hps; i++)
-		if(hps[i] == node)
-			return true;
-
-	return false;
-}
-
-
-// "Scan"
-// run through all retired nodes in this thread's freelist; any of them
-// not currently referenced are released (their memory freed).
-static void smr_release_unreferenced_nodes(TLS* tls)
-{
-	// nothing to do, and taking address of array[-1] isn't portable.
-	if(tls->num_retired_nodes == 0)
-		return;
-
-	// required for head/tail below; guaranteed by callers.
-	debug_assert(0 < tls->num_retired_nodes && tls->num_retired_nodes <= MAX_RETIRED);
-
-	//
-	// build array of all active (non-NULL) hazard pointers (more efficient
-	// than walking through tls_list on every is_node_referenced call)
-	//
-retry:
-	const size_t max_hps = (active_threads+3) * NUM_HPS;
-		// allow for creating a few additional threads during the loop
-	void** hps = (void**)alloca(max_hps * sizeof(void*));
-	size_t num_hps = 0;
-	// for each participating thread:
-	for(TLS* t = tls_list; t; t = t->next)
-		// for each of its non-NULL hazard pointers:
-		for(size_t i = 0; i < NUM_HPS; i++)
-		{
-			void* hp = t->hp[i];
-			if(!hp)
-				continue;
-
-			// many threads were created after choosing max_hps =>
-			// start over. this won't realistically happen, though.
-			if(num_hps >= max_hps)
-			{
-				debug_assert(0);	// max_hps overrun - why?
-				goto retry;
-			}
-
-			hps[num_hps++] = hp;
-		}
-
-	//
-	// free all discarded nodes that are no longer referenced
-	// (i.e. no element in hps[] points to them). no need to lock or
-	// clone the retired_nodes list since it's in TLS.
-	//
-	Node** head = tls->retired_nodes;
-	Node** tail = head + tls->num_retired_nodes-1;
-	while(head <= tail)
-	{
-		Node* node = *head;
-		// still in use - just skip to the next
-		if(is_node_referenced(node, hps, num_hps))
-			head++;
-		else
-		{
-			node_free(node);
-
-			// to avoid holes in the freelist, replace with last entry.
-			// this is easier than building a new list.
-			*head = *tail;	// if last element, no-op
-			tail--;
-			tls->num_retired_nodes--;
-		}
-	}
-}
-
-
-// note: we don't implement "HelpScan" - it is sufficient for the
-// freelists in retired but never-reused TLS slots to be emptied at exit,
-// since huge spikes of active threads are unrealistic.
-static void smr_retire_node(Node* node)
-{
-	TLS* tls = tls_get();
-	debug_assert(tls != (void*)-1);
-		// if this triggers, tls_alloc called from lfl_init failed due to
-		// lack of memory and the caller didn't check its return value.
-
-	debug_assert(tls->num_retired_nodes < MAX_RETIRED);
-	tls->retired_nodes[tls->num_retired_nodes++] = node;
-	if(tls->num_retired_nodes >= MAX_RETIRED/2)
-		smr_release_unreferenced_nodes(tls);
-}
-
-
-// although not strictly necessary (the OS will free resources at exit),
-// we free all nodes and TLS to avoid spurious leak reports.
-static void smr_shutdown()
-{
-	// there better not be any data structures still in use, else we're
-	// going to pull the rug out from under them.
-	debug_assert(active_data_structures == 0);
-
-	for(TLS* t = tls_list; t; t = t->next)
-	{
-		// wipe out hazard pointers so that everything can be freed.
-		tls_retire(t);
-
-		smr_release_unreferenced_nodes(t);
-	}
-
-	tls_shutdown();
-}
-
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// lock-free singly linked list
-//
-//////////////////////////////////////////////////////////////////////////////
-
-// output of lfl_lookup
-struct ListPos
-{
-	Node** pprev;
-	Node* cur;
-	Node* next;
-};
-
-
-// we 'mark' the next pointer of a retired node to prevent linking
-// to it in concurrent inserts. since all pointers returned by malloc are
-// at least 2-byte aligned, we can use the least significant bit.
-static inline bool is_marked_as_deleted(Node* p)
-{
-	const uintptr_t u = (uintptr_t)p;
-	return (u & Bit<uintptr_t>(0)) != 0;
-}
-
-static inline Node* with_mark(Node* p)
-{
-	debug_assert(!is_marked_as_deleted(p));	// paranoia
-	return p+1;
-}
-
-static inline Node* without_mark(Node* p)
-{
-	debug_assert(is_marked_as_deleted(p));	// paranoia
-	return p-1;
-}
-
-
-
-// make ready a previously unused(!) list object. if a negative error
-// code (currently only ERR::NO_MEM) is returned, the list can't be used.
-LibError lfl_init(LFList* list)
-{
-	// make sure a TLS slot has been allocated for this thread.
-	// if not (out of memory), the list object must not be used -
-	// other calls don't have a "tls=0" failure path.
-	// (it doesn't make sense to allow some calls to fail until more
-	// memory is available, since that might leave the list in an
-	// invalid state or leak memory)
-	TLS* tls = tls_get();
-	if(!tls)
-	{
-		list->head = (void*)-1;	// 'poison' prevents further use
-		return ERR::NO_MEM;
-	}
-
-	list->head = 0;
-	cpu_AtomicAdd(&active_data_structures, 1);
-	return INFO::OK;
-}
-
-
-// call when list is no longer needed; should no longer hold any references.
-void lfl_free(LFList* list)
-{
-	// TODO: is this iteration safe?
-	Node* cur = (Node*)list->head;
-	while(cur)
-	{
-		Node* next = cur->next;
-			// must latch before smr_retire_node, since that may
-			// actually free the memory.
-		smr_retire_node(cur);
-		cur = next;
-	}
-
-	cpu_AtomicAdd(&active_data_structures, -1);
-	debug_assert(active_data_structures >= 0);
-}
-
-
-// "Find"
-// look for a given key in the list; return true iff found.
-// pos points to the last inspected node and its successor and predecessor.
-static bool list_lookup(LFList* list, uintptr_t key, ListPos* pos)
-{
-	TLS* tls = tls_get();
-	debug_assert(tls != (void*)-1);
-		// if this triggers, tls_alloc called from lfl_init failed due to
-		// lack of memory and the caller didn't check its return value.
-
-	void** hp0 = &tls->hp[0];	// protects cur
-	void** hp1 = &tls->hp[1];	// protects *pprev
-
-retry:
-	pos->pprev = (Node**)&list->head;
-		// linearization point of erase and find if list is empty.
-		// already protected by virtue of being the root node.
-	pos->cur = *pos->pprev;
-
-	// until end of list:
-	while(pos->cur)
-	{
-		*hp0 = pos->cur;
-
-		// pprev changed (<==> *pprev or cur was removed) => start over.
-		// lock-free, since other threads thereby make progress.
-		if(*pos->pprev != pos->cur)
-			goto retry;
-
-		pos->next = pos->cur->next;
-			// linearization point of the following if list is not empty:
-			// unsuccessful insert or erase; find.
-
-		// this node has been removed from the list; retire it before
-		// continuing (we don't want to add references to it).
-		if(is_marked_as_deleted(pos->next))
-		{
-			Node* next = without_mark(pos->next);
-			if(!cpu_CAS(pos->pprev, pos->cur, next))
-				goto retry;
-
-			smr_retire_node(pos->cur);
-			pos->cur = next;
-		}
-		else
-		{
-			// (see above goto)
-			if(*pos->pprev != pos->cur)
-				goto retry;
-
-			// the nodes are sorted in ascending key order, so we've either
-			// found <key>, or it's not in the list.
-			const uintptr_t cur_key = pos->cur->key;
-			if(cur_key >= key)
-				return (cur_key == key);
-
-			pos->pprev = &pos->cur->next;
-			pos->cur   = pos->next;
-
-			// protect pprev in the subsequent iteration; it has assumed an
-			// arithmetic variation of cur (adding offsetof(Node, next)).
-			// note that we don't need to validate *pprev, since *hp0 is
-			// already protecting cur.
-			std::swap(hp0, hp1);
-		}
-	}
-
-	// hit end of list => not found.
-	return false;
-}
-
-
-// return pointer to "user data" attached to <key>,
-// or 0 if not found in the list.
-void* lfl_find(LFList* list, uintptr_t key)
-{
-	ListPos* pos = (ListPos*)alloca(sizeof(ListPos));
-	if(!list_lookup(list, key, pos))
-		return 0;
-	return node_user_data(pos->cur);
-}
-
-
-// insert into list in order of increasing key. ensures items are unique
-// by first checking if already in the list. returns 0 if out of memory,
-// otherwise a pointer to "user data" attached to <key>. the optional
-// <was_inserted> return variable indicates whether <key> was added.
-void* lfl_insert(LFList* list, uintptr_t key, size_t additional_bytes, int* was_inserted)
-{
-	TLS* tls = tls_get();
-	debug_assert(tls != (void*)-1);
-		// if this triggers, tls_alloc called from lfl_init failed due to
-		// lack of memory and the caller didn't check its return value.
-
-	ListPos* pos = (ListPos*)alloca(sizeof(ListPos));
-
-	Node* node = 0;
-	if(was_inserted)
-		*was_inserted = 0;
-
-retry:
-	// already in list - return it and leave <was_inserted> 'false'
-	if(list_lookup(list, key, pos))
-	{
-		// free in case we allocated below, but cpu_CAS failed;
-		// no-op if node == 0, i.e. it wasn't allocated.
-		node_free(node);
-
-		node = pos->cur;
-		goto have_node;
-	}
-	// else: not yet in list, so allocate a new Node if we haven't already.
-	// doing that after list_lookup avoids needless alloc/free.
-	if(!node)
-	{
-		node = node_alloc(additional_bytes);
-		// .. out of memory
-		if(!node)
-			return 0;
-	}
-	node->key  = key;
-	node->next = pos->cur;
-
-	// atomic insert immediately before pos->cur. failure implies
-	// at least of the following happened after list_lookup; we try again.
-	// - *pprev was removed (i.e. it's 'marked')
-	// - cur was retired (i.e. no longer reachable from *phead)
-	// - a new node was inserted immediately before cur
-	if(!cpu_CAS(pos->pprev, pos->cur, node))
-		goto retry;
-	// else: successfully inserted; linearization point
-	if(was_inserted)
-		*was_inserted = 1;
-
-have_node:
-	return node_user_data(node);
-}
-
-
-// remove from list; return -1 if not found, or 0 on success.
-LibError lfl_erase(LFList* list, uintptr_t key)
-{
-	TLS* tls = tls_get();
-	debug_assert(tls != (void*)-1);
-		// if this triggers, tls_alloc called from lfl_init failed due to
-		// lack of memory and the caller didn't check its return value.
-
-	ListPos* pos = (ListPos*)alloca(sizeof(ListPos));
-
-retry:
-	// not found in list - abort.
-	if(!list_lookup(list, key, pos))
-		return ERR::FAIL;
-	// mark as removed (avoids subsequent linking to it). failure implies
-	// at least of the following happened after list_lookup; we try again.
-	// - next was removed
-	// - cur was retired (i.e. no longer reachable from *phead)
-	// - a new node was inserted immediately after cur
-	if(!cpu_CAS(&pos->cur->next, pos->next, with_mark(pos->next)))
-		goto retry;
-	// remove from list; if successful, this is the
-	// linearization point and *pprev isn't marked.
-	if(cpu_CAS(pos->pprev, pos->cur, pos->next))
-		smr_retire_node(pos->cur);
-	// failed: another thread removed cur after it was marked above.
-	// call list_lookup to ensure # non-released nodes < # threads.
-	else
-		list_lookup(list, key, pos);
-	return INFO::OK;
-}
-
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// lock-free hash table
-//
-//////////////////////////////////////////////////////////////////////////////
-
-// note: implemented via lfl, so we don't need to update
-// active_data_structures.
-
-static void validate(LFHash* hash)
-{
-	debug_assert(hash->tbl);
-	debug_assert(is_pow2(hash->mask+1));
-}
-
-// return hash "chain" (i.e. linked list) that is assigned to <key>.
-static LFList* chain(LFHash* hash, uintptr_t key)
-{
-	validate(hash);
-	return &hash->tbl[key & hash->mask];
-}
-
-
-// make ready a previously unused(!) hash object. table size will be
-// <num_entries>; this cannot currently be expanded. if a negative error
-// code (currently only ERR::NO_MEM) is returned, the hash can't be used.
-LibError lfh_init(LFHash* hash, size_t num_entries)
-{
-	hash->tbl  = 0;
-	hash->mask = ~0u;
-
-	if(!is_pow2((long)num_entries))
-	{
-		debug_assert(0);	// lfh_init: size must be power of 2
-		return ERR::INVALID_PARAM;
-	}
-
-	hash->tbl = (LFList*)malloc(sizeof(LFList) * num_entries);
-	if(!hash->tbl)
-		return ERR::NO_MEM;
-	hash->mask = (size_t)num_entries-1;
-
-	for(int i = 0; i < (int)num_entries; i++)
-	{
-		int err = lfl_init(&hash->tbl[i]);
-		if(err < 0)
-		{
-			// failed - free all and bail
-			for(int j = 0; j < i; j++)
-				lfl_free(&hash->tbl[j]);
-			return ERR::NO_MEM;
-		}
-	}
-
-	return INFO::OK;
-}
-
-
-// call when hash is no longer needed; should no longer hold any references.
-void lfh_free(LFHash* hash)
-{
-	validate(hash);
-
-	// free all chains
-	for(size_t i = 0; i < hash->mask+1; i++)
-		lfl_free(&hash->tbl[i]);
-
-	free(hash->tbl);
-	hash->tbl  = 0;
-	hash->mask = 0;
-}
-
-
-// return pointer to "user data" attached to <key>,
-// or 0 if not found in the hash.
-void* lfh_find(LFHash* hash, uintptr_t key)
-{
-	return lfl_find(chain(hash,key), key);
-}
-
-
-// insert into hash if not already present. returns 0 if out of memory,
-// otherwise a pointer to "user data" attached to <key>. the optional
-// <was_inserted> return variable indicates whether <key> was added.
-void* lfh_insert(LFHash* hash, uintptr_t key, size_t additional_bytes, int* was_inserted)
-{
-	return lfl_insert(chain(hash,key), key, additional_bytes, was_inserted);
-}
-
-
-// remove from hash; return -1 if not found, or 0 on success.
-LibError lfh_erase(LFHash* hash, uintptr_t key)
-{
-	return lfl_erase(chain(hash,key), key);
-}
-
-
-//-----------------------------------------------------------------------------
-
-void lockfree_Init()
-{
-	tls_init();
-}
-
-void lockfree_Shutdown()
-{
-	smr_shutdown();
-}
-
-#endif
--- a/source/lib/lockfree.h
+++ b/source/lib/lockfree.h
@ -1,241 +0,0 @@
-/* Copyright (c) 2010 Wildfire Games
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * lock-free synchronized data structures.
- */
-
-#ifndef INCLUDED_LOCKFREE
-#define INCLUDED_LOCKFREE
-
-#include "lib/sysdep/cpu.h"		// cpu_CAS
-
-/*
-
-[KEEP IN SYNC WITH WIKI]
-
-overview
--------
-
-this module provides several implicitly thread-safe data structures.
-rather than allowing only one thread to access them at a time, their
-operations are carefully implemented such that they take effect in
-one atomic step. data consistency problems are thus avoided.
-this novel approach to synchronization has several advantages:
- deadlocks are impossible;
- overhead due to OS kernel entry is avoided;
- graceful scaling to multiple processors is ensured.
-
-
-mechanism
---------
-
-the basic primitive that makes this possible is "compare and swap",
-a CPU instruction that performs both steps atomically. it compares a
-machine word against the expected value; if equal, the new value is
-written and an indication returned. otherwise, another thread must have
-been writing to the same location; the operation is typically retried.
-
-this instruction is available on all modern architectures; in some cases,
-emulation in terms of an alternate primitive (LL/SC) is necessary.
-
-
-memory management
-----------------
-
-one major remaining problem is how to free no longer needed nodes in the
-data structure. in general, we want to reclaim their memory for arbitrary use;
-this isn't safe as long as other threads are still accessing them.
-
-the RCU algorithm recognizes that all CPUs having entered a quiescent
-state means that no threads are still referencing data.
-lacking such kernel support, we use a similar mechanism - "hazard pointers"
-are set before accessing data; only if none are pointing to a node can it
-be freed. until then, they are stored in a per-thread 'waiting list'.
-
-this approach has several advantages over previous algorithms
-(typically involving reference count): the CAS primitive need only
-operate on single machine words, and space/time overhead is much reduced.
-
-
-usage notes
-----------
-
-useful "payload" in the data structures is allocated when inserting each
-item: additional_bytes are appended. rationale: see struct Node definition.
-
-since lock-free algorithms are subtle and easy to get wrong, an extensive
-self-test is included.
-
-
-terminology
-----------
-
-"atomic" means indivisible; in this case, other CPUs cannot
-  interfere with such an operation.
-"race conditions" are potential data consistency
-  problems resulting from lack of thread synchronization.
-"deadlock" is a state where several threads are waiting on
-  one another and no progress is possible.
-"thread-safety" is understood to mean the
-  preceding two problems do not occur.
-"scalability" is a measure of how efficient synchronization is;
-  overhead should not increase significantly with more processors.
-"linearization point" denotes the time at which an external
-  observer believes a lock-free operation to have taken effect.
-
-*/
-
-
-extern void lockfree_Init();
-extern void lockfree_Shutdown();
-
-
-//
-// lock-free singly linked list
-//
-
-struct LFList
-{
-	void* head;
-};
-
-// make ready a previously unused(!) list object. if a negative error
-// code (currently only ERR::NO_MEM) is returned, the list can't be used.
-extern LibError lfl_init(LFList* list);
-
-// call when list is no longer needed; should no longer hold any references.
-extern void lfl_free(LFList* list);
-
-// return pointer to "user data" attached to <key>,
-// or 0 if not found in the list.
-extern void* lfl_find(LFList* list, uintptr_t key);
-
-// insert into list in order of increasing key. ensures items are unique
-// by first checking if already in the list. returns 0 if out of memory,
-// otherwise a pointer to "user data" attached to <key>. the optional
-// <was_inserted> return variable indicates whether <key> was added.
-extern void* lfl_insert(LFList* list, uintptr_t key, size_t additional_bytes, int* was_inserted);
-
-// remove from list; return -1 if not found, or 0 on success.
-extern LibError lfl_erase(LFList* list, uintptr_t key);
-
-
-//
-// lock-free hash table (chained, fixed size)
-//
-
-struct LFHash
-{
-	LFList* tbl;
-	size_t mask;
-};
-
-// make ready a previously unused(!) hash object. table size will be
-// <num_entries>; this cannot currently be expanded. if a negative error
-// code (currently only ERR::NO_MEM) is returned, the hash can't be used.
-extern LibError lfh_init(LFHash* hash, size_t num_entries);
-
-// call when hash is no longer needed; should no longer hold any references.
-extern void lfh_free(LFHash* hash);
-
-// return pointer to "user data" attached to <key>,
-// or 0 if not found in the hash.
-extern void* lfh_find(LFHash* hash, uintptr_t key);
-
-// insert into hash if not already present. returns 0 if out of memory,
-// otherwise a pointer to "user data" attached to <key>. the optional
-// <was_inserted> return variable indicates whether <key> was added.
-extern void* lfh_insert(LFHash* hash, uintptr_t key, size_t additional_bytes, int* was_inserted);
-
-// remove from hash; return -1 if not found, or 0 on success.
-extern LibError lfh_erase(LFHash* hash, uintptr_t key);
-
-
-
-/**
-* thread-safe (lock-free) reference counter with an extra 'exclusive' state.
-**/
-class LF_ReferenceCounter
-{
-public:
-	LF_ReferenceCounter()
-		: m_status(0)
-	{
-	}
-
-	/**
-	* @return true if successful or false if exclusive access has already
-	* been granted or reference count is non-zero.
-	**/
-	bool AcquireExclusiveAccess()
-	{
-		return cpu_CAS(&m_status, 0, S_EXCLUSIVE);
-	}
-
-	/**
-	* re-enables adding references.
-	**/
-	void RelinquishExclusiveAccess()
-	{
-		const bool ok = cpu_CAS(&m_status, S_EXCLUSIVE, 0);
-		debug_assert(ok);
-	}
-
-	/**
-	* increase the reference count (bounds-checked).
-	*
-	* @return true if successful or false if the item is currently locked.
-	**/
-	bool AddReference()
-	{
-		const uintptr_t oldRefCnt = ReferenceCount();
-		debug_assert(oldRefCnt < (uintptr_t)S_REFCNT);
-		// (returns false if S_EXCLUSIVE is set)
-		return cpu_CAS(&m_status, oldRefCnt, oldRefCnt+1);
-	}
-
-	/**
-	* decrease the reference count (bounds-checked).
-	**/
-	void Release()
-	{
-		const uintptr_t oldRefCnt = ReferenceCount();
-		debug_assert(oldRefCnt != 0);
-		// (fails if S_EXCLUSIVE is set)
-		const bool ok = cpu_CAS(&m_status, oldRefCnt, oldRefCnt+1);
-		debug_assert(ok);
-	}
-
-	uintptr_t ReferenceCount() const
-	{
-		return m_status & S_REFCNT;
-	}
-
-private:
-	static const intptr_t S_REFCNT = (~0u) >> 1;		// 0x7F..F
-	static const intptr_t S_EXCLUSIVE = S_REFCNT+1u;	// 0x80..0
-
-	volatile intptr_t m_status;
-};
-
-#endif	// #ifndef INCLUDED_LOCKFREE
--- a/source/lib/res/h_mgr.cpp
+++ b/source/lib/res/h_mgr.cpp
@ -35,7 +35,7 @@
 #include <new>		// std::bad_alloc

 #include "lib/fnv_hash.h"
-#include "lib/allocators/allocators.h"	// OverrunProtector
+#include "lib/allocators/overrun_protector.h"
 #include "lib/module_init.h"


--- a/source/lib/sysdep/arch/ia32/ia32_asm.h
+++ b/source/lib/sysdep/arch/ia32/ia32_asm.h
@ -27,12 +27,8 @@
 #ifndef INCLUDED_IA32_ASM
 #define INCLUDED_IA32_ASM

-#ifdef __cplusplus
-extern "C" {
-#endif
-
 struct x86_x64_CpuidRegs;
-extern void CALL_CONV ia32_asm_cpuid(x86_x64_CpuidRegs* regs);
+EXTERN_C void CALL_CONV ia32_asm_cpuid(x86_x64_CpuidRegs* regs);

 /// control87
 // FPU control word
@ -58,7 +54,7 @@ const u32 IA32_EM_INEXACT    = 0x20;
 * with the bit values in new_val.
 * @return 0 to indicate success.
 **/
-extern u32 CALL_CONV ia32_asm_control87(u32 new_val, u32 mask);
+EXTERN_C u32 CALL_CONV ia32_asm_control87(u32 new_val, u32 mask);

 /// POSIX fpclassify
 #define IA32_FP_NAN       0x0100
@ -66,17 +62,13 @@ extern u32 CALL_CONV ia32_asm_control87(u32 new_val, u32 mask);
 #define IA32_FP_INFINITE  (IA32_FP_NAN | IA32_FP_NORMAL)
 #define IA32_FP_ZERO      0x4000
 #define IA32_FP_SUBNORMAL (IA32_FP_NORMAL | IA32_FP_ZERO)
-extern size_t CALL_CONV ia32_asm_fpclassifyd(double d);
-extern size_t CALL_CONV ia32_asm_fpclassifyf(float f);
+EXTERN_C size_t CALL_CONV ia32_asm_fpclassifyd(double d);
+EXTERN_C size_t CALL_CONV ia32_asm_fpclassifyf(float f);

 /**
 * write the current execution state (e.g. all register values) into
 * (Win32::CONTEXT*)pcontext (defined as void* to avoid dependency).
 **/
-extern void CALL_CONV ia32_asm_GetCurrentContext(void* pcontext);
-
-#ifdef __cplusplus
-}
-#endif
+EXTERN_C void CALL_CONV ia32_asm_GetCurrentContext(void* pcontext);

 #endif	// #ifndef INCLUDED_IA32_ASM
--- a/source/lib/sysdep/os/win/wnuma.cpp
+++ b/source/lib/sysdep/os/win/wnuma.cpp
@ -26,7 +26,7 @@
 #include "lib/bits.h"	// PopulationCount
 #include "lib/timer.h"
 #include "lib/module_init.h"
-#include "lib/allocators/allocators.h"	// page_aligned_alloc
+#include "lib/allocators/page_aligned.h"
 #include "lib/sysdep/os_cpu.h"
 #include "lib/sysdep/acpi.h"
 #include "lib/sysdep/os/win/win.h"
--- a/source/lib/sysdep/os/win/wposix/wfilesystem.cpp
+++ b/source/lib/sysdep/os/win/wposix/wfilesystem.cpp
@ -23,190 +23,81 @@
 #include "precompiled.h"
 #include "lib/sysdep/filesystem.h"

-#include "lib/allocators/allocators.h"	// single_calloc
-#include "lib/sysdep/os/win/wposix/wposix_internal.h"
-#include "lib/sysdep/os/win/wposix/waio.h"
+#include "lib/sysdep/cpu.h"	// cpu_CAS
+#include "lib/sysdep/os/win/wutil.h"	// LibError_from_GLE
+#include "lib/sysdep/os/win/wposix/waio.h"	// waio_reopen
 #include "lib/sysdep/os/win/wposix/wtime_internal.h"	// wtime_utc_filetime_to_time_t
-#include "lib/sysdep/os/win/wposix/crt_posix.h"			// _rmdir, _access
+#include "lib/sysdep/os/win/wposix/crt_posix.h"			// _close, _lseeki64 etc.

-//
-// determine file system type on the current drive -
-// needed to work around incorrect FAT time translation.
-//

-static enum Filesystem
+//-----------------------------------------------------------------------------
+// WDIR suballocator
+//-----------------------------------------------------------------------------
+
+// most applications only need a single WDIR at a time. we avoid expensive
+// heap allocations by reusing a single static instance. if it is already
+// in use, we allocate further instances dynamically.
+// NB: this is thread-safe due to CAS.
+
+struct WDIR	// POD
 {
-	FS_INVALID,	// detect_filesystem() not yet called
-	FS_FAT,		// FAT12, FAT16, or FAT32
-	FS_NTFS,	// (most common)
-	FS_UNKNOWN	// newer FS we don't know about
-}
-filesystem;
+	HANDLE hFind;

+	WIN32_FIND_DATAW findData;	// indeterminate if hFind == INVALID_HANDLE_VALUE

-// rationale: the previous method of checking every path was way too slow
-// (taking ~800ms total during init). instead, we only determine the FS once.
-// this is quite a bit easier than intercepting chdir() calls and/or
-// caching FS type per drive letter, but not foolproof.
-//
-// if some data files are on a different volume that is set up as FAT,
-// the workaround below won't be triggered (=> timestamps may be off by
-// 1 hour when DST is in effect). oh well, that is not a supported.
-//
-// the common case (everything is on a single NTFS volume) is more important
-// and must run without penalty.
+	// wreaddir will return the address of this member.
+	// (must be stored in WDIR to allow multiple independent
+	// wopendir/wreaddir sequences).
+	struct wdirent ent;

+	// used by wreaddir to skip the first FindNextFileW. (a counter is
+	// easy to test/update and also provides useful information.)
+	size_t numCalls;
+};

-// called from the first filetime_to_time_t() call, not win.cpp init;
-// this means we can rely on the current directory having been set to
-// the app's directory (and the corresponding volume - see above).
-static void detect_filesystem()
+static WDIR wdir_storage;
+static volatile intptr_t wdir_in_use;
+
+static inline WDIR* wdir_alloc()
 {
-	const DWORD length = GetCurrentDirectoryW(0, 0);
-	debug_assert(length != 0);
-	std::wstring rootPath(length, '\0');
-	const DWORD charsWritten = GetCurrentDirectoryW(length, &rootPath[0]);
-	debug_assert(charsWritten == length-1);
-
-	wchar_t drive[_MAX_DRIVE];
-	debug_assert(_wsplitpath_s(&rootPath[0], drive, ARRAY_SIZE(drive), 0,0, 0,0, 0,0) == 0);
-
-	wchar_t filesystemName[MAX_PATH+1] = {0};	// mandated by GetVolumeInformationW
-	BOOL ret = GetVolumeInformationW(OsString(OsPath(drive)/"").c_str(), 0,0,0,0,0, filesystemName, ARRAY_SIZE(filesystemName));
-	debug_assert(ret != 0);
-
-	filesystem = FS_UNKNOWN;
-	if(!wcsncmp(filesystemName, L"FAT", 3))	// e.g. FAT32
-		filesystem = FS_FAT;
-	else if(!wcscmp(filesystemName, L"NTFS"))
-		filesystem = FS_NTFS;
+	if(cpu_CAS(&wdir_in_use, 0, 1))	// gained ownership
+		return &wdir_storage;
+	
+	// already in use (rare) - allocate from heap
+	return new WDIR;
 }

-
-// convert local FILETIME (includes timezone bias and possibly DST bias)
-// to seconds-since-1970 UTC.
-//
-// note: splitting into month, year etc. is inefficient,
-//   but much easier than determining whether ft lies in DST,
-//   and ourselves adding the appropriate bias.
-//
-// called for FAT file times; see wposix filetime_to_time_t.
-time_t time_t_from_local_filetime(FILETIME* ft)
+static inline void wdir_free(WDIR* d)
 {
-	SYSTEMTIME st;
-	FileTimeToSystemTime(ft, &st);
-
-	struct tm t;
-	t.tm_sec   = st.wSecond;
-	t.tm_min   = st.wMinute;
-	t.tm_hour  = st.wHour;
-	t.tm_mday  = st.wDay;
-	t.tm_mon   = st.wMonth-1;
-	t.tm_year  = st.wYear-1900;
-	t.tm_isdst = -1;
-	// let the CRT determine whether this local time
-	// falls under DST by the US rules.
-	return mktime(&t);
-}
-
-
-// convert Windows FILETIME to POSIX time_t (seconds-since-1970 UTC);
-// used by stat and readdir_stat_np for st_mtime.
-//
-// works around a documented Windows bug in converting FAT file times
-// (correct results are desired since VFS mount logic considers
-// files 'equal' if their mtime and size are the same).
-static time_t filetime_to_time_t(FILETIME* ft)
-{
-	ONCE(detect_filesystem());
-
-	// the FAT file system stores local file times, while
-	// NTFS records UTC. Windows does convert automatically,
-	// but uses the current DST settings. (boo!)
-	// we go back to local time, and convert properly.
-	if(filesystem == FS_FAT)
+	if(d == &wdir_storage)
 	{
-		FILETIME local_ft;
-		FileTimeToLocalFileTime(ft, &local_ft);
-		return time_t_from_local_filetime(&local_ft);
+		const bool ok = cpu_CAS(&wdir_in_use, 1, 0);	// relinquish ownership
+		debug_assert(ok);	// ensure it wasn't double-freed
 	}
-
-	return wtime_utc_filetime_to_time_t(ft);
+	else	// allocated from heap
+		delete d;
 }


-
-
-
 //-----------------------------------------------------------------------------
 // dirent.h
 //-----------------------------------------------------------------------------

-// note: we avoid opening directories or returning entries that have
-// hidden or system attributes set. this is to prevent returning something
-// like "\System Volume Information", which raises an error upon opening.
-
-// 0-initialized by wdir_alloc for safety; this is required for
-// num_entries_scanned.
-struct WDIR
+static bool IsValidDirectory(const OsPath& path)
 {
-	HANDLE hFind;
-
-	// the wdirent returned by readdir.
-	// note: having only one global instance is not possible because
-	// multiple independent wopendir/wreaddir sequences must be supported.
-	struct wdirent ent;
-
-	WIN32_FIND_DATAW fd;
-
-	// since wopendir calls FindFirstFileW, we need a means of telling the
-	// first call to wreaddir that we already have a file.
-	// that's the case iff this is == 0; we use a counter rather than a
-	// flag because that allows keeping statistics.
-	int num_entries_scanned;
-};
-
-
-// suballocator - satisfies most requests with a reusable static instance,
-// thus speeding up allocation and avoiding heap fragmentation.
-// thread-safe.
-
-static WDIR global_wdir;
-static intptr_t global_wdir_is_in_use;
-
-// zero-initializes the WDIR (code below relies on this)
-static inline WDIR* wdir_alloc()
-{
-	return (WDIR*)single_calloc(&global_wdir, &global_wdir_is_in_use, sizeof(WDIR));
-}
-
-static inline void wdir_free(WDIR* d)
-{
-	single_free(&global_wdir, &global_wdir_is_in_use, d);
-}
-
-
-static const DWORD hs = FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM;
-
-// make sure path exists and is a normal (according to attributes) directory.
-static bool is_normal_dir(const OsPath& path)
-{
-	const DWORD fa = GetFileAttributesW(OsString(path).c_str());
+	const DWORD fileAttributes = GetFileAttributesW(OsString(path).c_str());

 	// path not found
-	if(fa == INVALID_FILE_ATTRIBUTES)
+	if(fileAttributes == INVALID_FILE_ATTRIBUTES)
 		return false;

 	// not a directory
-	if((fa & FILE_ATTRIBUTE_DIRECTORY) == 0)
+	if((fileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0)
 		return false;

-	// hidden or system attribute(s) set
-	// this check is now disabled because wsnd's add_oal_dlls_in_dir
-	// needs to open the Windows system directory, which sometimes has
-	// these attributes set.
-	//if((fa & hs) != 0)
-	//	return false;
+	// NB: no longer reject hidden or system attributes since
+	// wsnd's add_oal_dlls_in_dir opens the Windows system directory,
+	// which sometimes has these attributes set.

 	return true;
 }
@ -214,99 +105,84 @@ static bool is_normal_dir(const OsPath& path)

 WDIR* wopendir(const OsPath& path)
 {
-	if(!is_normal_dir(path))
+	WinScopedPreserveLastError s;
+
+	if(!IsValidDirectory(path))
 	{
 		errno = ENOENT;
 		return 0;
 	}

 	WDIR* d = wdir_alloc();
-	if(!d)
-	{
-		errno = ENOMEM;
-		return 0;
-	}
+	d->numCalls = 0;

-	// NB: c:\path only returns information about that directory; trailing
-	// slashes aren't allowed. append * to retrieve directory entries.
+	// NB: "c:\\path" only returns information about that directory;
+	// trailing slashes aren't allowed. append "\\*" to retrieve its entries.
 	OsPath searchPath = path/"*";

-	// note: we could store search_path and defer FindFirstFileW until
-	// wreaddir. this way is a bit more complex but required for
-	// correctness (we must return a valid DIR iff <path> is valid).
-	d->hFind = FindFirstFileW(OsString(searchPath).c_str(), &d->fd);
-	if(d->hFind == INVALID_HANDLE_VALUE)
-	{
-		// not an error - the directory is just empty.
-		if(GetLastError() == ERROR_NO_MORE_FILES)
-			return d;
+	// (we don't defer FindFirstFileW until wreaddir because callers
+	// expect us to return 0 if directory reading will/did fail.)
+	d->hFind = FindFirstFileW(OsString(searchPath).c_str(), &d->findData);
+	if(d->hFind != INVALID_HANDLE_VALUE)
+		return d;	// success
+	if(GetLastError() == ERROR_NO_MORE_FILES)
+		return d;	// success, but directory is empty

-		// translate Win32 error to errno.
-		LibError err = LibError_from_win32(FALSE);
-		LibError_set_errno(err);
+	LibError_set_errno(LibError_from_GLE());

-		// release the WDIR allocated above.
-		// unfortunately there's no way around this; we need to allocate
-		// d before FindFirstFile because it uses d->fd. copying from a
-		// temporary isn't nice either (this free doesn't happen often)
-		wdir_free(d);
-		return 0;
-	}
+	// release the WDIR allocated above (this is preferable to
+	// always copying the large WDIR or findData from a temporary)
+	wdir_free(d);

-	return d;
+	return 0;
 }


 struct wdirent* wreaddir(WDIR* d)
 {
-	// avoid polluting the last error.
-	DWORD prev_err = GetLastError();
+	// directory is empty and d->findData is indeterminate
+	if(d->hFind == INVALID_HANDLE_VALUE)
+		return 0;

-	// first call - skip FindNextFileW (see wopendir).
-	if(d->num_entries_scanned == 0)
-	{
-		// this directory is empty.
-		if(d->hFind == INVALID_HANDLE_VALUE)
-			return 0;
-		goto already_have_file;
-	}
+	WinScopedPreserveLastError s;

 	// until end of directory or a valid entry was found:
 	for(;;)
 	{
-		if(!FindNextFileW(d->hFind, &d->fd))
-			goto fail;
-already_have_file:
+		if(d->numCalls++ != 0)	// (skip first call to FindNextFileW - see wopendir)
+		{
+			if(!FindNextFileW(d->hFind, &d->findData))
+			{
+				if(GetLastError() != ERROR_NO_MORE_FILES)	// an actual error occurred
+					(void)LibError_from_GLE();	// raise warning
+				return 0;	// end of directory or error
+			}
+		}

-		d->num_entries_scanned++;
-
-		// not a hidden or system entry -> it's valid.
-		if((d->fd.dwFileAttributes & hs) == 0)
-			break;
+		// only accept non-hidden and non-system entries - otherwise,
+		// callers might encounter errors when attempting to open them.
+		if((d->findData.dwFileAttributes & (FILE_ATTRIBUTE_HIDDEN|FILE_ATTRIBUTE_SYSTEM)) == 0)
+		{
+			d->ent.d_name = d->findData.cFileName;	// (NB: d_name is a pointer)
+			return &d->ent;
+		}
 	}
-
-	// this entry has passed all checks; return information about it.
-	// (note: d_name is a pointer; see struct dirent definition)
-	d->ent.d_name = d->fd.cFileName;
-	return &d->ent;
-
-fail:
-	// FindNextFileW failed; determine why and bail.
-	// .. legit, end of dir reached. don't pollute last error code.
-	if(GetLastError() == ERROR_NO_MORE_FILES)
-		SetLastError(prev_err);
-	else
-		WARN_ERR(LibError_from_GLE());
-	return 0;
 }


 int wreaddir_stat_np(WDIR* d, struct stat* s)
 {
+	// NTFS stores UTC but FAT stores local times, which are incorrectly
+	// translated to UTC based on the _current_ DST settings. we no longer
+	// bother checking the filesystem, since that's either unreliable or
+	// expensive. timestamps may therefore be off after a DST transition,
+	// which means our cached files would be regenerated.
+	FILETIME* filetime = &d->findData.ftLastWriteTime;
+
 	memset(s, 0, sizeof(*s));
-	s->st_size  = (off_t)u64_from_u32(d->fd.nFileSizeHigh, d->fd.nFileSizeLow);
-	s->st_mode  = (unsigned short)((d->fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)? S_IFDIR : S_IFREG);
-	s->st_mtime = filetime_to_time_t(&d->fd.ftLastWriteTime);
+	s->st_size  = (off_t)u64_from_u32(d->findData.nFileSizeHigh, d->findData.nFileSizeLow);
+	s->st_mode  = (unsigned short)((d->findData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)? S_IFDIR : S_IFREG);
+	s->st_mtime = wtime_utc_filetime_to_time_t(filetime);
 	return 0;
 }

@ -395,6 +271,8 @@ off_t lseek(int fd, off_t ofs, int whence)

 int wtruncate(const OsPath& pathname, off_t length)
 {
+	// (re-open the file to avoid the FILE_FLAG_NO_BUFFERING
+	// sector-alignment restriction)
 	HANDLE hFile = CreateFileW(OsString(pathname).c_str(), GENERIC_WRITE, 0, 0, OPEN_EXISTING, 0, 0);
 	debug_assert(hFile != INVALID_HANDLE_VALUE);
 	LARGE_INTEGER ofs; ofs.QuadPart = length;
--- a/source/lib/sysdep/os/win/wposix/wsock_internal.h
+++ b/source/lib/sysdep/os/win/wposix/wsock_internal.h
@ -20,15 +20,7 @@
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern __declspec(dllimport) int __stdcall WSAStartup(unsigned short, void*);
-extern __declspec(dllimport) int __stdcall WSACleanup();
-extern __declspec(dllimport) int __stdcall WSAAsyncSelect(int s, HANDLE hWnd, unsigned int wMsg, long lEvent);
-extern __declspec(dllimport) int __stdcall WSAGetLastError();
-
-#ifdef __cplusplus
-}
-#endif
+EXTERN_C __declspec(dllimport) int __stdcall WSAStartup(unsigned short, void*);
+EXTERN_C __declspec(dllimport) int __stdcall WSACleanup();
+EXTERN_C __declspec(dllimport) int __stdcall WSAAsyncSelect(int s, HANDLE hWnd, unsigned int wMsg, long lEvent);
+EXTERN_C __declspec(dllimport) int __stdcall WSAGetLastError();
--- a/source/lib/sysdep/os/win/wposix/wterminal.cpp
+++ b/source/lib/sysdep/os/win/wposix/wterminal.cpp
@ -1,111 +0,0 @@
-/* Copyright (c) 2010 Wildfire Games
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "precompiled.h"
-#include "lib/sysdep/os/win/wposix/wterminal.h"
-
-#include "lib/sysdep/os/win/wposix/wposix_internal.h"
-#include "lib/sysdep/os/win/wposix/crt_posix.h"		// _get_osfhandle
-
-
-int ioctl(int fd, int op, int* data)
-{
-	const HANDLE h = HANDLE_from_intptr(_get_osfhandle(fd));
-
-	switch(op)
-	{
-	case TIOCMGET:
-		/* TIOCM_* mapped directly to MS_*_ON */
-		GetCommModemStatus(h, (DWORD*)data);
-		break;
-
-	case TIOCMBIS:
-		/* only RTS supported */
-		if(*data & TIOCM_RTS)
-			EscapeCommFunction(h, SETRTS);
-		else
-			EscapeCommFunction(h, CLRRTS);
-		break;
-
-	case TIOCMIWAIT:
-		static DWORD mask;
-		DWORD new_mask = 0;
-		if(*data & TIOCM_CD)
-			new_mask |= EV_RLSD;
-		if(*data & TIOCM_CTS)
-			new_mask |= EV_CTS;
-		if(new_mask != mask)
-			SetCommMask(h, mask = new_mask);
-		WaitCommEvent(h, &mask, 0);
-		break;
-	}
-
-	return 0;
-}
-
-
-
-static HANDLE std_h[2] = { (HANDLE)((char*)0 + 3), (HANDLE)((char*)0 + 7) };
-
-
-void _get_console()
-{
-	AllocConsole();
-}
-
-void _hide_console()
-{
-	FreeConsole();
-}
-
-
-int tcgetattr(int fd, struct termios* termios_p)
-{
-	if(fd >= 2)
-		return -1;
-	HANDLE h = std_h[fd];
-
-	DWORD mode;
-	GetConsoleMode(h, &mode);
-	termios_p->c_lflag = mode & (ENABLE_ECHO_INPUT|ENABLE_LINE_INPUT);
-
-	return 0;
-}
-
-
-int tcsetattr(int fd, int /* optional_actions */, const struct termios* termios_p)
-{
-	if(fd >= 2)
-		return -1;
-	HANDLE h = std_h[fd];
-	SetConsoleMode(h, (DWORD)termios_p->c_lflag);
-	FlushConsoleInputBuffer(h);
-
-	return 0;
-}
-
-
-int poll(struct pollfd /* fds */[], int /* nfds */, int /* timeout */)
-{
-	return -1;
-}
-
--- a/source/lib/sysdep/os/win/wposix/wterminal.h
+++ b/source/lib/sysdep/os/win/wposix/wterminal.h
@ -1,88 +0,0 @@
-/* Copyright (c) 2010 Wildfire Games
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef INCLUDED_WTERMINAL
-#define INCLUDED_WTERMINAL
-
-//
-// serial port IOCTL
-//
-
-// use with TIOCMBIS
-#define TIOCM_RTS 1
-
-// use with TIOCMGET or TIOCMIWAIT
-#define TIOCM_CD  0x80	// MS_RLSD_ON
-#define TIOCM_CTS 0x10	// MS_CTS_ON
-
-enum
-{
-	TIOCMBIS,		// set control line
-	TIOCMGET,		// get line state
-	TIOCMIWAIT		// wait for status change
-};
-
-extern int ioctl(int fd, int op, int* data);
-
-#ifndef _WINSOCKAPI_
-#define FIONREAD 0
-#endif
-
-
-extern void _get_console();
-extern void _hide_console();
-
-
-//
-// <poll.h>
-//
-
-struct pollfd
-{
-	int fd;
-	short int events, revents;
-};
-
-#define POLLIN 1
-
-extern int poll(struct pollfd[], int, int);
-
-
-
-//
-// <termios.h>
-//
-
-#define TCSANOW 0
-
-struct termios
-{
-	long c_lflag;
-};
-
-#define ICANON 2	// do not change - correspond to ENABLE_LINE_INPUT / ENABLE_ECHO_INPUT
-#define ECHO   4
-
-extern int tcgetattr(int fd, struct termios* termios_p);
-extern int tcsetattr(int fd, int optional_actions, const struct termios* termios_p);
-
-#endif	// #ifndef INCLUDED_WTERMINAL
--- a/source/lib/tests/test_lockfree.h
+++ b/source/lib/tests/test_lockfree.h
@ -1,281 +0,0 @@
-/* Copyright (c) 2010 Wildfire Games
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "lib/self_test.h"
-
-#include "lib/lockfree.h"
-#include "lib/sysdep/cpu.h"	// atomic_add
-#include "lib/timer.h"
-#include "lib/rand.h"
-
-// make sure the data structures work at all; doesn't test thread-safety.
-class TestLockfreeBasic : public CxxTest::TestSuite 
-{
-public:
-// note: the lockfree module is no longer part of the build, but cxxtestgen
-// still sees this class and its methods despite them being commented out
-// (#if 0 doesn't help, either). we therefore need to disable their bodies.
-#if 0
-	void setUp()
-	{
-		lockfree_Init();
-	}
-
-	void tearDown()
-	{
-		lockfree_Shutdown();
-	}
-#endif
-
-	void test_basic_single_threaded()
-	{
-#if 0
-		void* user_data;
-		const size_t ENTRIES = 50;
-		// should be more than max # retired nodes to test release..() code
-		uintptr_t key = 0x1000;
-		size_t sig = 10;
-
-		LFList list;
-		TS_ASSERT_OK(lfl_init(&list));
-
-		LFHash hash;
-		TS_ASSERT_OK(lfh_init(&hash, 8));
-
-		// add some entries; store "signatures" (ascending int values)
-		for(size_t i = 0; i < ENTRIES; i++)
-		{
-			int was_inserted;
-
-			user_data = lfl_insert(&list, key+i, sizeof(int), &was_inserted);
-			TS_ASSERT(user_data != 0 && was_inserted);
-			*(size_t*)user_data = sig+i;
-
-			user_data = lfh_insert(&hash, key+i, sizeof(int), &was_inserted);
-			TS_ASSERT(user_data != 0 && was_inserted);
-			*(size_t*)user_data = sig+i;
-		}
-
-		// make sure all "signatures" are present in list
-		for(size_t i = 0; i < ENTRIES; i++)
-		{
-			user_data = lfl_find(&list, key+i);
-			TS_ASSERT(user_data != 0);
-			TS_ASSERT_EQUALS(*(size_t*)user_data, sig+i);
-
-			user_data = lfh_find(&hash, key+i);
-			TS_ASSERT(user_data != 0);
-			TS_ASSERT_EQUALS(*(size_t*)user_data, sig+i);
-		}
-
-		lfl_free(&list);
-		lfh_free(&hash);
-#endif
-	}
-};
-
-
-// known to fail on P4 due to mem reordering and lack of membars.
-class TestMultithread : public CxxTest::TestSuite 
-{
-#if 0
-	void setUp()
-	{
-		lockfree_Init();
-	}
-
-	void tearDown()
-	{
-		lockfree_Shutdown();
-	}
-
-	// poor man's synchronization "barrier"
-	bool is_complete;
-	intptr_t num_active_threads;
-
-	LFList list;
-	LFHash hash;
-
-	typedef std::set<uintptr_t> KeySet; 
-	typedef KeySet::const_iterator KeySetIt;
-	KeySet keys;
-	pthread_mutex_t mutex;	// protects <keys>
-
-	struct ThreadFuncParam
-	{
-		TestMultithread* this_;
-		uintptr_t thread_number;
-
-		ThreadFuncParam(TestMultithread* this__, uintptr_t thread_number_)
-			: this_(this__), thread_number(thread_number_) {}
-	};
-
-	static void* thread_func(void* arg)
-	{
-		debug_SetThreadName("LF_test");
-
-		ThreadFuncParam* param = (ThreadFuncParam*)arg;
-		TestMultithread* this_        = param->this_;
-		const uintptr_t thread_number = param->thread_number;
-
-		cpu_AtomicAdd(&this_->num_active_threads, 1);
-
-		// chosen randomly every iteration (int_value % 4)
-		enum TestAction
-		{
-			TA_FIND   = 0,
-			TA_INSERT = 1,
-			TA_ERASE  = 2,
-			TA_SLEEP  = 3
-		};
-		static const wchar_t* const action_strings[] =
-		{
-			L"find", L"insert", L"erase", L"sleep"
-		};
-
-		while(!this_->is_complete)
-		{
-			void* user_data;
-
-			const size_t action         = rand(0, 4);
-			const uintptr_t key         = (uintptr_t)rand(0, 100);
-			const size_t sleep_duration_ms = rand(0, 100);
-			debug_printf(L"thread %d: %ls\n", thread_number, action_strings[action]);
-
-			//
-			pthread_mutex_lock(&this_->mutex);
-			const bool was_in_set = this_->keys.find(key) != this_->keys.end();
-			if(action == TA_INSERT)
-				this_->keys.insert(key);
-			else if(action == TA_ERASE)
-				this_->keys.erase(key);
-			pthread_mutex_unlock(&this_->mutex);
-
-			switch(action)
-			{
-			case TA_FIND:
-			{
-				user_data = lfl_find(&this_->list, key);
-				TS_ASSERT(was_in_set == (user_data != 0));
-				if(user_data)
-					TS_ASSERT_EQUALS(*(uintptr_t*)user_data, ~key);
-
-				user_data = lfh_find(&this_->hash, key);
-				// typical failure site if lockfree data structure has bugs.
-				TS_ASSERT(was_in_set == (user_data != 0));
-				if(user_data)
-					TS_ASSERT_EQUALS(*(uintptr_t*)user_data, ~key);
-			}
-			break;
-
-			case TA_INSERT:
-			{
-				int was_inserted;
-
-				user_data = lfl_insert(&this_->list, key, sizeof(uintptr_t), &was_inserted);
-				TS_ASSERT(user_data != 0);	// only triggers if out of memory
-				*(uintptr_t*)user_data = ~key;	// checked above
-				TS_ASSERT(was_in_set == !was_inserted);
-
-				user_data = lfh_insert(&this_->hash, key, sizeof(uintptr_t), &was_inserted);
-				TS_ASSERT(user_data != 0);	// only triggers if out of memory
-				*(uintptr_t*)user_data = ~key;	// checked above
-				TS_ASSERT(was_in_set == !was_inserted);
-			}
-			break;
-
-			case TA_ERASE:
-			{
-				int err;
-
-				err = lfl_erase(&this_->list, key);
-				TS_ASSERT(was_in_set == (err == INFO::OK));
-
-				err = lfh_erase(&this_->hash, key);
-				TS_ASSERT(was_in_set == (err == INFO::OK));
-			}
-			break;
-
-			case TA_SLEEP:
-				usleep(useconds_t(sleep_duration_ms*1000));
-				break;
-
-			default:
-				TS_FAIL(L"invalid TA_* action");
-				break;
-			}	// switch
-		}	// while !is_complete
-
-		cpu_AtomicAdd(&this_->num_active_threads, -1);
-		TS_ASSERT(this_->num_active_threads >= 0);
-
-		delete param;
-
-		return 0;
-	}
-
-public:
-	TestMultithread()
-		: is_complete(false), num_active_threads(0),
-		  list(), hash()
-	{
-		pthread_mutex_init(&mutex, NULL);
-	}
-
-	void disabled_due_to_failure_on_p4_test_multithread()
-	{
-		// this test is randomized; we need deterministic results.
-		srand(1);
-
-		static const double TEST_LENGTH = 30.;	// [seconds]
-		const double end_time = timer_Time() + TEST_LENGTH;
-		is_complete = false;
-
-		TS_ASSERT_OK(lfl_init(&list));
-		TS_ASSERT_OK(lfh_init(&hash, 128));
-		TS_ASSERT_OK(pthread_mutex_init(&mutex, 0));
-
-		// spin off test threads (many, to force preemption)
-		const size_t NUM_THREADS = 16;
-		for(uintptr_t i = 0; i < NUM_THREADS; i++)
-		{
-			ThreadFuncParam* param = new ThreadFuncParam(this, i);
-			pthread_t thread;	// unused, but GCC raises warning if 0 is passed
-			pthread_create(&thread, 0, thread_func, param);
-		}
-
-		// wait until time interval elapsed (if we get that far, all is well).
-		while(timer_Time() < end_time)
-			usleep(10*1000);
-
-		// signal and wait for all threads to complete (poor man's barrier -
-		// those aren't currently implemented in wpthread).
-		is_complete = true;
-		while(num_active_threads > 0)
-			usleep(5*1000);
-
-		lfl_free(&list);
-		lfh_free(&hash);
-		TS_ASSERT_OK(pthread_mutex_destroy(&mutex));
-	}
-#endif
-};
--- a/source/lib/tex/tex_codec.cpp
+++ b/source/lib/tex/tex_codec.cpp
@ -147,15 +147,15 @@ void tex_codec_register_all()
 // bottom-up; the row array is inverted if necessary to match global
 // orienatation. (this is more efficient than "transforming" later)
 //
-// used by PNG and JPG codecs; caller must delete[] rows when done.
+// used by PNG and JPG codecs.
 //
 // note: we don't allocate the data param ourselves because this function is
 // needed for encoding, too (where data is already present).
-shared_ptr<RowPtr> tex_codec_alloc_rows(const u8* data, size_t h, size_t pitch, size_t src_flags, size_t dst_orientation)
+std::vector<RowPtr> tex_codec_alloc_rows(const u8* data, size_t h, size_t pitch, size_t src_flags, size_t dst_orientation)
 {
 	const bool flip = !tex_orientations_match(src_flags, dst_orientation);

-	shared_ptr<RowPtr> rows(new RowPtr[h], ArrayDeleter());
+	std::vector<RowPtr> rows(h);

 	// determine start position and direction
 	RowPtr pos        = flip? data+pitch*(h-1) : data;
@ -164,7 +164,7 @@ shared_ptr<RowPtr> tex_codec_alloc_rows(const u8* data, size_t h, size_t pitch,

 	for(size_t i = 0; i < h; i++)
 	{
-		rows.get()[i] = pos;
+		rows[i] = pos;
 		pos += add;
 	}

--- a/source/lib/tex/tex_codec.h
+++ b/source/lib/tex/tex_codec.h
@ -225,11 +225,9 @@ extern LibError tex_codec_transform(Tex* t, size_t transforms);
 * can be one of TEX_BOTTOM_UP, TEX_TOP_DOWN, or 0 for the
 * "global orientation".
 * depending on src and dst, the row array is flipped if necessary.
- * @param rows (out) array of row pointers; caller must free() it when done.
- * @return LibError
 **/
 typedef const u8* RowPtr;
-extern shared_ptr<RowPtr> tex_codec_alloc_rows(const u8* data, size_t h, size_t pitch, size_t src_flags, size_t dst_orientation);
+extern std::vector<RowPtr> tex_codec_alloc_rows(const u8* data, size_t h, size_t pitch, size_t src_flags, size_t dst_orientation);

 /**
 * apply transforms and then copy header and image into output buffer.
--- a/source/lib/tex/tex_dds.cpp
+++ b/source/lib/tex/tex_dds.cpp
@ -236,8 +236,8 @@ static void s3tc_decompress_level(size_t UNUSED(level), size_t level_w, size_t l
 	// note: 1x1 images are legitimate (e.g. in mipmaps). they report their
 	// width as such for glTexImage, but the S3TC data is padded to
 	// 4x4 pixel block boundaries.
-	const size_t blocks_w = round_up(level_w, size_t(4)) / 4u;
-	const size_t blocks_h = round_up(level_h, size_t(4)) / 4u;
+	const size_t blocks_w = DivideRoundUp(level_w, size_t(4));
+	const size_t blocks_h = DivideRoundUp(level_h, size_t(4));
 	const u8* s3tc_data = level_data;
 	debug_assert(level_data_size % s3tc_block_size == 0);

@ -503,8 +503,8 @@ static LibError decode_sd(const DDS_HEADER* sd, size_t& w, size_t& h, size_t& bp
 	size_t stored_h, stored_w;
 	if(flags & TEX_DXT)
 	{
-		stored_h = round_up(h, size_t(4));
-		stored_w = round_up(w, size_t(4));
+		stored_h = Align<4>(h);
+		stored_w = Align<4>(w);
 	}
 	else
 	{
@ -517,7 +517,7 @@ static LibError decode_sd(const DDS_HEADER* sd, size_t& w, size_t& h, size_t& bp
 	const size_t sd_pitch_or_size = (size_t)read_le32(&sd->dwPitchOrLinearSize);
 	if(sd_flags & DDSD_PITCH)
 	{
-		if(sd_pitch_or_size != round_up(pitch, size_t(4)))
+		if(sd_pitch_or_size != Align<4>(pitch))
 			DEBUG_WARN_ERR(ERR::CORRUPTED);
 	}
 	if(sd_flags & DDSD_LINEARSIZE)
--- a/source/lib/tex/tex_jpg.cpp
+++ b/source/lib/tex/tex_jpg.cpp
@ -484,10 +484,10 @@ static LibError jpg_decode_impl(DynArray* da, jpeg_decompress_struct* cinfo, Tex
 	AllocateAligned(data, img_size, pageSize);

 	// read rows
-	shared_ptr<RowPtr> rows = tex_codec_alloc_rows(data.get(), h, pitch, TEX_TOP_DOWN, 0);
+	std::vector<RowPtr> rows = tex_codec_alloc_rows(data.get(), h, pitch, TEX_TOP_DOWN, 0);
 	// could use cinfo->output_scanline to keep track of progress,
 	// but we need to count lines_left anyway (paranoia).
-	JSAMPARRAY row = (JSAMPARRAY)rows.get();
+	JSAMPARRAY row = (JSAMPARRAY)&rows[0];
 	JDIMENSION lines_left = h;
 	while(lines_left != 0)
 	{
@ -542,11 +542,11 @@ static LibError jpg_encode_impl(Tex* t, jpeg_compress_struct* cinfo, DynArray* d

 	const size_t pitch = t->w * t->bpp / 8;
 	u8* data = tex_get_data(t);
-	shared_ptr<RowPtr> rows = tex_codec_alloc_rows(data, t->h, pitch, t->flags, TEX_TOP_DOWN);
+	std::vector<RowPtr> rows = tex_codec_alloc_rows(data, t->h, pitch, t->flags, TEX_TOP_DOWN);

 	// could use cinfo->output_scanline to keep track of progress,
 	// but we need to count lines_left anyway (paranoia).
-	JSAMPARRAY row = (JSAMPARRAY)rows.get();
+	JSAMPARRAY row = (JSAMPARRAY)&rows[0];
 	JDIMENSION lines_left = (JDIMENSION)t->h;
 	while(lines_left != 0)
 	{
--- a/source/lib/tex/tex_png.cpp
+++ b/source/lib/tex/tex_png.cpp
@ -115,8 +115,8 @@ static LibError png_decode_impl(DynArray* da, png_structp png_ptr, png_infop inf
 	shared_ptr<u8> data;
 	AllocateAligned(data, img_size, pageSize);

-	shared_ptr<RowPtr> rows = tex_codec_alloc_rows(data.get(), h, pitch, TEX_TOP_DOWN, 0);
-	png_read_image(png_ptr, (png_bytepp)rows.get());
+	std::vector<RowPtr> rows = tex_codec_alloc_rows(data.get(), h, pitch, TEX_TOP_DOWN, 0);
+	png_read_image(png_ptr, (png_bytepp)&rows[0]);
 	png_read_end(png_ptr, info_ptr);

 	// success; make sure all data was consumed.
@ -164,12 +164,12 @@ static LibError png_encode_impl(Tex* t, png_structp png_ptr, png_infop info_ptr,
 		PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);

 	u8* data = tex_get_data(t);
-	shared_ptr<RowPtr> rows = tex_codec_alloc_rows(data, h, pitch, t->flags, TEX_TOP_DOWN);
+	std::vector<RowPtr> rows = tex_codec_alloc_rows(data, h, pitch, t->flags, TEX_TOP_DOWN);

 	// PNG is native RGB.
 	const int png_transforms = (t->flags & TEX_BGR)? PNG_TRANSFORM_BGR : PNG_TRANSFORM_IDENTITY;

-	png_set_rows(png_ptr, info_ptr, (png_bytepp)rows.get());
+	png_set_rows(png_ptr, info_ptr, (png_bytepp)&rows[0]);
 	png_write_png(png_ptr, info_ptr, png_transforms, 0);

 	return INFO::OK;
--- a/source/ps/GameSetup/GameSetup.cpp
+++ b/source/ps/GameSetup/GameSetup.cpp
@ -19,7 +19,6 @@

 #include "lib/app_hooks.h"
 #include "lib/input.h"
-#include "lib/lockfree.h"
 #include "lib/ogl.h"
 #include "lib/timer.h"
 #include "lib/external_libraries/sdl.h"
--- a/source/renderer/VertexArray.cpp
+++ b/source/renderer/VertexArray.cpp
@ -17,7 +17,7 @@

 #include "precompiled.h"

-#include "lib/bits.h"
+#include "lib/alignment.h"
 #include "lib/ogl.h"
 #include "maths/Vector3D.h"
 #include "maths/Vector4D.h"
@ -158,7 +158,7 @@ static size_t RoundStride(size_t stride)
 	if (stride <= 16)
 		return 16;
 	
-	return round_up(stride, (size_t)32);
+	return Align<32>(stride);
 }

 // Re-layout by assigning offsets on a first-come first-serve basis,
@ -203,7 +203,7 @@ void VertexArray::Layout()
 		m_Stride += attrSize;

 		if (m_Target == GL_ARRAY_BUFFER)
-			m_Stride = round_up(m_Stride, (size_t)4);
+			m_Stride = Align<4>(m_Stride);

 		//debug_printf(L"%i: offset: %u\n", idx, attr->offset);
 	}