1
0
forked from 0ad/0ad

fixes+improvements from work:

- add AlignedAllocator - an STL allocator that returns
cache-line-aligned objects (required to avoid RFOs when threads write to
various independent items in a container)
- bits: bit_mask can now be used for N=0..numBits (works around
full-word-shifts-are-undefined issue)
- precompiled.h: remove scoped_ptr, add function-related stuff from TR1
- numa:
  . add numa_IsMemoryInterleaved
  . numa_Allocate is now able to allocate large pages as well (reduces
TLB misses)
- os_cpu: interface change to support 32-bit apps running on WoW64
systems with > 4 GB of memory
- topology: use new x86_x64_EnumerateCaches API; fix detection of cache
ID
- x86_x64: provide the means of enumerating all caches returned by CPUID
and detect L1 cache size

This was SVN commit r6004.
This commit is contained in:
janwas 2008-06-01 08:25:12 +00:00
parent 0118bfd634
commit 5d80d2ee5d
14 changed files with 411 additions and 128 deletions

View File

@ -0,0 +1,12 @@
/**
* =========================================================================
* File : aligned_allocator.cpp
* Project : 0 A.D.
* Description : STL allocator for aligned memory
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "aligned_allocator.h"

View File

@ -0,0 +1,130 @@
/**
* =========================================================================
* File : aligned_allocator.h
* Project : 0 A.D.
* Description : STL allocator for aligned memory
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef ALIGNED_ALLOCATOR
#define ALIGNED_ALLOCATOR
#include "lib/bits.h" // round_up
#include "lib/sysdep/x86_x64/x86_x64.h" // x86_x64_L1CacheLineSize
/**
* stateless STL allocator that aligns elements to the L1 cache line size.
*
* note: the alignment is hard-coded to avoid any allocator state.
* this avoids portability problems, which is important since allocators
* are rather poorly specified.
*
* references:
* http://www.tantalon.com/pete/customallocators.ppt
* http://www.flipcode.com/archives/Aligned_Block_Allocation.shtml
* http://www.josuttis.com/cppcode/allocator.html
*
* derived from code that bears the following copyright notice:
* (C) Copyright Nicolai M. Josuttis 1999.
* Permission to copy, use, modify, sell and distribute this software
* is granted provided this copyright notice appears in all copies.
* This software is provided "as is" without express or implied
* warranty, and with no claim as to its suitability for any purpose.
**/
template<class T>
class AlignedAllocator
{
public:
// type definitions
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
// rebind allocator to type U
template <class U>
struct rebind
{
typedef AlignedAllocator<U> other;
};
pointer address(reference value) const
{
return &value;
}
const_pointer address(const_reference value) const
{
return &value;
}
AlignedAllocator() throw()
{
}
AlignedAllocator(const AlignedAllocator&) throw()
{
}
template <class U>
AlignedAllocator (const AlignedAllocator<U>&) throw()
{
}
~AlignedAllocator() throw()
{
}
size_type max_size() const throw()
{
// maximum number of *elements* that can be allocated
return std::numeric_limits<std::size_t>::max() / sizeof(T);
}
// allocate uninitialized storage
pointer allocate(size_type numElements, const void* hint = 0)
{
const size_type alignment = x86_x64_L1CacheLineSize();
const size_type elementSize = round_up(sizeof(T), alignment);
const size_type size = numElements * elementSize;
pointer p = (pointer)_aligned_malloc(size, alignment);
return p;
}
// deallocate storage of elements that have been destroyed
void deallocate(pointer p, size_type num)
{
_aligned_free((void*)p);
}
void construct(pointer p, const T& value)
{
new((void*)p) T(value);
}
void destroy(pointer p)
{
p->~T();
}
};
// indicate that all specializations of this allocator are interchangeable
template <class T1, class T2>
bool operator==(const AlignedAllocator<T1>&, const AlignedAllocator<T2>&) throw()
{
return true;
}
template <class T1, class T2>
bool operator!=(const AlignedAllocator<T1>&, const AlignedAllocator<T2>&) throw()
{
return false;
}
#endif // #ifndef ALIGNED_ALLOCATOR

View File

@ -44,9 +44,14 @@ bool IsBitSet(T value, size_t index)
* @param num_bits number of bits in mask
**/
template<typename T>
T bit_mask(size_t num_bits)
T bit_mask(size_t numBits)
{
return (T)(T(1) << num_bits)-1;
if(numBits == 0) // prevent shift count == bitsInT, which would be undefined.
return 0;
// note: the perhaps more intuitive (1 << numBits)-1 cannot
// handle numBits == bitsInT, but this implementation does.
const T bitsInT = sizeof(T)*CHAR_BIT;
return ~T(0) >> T(bitsInT-numBits);
}
@ -64,7 +69,7 @@ template<typename T>
inline T bits(T num, size_t lo_idx, size_t hi_idx)
{
const size_t count = (hi_idx - lo_idx)+1; // # bits to return
T result = num >> lo_idx;
T result = num >> T(lo_idx);
result &= bit_mask<T>(count);
return result;
}

View File

@ -63,10 +63,15 @@
# define BOOST_ALL_DYN_LINK
#endif
#include <boost/utility.hpp> // noncopyable
#include <boost/shared_array.hpp>
// the following boost libraries have been included in TR1 and are
// thus deemed usable:
#include <boost/shared_ptr.hpp>
#include <boost/scoped_ptr.hpp>
using boost::shared_ptr; // has been added to TR1
using boost::shared_ptr;
#include <boost/mem_fn.hpp>
using boost::mem_fn;
#include <boost/function.hpp>
using boost::function;
#include <boost/bind.hpp>
#include "lib/external_libraries/boost_filesystem.h"
// (this must come after boost and common lib headers)

View File

@ -16,6 +16,6 @@
* avoids several common pitfalls; see discussion at
* http://www.azillionmonkeys.com/qed/random.html
**/
extern size_t rand(size_t min_inclusive, size_t max_exclusive);
LIB_API size_t rand(size_t min_inclusive, size_t max_exclusive);
#endif // #ifndef INCLUDED_RAND

View File

@ -70,7 +70,10 @@ size_t os_cpu_MemorySize()
static size_t memorySize;
if(!memorySize)
memorySize = sysconf(_SC_PHYS_PAGES) * os_cpu_PageSize();
{
const uint64_t memorySizeBytes = (uint64_t)sysconf(_SC_PHYS_PAGES) * os_cpu_PageSize();
memorySize = size_t(memorySizeBytes / MiB);
}
return memorySize;
}
@ -78,7 +81,8 @@ size_t os_cpu_MemorySize()
size_t os_cpu_MemoryAvailable()
{
const size_t memoryAvailable = sysconf(_SC_AVPHYS_PAGES) * os_cpu_PageSize();
const uint64_t memoryAvailableBytes = (uint64_t)sysconf(_SC_AVPHYS_PAGES) * os_cpu_PageSize();
const size_t memoryAvailable = size_t(memoryAvailableBytes / MiB);
return memoryAvailable;
}

View File

@ -36,16 +36,19 @@ LIB_API size_t numa_AvailableMemory(size_t node);
**/
LIB_API double numa_Factor();
/**
* @return an indication of whether memory pages are node-interleaved.
*
* note: this requires ACPI access, which may not be available on
* least-permission accounts. the default is to return false so as
* not to cause callers to panic and trigger performance warnings.
**/
LIB_API bool numa_IsMemoryInterleaved();
//-----------------------------------------------------------------------------
// allocator
/**
* simple allocator that "does the right thing" on NUMA systems - page frames
* will be taken from the node that first accesses them.
**/
LIB_API void* numa_Allocate(size_t size);
enum LargePageDisposition
{
LPD_DEFAULT,
@ -54,15 +57,25 @@ enum LargePageDisposition
};
/**
* allocate memory from a specific node.
* simple allocator that "does the right thing" on NUMA systems.
*
* @param node node number (zero-based)
* @param largePageDisposition - allows forcibly enabling/disabling the use
* of large pages; the default decision involves a heuristic.
* @param pageSize if non-zero, receives the size [bytes] of a single page
* out of those used to map the memory.
*
* note: page frames will be taken from the node that first accesses them.
**/
LIB_API void* numa_AllocateOnNode(size_t size, size_t node, LargePageDisposition largePageDisposition = LPD_DEFAULT, size_t* pageSize = 0);
LIB_API void* numa_Allocate(size_t size, LargePageDisposition largePageDisposition = LPD_DEFAULT, size_t* ppageSize = 0);
/**
* allocate memory from a specific node.
*
* @param node node number (zero-based)
* @param largePageDisposition - see numa_Allocate
* @param pageSize - see numa_Allocate
**/
LIB_API void* numa_AllocateOnNode(size_t node, size_t size, LargePageDisposition largePageDisposition = LPD_DEFAULT, size_t* pageSize = 0);
/**
* release memory that had been handed out by one of the above allocators.

View File

@ -76,12 +76,12 @@ LIB_API size_t os_cpu_PageSize();
LIB_API size_t os_cpu_LargePageSize();
/**
* @return the size [bytes] of physical memory.
* @return the size [MB] of physical memory.
**/
LIB_API size_t os_cpu_MemorySize();
/**
* @return the size [bytes] of currently available memory.
* @return the size [MB] of currently available memory.
**/
LIB_API size_t os_cpu_MemoryAvailable();

View File

@ -69,6 +69,7 @@ size_t os_cpu_MemorySize()
// Argh, the API doesn't seem to be const-correct
/*const*/ int mib[2] = { CTL_HW, HW_PHYSMEM };
sysctl(mib, 2, &memorySize, &len, 0, 0);
memorySize /= MiB;
}
return memorySize;
@ -82,6 +83,7 @@ size_t os_cpu_MemoryAvailable()
// Argh, the API doesn't seem to be const-correct
/*const*/ int mib[2] = { CTL_HW, HW_USERMEM };
sysctl(mib, 2, &memoryAvailable, &len, 0, 0);
memoryAvailable /= MiB;
return memoryAvailable;
}

View File

@ -142,31 +142,33 @@ static void GetMemoryStatus(MEMORYSTATUSEX& mse)
size_t os_cpu_MemorySize()
{
static size_t memorySize;
static size_t memorySizeMiB;
if(memorySize == 0)
if(memorySizeMiB == 0)
{
MEMORYSTATUSEX mse;
GetMemoryStatus(mse);
memorySize = (size_t)mse.ullTotalPhys;
DWORDLONG memorySize = mse.ullTotalPhys;
// Richter, "Programming Applications for Windows": the reported
// value doesn't include non-paged pool reserved during boot;
// it's not considered available to the kernel. (the amount is
// 528 KiB on a 512 MiB WinXP/Win2k machine). we'll round up
// to the nearest megabyte to fix this.
memorySize = round_up(memorySize, 1*MiB);
memorySize = round_up(memorySize, DWORDLONG(1*MiB));
memorySizeMiB = size_t(memorySize / MiB);
}
return memorySize;
return memorySizeMiB;
}
size_t os_cpu_MemoryAvailable()
{
MEMORYSTATUSEX mse;
GetMemoryStatus(mse);
const size_t memoryAvailable = (size_t)mse.ullAvailPhys;
return memoryAvailable;
const size_t memoryAvailableMiB = size_t(mse.ullAvailPhys / MiB);
return memoryAvailableMiB;
}

View File

@ -4,6 +4,7 @@
#include "lib/bits.h" // round_up, PopulationCount
#include "lib/timer.h"
#include "lib/sysdep/os_cpu.h"
#include "lib/sysdep/acpi.h"
#include "win.h"
#include "wutil.h"
#include "wcpu.h"
@ -141,7 +142,8 @@ size_t numa_AvailableMemory(size_t node)
ULONGLONG availableBytes;
const BOOL ok = pGetNumaAvailableMemoryNode((UCHAR)node, &availableBytes);
debug_assert(ok);
return (size_t)availableBytes;
const size_t availableMiB = size_t(availableBytes / MiB);
return availableMiB;
}
// NUMA not supported - return available system memory
else
@ -194,22 +196,34 @@ double numa_Factor()
}
bool numa_IsMemoryInterleaved()
{
WinScopedLock lock(WNUMA_CS);
static int isInterleaved = -1;
if(isInterleaved == -1)
{
if(acpi_Init())
{
// the BIOS only generates an SRAT (System Resource Affinity Table)
// if node interleaving is disabled.
isInterleaved = acpi_GetTable("SRAT") == 0;
acpi_Shutdown();
}
else
isInterleaved = 0; // can't tell
}
return isInterleaved != 0;
}
//-----------------------------------------------------------------------------
// allocator
//-----------------------------------------------------------------------------
void* numa_Allocate(size_t size)
{
void* const mem = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
if(!mem)
throw std::bad_alloc();
return mem;
}
static bool largePageAllocationTookTooLong = false;
static bool ShouldUseLargePages(LargePageDisposition disposition, size_t allocationSize, size_t node)
static bool ShouldUseLargePages(LargePageDisposition disposition, size_t allocationSize)
{
// can't, OS does not support large pages
if(os_cpu_LargePageSize() == 0)
@ -236,7 +250,7 @@ static bool ShouldUseLargePages(LargePageDisposition disposition, size_t allocat
// we want there to be plenty of memory available, otherwise the
// page frames are going to be terribly fragmented and even a
// single allocation would take SECONDS.
if(numa_AvailableMemory(node) < 2*GiB)
if(os_cpu_MemoryAvailable() < 2000) // 2 GB
return false;
}
@ -244,6 +258,44 @@ static bool ShouldUseLargePages(LargePageDisposition disposition, size_t allocat
}
void* numa_Allocate(size_t size, LargePageDisposition largePageDisposition, size_t* ppageSize)
{
void* mem = 0;
// try allocating with large pages (reduces TLB misses)
if(ShouldUseLargePages(largePageDisposition, size))
{
const size_t largePageSize = os_cpu_LargePageSize();
const size_t paddedSize = round_up(size, largePageSize); // required by MEM_LARGE_PAGES
// note: this call can take SECONDS, which is why several checks are
// undertaken before we even try. these aren't authoritative, so we
// at least prevent future attempts if it takes too long.
const double startTime = timer_Time();
mem = VirtualAlloc(0, paddedSize, MEM_RESERVE|MEM_COMMIT|MEM_LARGE_PAGES, PAGE_READWRITE);
if(ppageSize)
*ppageSize = largePageSize;
const double elapsedTime = timer_Time() - startTime;
debug_printf("TIMER| NUMA large page allocation: %g\n", elapsedTime);
if(elapsedTime > 1.0)
largePageAllocationTookTooLong = true;
}
// try (again) with regular pages
if(!mem)
{
mem = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
if(ppageSize)
*ppageSize = os_cpu_PageSize();
}
// all attempts failed - we're apparently out of memory.
if(!mem)
throw std::bad_alloc();
return mem;
}
static bool VerifyPages(void* mem, size_t size, size_t pageSize, size_t node)
{
typedef BOOL (WINAPI *PQueryWorkingSetEx)(HANDLE hProcess, PVOID buffer, DWORD bufferSize);
@ -294,61 +346,35 @@ static bool VerifyPages(void* mem, size_t size, size_t pageSize, size_t node)
}
void* numa_AllocateOnNode(size_t size, size_t node, LargePageDisposition largePageDisposition, size_t* ppageSize)
void* numa_AllocateOnNode(size_t node, size_t size, LargePageDisposition largePageDisposition, size_t* ppageSize)
{
debug_assert(node < numa_NumNodes());
// see if there will be enough memory (non-authoritative, for debug purposes only)
{
const size_t availableBytes = numa_AvailableMemory(node);
if(availableBytes < size)
debug_printf("NUMA: warning: node reports insufficient memory (%d vs %d)\n", availableBytes, size);
const size_t sizeMiB = size/MiB;
const size_t availableMiB = numa_AvailableMemory(node);
if(availableMiB < sizeMiB)
debug_printf("NUMA: warning: node reports insufficient memory (%d vs %d MB)\n", availableMiB, sizeMiB);
}
void* mem = 0;
size_t pageSize = 0;
// try allocating with large pages (reduces TLB misses)
if(ShouldUseLargePages(largePageDisposition, size, node))
{
const size_t largePageSize = os_cpu_LargePageSize();
const size_t paddedSize = round_up(size, largePageSize); // required by MEM_LARGE_PAGES
// note: this call can take SECONDS, which is why several checks are
// undertaken before we even try. these aren't authoritative, so we
// at least prevent future attempts if it takes too long.
const double startTime = timer_Time();
mem = VirtualAlloc(0, paddedSize, MEM_RESERVE|MEM_COMMIT|MEM_LARGE_PAGES, PAGE_READWRITE);
pageSize = largePageSize;
const double elapsedTime = timer_Time() - startTime;
debug_printf("TIMER| NUMA large page allocation: %g\n", elapsedTime);
if(elapsedTime > 1.0)
largePageAllocationTookTooLong = true;
}
// try (again) with regular pages
if(!mem)
{
mem = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
pageSize = os_cpu_PageSize();
}
// all attempts failed - we're apparently out of memory.
if(!mem)
throw std::bad_alloc();
size_t pageSize; // (used below even if ppageSize is zero)
void* const mem = numa_Allocate(size, largePageDisposition, &pageSize);
if(ppageSize)
*ppageSize = pageSize;
// we can't use VirtualAllocExNuma - it's only available in Vista and Server 2008.
// workaround: fault in all pages now to ensure they are allocated from the
// current node, then verify page attributes.
// (note: VirtualAlloc's MEM_COMMIT only maps virtual pages and does not
// actually allocate page frames. Windows uses a first-touch heuristic -
// the page will be taken from the node whose processor caused the fault.)
// actually allocate page frames. Windows XP uses a first-touch heuristic -
// the page will be taken from the node whose processor caused the fault.
// Windows Vista allocates on the "preferred" node, so affinity should be
// set such that this thread is running on <node>.)
memset(mem, 0, size);
VerifyPages(mem, size, pageSize, node);
if(ppageSize)
*ppageSize = pageSize;
return mem;
}

View File

@ -12,7 +12,7 @@
#include "topology.h"
#include "lib/bits.h"
#include "lib/sysdep/cpu.h"
#include "lib/sysdep/cpu.h" // ERR::CPU_FEATURE_MISSING
#include "lib/sysdep/os_cpu.h"
#include "x86_x64.h"
@ -99,36 +99,20 @@ static size_t LogicalPerCache()
if(!logicalPerCache)
{
logicalPerCache = 1; // caches aren't shared unless we find a descriptor
logicalPerCache = 1; // (default in case DetectL2Sharing fails)
// note: Intel Appnote 485 says the order in which caches are returned is
// undefined, so we need to loop through all of them.
for(u32 count = 0; ; count++)
struct DetectL2Sharing
{
// get next cache descriptor
x86_x64_CpuidRegs regs;
regs.eax = 4;
regs.ecx = count;
x86_x64_cpuid(&regs);
const u32 type = bits(regs.eax, 0, 4);
if(type == 0) // no more remaining
break;
struct IsL2DataCache
static void Callback(const x86_x64_CacheParameters* cache)
{
bool operator()(u32 type, u32 level) const
{
if(type != 1 && type != 3) // neither data nor unified
return false;
if(level != 2)
return false;
return true;
}
};
const u32 level = bits(regs.eax, 5, 7);
if(IsL2DataCache()(type, level))
logicalPerCache = bits(regs.eax, 14, 25)+1;
}
if(cache->type != X86_X64_CACHE_TYPE_DATA && cache->type != X86_X64_CACHE_TYPE_UNIFIED)
return;
if(cache->level != 2)
return;
logicalPerCache = cache->sharedBy;
}
};
x86_x64_EnumerateCaches(DetectL2Sharing::Callback);
}
return logicalPerCache;
@ -177,25 +161,18 @@ static const u8* ApicIds()
/**
* count the number of unique values assumed by a certain field (i.e. part
* of the APIC ID).
* @param numBits width of the field; must be set to ceil_log2 of the
* maximum value that can be assumed by the field.
* @return number of unique values (one if numBits is zero - this is
* convenient and kind of justified by counting the empty symbol)
* count the number of unique APIC IDs after application of a mask.
*
* this is used to implement NumUniqueValuesInField and also required
* for counting the number of caches.
**/
static size_t NumUniqueValuesInField(const u8* apicIds, size_t offset, size_t numBits)
static size_t NumUniqueMaskedValues(const u8* apicIds, u8 mask)
{
if(numBits == 0)
return 1; // see above
const u8 mask = bit_mask<u8>(numBits);
typedef std::set<u8> IdSet;
IdSet ids;
std::set<u8> ids;
for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
{
const u8 apicId = apicIds[processor];
const u8 field = u8(apicId >> offset) & mask;
const u8 field = apicId & mask;
ids.insert(field);
}
@ -203,13 +180,31 @@ static size_t NumUniqueValuesInField(const u8* apicIds, size_t offset, size_t nu
}
/**
* count the number of values assumed by a certain field within APIC IDs.
*
* @param offset index of the lowest bit that is part of the field.
* @param numValues number of values that can be assumed by the field.
* if equal to one, the field is zero-width.
* @return number of unique values (for convenience of the topology code,
* this is always at least one)
**/
static size_t NumUniqueValuesInField(const u8* apicIds, size_t offset, size_t numValues)
{
if(numValues == 1)
return 1; // see above
const size_t numBits = ceil_log2(numValues);
const u8 mask = u8((bit_mask<u8>(numBits) << offset) & 0xFF);
return NumUniqueMaskedValues(apicIds, mask);
}
static size_t NumPackages(const u8* apicIds)
{
if(apicIds)
{
const size_t offset = ceil_log2(CoresPerPackage()) + ceil_log2(LogicalPerCore());
const size_t numBits = 8;
return NumUniqueValuesInField(apicIds, offset, numBits);
return NumUniqueValuesInField(apicIds, offset, 256);
}
else
{
@ -241,8 +236,7 @@ static size_t CoresPerPackage(const u8* apicIds)
if(apicIds)
{
const size_t offset = ceil_log2(LogicalPerCore());
const size_t numBits = ceil_log2(CoresPerPackage());
return NumUniqueValuesInField(apicIds, offset, numBits);
return NumUniqueValuesInField(apicIds, offset, CoresPerPackage());
}
else
{
@ -257,8 +251,7 @@ static size_t LogicalPerCore(const u8* apicIds)
if(apicIds)
{
const size_t offset = 0;
const size_t numBits = ceil_log2(LogicalPerCore());
return NumUniqueValuesInField(apicIds, offset, numBits);
return NumUniqueValuesInField(apicIds, offset, LogicalPerCore());
}
else
{
@ -320,9 +313,9 @@ static size_t NumCaches(const u8* apicIds)
{
if(apicIds)
{
const size_t offset = 0;
const size_t numBits = ceil_log2(LogicalPerCache());
return NumUniqueValuesInField(apicIds, offset, numBits);
const u8 mask = u8((0xFF << numBits) & 0xFF);
return NumUniqueMaskedValues(apicIds, mask);
}
else
{

View File

@ -223,6 +223,63 @@ size_t x86_x64_Generation()
}
//-----------------------------------------------------------------------------
// cache
void x86_x64_EnumerateCaches(x86_x64_CacheCallback callback)
{
for(u32 count = 0; ; count++)
{
x86_x64_CpuidRegs regs;
regs.eax = 4;
regs.ecx = count;
x86_x64_cpuid(&regs);
x86_x64_CacheParameters cache;
cache.type = (x86_x64_CacheType)bits(regs.eax, 0, 4);
if(cache.type == X86_X64_CACHE_TYPE_NULL) // no more remaining
break;
cache.level = (size_t)bits(regs.eax, 5, 7);
cache.associativity = (size_t)bits(regs.ebx, 22, 31)+1;
cache.lineSize = (size_t)bits(regs.ebx, 0, 11)+1; // (yes, this also uses +1 encoding)
cache.sharedBy = (size_t)bits(regs.eax, 14, 25)+1;
{
const size_t partitions = (size_t)bits(regs.ebx, 12, 21)+1;
const size_t sets = (size_t)bits(regs.ecx, 0, 31)+1;
cache.size = cache.associativity * partitions * cache.lineSize * sets;
}
callback(&cache);
}
}
size_t x86_x64_L1CacheLineSize()
{
static size_t l1CacheLineSize;
if(!l1CacheLineSize)
{
l1CacheLineSize = 64; // (default in case DetectL1CacheLineSize fails)
struct DetectL1CacheLineSize
{
static void Callback(const x86_x64_CacheParameters* cache)
{
if(cache->type != X86_X64_CACHE_TYPE_DATA && cache->type != X86_X64_CACHE_TYPE_UNIFIED)
return;
if(cache->level != 1)
return;
l1CacheLineSize = cache->lineSize;
}
};
x86_x64_EnumerateCaches(DetectL1CacheLineSize::Callback);
}
return l1CacheLineSize;
}
//-----------------------------------------------------------------------------
// identifier string

View File

@ -96,6 +96,40 @@ enum x86_x64_Cap
LIB_API bool x86_x64_cap(x86_x64_Cap cap);
//-----------------------------------------------------------------------------
// cache
enum x86_x64_CacheType
{
X86_X64_CACHE_TYPE_NULL, // never passed to the callback
X86_X64_CACHE_TYPE_DATA,
X86_X64_CACHE_TYPE_INSTRUCTION,
X86_X64_CACHE_TYPE_UNIFIED
// note: further values are "reserved"
};
struct x86_x64_CacheParameters
{
x86_x64_CacheType type;
size_t level;
size_t associativity;
size_t lineSize;
size_t sharedBy;
size_t size;
};
typedef void (CALL_CONV *x86_x64_CacheCallback)(const x86_x64_CacheParameters*);
/**
* call back for each cache reported by CPUID.
*
* note: ordering is undefined (see Intel AP-485)
**/
LIB_API void x86_x64_EnumerateCaches(x86_x64_CacheCallback callback);
LIB_API size_t x86_x64_L1CacheLineSize();
//-----------------------------------------------------------------------------
// stateless