forked from 0ad/0ad
sync with work:
x86_x64: update MSR definitions to include model 2F (westmere ex); update bus clock computation for sandy bridge; remove unused cpu_Serialize; use namespace, add model names mahaf: cope with stricter security for services/drivers ApicId -> GetApicId, move to apic module aken: add batch file to install driver (optional) This was SVN commit r10815.
This commit is contained in:
parent
9911f561d3
commit
3537ae31d5
@ -83,7 +83,7 @@ void ColorActivateFastImpl()
|
||||
{
|
||||
}
|
||||
#if ARCH_X86_X64
|
||||
else if (x86_x64_cap(X86_X64_CAP_SSE))
|
||||
else if (x86_x64::Cap(x86_x64::CAP_SSE))
|
||||
{
|
||||
ConvertRGBColorTo4ub = sse_ConvertRGBColorTo4ub;
|
||||
}
|
||||
|
@ -107,7 +107,7 @@ public:
|
||||
// allocate uninitialized storage
|
||||
pointer allocate(size_type numElements)
|
||||
{
|
||||
const size_type alignment = x86_x64_Caches(L1D)->entrySize;
|
||||
const size_type alignment = x86_x64::Caches(L1D)->entrySize;
|
||||
const size_type elementSize = round_up(sizeof(T), alignment);
|
||||
const size_type size = numElements * elementSize;
|
||||
pointer p = (pointer)rtl_AllocateAligned(size, alignment);
|
||||
|
@ -23,10 +23,25 @@
|
||||
#include "precompiled.h"
|
||||
#include "lib/sysdep/arch/x86_x64/apic.h"
|
||||
|
||||
#include "lib/bits.h"
|
||||
#include "lib/module_init.h"
|
||||
#include "lib/sysdep/cpu.h" // ERR::CPU_FEATURE_MISSING
|
||||
#include "lib/sysdep/os_cpu.h"
|
||||
#include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64_ApicId
|
||||
#include "lib/sysdep/arch/x86_x64/x86_x64.h"
|
||||
|
||||
|
||||
ApicId GetApicId()
|
||||
{
|
||||
x86_x64::CpuidRegs regs = { 0 };
|
||||
regs.eax = 1;
|
||||
// note: CPUID function 1 is always supported, but only processors with
|
||||
// an xAPIC (e.g. P4/Athlon XP) will return a nonzero ID.
|
||||
bool ok = x86_x64::cpuid(®s);
|
||||
ASSERT(ok); UNUSED2(ok);
|
||||
const u8 apicId = (u8)bits(regs.ebx, 24, 31);
|
||||
return apicId;
|
||||
}
|
||||
|
||||
|
||||
static size_t numIds;
|
||||
static ApicId processorApicIds[os_cpu_MaxProcessors];
|
||||
@ -39,7 +54,7 @@ static Status GetAndValidateApicIds()
|
||||
{
|
||||
static void Callback(size_t processor, uintptr_t UNUSED(data))
|
||||
{
|
||||
processorApicIds[processor] = x86_x64_ApicId();
|
||||
processorApicIds[processor] = GetApicId();
|
||||
}
|
||||
};
|
||||
// (can fail due to restrictions on our process affinity or lack of
|
||||
|
@ -25,6 +25,16 @@
|
||||
|
||||
typedef u8 ApicId; // not necessarily contiguous values
|
||||
|
||||
/**
|
||||
* @return APIC ID of the currently executing processor or zero if the
|
||||
* platform does not have an xAPIC (i.e. 7th generation x86 or below).
|
||||
*
|
||||
* rationale: the alternative of accessing the APIC mmio registers is not
|
||||
* feasible - mahaf_MapPhysicalMemory only works reliably on WinXP. we also
|
||||
* don't want to interfere with the OS's constant use of the APIC registers.
|
||||
**/
|
||||
LIB_API u8 GetApicId();
|
||||
|
||||
// if this returns false, apicId = contiguousId = processor.
|
||||
// otherwise, there are unspecified but bijective mappings between
|
||||
// apicId<->contiguousId and apicId<->processor.
|
||||
|
@ -29,25 +29,27 @@
|
||||
#include "lib/sysdep/os_cpu.h"
|
||||
#include "lib/sysdep/arch/x86_x64/x86_x64.h"
|
||||
|
||||
namespace x86_x64 {
|
||||
|
||||
static const size_t maxTLBs = 2*2*4; // (level0, level1) x (D,I) x (4K, 2M, 4M, 1G)
|
||||
static size_t numTLBs = 0;
|
||||
|
||||
static const size_t numCaches = x86_x64_Cache::maxLevels * 2 + maxTLBs;
|
||||
static x86_x64_Cache caches[numCaches];
|
||||
static const size_t numCaches = x86_x64::Cache::maxLevels * 2 + maxTLBs;
|
||||
static Cache caches[numCaches];
|
||||
|
||||
|
||||
static void AddCache(const x86_x64_Cache& cache)
|
||||
static void AddCache(const x86_x64::Cache& cache)
|
||||
{
|
||||
ENSURE(cache.Validate());
|
||||
|
||||
if(cache.type == x86_x64_Cache::kData || cache.type == x86_x64_Cache::kUnified)
|
||||
if(cache.type == x86_x64::Cache::kData || cache.type == x86_x64::Cache::kUnified)
|
||||
caches[L1D + cache.level-1] = cache;
|
||||
if(cache.type == x86_x64_Cache::kInstruction || cache.type == x86_x64_Cache::kUnified)
|
||||
if(cache.type == x86_x64::Cache::kInstruction || cache.type == x86_x64::Cache::kUnified)
|
||||
caches[L1I + cache.level-1] = cache;
|
||||
}
|
||||
|
||||
|
||||
static void AddTLB(const x86_x64_Cache& tlb)
|
||||
static void AddTLB(const x86_x64::Cache& tlb)
|
||||
{
|
||||
ENSURE(tlb.Validate());
|
||||
ENSURE(tlb.level == 1 || tlb.level == 2); // see maxTLBs
|
||||
@ -65,9 +67,9 @@ static void AddTLB(const x86_x64_Cache& tlb)
|
||||
namespace AMD
|
||||
{
|
||||
|
||||
static x86_x64_Cache L1Cache(u32 reg, x86_x64_Cache::Type type)
|
||||
static x86_x64::Cache L1Cache(u32 reg, x86_x64::Cache::Type type)
|
||||
{
|
||||
x86_x64_Cache cache;
|
||||
x86_x64::Cache cache;
|
||||
cache.Initialize(1, type);
|
||||
|
||||
const size_t lineSize = bits(reg, 0, 7);
|
||||
@ -87,12 +89,12 @@ static x86_x64_Cache L1Cache(u32 reg, x86_x64_Cache::Type type)
|
||||
static const size_t associativityTable[16] =
|
||||
{
|
||||
0, 1, 2, 0, 4, 0, 8, 0,
|
||||
16, 0, 32, 48, 64, 96, 128, x86_x64_Cache::fullyAssociative
|
||||
16, 0, 32, 48, 64, 96, 128, x86_x64::Cache::fullyAssociative
|
||||
};
|
||||
|
||||
static x86_x64_Cache L2Cache(u32 reg, x86_x64_Cache::Type type)
|
||||
static x86_x64::Cache L2Cache(u32 reg, x86_x64::Cache::Type type)
|
||||
{
|
||||
x86_x64_Cache cache;
|
||||
x86_x64::Cache cache;
|
||||
cache.Initialize(2, type);
|
||||
|
||||
const size_t lineSize = bits(reg, 0, 7);
|
||||
@ -109,9 +111,9 @@ static x86_x64_Cache L2Cache(u32 reg, x86_x64_Cache::Type type)
|
||||
}
|
||||
|
||||
// (same as L2 except for the size)
|
||||
static x86_x64_Cache L3Cache(u32 reg, x86_x64_Cache::Type type)
|
||||
static x86_x64::Cache L3Cache(u32 reg, x86_x64::Cache::Type type)
|
||||
{
|
||||
x86_x64_Cache cache;
|
||||
x86_x64::Cache cache;
|
||||
cache.Initialize(3, type);
|
||||
|
||||
const size_t lineSize = bits(reg, 0, 7);
|
||||
@ -128,9 +130,9 @@ static x86_x64_Cache L3Cache(u32 reg, x86_x64_Cache::Type type)
|
||||
return cache;
|
||||
}
|
||||
|
||||
static x86_x64_Cache TLB1(u32 reg, size_t bitOffset, size_t pageSize, x86_x64_Cache::Type type)
|
||||
static x86_x64::Cache TLB1(u32 reg, size_t bitOffset, size_t pageSize, x86_x64::Cache::Type type)
|
||||
{
|
||||
x86_x64_Cache cache;
|
||||
x86_x64::Cache cache;
|
||||
cache.Initialize(1, type);
|
||||
|
||||
const size_t numEntries = bits(reg, bitOffset+0, bitOffset+ 7);
|
||||
@ -145,9 +147,9 @@ static x86_x64_Cache TLB1(u32 reg, size_t bitOffset, size_t pageSize, x86_x64_Ca
|
||||
return cache;
|
||||
}
|
||||
|
||||
static x86_x64_Cache TLB2(u32 reg, size_t bitOffset, size_t pageSize, x86_x64_Cache::Type type)
|
||||
static x86_x64::Cache TLB2(u32 reg, size_t bitOffset, size_t pageSize, x86_x64::Cache::Type type)
|
||||
{
|
||||
x86_x64_Cache cache;
|
||||
x86_x64::Cache cache;
|
||||
cache.Initialize(2, type);
|
||||
|
||||
const size_t numEntries = bits(reg, bitOffset+ 0, bitOffset+11);
|
||||
@ -164,11 +166,11 @@ static x86_x64_Cache TLB2(u32 reg, size_t bitOffset, size_t pageSize, x86_x64_Ca
|
||||
|
||||
static void AddTLB2Pair(u32 reg, size_t pageSize)
|
||||
{
|
||||
x86_x64_Cache::Type type = x86_x64_Cache::kUnified;
|
||||
x86_x64::Cache::Type type = x86_x64::Cache::kUnified;
|
||||
if(bits(reg, 16, 31) != 0) // not unified
|
||||
{
|
||||
AddTLB(TLB2(reg, 16, pageSize, x86_x64_Cache::kData));
|
||||
type = x86_x64_Cache::kInstruction;
|
||||
AddTLB(TLB2(reg, 16, pageSize, x86_x64::Cache::kData));
|
||||
type = x86_x64::Cache::kInstruction;
|
||||
}
|
||||
AddTLB(TLB2(reg, 0, pageSize, type));
|
||||
}
|
||||
@ -177,25 +179,25 @@ static void AddTLB2Pair(u32 reg, size_t pageSize)
|
||||
// "reserved". cache characteristics are returned via ext. functions.
|
||||
static void DetectCacheAndTLB()
|
||||
{
|
||||
x86_x64_CpuidRegs regs = { 0 };
|
||||
x86_x64::CpuidRegs regs = { 0 };
|
||||
|
||||
regs.eax = 0x80000005;
|
||||
if(x86_x64_cpuid(®s))
|
||||
if(x86_x64::cpuid(®s))
|
||||
{
|
||||
AddCache(L1Cache(regs.ecx, x86_x64_Cache::kData));
|
||||
AddCache(L1Cache(regs.edx, x86_x64_Cache::kInstruction));
|
||||
AddCache(L1Cache(regs.ecx, x86_x64::Cache::kData));
|
||||
AddCache(L1Cache(regs.edx, x86_x64::Cache::kInstruction));
|
||||
|
||||
AddTLB(TLB1(regs.eax, 0, 2*MiB, x86_x64_Cache::kInstruction));
|
||||
AddTLB(TLB1(regs.eax, 16, 2*MiB, x86_x64_Cache::kData));
|
||||
AddTLB(TLB1(regs.ebx, 0, 4*KiB, x86_x64_Cache::kInstruction));
|
||||
AddTLB(TLB1(regs.ebx, 16, 4*KiB, x86_x64_Cache::kData));
|
||||
AddTLB(TLB1(regs.eax, 0, 2*MiB, x86_x64::Cache::kInstruction));
|
||||
AddTLB(TLB1(regs.eax, 16, 2*MiB, x86_x64::Cache::kData));
|
||||
AddTLB(TLB1(regs.ebx, 0, 4*KiB, x86_x64::Cache::kInstruction));
|
||||
AddTLB(TLB1(regs.ebx, 16, 4*KiB, x86_x64::Cache::kData));
|
||||
}
|
||||
|
||||
regs.eax = 0x80000006;
|
||||
if(x86_x64_cpuid(®s))
|
||||
if(x86_x64::cpuid(®s))
|
||||
{
|
||||
AddCache(L2Cache(regs.ecx, x86_x64_Cache::kUnified));
|
||||
AddCache(L3Cache(regs.edx, x86_x64_Cache::kUnified));
|
||||
AddCache(L2Cache(regs.ecx, x86_x64::Cache::kUnified));
|
||||
AddCache(L3Cache(regs.edx, x86_x64::Cache::kUnified));
|
||||
|
||||
AddTLB2Pair(regs.eax, 2*MiB);
|
||||
AddTLB2Pair(regs.ebx, 4*KiB);
|
||||
@ -215,21 +217,21 @@ static bool DetectCache()
|
||||
// note: level order is unspecified (see Intel AP-485)
|
||||
for(u32 count = 0; ; count++)
|
||||
{
|
||||
x86_x64_CpuidRegs regs = { 0 };
|
||||
x86_x64::CpuidRegs regs = { 0 };
|
||||
regs.eax = 4;
|
||||
regs.ecx = count;
|
||||
if(!x86_x64_cpuid(®s))
|
||||
if(!x86_x64::cpuid(®s))
|
||||
return false;
|
||||
|
||||
const x86_x64_Cache::Type type = (x86_x64_Cache::Type)bits(regs.eax, 0, 4);
|
||||
if(type == x86_x64_Cache::kNull) // no more remaining
|
||||
const x86_x64::Cache::Type type = (x86_x64::Cache::Type)bits(regs.eax, 0, 4);
|
||||
if(type == x86_x64::Cache::kNull) // no more remaining
|
||||
break;
|
||||
|
||||
const size_t level = (size_t)bits(regs.eax, 5, 7);
|
||||
const size_t partitions = (size_t)bits(regs.ebx, 12, 21)+1;
|
||||
const size_t sets = (size_t)bits(regs.ecx, 0, 31)+1;
|
||||
|
||||
x86_x64_Cache cache;
|
||||
x86_x64::Cache cache;
|
||||
cache.Initialize(level, type);
|
||||
cache.entrySize = (size_t)bits(regs.ebx, 0, 11)+1; // (yes, this also uses +1 encoding)
|
||||
cache.associativity = (size_t)bits(regs.ebx, 22, 31)+1;
|
||||
@ -274,9 +276,9 @@ static Descriptors GetDescriptors()
|
||||
const uintptr_t firstProcessor = allProcessors & -intptr_t(allProcessors);
|
||||
const uintptr_t prevAffinityMask = os_cpu_SetThreadAffinityMask(firstProcessor);
|
||||
|
||||
x86_x64_CpuidRegs regs = { 0 };
|
||||
x86_x64::CpuidRegs regs = { 0 };
|
||||
regs.eax = 2;
|
||||
if(!x86_x64_cpuid(®s))
|
||||
if(!x86_x64::cpuid(®s))
|
||||
return Descriptors();
|
||||
|
||||
Descriptors descriptors;
|
||||
@ -290,7 +292,7 @@ static Descriptors GetDescriptors()
|
||||
if(--iterations == 0)
|
||||
break;
|
||||
regs.eax = 2;
|
||||
const bool ok = x86_x64_cpuid(®s);
|
||||
const bool ok = x86_x64::cpuid(®s);
|
||||
ENSURE(ok);
|
||||
}
|
||||
|
||||
@ -321,19 +323,19 @@ enum Flags
|
||||
// (there are > 100 descriptors, so we squeeze all fields into 8 bytes.)
|
||||
struct Characteristics // POD
|
||||
{
|
||||
x86_x64_Cache::Type Type() const
|
||||
x86_x64::Cache::Type Type() const
|
||||
{
|
||||
switch(flags & U)
|
||||
{
|
||||
case D:
|
||||
return x86_x64_Cache::kData;
|
||||
return x86_x64::Cache::kData;
|
||||
case I:
|
||||
return x86_x64_Cache::kInstruction;
|
||||
return x86_x64::Cache::kInstruction;
|
||||
case U:
|
||||
return x86_x64_Cache::kUnified;
|
||||
return x86_x64::Cache::kUnified;
|
||||
default:
|
||||
DEBUG_WARN_ERR(ERR::LOGIC);
|
||||
return x86_x64_Cache::kNull;
|
||||
return x86_x64::Cache::kNull;
|
||||
}
|
||||
}
|
||||
|
||||
@ -365,7 +367,7 @@ struct Characteristics // POD
|
||||
u32 flags; // level, type, largeSize
|
||||
};
|
||||
|
||||
static const u8 F = x86_x64_Cache::fullyAssociative;
|
||||
static const u8 F = x86_x64::Cache::fullyAssociative;
|
||||
|
||||
#define CACHE(descriptor, flags, totalSize, assoc, entrySize) { descriptor, assoc, -entrySize, flags | ((totalSize)/(entrySize)) }
|
||||
#define TLB(descriptor, flags, entrySize, assoc, numEntries) { descriptor, assoc, numEntries, flags | (entrySize) }
|
||||
@ -586,7 +588,7 @@ static void DetectCacheAndTLB(size_t& descriptorFlags)
|
||||
if((descriptorFlags & SKIP_CACHE_DESCRIPTORS) && !characteristics->IsTLB())
|
||||
continue;
|
||||
|
||||
x86_x64_Cache cache;
|
||||
x86_x64::Cache cache;
|
||||
cache.Initialize(characteristics->Level(), characteristics->Type());
|
||||
cache.numEntries = characteristics->NumEntries();
|
||||
cache.entrySize = characteristics->EntrySize();
|
||||
@ -605,13 +607,13 @@ static void DetectCacheAndTLB(size_t& descriptorFlags)
|
||||
static Status DetectCacheAndTLB()
|
||||
{
|
||||
// ensure all cache entries are initialized (DetectCache* might not set them all)
|
||||
for(size_t idxLevel = 0; idxLevel < x86_x64_Cache::maxLevels; idxLevel++)
|
||||
for(size_t idxLevel = 0; idxLevel < x86_x64::Cache::maxLevels; idxLevel++)
|
||||
{
|
||||
caches[L1D+idxLevel].Initialize(idxLevel+1, x86_x64_Cache::kData);
|
||||
caches[L1I+idxLevel].Initialize(idxLevel+1, x86_x64_Cache::kInstruction);
|
||||
caches[L1D+idxLevel].Initialize(idxLevel+1, x86_x64::Cache::kData);
|
||||
caches[L1I+idxLevel].Initialize(idxLevel+1, x86_x64::Cache::kInstruction);
|
||||
}
|
||||
|
||||
if(x86_x64_Vendor() == X86_X64_VENDOR_AMD)
|
||||
if(x86_x64::Vendor() == x86_x64::VENDOR_AMD)
|
||||
AMD::DetectCacheAndTLB();
|
||||
else
|
||||
{
|
||||
@ -622,13 +624,13 @@ static Status DetectCacheAndTLB()
|
||||
}
|
||||
|
||||
// sanity checks
|
||||
for(size_t idxLevel = 0; idxLevel < x86_x64_Cache::maxLevels; idxLevel++)
|
||||
for(size_t idxLevel = 0; idxLevel < x86_x64::Cache::maxLevels; idxLevel++)
|
||||
{
|
||||
ENSURE(caches[L1D+idxLevel].type == x86_x64_Cache::kData || caches[L1D+idxLevel].type == x86_x64_Cache::kUnified);
|
||||
ENSURE(caches[L1D+idxLevel].type == x86_x64::Cache::kData || caches[L1D+idxLevel].type == x86_x64::Cache::kUnified);
|
||||
ENSURE(caches[L1D+idxLevel].level == idxLevel+1);
|
||||
ENSURE(caches[L1D+idxLevel].Validate() == true);
|
||||
|
||||
ENSURE(caches[L1I+idxLevel].type == x86_x64_Cache::kInstruction || caches[L1I+idxLevel].type == x86_x64_Cache::kUnified);
|
||||
ENSURE(caches[L1I+idxLevel].type == x86_x64::Cache::kInstruction || caches[L1I+idxLevel].type == x86_x64::Cache::kUnified);
|
||||
ENSURE(caches[L1I+idxLevel].level == idxLevel+1);
|
||||
ENSURE(caches[L1I+idxLevel].Validate() == true);
|
||||
}
|
||||
@ -638,7 +640,7 @@ static Status DetectCacheAndTLB()
|
||||
return INFO::OK;
|
||||
}
|
||||
|
||||
const x86_x64_Cache* x86_x64_Caches(size_t idxCache)
|
||||
const x86_x64::Cache* x86_x64::Caches(size_t idxCache)
|
||||
{
|
||||
static ModuleInitState initState;
|
||||
ModuleInit(&initState, DetectCacheAndTLB);
|
||||
@ -648,3 +650,5 @@ const x86_x64_Cache* x86_x64_Caches(size_t idxCache)
|
||||
|
||||
return &caches[idxCache];
|
||||
}
|
||||
|
||||
} // namespace x86_x64
|
||||
|
@ -23,7 +23,9 @@
|
||||
#ifndef INCLUDED_X86_X64_CACHE
|
||||
#define INCLUDED_X86_X64_CACHE
|
||||
|
||||
struct x86_x64_Cache // POD (may be used before static constructors)
|
||||
namespace x86_x64 {
|
||||
|
||||
struct Cache // POD (may be used before static constructors)
|
||||
{
|
||||
enum Type
|
||||
{
|
||||
@ -133,6 +135,8 @@ enum IdxCache
|
||||
* @return 0 if idxCache >= TLB+numTLBs, otherwise a valid pointer to
|
||||
* a Cache whose numEntries is 0 if disabled / not present.
|
||||
**/
|
||||
LIB_API const x86_x64_Cache* x86_x64_Caches(size_t idxCache);
|
||||
LIB_API const Cache* Caches(size_t idxCache);
|
||||
|
||||
} // namespace x86_x64
|
||||
|
||||
#endif // #ifndef INCLUDED_X86_X64_CACHE
|
||||
|
@ -31,7 +31,7 @@ namespace MSR {
|
||||
|
||||
bool IsAccessible()
|
||||
{
|
||||
if(!x86_x64_Cap(X86_X64_CAP_MSR))
|
||||
if(!x86_x64::Cap(x86_x64::CAP_MSR))
|
||||
return false;
|
||||
|
||||
// only read/writable from ring 0, so we need the driver.
|
||||
@ -49,13 +49,13 @@ bool HasEnergyPerfBias()
|
||||
// this, lest we provoke a GPF.
|
||||
return false;
|
||||
#else
|
||||
if(x86_x64_Vendor() != X86_X64_VENDOR_INTEL)
|
||||
if(x86_x64::Vendor() != x86_x64::VENDOR_INTEL)
|
||||
return false;
|
||||
|
||||
if(x86_x64_Family() < 6)
|
||||
if(x86_x64::Family() < 6)
|
||||
return false;
|
||||
|
||||
if(x86_x64_Model() < 0xE)
|
||||
if(x86_x64::Model() < 0xE)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
@ -65,32 +65,33 @@ bool HasEnergyPerfBias()
|
||||
|
||||
bool HasPlatformInfo()
|
||||
{
|
||||
if(x86_x64_Vendor() != X86_X64_VENDOR_INTEL)
|
||||
if(x86_x64::Vendor() != x86_x64::VENDOR_INTEL)
|
||||
return false;
|
||||
|
||||
if(x86_x64_Family() != 6)
|
||||
if(x86_x64::Family() != 6)
|
||||
return false;
|
||||
|
||||
switch(x86_x64_Model())
|
||||
switch(x86_x64::Model())
|
||||
{
|
||||
// Xeon 5500 / i7 (section B.4 in 253669-037US)
|
||||
case 0x1A: // Bloomfield, Gainstown
|
||||
case 0x1E: // Clarksfield, Lynnfield, Jasper Forest
|
||||
case 0x1F:
|
||||
// section 34.4 in 253665-041US
|
||||
case x86_x64::MODEL_NEHALEM_EP:
|
||||
case x86_x64::MODEL_NEHALEM_EP_2:
|
||||
case x86_x64::MODEL_NEHALEM_EX:
|
||||
case x86_x64::MODEL_I7_I5:
|
||||
return true;
|
||||
|
||||
// Xeon 7500 (section B.4.2)
|
||||
case 0x2E:
|
||||
// section 34.5
|
||||
case x86_x64::MODEL_CLARKDALE:
|
||||
case x86_x64::MODEL_WESTMERE_EP:
|
||||
return true;
|
||||
|
||||
// Xeon 5600 / Westmere (section B.5)
|
||||
case 0x25: // Clarkdale, Arrandale
|
||||
case 0x2C: // Gulftown
|
||||
// section 34.6
|
||||
case x86_x64::MODEL_WESTMERE_EX:
|
||||
return true;
|
||||
|
||||
// Xeon 2xxx / Sandy Bridge (section B.6)
|
||||
case 0x2A:
|
||||
case 0x2D:
|
||||
// section 34.7
|
||||
case x86_x64::MODEL_SANDY_BRIDGE:
|
||||
case x86_x64::MODEL_SANDY_BRIDGE_2:
|
||||
return true;
|
||||
|
||||
default:
|
||||
@ -101,13 +102,13 @@ bool HasPlatformInfo()
|
||||
|
||||
bool HasUncore()
|
||||
{
|
||||
if(x86_x64_Vendor() != X86_X64_VENDOR_INTEL)
|
||||
if(x86_x64::Vendor() != x86_x64::VENDOR_INTEL)
|
||||
return false;
|
||||
|
||||
if(x86_x64_Family() != 6)
|
||||
if(x86_x64::Family() != 6)
|
||||
return false;
|
||||
|
||||
switch(x86_x64_Model())
|
||||
switch(x86_x64::Model())
|
||||
{
|
||||
// Xeon 5500 / i7 (section B.4.1 in 253669-037US)
|
||||
case 0x1A: // Bloomfield, Gainstown
|
||||
|
@ -29,8 +29,8 @@ class TestTopology : public CxxTest::TestSuite
|
||||
public:
|
||||
void test_run()
|
||||
{
|
||||
TS_ASSERT_LESS_THAN_EQUALS(1u, cpu_topology_NumPackages());
|
||||
TS_ASSERT_LESS_THAN_EQUALS(1u, cpu_topology_CoresPerPackage());
|
||||
TS_ASSERT_LESS_THAN_EQUALS(1u, cpu_topology_LogicalPerCore());
|
||||
TS_ASSERT_LESS_THAN_EQUALS(1u, topology::NumPackages());
|
||||
TS_ASSERT_LESS_THAN_EQUALS(1u, topology::CoresPerPackage());
|
||||
TS_ASSERT_LESS_THAN_EQUALS(1u, topology::LogicalPerCore());
|
||||
}
|
||||
};
|
||||
|
@ -38,6 +38,7 @@
|
||||
#include "lib/sysdep/arch/x86_x64/cache.h"
|
||||
#include "lib/sysdep/arch/x86_x64/apic.h"
|
||||
|
||||
namespace topology {
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------------
|
||||
// detect *maximum* number of cores/packages/caches.
|
||||
@ -49,19 +50,19 @@ static size_t MaxCoresPerPackage()
|
||||
// assume single-core unless one of the following applies:
|
||||
size_t maxCoresPerPackage = 1;
|
||||
|
||||
x86_x64_CpuidRegs regs = { 0 };
|
||||
switch(x86_x64_Vendor())
|
||||
x86_x64::CpuidRegs regs = { 0 };
|
||||
switch(x86_x64::Vendor())
|
||||
{
|
||||
case X86_X64_VENDOR_INTEL:
|
||||
case x86_x64::VENDOR_INTEL:
|
||||
regs.eax = 4;
|
||||
regs.ecx = 0;
|
||||
if(x86_x64_cpuid(®s))
|
||||
if(x86_x64::cpuid(®s))
|
||||
maxCoresPerPackage = bits(regs.eax, 26, 31)+1;
|
||||
break;
|
||||
|
||||
case X86_X64_VENDOR_AMD:
|
||||
case x86_x64::VENDOR_AMD:
|
||||
regs.eax = 0x80000008;
|
||||
if(x86_x64_cpuid(®s))
|
||||
if(x86_x64::cpuid(®s))
|
||||
maxCoresPerPackage = bits(regs.ecx, 0, 7)+1;
|
||||
break;
|
||||
|
||||
@ -80,13 +81,13 @@ static size_t MaxLogicalPerCore()
|
||||
bool operator()() const
|
||||
{
|
||||
// definitely not
|
||||
if(!x86_x64_cap(X86_X64_CAP_HT))
|
||||
if(!x86_x64::Cap(x86_x64::CAP_HT))
|
||||
return false;
|
||||
|
||||
// multi-core AMD systems falsely set the HT bit for reasons of
|
||||
// compatibility. we'll just ignore it, because clearing it might
|
||||
// confuse other callers.
|
||||
if(x86_x64_Vendor() == X86_X64_VENDOR_AMD && x86_x64_cap(X86_X64_CAP_AMD_CMP_LEGACY))
|
||||
if(x86_x64::Vendor() == x86_x64::VENDOR_AMD && x86_x64::Cap(x86_x64::CAP_AMD_CMP_LEGACY))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
@ -94,9 +95,9 @@ static size_t MaxLogicalPerCore()
|
||||
};
|
||||
if(IsHyperthreadingCapable()())
|
||||
{
|
||||
x86_x64_CpuidRegs regs = { 0 };
|
||||
x86_x64::CpuidRegs regs = { 0 };
|
||||
regs.eax = 1;
|
||||
if(!x86_x64_cpuid(®s))
|
||||
if(!x86_x64::cpuid(®s))
|
||||
DEBUG_WARN_ERR(ERR::CPU_FEATURE_MISSING);
|
||||
const size_t logicalPerPackage = bits(regs.ebx, 16, 23);
|
||||
const size_t maxCoresPerPackage = MaxCoresPerPackage();
|
||||
@ -112,7 +113,7 @@ static size_t MaxLogicalPerCore()
|
||||
|
||||
static size_t MaxLogicalPerCache()
|
||||
{
|
||||
return x86_x64_Caches(L2D)->sharedBy;
|
||||
return x86_x64::Caches(x86_x64::L2D)->sharedBy;
|
||||
}
|
||||
|
||||
|
||||
@ -240,44 +241,44 @@ static Status InitCpuTopology()
|
||||
}
|
||||
|
||||
|
||||
size_t cpu_topology_NumPackages()
|
||||
size_t NumPackages()
|
||||
{
|
||||
ModuleInit(&cpuInitState, InitCpuTopology);
|
||||
return cpuTopology.numPackages;
|
||||
}
|
||||
|
||||
size_t cpu_topology_CoresPerPackage()
|
||||
size_t CoresPerPackage()
|
||||
{
|
||||
ModuleInit(&cpuInitState, InitCpuTopology);
|
||||
return cpuTopology.coresPerPackage;
|
||||
}
|
||||
|
||||
size_t cpu_topology_LogicalPerCore()
|
||||
size_t LogicalPerCore()
|
||||
{
|
||||
ModuleInit(&cpuInitState, InitCpuTopology);
|
||||
return cpuTopology.logicalPerCore;
|
||||
}
|
||||
|
||||
size_t cpu_topology_LogicalFromApicId(size_t apicId)
|
||||
size_t LogicalFromApicId(ApicId apicId)
|
||||
{
|
||||
const size_t contiguousId = ContiguousIdFromApicId(apicId);
|
||||
return contiguousId % cpuTopology.logicalPerCore;
|
||||
}
|
||||
|
||||
size_t cpu_topology_CoreFromApicId(size_t apicId)
|
||||
size_t CoreFromApicId(ApicId apicId)
|
||||
{
|
||||
const size_t contiguousId = ContiguousIdFromApicId(apicId);
|
||||
return (contiguousId / cpuTopology.logicalPerCore) % cpuTopology.coresPerPackage;
|
||||
}
|
||||
|
||||
size_t cpu_topology_PackageFromApicId(size_t apicId)
|
||||
size_t PackageFromApicId(ApicId apicId)
|
||||
{
|
||||
const size_t contiguousId = ContiguousIdFromApicId(apicId);
|
||||
return contiguousId / (cpuTopology.logicalPerCore * cpuTopology.coresPerPackage);
|
||||
}
|
||||
|
||||
|
||||
size_t cpu_topology_ApicId(size_t idxLogical, size_t idxCore, size_t idxPackage)
|
||||
ApicId ApicIdFromIndices(size_t idxLogical, size_t idxCore, size_t idxPackage)
|
||||
{
|
||||
ModuleInit(&cpuInitState, InitCpuTopology);
|
||||
|
||||
@ -450,22 +451,24 @@ static Status InitCacheTopology()
|
||||
return INFO::OK;
|
||||
}
|
||||
|
||||
size_t cache_topology_NumCaches()
|
||||
size_t NumCaches()
|
||||
{
|
||||
ModuleInit(&cacheInitState, InitCacheTopology);
|
||||
return cacheTopology.numCaches;
|
||||
}
|
||||
|
||||
size_t cache_topology_CacheFromProcessor(size_t processor)
|
||||
size_t CacheFromProcessor(size_t processor)
|
||||
{
|
||||
ModuleInit(&cacheInitState, InitCacheTopology);
|
||||
ENSURE(processor < os_cpu_NumProcessors());
|
||||
return cacheTopology.processorsCache[processor];
|
||||
}
|
||||
|
||||
uintptr_t cache_topology_ProcessorMaskFromCache(size_t cache)
|
||||
uintptr_t ProcessorMaskFromCache(size_t cache)
|
||||
{
|
||||
ModuleInit(&cacheInitState, InitCacheTopology);
|
||||
ENSURE(cache < cacheTopology.numCaches);
|
||||
return cacheTopology.cachesProcessorMask[cache];
|
||||
}
|
||||
|
||||
} // namespace topology
|
||||
|
@ -25,8 +25,12 @@
|
||||
* thread-safe, no explicit initialization is required.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_TOPOLOGY
|
||||
#define INCLUDED_TOPOLOGY
|
||||
#ifndef INCLUDED_X86_X64_TOPOLOGY
|
||||
#define INCLUDED_X86_X64_TOPOLOGY
|
||||
|
||||
#include "lib/sysdep/arch/x86_x64/apic.h" // ApicId
|
||||
|
||||
namespace topology {
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// cpu
|
||||
@ -41,25 +45,40 @@
|
||||
/**
|
||||
* @return number of *enabled* CPU packages / sockets.
|
||||
**/
|
||||
LIB_API size_t cpu_topology_NumPackages();
|
||||
LIB_API size_t NumPackages();
|
||||
|
||||
/**
|
||||
* @return number of *enabled* CPU cores per package.
|
||||
* (2 on dual-core systems)
|
||||
**/
|
||||
LIB_API size_t cpu_topology_CoresPerPackage();
|
||||
LIB_API size_t CoresPerPackage();
|
||||
|
||||
/**
|
||||
* @return number of *enabled* logical processors (aka Hyperthreads)
|
||||
* per core. (2 on P4 EE)
|
||||
**/
|
||||
LIB_API size_t cpu_topology_LogicalPerCore();
|
||||
LIB_API size_t LogicalPerCore();
|
||||
|
||||
LIB_API size_t cpu_topology_PackageFromApicId(size_t apicId);
|
||||
LIB_API size_t cpu_topology_CoreFromApicId(size_t apicId);
|
||||
LIB_API size_t cpu_topology_LogicalFromApicId(size_t apicId);
|
||||
/**
|
||||
* @return index of processor package/socket in [0, NumPackages())
|
||||
**/
|
||||
LIB_API size_t PackageFromApicId(ApicId apicId);
|
||||
|
||||
LIB_API size_t cpu_topology_ApicId(size_t idxPackage, size_t idxCore, size_t idxLogical);
|
||||
/**
|
||||
* @return index of processor core in [0, CoresPerPackage())
|
||||
**/
|
||||
LIB_API size_t CoreFromApicId(ApicId apicId);
|
||||
|
||||
/**
|
||||
* @return index of logical processor in [0, LogicalPerCore())
|
||||
**/
|
||||
LIB_API size_t LogicalFromApicId(ApicId apicId);
|
||||
|
||||
/**
|
||||
* @param idxPackage, idxCore, idxLogical return values of *FromApicId
|
||||
* @return APIC ID (see note at AreApicIdsReliable)
|
||||
**/
|
||||
LIB_API ApicId ApicIdFromIndices(size_t idxPackage, size_t idxCore, size_t idxLogical);
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
@ -75,16 +94,18 @@ LIB_API size_t cpu_topology_ApicId(size_t idxPackage, size_t idxCore, size_t idx
|
||||
/**
|
||||
* @return number of distinct L2 caches.
|
||||
**/
|
||||
LIB_API size_t cache_topology_NumCaches();
|
||||
LIB_API size_t NumCaches();
|
||||
|
||||
/**
|
||||
* @return L2 cache number (zero-based) to which the given processor belongs.
|
||||
**/
|
||||
LIB_API size_t cache_topology_CacheFromProcessor(size_t processor);
|
||||
LIB_API size_t CacheFromProcessor(size_t processor);
|
||||
|
||||
/**
|
||||
* @return bit-mask of all processors sharing the given cache.
|
||||
**/
|
||||
LIB_API uintptr_t cache_topology_ProcessorMaskFromCache(size_t cache);
|
||||
LIB_API uintptr_t ProcessorMaskFromCache(size_t cache);
|
||||
|
||||
#endif // #ifndef INCLUDED_TOPOLOGY
|
||||
} // namespace topology
|
||||
|
||||
#endif // #ifndef INCLUDED_X86_X64_TOPOLOGY
|
||||
|
@ -44,6 +44,8 @@
|
||||
# include <intrin.h> // __rdtsc
|
||||
#endif
|
||||
|
||||
namespace x86_x64 {
|
||||
|
||||
#if defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729
|
||||
// VC10+ and VC9 SP1: __cpuidex is already available
|
||||
#elif GCC_VERSION
|
||||
@ -65,11 +67,10 @@
|
||||
// call a public function (that re-enters ModuleInit), so each
|
||||
// function gets its own initState.
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// CPUID
|
||||
|
||||
static void cpuid(x86_x64_CpuidRegs* regs)
|
||||
static void Invoke_cpuid(CpuidRegs* regs)
|
||||
{
|
||||
cassert(sizeof(regs->eax) == sizeof(int));
|
||||
cassert(sizeof(*regs) == 4*sizeof(int));
|
||||
@ -81,20 +82,20 @@ static u32 cpuid_maxExtendedFunction;
|
||||
|
||||
static Status InitCpuid()
|
||||
{
|
||||
x86_x64_CpuidRegs regs = { 0 };
|
||||
CpuidRegs regs = { 0 };
|
||||
|
||||
regs.eax = 0;
|
||||
cpuid(®s);
|
||||
Invoke_cpuid(®s);
|
||||
cpuid_maxFunction = regs.eax;
|
||||
|
||||
regs.eax = 0x80000000;
|
||||
cpuid(®s);
|
||||
Invoke_cpuid(®s);
|
||||
cpuid_maxExtendedFunction = regs.eax;
|
||||
|
||||
return INFO::OK;
|
||||
}
|
||||
|
||||
bool x86_x64_cpuid(x86_x64_CpuidRegs* regs)
|
||||
bool cpuid(CpuidRegs* regs)
|
||||
{
|
||||
static ModuleInitState initState;
|
||||
ModuleInit(&initState, InitCpuid);
|
||||
@ -105,7 +106,7 @@ bool x86_x64_cpuid(x86_x64_CpuidRegs* regs)
|
||||
if(function < 0x80000000 && function > cpuid_maxFunction)
|
||||
return false;
|
||||
|
||||
cpuid(regs);
|
||||
Invoke_cpuid(regs);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -114,22 +115,22 @@ bool x86_x64_cpuid(x86_x64_CpuidRegs* regs)
|
||||
// capability bits
|
||||
|
||||
// treated as 128 bit field; order: std ecx, std edx, ext ecx, ext edx
|
||||
// keep in sync with enum x86_x64_Cap!
|
||||
// keep in sync with enum Cap!
|
||||
static u32 caps[4];
|
||||
|
||||
static ModuleInitState capsInitState;
|
||||
|
||||
static Status InitCaps()
|
||||
{
|
||||
x86_x64_CpuidRegs regs = { 0 };
|
||||
CpuidRegs regs = { 0 };
|
||||
regs.eax = 1;
|
||||
if(x86_x64_cpuid(®s))
|
||||
if(cpuid(®s))
|
||||
{
|
||||
caps[0] = regs.ecx;
|
||||
caps[1] = regs.edx;
|
||||
}
|
||||
regs.eax = 0x80000001;
|
||||
if(x86_x64_cpuid(®s))
|
||||
if(cpuid(®s))
|
||||
{
|
||||
caps[2] = regs.ecx;
|
||||
caps[3] = regs.edx;
|
||||
@ -138,7 +139,7 @@ static Status InitCaps()
|
||||
return INFO::OK;
|
||||
}
|
||||
|
||||
bool x86_x64_cap(x86_x64_Cap cap)
|
||||
bool Cap(Caps cap)
|
||||
{
|
||||
ModuleInit(&capsInitState, InitCaps);
|
||||
|
||||
@ -152,7 +153,7 @@ bool x86_x64_cap(x86_x64_Cap cap)
|
||||
return IsBitSet(caps[index], bit);
|
||||
}
|
||||
|
||||
void x86_x64_caps(u32* d0, u32* d1, u32* d2, u32* d3)
|
||||
void GetCapBits(u32* d0, u32* d1, u32* d2, u32* d3)
|
||||
{
|
||||
ModuleInit(&capsInitState, InitCaps);
|
||||
|
||||
@ -166,13 +167,13 @@ void x86_x64_caps(u32* d0, u32* d1, u32* d2, u32* d3)
|
||||
//-----------------------------------------------------------------------------
|
||||
// vendor
|
||||
|
||||
static x86_x64_Vendors vendor;
|
||||
static Vendors vendor;
|
||||
|
||||
static Status InitVendor()
|
||||
{
|
||||
x86_x64_CpuidRegs regs = { 0 };
|
||||
CpuidRegs regs = { 0 };
|
||||
regs.eax = 0;
|
||||
if(!x86_x64_cpuid(®s))
|
||||
if(!cpuid(®s))
|
||||
DEBUG_WARN_ERR(ERR::CPU_FEATURE_MISSING);
|
||||
|
||||
// copy regs to string
|
||||
@ -184,19 +185,19 @@ static Status InitVendor()
|
||||
vendorString[12] = '\0'; // 0-terminate
|
||||
|
||||
if(!strcmp(vendorString, "AuthenticAMD"))
|
||||
vendor = X86_X64_VENDOR_AMD;
|
||||
vendor = x86_x64::VENDOR_AMD;
|
||||
else if(!strcmp(vendorString, "GenuineIntel"))
|
||||
vendor = X86_X64_VENDOR_INTEL;
|
||||
vendor = x86_x64::VENDOR_INTEL;
|
||||
else
|
||||
{
|
||||
DEBUG_WARN_ERR(ERR::CPU_UNKNOWN_VENDOR);
|
||||
vendor = X86_X64_VENDOR_UNKNOWN;
|
||||
vendor = x86_x64::VENDOR_UNKNOWN;
|
||||
}
|
||||
|
||||
return INFO::OK;
|
||||
}
|
||||
|
||||
x86_x64_Vendors x86_x64_Vendor()
|
||||
Vendors Vendor()
|
||||
{
|
||||
static ModuleInitState initState;
|
||||
ModuleInit(&initState, InitVendor);
|
||||
@ -213,9 +214,9 @@ static ModuleInitState signatureInitState;
|
||||
|
||||
static Status InitSignature()
|
||||
{
|
||||
x86_x64_CpuidRegs regs = { 0 };
|
||||
CpuidRegs regs = { 0 };
|
||||
regs.eax = 1;
|
||||
if(!x86_x64_cpuid(®s))
|
||||
if(!cpuid(®s))
|
||||
DEBUG_WARN_ERR(ERR::CPU_FEATURE_MISSING);
|
||||
model = bits(regs.eax, 4, 7);
|
||||
family = bits(regs.eax, 8, 11);
|
||||
@ -223,18 +224,18 @@ static Status InitSignature()
|
||||
const size_t extendedFamily = bits(regs.eax, 20, 27);
|
||||
if(family == 0xF)
|
||||
family += extendedFamily;
|
||||
if(family == 0xF || (x86_x64_Vendor() == X86_X64_VENDOR_INTEL && family == 6))
|
||||
if(family == 0xF || (Vendor() == x86_x64::VENDOR_INTEL && family == 6))
|
||||
model += extendedModel << 4;
|
||||
return INFO::OK;
|
||||
}
|
||||
|
||||
size_t x86_x64_Model()
|
||||
size_t Model()
|
||||
{
|
||||
ModuleInit(&signatureInitState, InitSignature);
|
||||
return model;
|
||||
}
|
||||
|
||||
size_t x86_x64_Family()
|
||||
size_t Family()
|
||||
{
|
||||
ModuleInit(&signatureInitState, InitSignature);
|
||||
return family;
|
||||
@ -285,9 +286,9 @@ static Status InitIdentifierString()
|
||||
bool gotBrandString = true;
|
||||
for(u32 function = 0x80000002; function <= 0x80000004; function++)
|
||||
{
|
||||
x86_x64_CpuidRegs regs = { 0 };
|
||||
CpuidRegs regs = { 0 };
|
||||
regs.eax = function;
|
||||
gotBrandString &= x86_x64_cpuid(®s);
|
||||
gotBrandString &= cpuid(®s);
|
||||
memcpy(pos, ®s, 16);
|
||||
pos += 16;
|
||||
}
|
||||
@ -300,11 +301,11 @@ static Status InitIdentifierString()
|
||||
// doesn't recognize.
|
||||
if(!gotBrandString || strncmp(identifierString, "Unknow", 6) == 0)
|
||||
{
|
||||
const size_t family = x86_x64_Family();
|
||||
const size_t model = x86_x64_Model();
|
||||
switch(x86_x64_Vendor())
|
||||
const size_t family = Family();
|
||||
const size_t model = Model();
|
||||
switch(Vendor())
|
||||
{
|
||||
case X86_X64_VENDOR_AMD:
|
||||
case x86_x64::VENDOR_AMD:
|
||||
// everything else is either too old, or should have a brand string.
|
||||
if(family == 6)
|
||||
{
|
||||
@ -314,7 +315,7 @@ static Status InitIdentifierString()
|
||||
strcpy_s(identifierString, ARRAY_SIZE(identifierString), "AMD Athlon");
|
||||
else
|
||||
{
|
||||
if(x86_x64_cap(X86_X64_CAP_AMD_MP))
|
||||
if(Cap(x86_x64::CAP_AMD_MP))
|
||||
strcpy_s(identifierString, ARRAY_SIZE(identifierString), "AMD Athlon MP");
|
||||
else
|
||||
strcpy_s(identifierString, ARRAY_SIZE(identifierString), "AMD Athlon XP");
|
||||
@ -322,7 +323,7 @@ static Status InitIdentifierString()
|
||||
}
|
||||
break;
|
||||
|
||||
case X86_X64_VENDOR_INTEL:
|
||||
case x86_x64::VENDOR_INTEL:
|
||||
// everything else is either too old, or should have a brand string.
|
||||
if(family == 6)
|
||||
{
|
||||
@ -357,7 +358,7 @@ static Status InitIdentifierString()
|
||||
return INFO::OK;
|
||||
}
|
||||
|
||||
const char* cpu_IdentifierString()
|
||||
static const char* IdentifierString()
|
||||
{
|
||||
static ModuleInitState initState;
|
||||
ModuleInit(&initState, InitIdentifierString);
|
||||
@ -368,25 +369,8 @@ const char* cpu_IdentifierString()
|
||||
//-----------------------------------------------------------------------------
|
||||
// miscellaneous stateless functions
|
||||
|
||||
// these routines do not call ModuleInit (because some of them are
|
||||
// time-critical, e.g. cpu_Serialize) and should also avoid the
|
||||
// other x86_x64* functions and their global state.
|
||||
// in particular, use cpuid instead of x86_x64_cpuid.
|
||||
|
||||
u8 x86_x64_ApicId()
|
||||
{
|
||||
x86_x64_CpuidRegs regs = { 0 };
|
||||
regs.eax = 1;
|
||||
// note: CPUID function 1 is always supported, but only processors with
|
||||
// an xAPIC (e.g. P4/Athlon XP) will return a nonzero ID.
|
||||
cpuid(®s);
|
||||
const u8 apicId = (u8)bits(regs.ebx, 24, 31);
|
||||
return apicId;
|
||||
}
|
||||
|
||||
|
||||
#if !MSC_VERSION // replaced by macro
|
||||
u64 x86_x64_rdtsc()
|
||||
#if !MSC_VERSION // ensure not already defined in header
|
||||
u64 rdtsc()
|
||||
{
|
||||
#if GCC_VERSION
|
||||
// GCC supports "portable" assembly for both x86 and x64
|
||||
@ -398,7 +382,7 @@ u64 x86_x64_rdtsc()
|
||||
#endif
|
||||
|
||||
|
||||
void x86_x64_DebugBreak()
|
||||
void DebugBreak()
|
||||
{
|
||||
#if MSC_VERSION
|
||||
__debugbreak();
|
||||
@ -411,14 +395,6 @@ void x86_x64_DebugBreak()
|
||||
}
|
||||
|
||||
|
||||
void cpu_Serialize()
|
||||
{
|
||||
x86_x64_CpuidRegs regs = { 0 };
|
||||
regs.eax = 1;
|
||||
cpuid(®s); // CPUID serializes execution.
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// CPU frequency
|
||||
|
||||
@ -450,13 +426,13 @@ private:
|
||||
|
||||
// note: this function uses timer.cpp!timer_Time, which is implemented via
|
||||
// whrt.cpp on Windows.
|
||||
double x86_x64_ClockFrequency()
|
||||
double ClockFrequency()
|
||||
{
|
||||
// if the TSC isn't available, there's really no good way to count the
|
||||
// actual CPU clocks per known time interval, so bail.
|
||||
// note: loop iterations ("bogomips") are not a reliable measure due
|
||||
// to differing IPC and compiler optimizations.
|
||||
if(!x86_x64_cap(X86_X64_CAP_TSC))
|
||||
if(!Cap(x86_x64::CAP_TSC))
|
||||
return -1.0; // impossible value
|
||||
|
||||
// increase priority to reduce interference while measuring.
|
||||
@ -465,7 +441,7 @@ double x86_x64_ClockFrequency()
|
||||
|
||||
// note: no need to "warm up" cpuid - it will already have been
|
||||
// called several times by the time this code is reached.
|
||||
// (background: it's used in x86_x64_rdtsc() to serialize instruction flow;
|
||||
// (background: it's used in rdtsc() to serialize instruction flow;
|
||||
// the first call is documented to be slower on Intel CPUs)
|
||||
|
||||
size_t numSamples = 16;
|
||||
@ -488,27 +464,27 @@ double x86_x64_ClockFrequency()
|
||||
do
|
||||
{
|
||||
// note: timer_Time effectively has a long delay (up to 5 us)
|
||||
// before returning the time. we call it before x86_x64_rdtsc to
|
||||
// before returning the time. we call it before rdtsc to
|
||||
// minimize the delay between actually sampling time / TSC,
|
||||
// thus decreasing the chance for interference.
|
||||
// (if unavoidable background activity, e.g. interrupts,
|
||||
// delays the second reading, inaccuracy is introduced).
|
||||
t1 = timer_Time();
|
||||
c1 = x86_x64_rdtsc();
|
||||
c1 = rdtsc();
|
||||
}
|
||||
while(t1 == t0);
|
||||
// .. wait until start of next tick and at least 1 ms elapsed.
|
||||
do
|
||||
{
|
||||
const double t2 = timer_Time();
|
||||
const u64 c2 = x86_x64_rdtsc();
|
||||
const u64 c2 = rdtsc();
|
||||
dc = (i64)(c2 - c1);
|
||||
dt = t2 - t1;
|
||||
}
|
||||
while(dt < 1e-3);
|
||||
|
||||
// .. freq = (delta_clocks) / (delta_seconds);
|
||||
// x86_x64_rdtsc/timer overhead is negligible.
|
||||
// rdtsc/timer overhead is negligible.
|
||||
const double freq = dc / dt;
|
||||
samples[i] = freq;
|
||||
}
|
||||
@ -526,3 +502,11 @@ double x86_x64_ClockFrequency()
|
||||
const double clockFrequency = sum / (hi-lo);
|
||||
return clockFrequency;
|
||||
}
|
||||
|
||||
} // namespace x86_x64
|
||||
|
||||
|
||||
const char* cpu_IdentifierString()
|
||||
{
|
||||
return x86_x64::IdentifierString();
|
||||
}
|
||||
|
@ -37,11 +37,13 @@
|
||||
#include <intrin.h> // __rdtsc
|
||||
#endif
|
||||
|
||||
namespace x86_x64 {
|
||||
|
||||
/**
|
||||
* registers used/returned by x86_x64_cpuid
|
||||
* registers used/returned by cpuid
|
||||
**/
|
||||
#pragma pack(push, 1) // (allows casting to int*)
|
||||
struct x86_x64_CpuidRegs
|
||||
struct CpuidRegs
|
||||
{
|
||||
u32 eax;
|
||||
u32 ebx;
|
||||
@ -60,90 +62,93 @@ struct x86_x64_CpuidRegs
|
||||
* and allows graceful expansion to functions that require further inputs.
|
||||
* @return true on success or false if the sub-function isn't supported.
|
||||
**/
|
||||
LIB_API bool x86_x64_cpuid(x86_x64_CpuidRegs* regs);
|
||||
LIB_API bool cpuid(CpuidRegs* regs);
|
||||
|
||||
/**
|
||||
* CPU vendor.
|
||||
* (this is exposed because some CPUID functions are vendor-specific.)
|
||||
* (an enum is easier to compare than the original string values.)
|
||||
**/
|
||||
enum x86_x64_Vendors
|
||||
enum Vendors
|
||||
{
|
||||
X86_X64_VENDOR_UNKNOWN,
|
||||
X86_X64_VENDOR_INTEL,
|
||||
X86_X64_VENDOR_AMD
|
||||
VENDOR_UNKNOWN,
|
||||
VENDOR_INTEL,
|
||||
VENDOR_AMD
|
||||
};
|
||||
|
||||
LIB_API x86_x64_Vendors x86_x64_Vendor();
|
||||
LIB_API Vendors Vendor();
|
||||
|
||||
|
||||
LIB_API size_t x86_x64_Model();
|
||||
enum Models
|
||||
{
|
||||
MODEL_NEHALEM_EP = 0x1A, // Bloomfield (X35xx), Gainestown (X55xx)
|
||||
MODEL_NEHALEM_EP_2 = 0x1E, // Clarksfield, Lynnfield (X34xx), Jasper Forest (C35xx, C55xx)
|
||||
MODEL_I7_I5 = 0x1F, // similar to 1E; mentioned in 253665-041US, no codename known
|
||||
MODEL_CLARKDALE = 0x25, // Arrandale, Clarkdale (L34xx)
|
||||
MODEL_WESTMERE_EP = 0x2C, // Gulftown (X36xx, X56xx)
|
||||
MODEL_NEHALEM_EX = 0x2E, // Beckton (X75xx)
|
||||
MODEL_WESTMERE_EX = 0x2F, // Gulftown uarch, Beckton package (E7-48xx)
|
||||
MODEL_SANDY_BRIDGE = 0x2A, // (E3-12xx, E5-26xx)
|
||||
MODEL_SANDY_BRIDGE_2 = 0x2D, // (E5-26xx, E5-46xx)
|
||||
};
|
||||
|
||||
LIB_API size_t x86_x64_Family();
|
||||
LIB_API size_t Model();
|
||||
|
||||
LIB_API size_t Family();
|
||||
|
||||
|
||||
/**
|
||||
* @return the colloquial processor generation
|
||||
* (5 = Pentium, 6 = Pentium Pro/II/III / K6, 7 = Pentium4 / Athlon, 8 = Core / Opteron)
|
||||
**/
|
||||
LIB_API size_t x86_x64_Generation();
|
||||
LIB_API size_t Generation();
|
||||
|
||||
|
||||
/**
|
||||
* bit indices of CPU capability flags (128 bits).
|
||||
* values are defined by IA-32 CPUID feature flags - do not change!
|
||||
**/
|
||||
enum x86_x64_Cap
|
||||
enum Caps
|
||||
{
|
||||
// standard (ecx) - currently only defined by Intel
|
||||
X86_X64_CAP_SSE3 = 0+0, // Streaming SIMD Extensions 3
|
||||
X86_X64_CAP_EST = 0+7, // Enhanced Speedstep Technology
|
||||
X86_X64_CAP_SSSE3 = 0+9, // Supplemental Streaming SIMD Extensions 3
|
||||
X86_X64_CAP_SSE41 = 0+19, // Streaming SIMD Extensions 4.1
|
||||
X86_X64_CAP_SSE42 = 0+20, // Streaming SIMD Extensions 4.2
|
||||
CAP_SSE3 = 0+0, // Streaming SIMD Extensions 3
|
||||
CAP_EST = 0+7, // Enhanced Speedstep Technology
|
||||
CAP_SSSE3 = 0+9, // Supplemental Streaming SIMD Extensions 3
|
||||
CAP_SSE41 = 0+19, // Streaming SIMD Extensions 4.1
|
||||
CAP_SSE42 = 0+20, // Streaming SIMD Extensions 4.2
|
||||
|
||||
// standard (edx)
|
||||
X86_X64_CAP_FPU = 32+0, // Floating Point Unit
|
||||
X86_X64_CAP_TSC = 32+4, // TimeStamp Counter
|
||||
X86_X64_CAP_MSR = 32+5, // Model Specific Registers
|
||||
X86_X64_CAP_CMOV = 32+15, // Conditional MOVe
|
||||
X86_X64_CAP_TM_SCC = 32+22, // Thermal Monitoring and Software Controlled Clock
|
||||
X86_X64_CAP_MMX = 32+23, // MultiMedia eXtensions
|
||||
X86_X64_CAP_SSE = 32+25, // Streaming SIMD Extensions
|
||||
X86_X64_CAP_SSE2 = 32+26, // Streaming SIMD Extensions 2
|
||||
X86_X64_CAP_HT = 32+28, // HyperThreading
|
||||
CAP_FPU = 32+0, // Floating Point Unit
|
||||
CAP_TSC = 32+4, // TimeStamp Counter
|
||||
CAP_MSR = 32+5, // Model Specific Registers
|
||||
CAP_CMOV = 32+15, // Conditional MOVe
|
||||
CAP_TM_SCC = 32+22, // Thermal Monitoring and Software Controlled Clock
|
||||
CAP_MMX = 32+23, // MultiMedia eXtensions
|
||||
CAP_SSE = 32+25, // Streaming SIMD Extensions
|
||||
CAP_SSE2 = 32+26, // Streaming SIMD Extensions 2
|
||||
CAP_HT = 32+28, // HyperThreading
|
||||
|
||||
// extended (ecx)
|
||||
X86_X64_CAP_AMD_CMP_LEGACY = 64+1, // N-core and X86_X64_CAP_HT is falsely set
|
||||
CAP_AMD_CMP_LEGACY = 64+1, // N-core and CAP_HT is falsely set
|
||||
|
||||
// extended (edx)
|
||||
X86_X64_CAP_AMD_MP = 96+19, // MultiProcessing capable; reserved on AMD64
|
||||
X86_X64_CAP_AMD_MMX_EXT = 96+22,
|
||||
X86_X64_CAP_AMD_3DNOW_PRO = 96+30,
|
||||
X86_X64_CAP_AMD_3DNOW = 96+31
|
||||
CAP_AMD_MP = 96+19, // MultiProcessing capable; reserved on AMD64
|
||||
CAP_AMD_MMX_EXT = 96+22,
|
||||
CAP_AMD_3DNOW_PRO = 96+30,
|
||||
CAP_AMD_3DNOW = 96+31
|
||||
};
|
||||
|
||||
/**
|
||||
* @return whether the CPU supports the indicated x86_x64_Cap / feature flag.
|
||||
* @return whether the CPU supports the indicated Cap / feature flag.
|
||||
**/
|
||||
LIB_API bool x86_x64_cap(x86_x64_Cap cap);
|
||||
LIB_API bool Cap(Caps cap);
|
||||
|
||||
LIB_API void x86_x64_caps(u32* d0, u32* d1, u32* d2, u32* d3);
|
||||
LIB_API void GetCapBits(u32* d0, u32* d1, u32* d2, u32* d3);
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// stateless
|
||||
|
||||
/**
|
||||
* @return APIC ID of the currently executing processor or zero if the
|
||||
* platform does not have an xAPIC (i.e. 7th generation x86 or below).
|
||||
*
|
||||
* rationale: the alternative of accessing the APIC mmio registers is not
|
||||
* feasible - mahaf_MapPhysicalMemory only works reliably on WinXP. we also
|
||||
* don't want to intefere with the OS's constant use of the APIC registers.
|
||||
**/
|
||||
LIB_API u8 x86_x64_ApicId();
|
||||
|
||||
/**
|
||||
* @return the current value of the TimeStampCounter (a counter of
|
||||
* CPU cycles since power-on, which is useful for high-resolution timing
|
||||
@ -154,22 +159,24 @@ LIB_API u8 x86_x64_ApicId();
|
||||
* - x64 RDTSC writes to edx:eax and clears the upper halves of rdx and rax.
|
||||
**/
|
||||
#if MSC_VERSION
|
||||
#define x86_x64_rdtsc __rdtsc
|
||||
static inline u64 rdtsc() { return __rdtsc(); }
|
||||
#else
|
||||
LIB_API u64 x86_x64_rdtsc();
|
||||
LIB_API u64 rdtsc();
|
||||
#endif
|
||||
|
||||
/**
|
||||
* trigger a breakpoint inside this function when it is called.
|
||||
**/
|
||||
LIB_API void x86_x64_DebugBreak();
|
||||
LIB_API void DebugBreak();
|
||||
|
||||
/**
|
||||
* measure the CPU clock frequency via x86_x64_rdtsc and timer_Time.
|
||||
* measure the CPU clock frequency via rdtsc and timer_Time.
|
||||
* (it follows that this must not be called from WHRT init.)
|
||||
* this takes several milliseconds (i.e. much longer than
|
||||
* os_cpu_ClockFrequency) but delivers accurate measurements.
|
||||
**/
|
||||
LIB_API double x86_x64_ClockFrequency();
|
||||
LIB_API double ClockFrequency();
|
||||
|
||||
} // namespace x86_x64
|
||||
|
||||
#endif // #ifndef INCLUDED_X86_X64
|
||||
|
@ -100,7 +100,7 @@
|
||||
|
||||
|
||||
// Streaming SIMD Extensions (not supported by all GCC)
|
||||
// this only ascertains compiler support; use x86_x64_cap to
|
||||
// this only ascertains compiler support; use x86_x64::Cap to
|
||||
// check whether the instructions are supported by the CPU.
|
||||
#ifndef HAVE_SSE
|
||||
# if GCC_VERSION && defined(__SSE__)
|
||||
|
@ -85,11 +85,6 @@ inline bool cpu_CAS(volatile T* location, T expected, T new_value)
|
||||
|
||||
LIB_API void cpu_Test();
|
||||
|
||||
/**
|
||||
* enforce strict instruction ordering in the CPU pipeline.
|
||||
**/
|
||||
LIB_API void cpu_Serialize();
|
||||
|
||||
/**
|
||||
* pause in spin-wait loops, as a performance optimisation.
|
||||
**/
|
||||
|
87
source/lib/sysdep/os/win/aken/aken_install.bat
Normal file
87
source/lib/sysdep/os/win/aken/aken_install.bat
Normal file
@ -0,0 +1,87 @@
|
||||
@ECHO OFF
|
||||
|
||||
"%systemroot%\system32\cacls.exe" "%systemroot%\system32\config\system" >nul 2>&1
|
||||
IF ERRORLEVEL 1 GOTO relaunch
|
||||
|
||||
REM detect whether OS is 32/64 bit
|
||||
IF "%ProgramW6432%" == "%ProgramFiles%" (
|
||||
SET aken_bits=64
|
||||
) ELSE (
|
||||
SET aken_bits=32
|
||||
)
|
||||
|
||||
IF "%1" == "enabletest" GOTO enabletest
|
||||
IF "%1" == "disabletest" GOTO disabletest
|
||||
IF "%1" == "install" GOTO install
|
||||
IF "%1" == "remove" GOTO remove
|
||||
GOTO usage
|
||||
|
||||
:enabletest
|
||||
bcdedit.exe /set TESTSIGNING ON
|
||||
GOTO end
|
||||
|
||||
:disabletest
|
||||
bcdedit.exe /set TESTSIGNING OFF
|
||||
GOTO end
|
||||
|
||||
:install
|
||||
IF (%2) == () (
|
||||
SET aken_path="%~p0\aken%aken_bits%.sys"
|
||||
) ELSE (
|
||||
echo %2\aken%aken_bits%.sys
|
||||
SET aken_path=%2\aken%aken_bits%.sys
|
||||
)
|
||||
echo %aken_path%
|
||||
IF NOT EXIST %aken_path% GOTO notfound
|
||||
sc create Aken DisplayName= Aken type= kernel start= auto binpath= %aken_path%
|
||||
REM error= normal is default
|
||||
IF ERRORLEVEL 1 GOTO failed
|
||||
sc start Aken
|
||||
IF ERRORLEVEL 1 GOTO failed
|
||||
ECHO Success!
|
||||
GOTO end
|
||||
|
||||
:remove
|
||||
sc stop Aken
|
||||
sc delete Aken
|
||||
IF ERRORLEVEL 1 GOTO failed
|
||||
ECHO Success! (The previous line should read: [SC] DeleteService SUCCESS)
|
||||
GOTO end
|
||||
|
||||
:usage
|
||||
ECHO To install the driver, please first enable test mode:
|
||||
ECHO %0 enabletest
|
||||
ECHO (This is necessary because Vista/Win7 x64 require signing with
|
||||
ECHO a Microsoft "cross certificate". The Fraunhofer code signing certificate
|
||||
ECHO is not enough, even though its chain of trust is impeccable.
|
||||
ECHO Going the WHQL route, perhaps as an "unclassified" driver, might work.
|
||||
ECHO see http://www.freeotfe.org/docs/Main/impact_of_kernel_driver_signing.htm )
|
||||
ECHO Then reboot (!) and install the driver:
|
||||
ECHO %0 install ["path_to_directory_containing_aken*.sys"]
|
||||
ECHO (If no path is given, we will use the directory of this batch file)
|
||||
ECHO To remove the driver and disable test mode, execute the following:
|
||||
ECHO %0 remove
|
||||
ECHO %0 disabletest
|
||||
PAUSE
|
||||
GOTO end
|
||||
|
||||
:relaunch
|
||||
SET aken_vbs="%temp%\aken_run.vbs"
|
||||
ECHO Set UAC = CreateObject^("Shell.Application"^) > %aken_vbs%
|
||||
ECHO UAC.ShellExecute "cmd.exe", "/k %~s0 %1 %2", "", "runas", 1 >> %aken_vbs%
|
||||
ECHO "To re-run this batch file as admin, we have created %aken_vbs% with the following contents:"
|
||||
type %aken_vbs%
|
||||
PAUSE
|
||||
cscript //Nologo %aken_vbs%
|
||||
DEL %aken_vbs%
|
||||
GOTO end
|
||||
|
||||
:notfound
|
||||
ECHO Driver not found at specified path (%aken_path%)
|
||||
GOTO end
|
||||
|
||||
:failed
|
||||
ECHO Something went wrong -- see previous line
|
||||
GOTO end
|
||||
|
||||
:end
|
@ -202,14 +202,13 @@ static SC_HANDLE OpenServiceControlManager(DWORD access)
|
||||
SC_HANDLE hSCM = OpenSCManagerW(machineName, databaseName, access);
|
||||
if(!hSCM)
|
||||
{
|
||||
// administrator privileges are required for SC_MANAGER_CREATE_SERVICE.
|
||||
// note: installing the service and having it start automatically would
|
||||
// allow Least-Permission accounts to use it (after relaxing the
|
||||
// service's DACL).
|
||||
|
||||
// ensure no other problems arose
|
||||
ENSURE(GetLastError() == ERROR_ACCESS_DENIED);
|
||||
|
||||
// administrator privileges are required for SC_MANAGER_CREATE_SERVICE.
|
||||
// this is a problem on Vista / Win7, so users will have to use the
|
||||
// separate aken_install.bat
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -258,7 +257,7 @@ static void StartDriver(const OsPath& driverPathname)
|
||||
return;
|
||||
}
|
||||
|
||||
SC_HANDLE hService = OpenServiceW(hSCM, AKEN_NAME, GENERIC_READ);
|
||||
SC_HANDLE hService = OpenServiceW(hSCM, AKEN_NAME, SERVICE_START);
|
||||
|
||||
// during development, we want to ensure the newest build is used, so
|
||||
// unload and re-create the service if it's running/installed.
|
||||
@ -281,22 +280,25 @@ static void StartDriver(const OsPath& driverPathname)
|
||||
LPCWSTR startName = 0; // LocalSystem
|
||||
// NB: Windows 7 seems to insist upon backslashes (i.e. external_file_string)
|
||||
hService = CreateServiceW(hSCM, AKEN_NAME, AKEN_NAME,
|
||||
SERVICE_START, SERVICE_KERNEL_DRIVER, SERVICE_DEMAND_START, SERVICE_ERROR_NORMAL,
|
||||
SERVICE_START, SERVICE_KERNEL_DRIVER, SERVICE_AUTO_START, SERVICE_ERROR_NORMAL,
|
||||
OsString(driverPathname).c_str(), 0, 0, 0, startName, 0);
|
||||
ENSURE(hService != 0);
|
||||
}
|
||||
|
||||
// start service
|
||||
{
|
||||
DWORD numArgs = 0;
|
||||
BOOL ok = StartService(hService, numArgs, 0);
|
||||
if(!ok)
|
||||
{
|
||||
if(GetLastError() != ERROR_SERVICE_ALREADY_RUNNING)
|
||||
switch(GetLastError())
|
||||
{
|
||||
// starting failed. don't raise a warning because this
|
||||
// always happens on least-permission user accounts.
|
||||
//DEBUG_WARN_ERR(ERR::LOGIC);
|
||||
case ERROR_SERVICE_ALREADY_RUNNING:
|
||||
break; // ok, no action needed
|
||||
case ERROR_ACCESS_DENIED:
|
||||
break; // Win7, can't start service; must use aken_install.bat
|
||||
default: // unexpected problem
|
||||
DEBUG_WARN_ERR(ERR::LOGIC);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -34,17 +34,17 @@ public:
|
||||
void test_rdtsc()
|
||||
{
|
||||
// must increase monotonously
|
||||
const u64 c1 = x86_x64_rdtsc();
|
||||
const u64 c2 = x86_x64_rdtsc();
|
||||
const u64 c3 = x86_x64_rdtsc();
|
||||
const u64 c1 = x86_x64::rdtsc();
|
||||
const u64 c2 = x86_x64::rdtsc();
|
||||
const u64 c3 = x86_x64::rdtsc();
|
||||
TS_ASSERT(c1 < c2 && c2 < c3);
|
||||
}
|
||||
|
||||
void test_ia32_cap()
|
||||
{
|
||||
// make sure the really common/basic caps end up reported as true
|
||||
TS_ASSERT(x86_x64_cap(X86_X64_CAP_FPU));
|
||||
TS_ASSERT(x86_x64_cap(X86_X64_CAP_TSC));
|
||||
TS_ASSERT(x86_x64_cap(X86_X64_CAP_MMX));
|
||||
TS_ASSERT(x86_x64::Cap(x86_x64::CAP_FPU));
|
||||
TS_ASSERT(x86_x64::Cap(x86_x64::CAP_TSC));
|
||||
TS_ASSERT(x86_x64::Cap(x86_x64::CAP_MMX));
|
||||
}
|
||||
};
|
||||
|
@ -36,7 +36,7 @@
|
||||
#include "lib/sysdep/os/win/wutil.h"
|
||||
|
||||
#if ARCH_X86_X64
|
||||
# include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64_rdtsc
|
||||
# include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64::rdtsc
|
||||
# include "lib/sysdep/arch/x86_x64/topology.h"
|
||||
# include "lib/sysdep/arch/x86_x64/msr.h"
|
||||
#endif
|
||||
@ -46,9 +46,9 @@
|
||||
|
||||
static bool IsUniprocessor()
|
||||
{
|
||||
if(cpu_topology_NumPackages() != 1)
|
||||
if(topology::NumPackages() != 1)
|
||||
return false;
|
||||
if(cpu_topology_CoresPerPackage() != 1)
|
||||
if(topology::CoresPerPackage() != 1)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
@ -57,11 +57,11 @@ static bool IsUniprocessor()
|
||||
static bool IsInvariantTSC()
|
||||
{
|
||||
#if ARCH_X86_X64
|
||||
// (we no longer need to check x86_x64_Vendor - Intel and AMD
|
||||
// (we no longer need to check x86_x64::Vendor - Intel and AMD
|
||||
// agreed on the definition of this feature check)
|
||||
x86_x64_CpuidRegs regs = { 0 };
|
||||
x86_x64::CpuidRegs regs = { 0 };
|
||||
regs.eax = 0x80000007;
|
||||
if(x86_x64_cpuid(®s))
|
||||
if(x86_x64::cpuid(®s))
|
||||
{
|
||||
// TSC is invariant across P-state, C-state, turbo, and
|
||||
// stop grant transitions (e.g. STPCLK)
|
||||
@ -77,17 +77,17 @@ static bool IsInvariantTSC()
|
||||
static bool IsThrottlingPossible()
|
||||
{
|
||||
#if ARCH_X86_X64
|
||||
x86_x64_CpuidRegs regs = { 0 };
|
||||
switch(x86_x64_Vendor())
|
||||
x86_x64::CpuidRegs regs = { 0 };
|
||||
switch(x86_x64::Vendor())
|
||||
{
|
||||
case X86_X64_VENDOR_INTEL:
|
||||
if(x86_x64_cap(X86_X64_CAP_TM_SCC) || x86_x64_cap(X86_X64_CAP_EST))
|
||||
case x86_x64::VENDOR_INTEL:
|
||||
if(x86_x64::Cap(x86_x64::CAP_TM_SCC) || x86_x64::Cap(x86_x64::CAP_EST))
|
||||
return true;
|
||||
break;
|
||||
|
||||
case X86_X64_VENDOR_AMD:
|
||||
case x86_x64::VENDOR_AMD:
|
||||
regs.eax = 0x80000007;
|
||||
if(x86_x64_cpuid(®s))
|
||||
if(x86_x64::cpuid(®s))
|
||||
{
|
||||
enum AmdPowerNowFlags
|
||||
{
|
||||
@ -109,6 +109,18 @@ static bool IsThrottlingPossible()
|
||||
}
|
||||
|
||||
|
||||
static bool IsSandyBridge()
|
||||
{
|
||||
if(x86_x64::Vendor() != x86_x64::VENDOR_INTEL)
|
||||
return false;
|
||||
if(x86_x64::Model() == x86_x64::MODEL_SANDY_BRIDGE)
|
||||
return true;
|
||||
if(x86_x64::Model() == x86_x64::MODEL_SANDY_BRIDGE_2)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
class CounterTSC : public ICounter
|
||||
@ -122,7 +134,7 @@ public:
|
||||
Status Activate()
|
||||
{
|
||||
#if ARCH_X86_X64
|
||||
if(!x86_x64_cap(X86_X64_CAP_TSC))
|
||||
if(!x86_x64::Cap(x86_x64::CAP_TSC))
|
||||
return ERR::NOT_SUPPORTED; // NOWARN (CPU doesn't support RDTSC)
|
||||
#endif
|
||||
|
||||
@ -177,7 +189,7 @@ public:
|
||||
|
||||
#if ARCH_X86_X64
|
||||
// recent CPU:
|
||||
//if(x86_x64_Generation() >= 7)
|
||||
//if(x86_x64::Generation() >= 7)
|
||||
{
|
||||
// note: 8th generation CPUs support C1-clock ramping, which causes
|
||||
// drift on multi-core systems, but those were excluded above.
|
||||
@ -204,7 +216,7 @@ public:
|
||||
|
||||
u64 Counter() const
|
||||
{
|
||||
return x86_x64_rdtsc();
|
||||
return x86_x64::rdtsc();
|
||||
}
|
||||
|
||||
size_t CounterBits() const
|
||||
@ -214,7 +226,7 @@ public:
|
||||
|
||||
double NominalFrequency() const
|
||||
{
|
||||
// WARNING: do not call x86_x64_ClockFrequency because it uses the
|
||||
// WARNING: do not call x86_x64::ClockFrequency because it uses the
|
||||
// HRT, which we're currently in the process of initializing.
|
||||
// instead query CPU clock frequency via OS.
|
||||
//
|
||||
@ -224,9 +236,10 @@ public:
|
||||
#if ARCH_X86_X64
|
||||
if(MSR::IsAccessible() && MSR::HasPlatformInfo())
|
||||
{
|
||||
const i64 busFrequency = IsSandyBridge()? 100000000 : 133333333;
|
||||
const u64 platformInfo = MSR::Read(MSR::PLATFORM_INFO);
|
||||
const u8 maxNonTurboRatio = bits(platformInfo, 8, 15);
|
||||
return maxNonTurboRatio * 133.33e6f;
|
||||
return double(maxNonTurboRatio) * busFrequency;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
@ -37,7 +37,7 @@
|
||||
#include "lib/module_init.h"
|
||||
#include "lib/sysdep/cpu.h" // cpu_AtomicAdd
|
||||
#include "lib/sysdep/numa.h"
|
||||
#include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64_ApicId
|
||||
#include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64::ApicId
|
||||
#include "lib/sysdep/arch/x86_x64/apic.h" // ProcessorFromApicId
|
||||
#include "lib/sysdep/os/win/wversion.h"
|
||||
#include "lib/sysdep/os/win/winit.h"
|
||||
@ -53,7 +53,7 @@ static WUTIL_FUNC(pVirtualAllocExNuma, LPVOID, (HANDLE, LPVOID, SIZE_T, DWORD, D
|
||||
|
||||
static DWORD WINAPI EmulateGetCurrentProcessorNumber(VOID)
|
||||
{
|
||||
const u8 apicId = x86_x64_ApicId();
|
||||
const ApicId apicId = GetApicId();
|
||||
const DWORD processor = (DWORD)ProcessorFromApicId(apicId);
|
||||
ASSERT(processor < os_cpu_MaxProcessors);
|
||||
return processor;
|
||||
|
@ -31,7 +31,7 @@
|
||||
#include "lib/sysdep/cpu.h" // cpu_AtomicAdd
|
||||
#if ARCH_X86_X64 && CONFIG2_TIMER_ALLOW_RDTSC
|
||||
# include "lib/sysdep/os_cpu.h" // os_cpu_ClockFrequency
|
||||
# include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64_rdtsc
|
||||
# include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64::rdtsc
|
||||
#endif
|
||||
|
||||
|
||||
@ -142,7 +142,7 @@ private:
|
||||
|
||||
// since TIMER_ACCRUE et al. are called so often, we try to keep
|
||||
// overhead to an absolute minimum. storing raw tick counts (e.g. CPU cycles
|
||||
// returned by x86_x64_rdtsc) instead of absolute time has two benefits:
|
||||
// returned by x86_x64::rdtsc) instead of absolute time has two benefits:
|
||||
// - no need to convert from raw->time on every call
|
||||
// (instead, it's only done once when displaying the totals)
|
||||
// - possibly less overhead to querying the time itself
|
||||
@ -170,7 +170,7 @@ public:
|
||||
|
||||
void SetFromTimer()
|
||||
{
|
||||
m_cycles = x86_x64_rdtsc();
|
||||
m_cycles = x86_x64::rdtsc();
|
||||
}
|
||||
|
||||
void AddDifference(TimerUnit t0, TimerUnit t1)
|
||||
|
@ -45,14 +45,14 @@
|
||||
static void ReportGLLimits(ScriptInterface& scriptInterface, CScriptValRooted settings);
|
||||
|
||||
#if ARCH_X86_X64
|
||||
CScriptVal ConvertCaches(ScriptInterface& scriptInterface, IdxCache idxCache)
|
||||
CScriptVal ConvertCaches(ScriptInterface& scriptInterface, x86_x64::IdxCache idxCache)
|
||||
{
|
||||
CScriptVal ret;
|
||||
scriptInterface.Eval("[]", ret);
|
||||
for (size_t idxLevel = 0; idxLevel < x86_x64_Cache::maxLevels; ++idxLevel)
|
||||
for (size_t idxLevel = 0; idxLevel < x86_x64::Cache::maxLevels; ++idxLevel)
|
||||
{
|
||||
const x86_x64_Cache* pcache = x86_x64_Caches(idxCache+idxLevel);
|
||||
if (pcache->type == x86_x64_Cache::kNull || pcache->numEntries == 0)
|
||||
const x86_x64::Cache* pcache = x86_x64::Caches(idxCache+idxLevel);
|
||||
if (pcache->type == x86_x64::Cache::kNull || pcache->numEntries == 0)
|
||||
continue;
|
||||
CScriptVal cache;
|
||||
scriptInterface.Eval("({})", cache);
|
||||
@ -73,7 +73,7 @@ CScriptVal ConvertTLBs(ScriptInterface& scriptInterface)
|
||||
scriptInterface.Eval("[]", ret);
|
||||
for(size_t i = 0; ; i++)
|
||||
{
|
||||
const x86_x64_Cache* ptlb = x86_x64_Caches(TLB+i);
|
||||
const x86_x64::Cache* ptlb = x86_x64::Caches(x86_x64::TLB+i);
|
||||
if (!ptlb)
|
||||
break;
|
||||
CScriptVal tlb;
|
||||
@ -229,10 +229,10 @@ void RunHardwareDetection()
|
||||
scriptInterface.SetProperty(settings.get(), "cpu_largepagesize", (u32)os_cpu_LargePageSize());
|
||||
scriptInterface.SetProperty(settings.get(), "cpu_numprocs", (u32)os_cpu_NumProcessors());
|
||||
#if ARCH_X86_X64
|
||||
scriptInterface.SetProperty(settings.get(), "cpu_numpackages", (u32)cpu_topology_NumPackages());
|
||||
scriptInterface.SetProperty(settings.get(), "cpu_coresperpackage", (u32)cpu_topology_CoresPerPackage());
|
||||
scriptInterface.SetProperty(settings.get(), "cpu_logicalpercore", (u32)cpu_topology_LogicalPerCore());
|
||||
scriptInterface.SetProperty(settings.get(), "cpu_numcaches", (u32)cache_topology_NumCaches());
|
||||
scriptInterface.SetProperty(settings.get(), "cpu_numpackages", (u32)topology::NumPackages());
|
||||
scriptInterface.SetProperty(settings.get(), "cpu_coresperpackage", (u32)topology::CoresPerPackage());
|
||||
scriptInterface.SetProperty(settings.get(), "cpu_logicalpercore", (u32)topology::LogicalPerCore());
|
||||
scriptInterface.SetProperty(settings.get(), "cpu_numcaches", (u32)topology::NumCaches());
|
||||
#endif
|
||||
|
||||
scriptInterface.SetProperty(settings.get(), "numa_numnodes", (u32)numa_NumNodes());
|
||||
@ -244,21 +244,21 @@ void RunHardwareDetection()
|
||||
scriptInterface.SetProperty(settings.get(), "ram_free", (u32)os_cpu_MemoryAvailable());
|
||||
|
||||
#if ARCH_X86_X64
|
||||
scriptInterface.SetProperty(settings.get(), "x86_frequency", x86_x64_ClockFrequency());
|
||||
scriptInterface.SetProperty(settings.get(), "x86_frequency", x86_x64::ClockFrequency());
|
||||
|
||||
scriptInterface.SetProperty(settings.get(), "x86_vendor", (u32)x86_x64_Vendor());
|
||||
scriptInterface.SetProperty(settings.get(), "x86_model", (u32)x86_x64_Model());
|
||||
scriptInterface.SetProperty(settings.get(), "x86_family", (u32)x86_x64_Family());
|
||||
scriptInterface.SetProperty(settings.get(), "x86_vendor", (u32)x86_x64::Vendor());
|
||||
scriptInterface.SetProperty(settings.get(), "x86_model", (u32)x86_x64::Model());
|
||||
scriptInterface.SetProperty(settings.get(), "x86_family", (u32)x86_x64::Family());
|
||||
|
||||
u32 caps0, caps1, caps2, caps3;
|
||||
x86_x64_caps(&caps0, &caps1, &caps2, &caps3);
|
||||
x86_x64::GetCapBits(&caps0, &caps1, &caps2, &caps3);
|
||||
scriptInterface.SetProperty(settings.get(), "x86_caps[0]", caps0);
|
||||
scriptInterface.SetProperty(settings.get(), "x86_caps[1]", caps1);
|
||||
scriptInterface.SetProperty(settings.get(), "x86_caps[2]", caps2);
|
||||
scriptInterface.SetProperty(settings.get(), "x86_caps[3]", caps3);
|
||||
|
||||
scriptInterface.SetProperty(settings.get(), "x86_icaches", ConvertCaches(scriptInterface, L1I));
|
||||
scriptInterface.SetProperty(settings.get(), "x86_dcaches", ConvertCaches(scriptInterface, L1D));
|
||||
scriptInterface.SetProperty(settings.get(), "x86_icaches", ConvertCaches(scriptInterface, x86_x64::L1I));
|
||||
scriptInterface.SetProperty(settings.get(), "x86_dcaches", ConvertCaches(scriptInterface, x86_x64::L1D));
|
||||
scriptInterface.SetProperty(settings.get(), "x86_tlbs", ConvertTLBs(scriptInterface));
|
||||
#endif
|
||||
|
||||
|
@ -99,10 +99,10 @@ void WriteSystemInfo()
|
||||
fprintf(f, "OS : %s %s (%s)\n", un.sysname, un.release, un.version);
|
||||
|
||||
// CPU
|
||||
fprintf(f, "CPU : %s, %s (%dx%dx%d)", un.machine, cpu_IdentifierString(), (int)cpu_topology_NumPackages(), (int)cpu_topology_CoresPerPackage(), (int)cpu_topology_LogicalPerCore());
|
||||
fprintf(f, "CPU : %s, %s (%dx%dx%d)", un.machine, cpu_IdentifierString(), (int)topology::NumPackages(), (int)topology::CoresPerPackage(), (int)topology::LogicalPerCore());
|
||||
double cpuClock = os_cpu_ClockFrequency(); // query OS (may fail)
|
||||
if(cpuClock <= 0.0)
|
||||
cpuClock = x86_x64_ClockFrequency(); // measure (takes a few ms)
|
||||
cpuClock = x86_x64::ClockFrequency(); // measure (takes a few ms)
|
||||
if(cpuClock > 0.0)
|
||||
{
|
||||
if(cpuClock < 1e9)
|
||||
|
@ -53,7 +53,7 @@ static bool g_EnableSSE = false;
|
||||
void ModelRenderer::Init()
|
||||
{
|
||||
#if ARCH_X86_X64
|
||||
if (x86_x64_cap(X86_X64_CAP_SSE))
|
||||
if (x86_x64::Cap(x86_x64::CAP_SSE))
|
||||
g_EnableSSE = true;
|
||||
#endif
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user