sync with work:

x86_x64: update MSR definitions to include model 2F (westmere ex);
update bus clock computation for sandy bridge; remove unused
cpu_Serialize; use namespace, add model names
mahaf: cope with stricter security for services/drivers
ApicId -> GetApicId, move to apic module
aken: add batch file to install driver (optional)

This was SVN commit r10815.
This commit is contained in:
janwas 2011-12-27 14:12:31 +00:00
parent 9911f561d3
commit 3537ae31d5
23 changed files with 448 additions and 302 deletions

View File

@ -83,7 +83,7 @@ void ColorActivateFastImpl()
{
}
#if ARCH_X86_X64
else if (x86_x64_cap(X86_X64_CAP_SSE))
else if (x86_x64::Cap(x86_x64::CAP_SSE))
{
ConvertRGBColorTo4ub = sse_ConvertRGBColorTo4ub;
}

View File

@ -107,7 +107,7 @@ public:
// allocate uninitialized storage
pointer allocate(size_type numElements)
{
const size_type alignment = x86_x64_Caches(L1D)->entrySize;
const size_type alignment = x86_x64::Caches(L1D)->entrySize;
const size_type elementSize = round_up(sizeof(T), alignment);
const size_type size = numElements * elementSize;
pointer p = (pointer)rtl_AllocateAligned(size, alignment);

View File

@ -23,10 +23,25 @@
#include "precompiled.h"
#include "lib/sysdep/arch/x86_x64/apic.h"
#include "lib/bits.h"
#include "lib/module_init.h"
#include "lib/sysdep/cpu.h" // ERR::CPU_FEATURE_MISSING
#include "lib/sysdep/os_cpu.h"
#include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64_ApicId
#include "lib/sysdep/arch/x86_x64/x86_x64.h"
ApicId GetApicId()
{
x86_x64::CpuidRegs regs = { 0 };
regs.eax = 1;
// note: CPUID function 1 is always supported, but only processors with
// an xAPIC (e.g. P4/Athlon XP) will return a nonzero ID.
bool ok = x86_x64::cpuid(&regs);
ASSERT(ok); UNUSED2(ok);
const u8 apicId = (u8)bits(regs.ebx, 24, 31);
return apicId;
}
static size_t numIds;
static ApicId processorApicIds[os_cpu_MaxProcessors];
@ -39,7 +54,7 @@ static Status GetAndValidateApicIds()
{
static void Callback(size_t processor, uintptr_t UNUSED(data))
{
processorApicIds[processor] = x86_x64_ApicId();
processorApicIds[processor] = GetApicId();
}
};
// (can fail due to restrictions on our process affinity or lack of

View File

@ -25,6 +25,16 @@
typedef u8 ApicId; // not necessarily contiguous values
/**
* @return APIC ID of the currently executing processor or zero if the
* platform does not have an xAPIC (i.e. 7th generation x86 or below).
*
* rationale: the alternative of accessing the APIC mmio registers is not
* feasible - mahaf_MapPhysicalMemory only works reliably on WinXP. we also
* don't want to interfere with the OS's constant use of the APIC registers.
**/
LIB_API u8 GetApicId();
// if this returns false, apicId = contiguousId = processor.
// otherwise, there are unspecified but bijective mappings between
// apicId<->contiguousId and apicId<->processor.

View File

@ -29,25 +29,27 @@
#include "lib/sysdep/os_cpu.h"
#include "lib/sysdep/arch/x86_x64/x86_x64.h"
namespace x86_x64 {
static const size_t maxTLBs = 2*2*4; // (level0, level1) x (D,I) x (4K, 2M, 4M, 1G)
static size_t numTLBs = 0;
static const size_t numCaches = x86_x64_Cache::maxLevels * 2 + maxTLBs;
static x86_x64_Cache caches[numCaches];
static const size_t numCaches = x86_x64::Cache::maxLevels * 2 + maxTLBs;
static Cache caches[numCaches];
static void AddCache(const x86_x64_Cache& cache)
static void AddCache(const x86_x64::Cache& cache)
{
ENSURE(cache.Validate());
if(cache.type == x86_x64_Cache::kData || cache.type == x86_x64_Cache::kUnified)
if(cache.type == x86_x64::Cache::kData || cache.type == x86_x64::Cache::kUnified)
caches[L1D + cache.level-1] = cache;
if(cache.type == x86_x64_Cache::kInstruction || cache.type == x86_x64_Cache::kUnified)
if(cache.type == x86_x64::Cache::kInstruction || cache.type == x86_x64::Cache::kUnified)
caches[L1I + cache.level-1] = cache;
}
static void AddTLB(const x86_x64_Cache& tlb)
static void AddTLB(const x86_x64::Cache& tlb)
{
ENSURE(tlb.Validate());
ENSURE(tlb.level == 1 || tlb.level == 2); // see maxTLBs
@ -65,9 +67,9 @@ static void AddTLB(const x86_x64_Cache& tlb)
namespace AMD
{
static x86_x64_Cache L1Cache(u32 reg, x86_x64_Cache::Type type)
static x86_x64::Cache L1Cache(u32 reg, x86_x64::Cache::Type type)
{
x86_x64_Cache cache;
x86_x64::Cache cache;
cache.Initialize(1, type);
const size_t lineSize = bits(reg, 0, 7);
@ -87,12 +89,12 @@ static x86_x64_Cache L1Cache(u32 reg, x86_x64_Cache::Type type)
static const size_t associativityTable[16] =
{
0, 1, 2, 0, 4, 0, 8, 0,
16, 0, 32, 48, 64, 96, 128, x86_x64_Cache::fullyAssociative
16, 0, 32, 48, 64, 96, 128, x86_x64::Cache::fullyAssociative
};
static x86_x64_Cache L2Cache(u32 reg, x86_x64_Cache::Type type)
static x86_x64::Cache L2Cache(u32 reg, x86_x64::Cache::Type type)
{
x86_x64_Cache cache;
x86_x64::Cache cache;
cache.Initialize(2, type);
const size_t lineSize = bits(reg, 0, 7);
@ -109,9 +111,9 @@ static x86_x64_Cache L2Cache(u32 reg, x86_x64_Cache::Type type)
}
// (same as L2 except for the size)
static x86_x64_Cache L3Cache(u32 reg, x86_x64_Cache::Type type)
static x86_x64::Cache L3Cache(u32 reg, x86_x64::Cache::Type type)
{
x86_x64_Cache cache;
x86_x64::Cache cache;
cache.Initialize(3, type);
const size_t lineSize = bits(reg, 0, 7);
@ -128,9 +130,9 @@ static x86_x64_Cache L3Cache(u32 reg, x86_x64_Cache::Type type)
return cache;
}
static x86_x64_Cache TLB1(u32 reg, size_t bitOffset, size_t pageSize, x86_x64_Cache::Type type)
static x86_x64::Cache TLB1(u32 reg, size_t bitOffset, size_t pageSize, x86_x64::Cache::Type type)
{
x86_x64_Cache cache;
x86_x64::Cache cache;
cache.Initialize(1, type);
const size_t numEntries = bits(reg, bitOffset+0, bitOffset+ 7);
@ -145,9 +147,9 @@ static x86_x64_Cache TLB1(u32 reg, size_t bitOffset, size_t pageSize, x86_x64_Ca
return cache;
}
static x86_x64_Cache TLB2(u32 reg, size_t bitOffset, size_t pageSize, x86_x64_Cache::Type type)
static x86_x64::Cache TLB2(u32 reg, size_t bitOffset, size_t pageSize, x86_x64::Cache::Type type)
{
x86_x64_Cache cache;
x86_x64::Cache cache;
cache.Initialize(2, type);
const size_t numEntries = bits(reg, bitOffset+ 0, bitOffset+11);
@ -164,11 +166,11 @@ static x86_x64_Cache TLB2(u32 reg, size_t bitOffset, size_t pageSize, x86_x64_Ca
static void AddTLB2Pair(u32 reg, size_t pageSize)
{
x86_x64_Cache::Type type = x86_x64_Cache::kUnified;
x86_x64::Cache::Type type = x86_x64::Cache::kUnified;
if(bits(reg, 16, 31) != 0) // not unified
{
AddTLB(TLB2(reg, 16, pageSize, x86_x64_Cache::kData));
type = x86_x64_Cache::kInstruction;
AddTLB(TLB2(reg, 16, pageSize, x86_x64::Cache::kData));
type = x86_x64::Cache::kInstruction;
}
AddTLB(TLB2(reg, 0, pageSize, type));
}
@ -177,25 +179,25 @@ static void AddTLB2Pair(u32 reg, size_t pageSize)
// "reserved". cache characteristics are returned via ext. functions.
static void DetectCacheAndTLB()
{
x86_x64_CpuidRegs regs = { 0 };
x86_x64::CpuidRegs regs = { 0 };
regs.eax = 0x80000005;
if(x86_x64_cpuid(&regs))
if(x86_x64::cpuid(&regs))
{
AddCache(L1Cache(regs.ecx, x86_x64_Cache::kData));
AddCache(L1Cache(regs.edx, x86_x64_Cache::kInstruction));
AddCache(L1Cache(regs.ecx, x86_x64::Cache::kData));
AddCache(L1Cache(regs.edx, x86_x64::Cache::kInstruction));
AddTLB(TLB1(regs.eax, 0, 2*MiB, x86_x64_Cache::kInstruction));
AddTLB(TLB1(regs.eax, 16, 2*MiB, x86_x64_Cache::kData));
AddTLB(TLB1(regs.ebx, 0, 4*KiB, x86_x64_Cache::kInstruction));
AddTLB(TLB1(regs.ebx, 16, 4*KiB, x86_x64_Cache::kData));
AddTLB(TLB1(regs.eax, 0, 2*MiB, x86_x64::Cache::kInstruction));
AddTLB(TLB1(regs.eax, 16, 2*MiB, x86_x64::Cache::kData));
AddTLB(TLB1(regs.ebx, 0, 4*KiB, x86_x64::Cache::kInstruction));
AddTLB(TLB1(regs.ebx, 16, 4*KiB, x86_x64::Cache::kData));
}
regs.eax = 0x80000006;
if(x86_x64_cpuid(&regs))
if(x86_x64::cpuid(&regs))
{
AddCache(L2Cache(regs.ecx, x86_x64_Cache::kUnified));
AddCache(L3Cache(regs.edx, x86_x64_Cache::kUnified));
AddCache(L2Cache(regs.ecx, x86_x64::Cache::kUnified));
AddCache(L3Cache(regs.edx, x86_x64::Cache::kUnified));
AddTLB2Pair(regs.eax, 2*MiB);
AddTLB2Pair(regs.ebx, 4*KiB);
@ -215,21 +217,21 @@ static bool DetectCache()
// note: level order is unspecified (see Intel AP-485)
for(u32 count = 0; ; count++)
{
x86_x64_CpuidRegs regs = { 0 };
x86_x64::CpuidRegs regs = { 0 };
regs.eax = 4;
regs.ecx = count;
if(!x86_x64_cpuid(&regs))
if(!x86_x64::cpuid(&regs))
return false;
const x86_x64_Cache::Type type = (x86_x64_Cache::Type)bits(regs.eax, 0, 4);
if(type == x86_x64_Cache::kNull) // no more remaining
const x86_x64::Cache::Type type = (x86_x64::Cache::Type)bits(regs.eax, 0, 4);
if(type == x86_x64::Cache::kNull) // no more remaining
break;
const size_t level = (size_t)bits(regs.eax, 5, 7);
const size_t partitions = (size_t)bits(regs.ebx, 12, 21)+1;
const size_t sets = (size_t)bits(regs.ecx, 0, 31)+1;
x86_x64_Cache cache;
x86_x64::Cache cache;
cache.Initialize(level, type);
cache.entrySize = (size_t)bits(regs.ebx, 0, 11)+1; // (yes, this also uses +1 encoding)
cache.associativity = (size_t)bits(regs.ebx, 22, 31)+1;
@ -274,9 +276,9 @@ static Descriptors GetDescriptors()
const uintptr_t firstProcessor = allProcessors & -intptr_t(allProcessors);
const uintptr_t prevAffinityMask = os_cpu_SetThreadAffinityMask(firstProcessor);
x86_x64_CpuidRegs regs = { 0 };
x86_x64::CpuidRegs regs = { 0 };
regs.eax = 2;
if(!x86_x64_cpuid(&regs))
if(!x86_x64::cpuid(&regs))
return Descriptors();
Descriptors descriptors;
@ -290,7 +292,7 @@ static Descriptors GetDescriptors()
if(--iterations == 0)
break;
regs.eax = 2;
const bool ok = x86_x64_cpuid(&regs);
const bool ok = x86_x64::cpuid(&regs);
ENSURE(ok);
}
@ -321,19 +323,19 @@ enum Flags
// (there are > 100 descriptors, so we squeeze all fields into 8 bytes.)
struct Characteristics // POD
{
x86_x64_Cache::Type Type() const
x86_x64::Cache::Type Type() const
{
switch(flags & U)
{
case D:
return x86_x64_Cache::kData;
return x86_x64::Cache::kData;
case I:
return x86_x64_Cache::kInstruction;
return x86_x64::Cache::kInstruction;
case U:
return x86_x64_Cache::kUnified;
return x86_x64::Cache::kUnified;
default:
DEBUG_WARN_ERR(ERR::LOGIC);
return x86_x64_Cache::kNull;
return x86_x64::Cache::kNull;
}
}
@ -365,7 +367,7 @@ struct Characteristics // POD
u32 flags; // level, type, largeSize
};
static const u8 F = x86_x64_Cache::fullyAssociative;
static const u8 F = x86_x64::Cache::fullyAssociative;
#define CACHE(descriptor, flags, totalSize, assoc, entrySize) { descriptor, assoc, -entrySize, flags | ((totalSize)/(entrySize)) }
#define TLB(descriptor, flags, entrySize, assoc, numEntries) { descriptor, assoc, numEntries, flags | (entrySize) }
@ -586,7 +588,7 @@ static void DetectCacheAndTLB(size_t& descriptorFlags)
if((descriptorFlags & SKIP_CACHE_DESCRIPTORS) && !characteristics->IsTLB())
continue;
x86_x64_Cache cache;
x86_x64::Cache cache;
cache.Initialize(characteristics->Level(), characteristics->Type());
cache.numEntries = characteristics->NumEntries();
cache.entrySize = characteristics->EntrySize();
@ -605,13 +607,13 @@ static void DetectCacheAndTLB(size_t& descriptorFlags)
static Status DetectCacheAndTLB()
{
// ensure all cache entries are initialized (DetectCache* might not set them all)
for(size_t idxLevel = 0; idxLevel < x86_x64_Cache::maxLevels; idxLevel++)
for(size_t idxLevel = 0; idxLevel < x86_x64::Cache::maxLevels; idxLevel++)
{
caches[L1D+idxLevel].Initialize(idxLevel+1, x86_x64_Cache::kData);
caches[L1I+idxLevel].Initialize(idxLevel+1, x86_x64_Cache::kInstruction);
caches[L1D+idxLevel].Initialize(idxLevel+1, x86_x64::Cache::kData);
caches[L1I+idxLevel].Initialize(idxLevel+1, x86_x64::Cache::kInstruction);
}
if(x86_x64_Vendor() == X86_X64_VENDOR_AMD)
if(x86_x64::Vendor() == x86_x64::VENDOR_AMD)
AMD::DetectCacheAndTLB();
else
{
@ -622,13 +624,13 @@ static Status DetectCacheAndTLB()
}
// sanity checks
for(size_t idxLevel = 0; idxLevel < x86_x64_Cache::maxLevels; idxLevel++)
for(size_t idxLevel = 0; idxLevel < x86_x64::Cache::maxLevels; idxLevel++)
{
ENSURE(caches[L1D+idxLevel].type == x86_x64_Cache::kData || caches[L1D+idxLevel].type == x86_x64_Cache::kUnified);
ENSURE(caches[L1D+idxLevel].type == x86_x64::Cache::kData || caches[L1D+idxLevel].type == x86_x64::Cache::kUnified);
ENSURE(caches[L1D+idxLevel].level == idxLevel+1);
ENSURE(caches[L1D+idxLevel].Validate() == true);
ENSURE(caches[L1I+idxLevel].type == x86_x64_Cache::kInstruction || caches[L1I+idxLevel].type == x86_x64_Cache::kUnified);
ENSURE(caches[L1I+idxLevel].type == x86_x64::Cache::kInstruction || caches[L1I+idxLevel].type == x86_x64::Cache::kUnified);
ENSURE(caches[L1I+idxLevel].level == idxLevel+1);
ENSURE(caches[L1I+idxLevel].Validate() == true);
}
@ -638,7 +640,7 @@ static Status DetectCacheAndTLB()
return INFO::OK;
}
const x86_x64_Cache* x86_x64_Caches(size_t idxCache)
const x86_x64::Cache* x86_x64::Caches(size_t idxCache)
{
static ModuleInitState initState;
ModuleInit(&initState, DetectCacheAndTLB);
@ -648,3 +650,5 @@ const x86_x64_Cache* x86_x64_Caches(size_t idxCache)
return &caches[idxCache];
}
} // namespace x86_x64

View File

@ -23,7 +23,9 @@
#ifndef INCLUDED_X86_X64_CACHE
#define INCLUDED_X86_X64_CACHE
struct x86_x64_Cache // POD (may be used before static constructors)
namespace x86_x64 {
struct Cache // POD (may be used before static constructors)
{
enum Type
{
@ -133,6 +135,8 @@ enum IdxCache
* @return 0 if idxCache >= TLB+numTLBs, otherwise a valid pointer to
* a Cache whose numEntries is 0 if disabled / not present.
**/
LIB_API const x86_x64_Cache* x86_x64_Caches(size_t idxCache);
LIB_API const Cache* Caches(size_t idxCache);
} // namespace x86_x64
#endif // #ifndef INCLUDED_X86_X64_CACHE

View File

@ -31,7 +31,7 @@ namespace MSR {
bool IsAccessible()
{
if(!x86_x64_Cap(X86_X64_CAP_MSR))
if(!x86_x64::Cap(x86_x64::CAP_MSR))
return false;
// only read/writable from ring 0, so we need the driver.
@ -49,13 +49,13 @@ bool HasEnergyPerfBias()
// this, lest we provoke a GPF.
return false;
#else
if(x86_x64_Vendor() != X86_X64_VENDOR_INTEL)
if(x86_x64::Vendor() != x86_x64::VENDOR_INTEL)
return false;
if(x86_x64_Family() < 6)
if(x86_x64::Family() < 6)
return false;
if(x86_x64_Model() < 0xE)
if(x86_x64::Model() < 0xE)
return false;
return true;
@ -65,32 +65,33 @@ bool HasEnergyPerfBias()
bool HasPlatformInfo()
{
if(x86_x64_Vendor() != X86_X64_VENDOR_INTEL)
if(x86_x64::Vendor() != x86_x64::VENDOR_INTEL)
return false;
if(x86_x64_Family() != 6)
if(x86_x64::Family() != 6)
return false;
switch(x86_x64_Model())
switch(x86_x64::Model())
{
// Xeon 5500 / i7 (section B.4 in 253669-037US)
case 0x1A: // Bloomfield, Gainstown
case 0x1E: // Clarksfield, Lynnfield, Jasper Forest
case 0x1F:
// section 34.4 in 253665-041US
case x86_x64::MODEL_NEHALEM_EP:
case x86_x64::MODEL_NEHALEM_EP_2:
case x86_x64::MODEL_NEHALEM_EX:
case x86_x64::MODEL_I7_I5:
return true;
// Xeon 7500 (section B.4.2)
case 0x2E:
// section 34.5
case x86_x64::MODEL_CLARKDALE:
case x86_x64::MODEL_WESTMERE_EP:
return true;
// Xeon 5600 / Westmere (section B.5)
case 0x25: // Clarkdale, Arrandale
case 0x2C: // Gulftown
// section 34.6
case x86_x64::MODEL_WESTMERE_EX:
return true;
// Xeon 2xxx / Sandy Bridge (section B.6)
case 0x2A:
case 0x2D:
// section 34.7
case x86_x64::MODEL_SANDY_BRIDGE:
case x86_x64::MODEL_SANDY_BRIDGE_2:
return true;
default:
@ -101,13 +102,13 @@ bool HasPlatformInfo()
bool HasUncore()
{
if(x86_x64_Vendor() != X86_X64_VENDOR_INTEL)
if(x86_x64::Vendor() != x86_x64::VENDOR_INTEL)
return false;
if(x86_x64_Family() != 6)
if(x86_x64::Family() != 6)
return false;
switch(x86_x64_Model())
switch(x86_x64::Model())
{
// Xeon 5500 / i7 (section B.4.1 in 253669-037US)
case 0x1A: // Bloomfield, Gainstown

View File

@ -29,8 +29,8 @@ class TestTopology : public CxxTest::TestSuite
public:
void test_run()
{
TS_ASSERT_LESS_THAN_EQUALS(1u, cpu_topology_NumPackages());
TS_ASSERT_LESS_THAN_EQUALS(1u, cpu_topology_CoresPerPackage());
TS_ASSERT_LESS_THAN_EQUALS(1u, cpu_topology_LogicalPerCore());
TS_ASSERT_LESS_THAN_EQUALS(1u, topology::NumPackages());
TS_ASSERT_LESS_THAN_EQUALS(1u, topology::CoresPerPackage());
TS_ASSERT_LESS_THAN_EQUALS(1u, topology::LogicalPerCore());
}
};

View File

@ -38,6 +38,7 @@
#include "lib/sysdep/arch/x86_x64/cache.h"
#include "lib/sysdep/arch/x86_x64/apic.h"
namespace topology {
//---------------------------------------------------------------------------------------------------------------------
// detect *maximum* number of cores/packages/caches.
@ -49,19 +50,19 @@ static size_t MaxCoresPerPackage()
// assume single-core unless one of the following applies:
size_t maxCoresPerPackage = 1;
x86_x64_CpuidRegs regs = { 0 };
switch(x86_x64_Vendor())
x86_x64::CpuidRegs regs = { 0 };
switch(x86_x64::Vendor())
{
case X86_X64_VENDOR_INTEL:
case x86_x64::VENDOR_INTEL:
regs.eax = 4;
regs.ecx = 0;
if(x86_x64_cpuid(&regs))
if(x86_x64::cpuid(&regs))
maxCoresPerPackage = bits(regs.eax, 26, 31)+1;
break;
case X86_X64_VENDOR_AMD:
case x86_x64::VENDOR_AMD:
regs.eax = 0x80000008;
if(x86_x64_cpuid(&regs))
if(x86_x64::cpuid(&regs))
maxCoresPerPackage = bits(regs.ecx, 0, 7)+1;
break;
@ -80,13 +81,13 @@ static size_t MaxLogicalPerCore()
bool operator()() const
{
// definitely not
if(!x86_x64_cap(X86_X64_CAP_HT))
if(!x86_x64::Cap(x86_x64::CAP_HT))
return false;
// multi-core AMD systems falsely set the HT bit for reasons of
// compatibility. we'll just ignore it, because clearing it might
// confuse other callers.
if(x86_x64_Vendor() == X86_X64_VENDOR_AMD && x86_x64_cap(X86_X64_CAP_AMD_CMP_LEGACY))
if(x86_x64::Vendor() == x86_x64::VENDOR_AMD && x86_x64::Cap(x86_x64::CAP_AMD_CMP_LEGACY))
return false;
return true;
@ -94,9 +95,9 @@ static size_t MaxLogicalPerCore()
};
if(IsHyperthreadingCapable()())
{
x86_x64_CpuidRegs regs = { 0 };
x86_x64::CpuidRegs regs = { 0 };
regs.eax = 1;
if(!x86_x64_cpuid(&regs))
if(!x86_x64::cpuid(&regs))
DEBUG_WARN_ERR(ERR::CPU_FEATURE_MISSING);
const size_t logicalPerPackage = bits(regs.ebx, 16, 23);
const size_t maxCoresPerPackage = MaxCoresPerPackage();
@ -112,7 +113,7 @@ static size_t MaxLogicalPerCore()
static size_t MaxLogicalPerCache()
{
return x86_x64_Caches(L2D)->sharedBy;
return x86_x64::Caches(x86_x64::L2D)->sharedBy;
}
@ -240,44 +241,44 @@ static Status InitCpuTopology()
}
size_t cpu_topology_NumPackages()
size_t NumPackages()
{
ModuleInit(&cpuInitState, InitCpuTopology);
return cpuTopology.numPackages;
}
size_t cpu_topology_CoresPerPackage()
size_t CoresPerPackage()
{
ModuleInit(&cpuInitState, InitCpuTopology);
return cpuTopology.coresPerPackage;
}
size_t cpu_topology_LogicalPerCore()
size_t LogicalPerCore()
{
ModuleInit(&cpuInitState, InitCpuTopology);
return cpuTopology.logicalPerCore;
}
size_t cpu_topology_LogicalFromApicId(size_t apicId)
size_t LogicalFromApicId(ApicId apicId)
{
const size_t contiguousId = ContiguousIdFromApicId(apicId);
return contiguousId % cpuTopology.logicalPerCore;
}
size_t cpu_topology_CoreFromApicId(size_t apicId)
size_t CoreFromApicId(ApicId apicId)
{
const size_t contiguousId = ContiguousIdFromApicId(apicId);
return (contiguousId / cpuTopology.logicalPerCore) % cpuTopology.coresPerPackage;
}
size_t cpu_topology_PackageFromApicId(size_t apicId)
size_t PackageFromApicId(ApicId apicId)
{
const size_t contiguousId = ContiguousIdFromApicId(apicId);
return contiguousId / (cpuTopology.logicalPerCore * cpuTopology.coresPerPackage);
}
size_t cpu_topology_ApicId(size_t idxLogical, size_t idxCore, size_t idxPackage)
ApicId ApicIdFromIndices(size_t idxLogical, size_t idxCore, size_t idxPackage)
{
ModuleInit(&cpuInitState, InitCpuTopology);
@ -450,22 +451,24 @@ static Status InitCacheTopology()
return INFO::OK;
}
size_t cache_topology_NumCaches()
size_t NumCaches()
{
ModuleInit(&cacheInitState, InitCacheTopology);
return cacheTopology.numCaches;
}
size_t cache_topology_CacheFromProcessor(size_t processor)
size_t CacheFromProcessor(size_t processor)
{
ModuleInit(&cacheInitState, InitCacheTopology);
ENSURE(processor < os_cpu_NumProcessors());
return cacheTopology.processorsCache[processor];
}
uintptr_t cache_topology_ProcessorMaskFromCache(size_t cache)
uintptr_t ProcessorMaskFromCache(size_t cache)
{
ModuleInit(&cacheInitState, InitCacheTopology);
ENSURE(cache < cacheTopology.numCaches);
return cacheTopology.cachesProcessorMask[cache];
}
} // namespace topology

View File

@ -25,8 +25,12 @@
* thread-safe, no explicit initialization is required.
*/
#ifndef INCLUDED_TOPOLOGY
#define INCLUDED_TOPOLOGY
#ifndef INCLUDED_X86_X64_TOPOLOGY
#define INCLUDED_X86_X64_TOPOLOGY
#include "lib/sysdep/arch/x86_x64/apic.h" // ApicId
namespace topology {
//-----------------------------------------------------------------------------
// cpu
@ -41,25 +45,40 @@
/**
* @return number of *enabled* CPU packages / sockets.
**/
LIB_API size_t cpu_topology_NumPackages();
LIB_API size_t NumPackages();
/**
* @return number of *enabled* CPU cores per package.
* (2 on dual-core systems)
**/
LIB_API size_t cpu_topology_CoresPerPackage();
LIB_API size_t CoresPerPackage();
/**
* @return number of *enabled* logical processors (aka Hyperthreads)
* per core. (2 on P4 EE)
**/
LIB_API size_t cpu_topology_LogicalPerCore();
LIB_API size_t LogicalPerCore();
LIB_API size_t cpu_topology_PackageFromApicId(size_t apicId);
LIB_API size_t cpu_topology_CoreFromApicId(size_t apicId);
LIB_API size_t cpu_topology_LogicalFromApicId(size_t apicId);
/**
* @return index of processor package/socket in [0, NumPackages())
**/
LIB_API size_t PackageFromApicId(ApicId apicId);
LIB_API size_t cpu_topology_ApicId(size_t idxPackage, size_t idxCore, size_t idxLogical);
/**
* @return index of processor core in [0, CoresPerPackage())
**/
LIB_API size_t CoreFromApicId(ApicId apicId);
/**
* @return index of logical processor in [0, LogicalPerCore())
**/
LIB_API size_t LogicalFromApicId(ApicId apicId);
/**
* @param idxPackage, idxCore, idxLogical return values of *FromApicId
* @return APIC ID (see note at AreApicIdsReliable)
**/
LIB_API ApicId ApicIdFromIndices(size_t idxPackage, size_t idxCore, size_t idxLogical);
//-----------------------------------------------------------------------------
@ -75,16 +94,18 @@ LIB_API size_t cpu_topology_ApicId(size_t idxPackage, size_t idxCore, size_t idx
/**
* @return number of distinct L2 caches.
**/
LIB_API size_t cache_topology_NumCaches();
LIB_API size_t NumCaches();
/**
* @return L2 cache number (zero-based) to which the given processor belongs.
**/
LIB_API size_t cache_topology_CacheFromProcessor(size_t processor);
LIB_API size_t CacheFromProcessor(size_t processor);
/**
* @return bit-mask of all processors sharing the given cache.
**/
LIB_API uintptr_t cache_topology_ProcessorMaskFromCache(size_t cache);
LIB_API uintptr_t ProcessorMaskFromCache(size_t cache);
#endif // #ifndef INCLUDED_TOPOLOGY
} // namespace topology
#endif // #ifndef INCLUDED_X86_X64_TOPOLOGY

View File

@ -44,6 +44,8 @@
# include <intrin.h> // __rdtsc
#endif
namespace x86_x64 {
#if defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729
// VC10+ and VC9 SP1: __cpuidex is already available
#elif GCC_VERSION
@ -65,11 +67,10 @@
// call a public function (that re-enters ModuleInit), so each
// function gets its own initState.
//-----------------------------------------------------------------------------
// CPUID
static void cpuid(x86_x64_CpuidRegs* regs)
static void Invoke_cpuid(CpuidRegs* regs)
{
cassert(sizeof(regs->eax) == sizeof(int));
cassert(sizeof(*regs) == 4*sizeof(int));
@ -81,20 +82,20 @@ static u32 cpuid_maxExtendedFunction;
static Status InitCpuid()
{
x86_x64_CpuidRegs regs = { 0 };
CpuidRegs regs = { 0 };
regs.eax = 0;
cpuid(&regs);
Invoke_cpuid(&regs);
cpuid_maxFunction = regs.eax;
regs.eax = 0x80000000;
cpuid(&regs);
Invoke_cpuid(&regs);
cpuid_maxExtendedFunction = regs.eax;
return INFO::OK;
}
bool x86_x64_cpuid(x86_x64_CpuidRegs* regs)
bool cpuid(CpuidRegs* regs)
{
static ModuleInitState initState;
ModuleInit(&initState, InitCpuid);
@ -105,7 +106,7 @@ bool x86_x64_cpuid(x86_x64_CpuidRegs* regs)
if(function < 0x80000000 && function > cpuid_maxFunction)
return false;
cpuid(regs);
Invoke_cpuid(regs);
return true;
}
@ -114,22 +115,22 @@ bool x86_x64_cpuid(x86_x64_CpuidRegs* regs)
// capability bits
// treated as 128 bit field; order: std ecx, std edx, ext ecx, ext edx
// keep in sync with enum x86_x64_Cap!
// keep in sync with enum Cap!
static u32 caps[4];
static ModuleInitState capsInitState;
static Status InitCaps()
{
x86_x64_CpuidRegs regs = { 0 };
CpuidRegs regs = { 0 };
regs.eax = 1;
if(x86_x64_cpuid(&regs))
if(cpuid(&regs))
{
caps[0] = regs.ecx;
caps[1] = regs.edx;
}
regs.eax = 0x80000001;
if(x86_x64_cpuid(&regs))
if(cpuid(&regs))
{
caps[2] = regs.ecx;
caps[3] = regs.edx;
@ -138,7 +139,7 @@ static Status InitCaps()
return INFO::OK;
}
bool x86_x64_cap(x86_x64_Cap cap)
bool Cap(Caps cap)
{
ModuleInit(&capsInitState, InitCaps);
@ -152,7 +153,7 @@ bool x86_x64_cap(x86_x64_Cap cap)
return IsBitSet(caps[index], bit);
}
void x86_x64_caps(u32* d0, u32* d1, u32* d2, u32* d3)
void GetCapBits(u32* d0, u32* d1, u32* d2, u32* d3)
{
ModuleInit(&capsInitState, InitCaps);
@ -166,13 +167,13 @@ void x86_x64_caps(u32* d0, u32* d1, u32* d2, u32* d3)
//-----------------------------------------------------------------------------
// vendor
static x86_x64_Vendors vendor;
static Vendors vendor;
static Status InitVendor()
{
x86_x64_CpuidRegs regs = { 0 };
CpuidRegs regs = { 0 };
regs.eax = 0;
if(!x86_x64_cpuid(&regs))
if(!cpuid(&regs))
DEBUG_WARN_ERR(ERR::CPU_FEATURE_MISSING);
// copy regs to string
@ -184,19 +185,19 @@ static Status InitVendor()
vendorString[12] = '\0'; // 0-terminate
if(!strcmp(vendorString, "AuthenticAMD"))
vendor = X86_X64_VENDOR_AMD;
vendor = x86_x64::VENDOR_AMD;
else if(!strcmp(vendorString, "GenuineIntel"))
vendor = X86_X64_VENDOR_INTEL;
vendor = x86_x64::VENDOR_INTEL;
else
{
DEBUG_WARN_ERR(ERR::CPU_UNKNOWN_VENDOR);
vendor = X86_X64_VENDOR_UNKNOWN;
vendor = x86_x64::VENDOR_UNKNOWN;
}
return INFO::OK;
}
x86_x64_Vendors x86_x64_Vendor()
Vendors Vendor()
{
static ModuleInitState initState;
ModuleInit(&initState, InitVendor);
@ -213,9 +214,9 @@ static ModuleInitState signatureInitState;
static Status InitSignature()
{
x86_x64_CpuidRegs regs = { 0 };
CpuidRegs regs = { 0 };
regs.eax = 1;
if(!x86_x64_cpuid(&regs))
if(!cpuid(&regs))
DEBUG_WARN_ERR(ERR::CPU_FEATURE_MISSING);
model = bits(regs.eax, 4, 7);
family = bits(regs.eax, 8, 11);
@ -223,18 +224,18 @@ static Status InitSignature()
const size_t extendedFamily = bits(regs.eax, 20, 27);
if(family == 0xF)
family += extendedFamily;
if(family == 0xF || (x86_x64_Vendor() == X86_X64_VENDOR_INTEL && family == 6))
if(family == 0xF || (Vendor() == x86_x64::VENDOR_INTEL && family == 6))
model += extendedModel << 4;
return INFO::OK;
}
size_t x86_x64_Model()
size_t Model()
{
ModuleInit(&signatureInitState, InitSignature);
return model;
}
size_t x86_x64_Family()
size_t Family()
{
ModuleInit(&signatureInitState, InitSignature);
return family;
@ -285,9 +286,9 @@ static Status InitIdentifierString()
bool gotBrandString = true;
for(u32 function = 0x80000002; function <= 0x80000004; function++)
{
x86_x64_CpuidRegs regs = { 0 };
CpuidRegs regs = { 0 };
regs.eax = function;
gotBrandString &= x86_x64_cpuid(&regs);
gotBrandString &= cpuid(&regs);
memcpy(pos, &regs, 16);
pos += 16;
}
@ -300,11 +301,11 @@ static Status InitIdentifierString()
// doesn't recognize.
if(!gotBrandString || strncmp(identifierString, "Unknow", 6) == 0)
{
const size_t family = x86_x64_Family();
const size_t model = x86_x64_Model();
switch(x86_x64_Vendor())
const size_t family = Family();
const size_t model = Model();
switch(Vendor())
{
case X86_X64_VENDOR_AMD:
case x86_x64::VENDOR_AMD:
// everything else is either too old, or should have a brand string.
if(family == 6)
{
@ -314,7 +315,7 @@ static Status InitIdentifierString()
strcpy_s(identifierString, ARRAY_SIZE(identifierString), "AMD Athlon");
else
{
if(x86_x64_cap(X86_X64_CAP_AMD_MP))
if(Cap(x86_x64::CAP_AMD_MP))
strcpy_s(identifierString, ARRAY_SIZE(identifierString), "AMD Athlon MP");
else
strcpy_s(identifierString, ARRAY_SIZE(identifierString), "AMD Athlon XP");
@ -322,7 +323,7 @@ static Status InitIdentifierString()
}
break;
case X86_X64_VENDOR_INTEL:
case x86_x64::VENDOR_INTEL:
// everything else is either too old, or should have a brand string.
if(family == 6)
{
@ -357,7 +358,7 @@ static Status InitIdentifierString()
return INFO::OK;
}
const char* cpu_IdentifierString()
static const char* IdentifierString()
{
static ModuleInitState initState;
ModuleInit(&initState, InitIdentifierString);
@ -368,25 +369,8 @@ const char* cpu_IdentifierString()
//-----------------------------------------------------------------------------
// miscellaneous stateless functions
// these routines do not call ModuleInit (because some of them are
// time-critical, e.g. cpu_Serialize) and should also avoid the
// other x86_x64* functions and their global state.
// in particular, use cpuid instead of x86_x64_cpuid.
u8 x86_x64_ApicId()
{
x86_x64_CpuidRegs regs = { 0 };
regs.eax = 1;
// note: CPUID function 1 is always supported, but only processors with
// an xAPIC (e.g. P4/Athlon XP) will return a nonzero ID.
cpuid(&regs);
const u8 apicId = (u8)bits(regs.ebx, 24, 31);
return apicId;
}
#if !MSC_VERSION // replaced by macro
u64 x86_x64_rdtsc()
#if !MSC_VERSION // ensure not already defined in header
u64 rdtsc()
{
#if GCC_VERSION
// GCC supports "portable" assembly for both x86 and x64
@ -398,7 +382,7 @@ u64 x86_x64_rdtsc()
#endif
void x86_x64_DebugBreak()
void DebugBreak()
{
#if MSC_VERSION
__debugbreak();
@ -411,14 +395,6 @@ void x86_x64_DebugBreak()
}
void cpu_Serialize()
{
x86_x64_CpuidRegs regs = { 0 };
regs.eax = 1;
cpuid(&regs); // CPUID serializes execution.
}
//-----------------------------------------------------------------------------
// CPU frequency
@ -450,13 +426,13 @@ private:
// note: this function uses timer.cpp!timer_Time, which is implemented via
// whrt.cpp on Windows.
double x86_x64_ClockFrequency()
double ClockFrequency()
{
// if the TSC isn't available, there's really no good way to count the
// actual CPU clocks per known time interval, so bail.
// note: loop iterations ("bogomips") are not a reliable measure due
// to differing IPC and compiler optimizations.
if(!x86_x64_cap(X86_X64_CAP_TSC))
if(!Cap(x86_x64::CAP_TSC))
return -1.0; // impossible value
// increase priority to reduce interference while measuring.
@ -465,7 +441,7 @@ double x86_x64_ClockFrequency()
// note: no need to "warm up" cpuid - it will already have been
// called several times by the time this code is reached.
// (background: it's used in x86_x64_rdtsc() to serialize instruction flow;
// (background: it's used in rdtsc() to serialize instruction flow;
// the first call is documented to be slower on Intel CPUs)
size_t numSamples = 16;
@ -488,27 +464,27 @@ double x86_x64_ClockFrequency()
do
{
// note: timer_Time effectively has a long delay (up to 5 us)
// before returning the time. we call it before x86_x64_rdtsc to
// before returning the time. we call it before rdtsc to
// minimize the delay between actually sampling time / TSC,
// thus decreasing the chance for interference.
// (if unavoidable background activity, e.g. interrupts,
// delays the second reading, inaccuracy is introduced).
t1 = timer_Time();
c1 = x86_x64_rdtsc();
c1 = rdtsc();
}
while(t1 == t0);
// .. wait until start of next tick and at least 1 ms elapsed.
do
{
const double t2 = timer_Time();
const u64 c2 = x86_x64_rdtsc();
const u64 c2 = rdtsc();
dc = (i64)(c2 - c1);
dt = t2 - t1;
}
while(dt < 1e-3);
// .. freq = (delta_clocks) / (delta_seconds);
// x86_x64_rdtsc/timer overhead is negligible.
// rdtsc/timer overhead is negligible.
const double freq = dc / dt;
samples[i] = freq;
}
@ -526,3 +502,11 @@ double x86_x64_ClockFrequency()
const double clockFrequency = sum / (hi-lo);
return clockFrequency;
}
} // namespace x86_x64
const char* cpu_IdentifierString()
{
return x86_x64::IdentifierString();
}

View File

@ -37,11 +37,13 @@
#include <intrin.h> // __rdtsc
#endif
namespace x86_x64 {
/**
* registers used/returned by x86_x64_cpuid
* registers used/returned by cpuid
**/
#pragma pack(push, 1) // (allows casting to int*)
struct x86_x64_CpuidRegs
struct CpuidRegs
{
u32 eax;
u32 ebx;
@ -60,90 +62,93 @@ struct x86_x64_CpuidRegs
* and allows graceful expansion to functions that require further inputs.
* @return true on success or false if the sub-function isn't supported.
**/
LIB_API bool x86_x64_cpuid(x86_x64_CpuidRegs* regs);
LIB_API bool cpuid(CpuidRegs* regs);
/**
* CPU vendor.
* (this is exposed because some CPUID functions are vendor-specific.)
* (an enum is easier to compare than the original string values.)
**/
enum x86_x64_Vendors
enum Vendors
{
X86_X64_VENDOR_UNKNOWN,
X86_X64_VENDOR_INTEL,
X86_X64_VENDOR_AMD
VENDOR_UNKNOWN,
VENDOR_INTEL,
VENDOR_AMD
};
LIB_API x86_x64_Vendors x86_x64_Vendor();
LIB_API Vendors Vendor();
LIB_API size_t x86_x64_Model();
enum Models
{
MODEL_NEHALEM_EP = 0x1A, // Bloomfield (X35xx), Gainestown (X55xx)
MODEL_NEHALEM_EP_2 = 0x1E, // Clarksfield, Lynnfield (X34xx), Jasper Forest (C35xx, C55xx)
MODEL_I7_I5 = 0x1F, // similar to 1E; mentioned in 253665-041US, no codename known
MODEL_CLARKDALE = 0x25, // Arrandale, Clarkdale (L34xx)
MODEL_WESTMERE_EP = 0x2C, // Gulftown (X36xx, X56xx)
MODEL_NEHALEM_EX = 0x2E, // Beckton (X75xx)
MODEL_WESTMERE_EX = 0x2F, // Gulftown uarch, Beckton package (E7-48xx)
MODEL_SANDY_BRIDGE = 0x2A, // (E3-12xx, E5-26xx)
MODEL_SANDY_BRIDGE_2 = 0x2D, // (E5-26xx, E5-46xx)
};
LIB_API size_t x86_x64_Family();
LIB_API size_t Model();
LIB_API size_t Family();
/**
* @return the colloquial processor generation
* (5 = Pentium, 6 = Pentium Pro/II/III / K6, 7 = Pentium4 / Athlon, 8 = Core / Opteron)
**/
LIB_API size_t x86_x64_Generation();
LIB_API size_t Generation();
/**
* bit indices of CPU capability flags (128 bits).
* values are defined by IA-32 CPUID feature flags - do not change!
**/
enum x86_x64_Cap
enum Caps
{
// standard (ecx) - currently only defined by Intel
X86_X64_CAP_SSE3 = 0+0, // Streaming SIMD Extensions 3
X86_X64_CAP_EST = 0+7, // Enhanced Speedstep Technology
X86_X64_CAP_SSSE3 = 0+9, // Supplemental Streaming SIMD Extensions 3
X86_X64_CAP_SSE41 = 0+19, // Streaming SIMD Extensions 4.1
X86_X64_CAP_SSE42 = 0+20, // Streaming SIMD Extensions 4.2
CAP_SSE3 = 0+0, // Streaming SIMD Extensions 3
CAP_EST = 0+7, // Enhanced Speedstep Technology
CAP_SSSE3 = 0+9, // Supplemental Streaming SIMD Extensions 3
CAP_SSE41 = 0+19, // Streaming SIMD Extensions 4.1
CAP_SSE42 = 0+20, // Streaming SIMD Extensions 4.2
// standard (edx)
X86_X64_CAP_FPU = 32+0, // Floating Point Unit
X86_X64_CAP_TSC = 32+4, // TimeStamp Counter
X86_X64_CAP_MSR = 32+5, // Model Specific Registers
X86_X64_CAP_CMOV = 32+15, // Conditional MOVe
X86_X64_CAP_TM_SCC = 32+22, // Thermal Monitoring and Software Controlled Clock
X86_X64_CAP_MMX = 32+23, // MultiMedia eXtensions
X86_X64_CAP_SSE = 32+25, // Streaming SIMD Extensions
X86_X64_CAP_SSE2 = 32+26, // Streaming SIMD Extensions 2
X86_X64_CAP_HT = 32+28, // HyperThreading
CAP_FPU = 32+0, // Floating Point Unit
CAP_TSC = 32+4, // TimeStamp Counter
CAP_MSR = 32+5, // Model Specific Registers
CAP_CMOV = 32+15, // Conditional MOVe
CAP_TM_SCC = 32+22, // Thermal Monitoring and Software Controlled Clock
CAP_MMX = 32+23, // MultiMedia eXtensions
CAP_SSE = 32+25, // Streaming SIMD Extensions
CAP_SSE2 = 32+26, // Streaming SIMD Extensions 2
CAP_HT = 32+28, // HyperThreading
// extended (ecx)
X86_X64_CAP_AMD_CMP_LEGACY = 64+1, // N-core and X86_X64_CAP_HT is falsely set
CAP_AMD_CMP_LEGACY = 64+1, // N-core and CAP_HT is falsely set
// extended (edx)
X86_X64_CAP_AMD_MP = 96+19, // MultiProcessing capable; reserved on AMD64
X86_X64_CAP_AMD_MMX_EXT = 96+22,
X86_X64_CAP_AMD_3DNOW_PRO = 96+30,
X86_X64_CAP_AMD_3DNOW = 96+31
CAP_AMD_MP = 96+19, // MultiProcessing capable; reserved on AMD64
CAP_AMD_MMX_EXT = 96+22,
CAP_AMD_3DNOW_PRO = 96+30,
CAP_AMD_3DNOW = 96+31
};
/**
* @return whether the CPU supports the indicated x86_x64_Cap / feature flag.
* @return whether the CPU supports the indicated Cap / feature flag.
**/
LIB_API bool x86_x64_cap(x86_x64_Cap cap);
LIB_API bool Cap(Caps cap);
LIB_API void x86_x64_caps(u32* d0, u32* d1, u32* d2, u32* d3);
LIB_API void GetCapBits(u32* d0, u32* d1, u32* d2, u32* d3);
//-----------------------------------------------------------------------------
// stateless
/**
* @return APIC ID of the currently executing processor or zero if the
* platform does not have an xAPIC (i.e. 7th generation x86 or below).
*
* rationale: the alternative of accessing the APIC mmio registers is not
* feasible - mahaf_MapPhysicalMemory only works reliably on WinXP. we also
* don't want to intefere with the OS's constant use of the APIC registers.
**/
LIB_API u8 x86_x64_ApicId();
/**
* @return the current value of the TimeStampCounter (a counter of
* CPU cycles since power-on, which is useful for high-resolution timing
@ -154,22 +159,24 @@ LIB_API u8 x86_x64_ApicId();
* - x64 RDTSC writes to edx:eax and clears the upper halves of rdx and rax.
**/
#if MSC_VERSION
#define x86_x64_rdtsc __rdtsc
static inline u64 rdtsc() { return __rdtsc(); }
#else
LIB_API u64 x86_x64_rdtsc();
LIB_API u64 rdtsc();
#endif
/**
* trigger a breakpoint inside this function when it is called.
**/
LIB_API void x86_x64_DebugBreak();
LIB_API void DebugBreak();
/**
* measure the CPU clock frequency via x86_x64_rdtsc and timer_Time.
* measure the CPU clock frequency via rdtsc and timer_Time.
* (it follows that this must not be called from WHRT init.)
* this takes several milliseconds (i.e. much longer than
* os_cpu_ClockFrequency) but delivers accurate measurements.
**/
LIB_API double x86_x64_ClockFrequency();
LIB_API double ClockFrequency();
} // namespace x86_x64
#endif // #ifndef INCLUDED_X86_X64

View File

@ -100,7 +100,7 @@
// Streaming SIMD Extensions (not supported by all GCC)
// this only ascertains compiler support; use x86_x64_cap to
// this only ascertains compiler support; use x86_x64::Cap to
// check whether the instructions are supported by the CPU.
#ifndef HAVE_SSE
# if GCC_VERSION && defined(__SSE__)

View File

@ -85,11 +85,6 @@ inline bool cpu_CAS(volatile T* location, T expected, T new_value)
LIB_API void cpu_Test();
/**
* enforce strict instruction ordering in the CPU pipeline.
**/
LIB_API void cpu_Serialize();
/**
* pause in spin-wait loops, as a performance optimisation.
**/

View File

@ -0,0 +1,87 @@
@ECHO OFF
"%systemroot%\system32\cacls.exe" "%systemroot%\system32\config\system" >nul 2>&1
IF ERRORLEVEL 1 GOTO relaunch
REM detect whether OS is 32/64 bit
IF "%ProgramW6432%" == "%ProgramFiles%" (
SET aken_bits=64
) ELSE (
SET aken_bits=32
)
IF "%1" == "enabletest" GOTO enabletest
IF "%1" == "disabletest" GOTO disabletest
IF "%1" == "install" GOTO install
IF "%1" == "remove" GOTO remove
GOTO usage
:enabletest
bcdedit.exe /set TESTSIGNING ON
GOTO end
:disabletest
bcdedit.exe /set TESTSIGNING OFF
GOTO end
:install
IF (%2) == () (
SET aken_path="%~p0\aken%aken_bits%.sys"
) ELSE (
echo %2\aken%aken_bits%.sys
SET aken_path=%2\aken%aken_bits%.sys
)
echo %aken_path%
IF NOT EXIST %aken_path% GOTO notfound
sc create Aken DisplayName= Aken type= kernel start= auto binpath= %aken_path%
REM error= normal is default
IF ERRORLEVEL 1 GOTO failed
sc start Aken
IF ERRORLEVEL 1 GOTO failed
ECHO Success!
GOTO end
:remove
sc stop Aken
sc delete Aken
IF ERRORLEVEL 1 GOTO failed
ECHO Success! (The previous line should read: [SC] DeleteService SUCCESS)
GOTO end
:usage
ECHO To install the driver, please first enable test mode:
ECHO %0 enabletest
ECHO (This is necessary because Vista/Win7 x64 require signing with
ECHO a Microsoft "cross certificate". The Fraunhofer code signing certificate
ECHO is not enough, even though its chain of trust is impeccable.
ECHO Going the WHQL route, perhaps as an "unclassified" driver, might work.
ECHO see http://www.freeotfe.org/docs/Main/impact_of_kernel_driver_signing.htm )
ECHO Then reboot (!) and install the driver:
ECHO %0 install ["path_to_directory_containing_aken*.sys"]
ECHO (If no path is given, we will use the directory of this batch file)
ECHO To remove the driver and disable test mode, execute the following:
ECHO %0 remove
ECHO %0 disabletest
PAUSE
GOTO end
:relaunch
SET aken_vbs="%temp%\aken_run.vbs"
ECHO Set UAC = CreateObject^("Shell.Application"^) > %aken_vbs%
ECHO UAC.ShellExecute "cmd.exe", "/k %~s0 %1 %2", "", "runas", 1 >> %aken_vbs%
ECHO "To re-run this batch file as admin, we have created %aken_vbs% with the following contents:"
type %aken_vbs%
PAUSE
cscript //Nologo %aken_vbs%
DEL %aken_vbs%
GOTO end
:notfound
ECHO Driver not found at specified path (%aken_path%)
GOTO end
:failed
ECHO Something went wrong -- see previous line
GOTO end
:end

View File

@ -202,14 +202,13 @@ static SC_HANDLE OpenServiceControlManager(DWORD access)
SC_HANDLE hSCM = OpenSCManagerW(machineName, databaseName, access);
if(!hSCM)
{
// administrator privileges are required for SC_MANAGER_CREATE_SERVICE.
// note: installing the service and having it start automatically would
// allow Least-Permission accounts to use it (after relaxing the
// service's DACL).
// ensure no other problems arose
ENSURE(GetLastError() == ERROR_ACCESS_DENIED);
// administrator privileges are required for SC_MANAGER_CREATE_SERVICE.
// this is a problem on Vista / Win7, so users will have to use the
// separate aken_install.bat
return 0;
}
@ -258,7 +257,7 @@ static void StartDriver(const OsPath& driverPathname)
return;
}
SC_HANDLE hService = OpenServiceW(hSCM, AKEN_NAME, GENERIC_READ);
SC_HANDLE hService = OpenServiceW(hSCM, AKEN_NAME, SERVICE_START);
// during development, we want to ensure the newest build is used, so
// unload and re-create the service if it's running/installed.
@ -281,22 +280,25 @@ static void StartDriver(const OsPath& driverPathname)
LPCWSTR startName = 0; // LocalSystem
// NB: Windows 7 seems to insist upon backslashes (i.e. external_file_string)
hService = CreateServiceW(hSCM, AKEN_NAME, AKEN_NAME,
SERVICE_START, SERVICE_KERNEL_DRIVER, SERVICE_DEMAND_START, SERVICE_ERROR_NORMAL,
SERVICE_START, SERVICE_KERNEL_DRIVER, SERVICE_AUTO_START, SERVICE_ERROR_NORMAL,
OsString(driverPathname).c_str(), 0, 0, 0, startName, 0);
ENSURE(hService != 0);
}
// start service
{
DWORD numArgs = 0;
BOOL ok = StartService(hService, numArgs, 0);
if(!ok)
{
if(GetLastError() != ERROR_SERVICE_ALREADY_RUNNING)
switch(GetLastError())
{
// starting failed. don't raise a warning because this
// always happens on least-permission user accounts.
//DEBUG_WARN_ERR(ERR::LOGIC);
case ERROR_SERVICE_ALREADY_RUNNING:
break; // ok, no action needed
case ERROR_ACCESS_DENIED:
break; // Win7, can't start service; must use aken_install.bat
default: // unexpected problem
DEBUG_WARN_ERR(ERR::LOGIC);
break;
}
}
}

View File

@ -34,17 +34,17 @@ public:
void test_rdtsc()
{
// must increase monotonously
const u64 c1 = x86_x64_rdtsc();
const u64 c2 = x86_x64_rdtsc();
const u64 c3 = x86_x64_rdtsc();
const u64 c1 = x86_x64::rdtsc();
const u64 c2 = x86_x64::rdtsc();
const u64 c3 = x86_x64::rdtsc();
TS_ASSERT(c1 < c2 && c2 < c3);
}
void test_ia32_cap()
{
// make sure the really common/basic caps end up reported as true
TS_ASSERT(x86_x64_cap(X86_X64_CAP_FPU));
TS_ASSERT(x86_x64_cap(X86_X64_CAP_TSC));
TS_ASSERT(x86_x64_cap(X86_X64_CAP_MMX));
TS_ASSERT(x86_x64::Cap(x86_x64::CAP_FPU));
TS_ASSERT(x86_x64::Cap(x86_x64::CAP_TSC));
TS_ASSERT(x86_x64::Cap(x86_x64::CAP_MMX));
}
};

View File

@ -36,7 +36,7 @@
#include "lib/sysdep/os/win/wutil.h"
#if ARCH_X86_X64
# include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64_rdtsc
# include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64::rdtsc
# include "lib/sysdep/arch/x86_x64/topology.h"
# include "lib/sysdep/arch/x86_x64/msr.h"
#endif
@ -46,9 +46,9 @@
static bool IsUniprocessor()
{
if(cpu_topology_NumPackages() != 1)
if(topology::NumPackages() != 1)
return false;
if(cpu_topology_CoresPerPackage() != 1)
if(topology::CoresPerPackage() != 1)
return false;
return true;
}
@ -57,11 +57,11 @@ static bool IsUniprocessor()
static bool IsInvariantTSC()
{
#if ARCH_X86_X64
// (we no longer need to check x86_x64_Vendor - Intel and AMD
// (we no longer need to check x86_x64::Vendor - Intel and AMD
// agreed on the definition of this feature check)
x86_x64_CpuidRegs regs = { 0 };
x86_x64::CpuidRegs regs = { 0 };
regs.eax = 0x80000007;
if(x86_x64_cpuid(&regs))
if(x86_x64::cpuid(&regs))
{
// TSC is invariant across P-state, C-state, turbo, and
// stop grant transitions (e.g. STPCLK)
@ -77,17 +77,17 @@ static bool IsInvariantTSC()
static bool IsThrottlingPossible()
{
#if ARCH_X86_X64
x86_x64_CpuidRegs regs = { 0 };
switch(x86_x64_Vendor())
x86_x64::CpuidRegs regs = { 0 };
switch(x86_x64::Vendor())
{
case X86_X64_VENDOR_INTEL:
if(x86_x64_cap(X86_X64_CAP_TM_SCC) || x86_x64_cap(X86_X64_CAP_EST))
case x86_x64::VENDOR_INTEL:
if(x86_x64::Cap(x86_x64::CAP_TM_SCC) || x86_x64::Cap(x86_x64::CAP_EST))
return true;
break;
case X86_X64_VENDOR_AMD:
case x86_x64::VENDOR_AMD:
regs.eax = 0x80000007;
if(x86_x64_cpuid(&regs))
if(x86_x64::cpuid(&regs))
{
enum AmdPowerNowFlags
{
@ -109,6 +109,18 @@ static bool IsThrottlingPossible()
}
static bool IsSandyBridge()
{
if(x86_x64::Vendor() != x86_x64::VENDOR_INTEL)
return false;
if(x86_x64::Model() == x86_x64::MODEL_SANDY_BRIDGE)
return true;
if(x86_x64::Model() == x86_x64::MODEL_SANDY_BRIDGE_2)
return true;
return false;
}
//-----------------------------------------------------------------------------
class CounterTSC : public ICounter
@ -122,7 +134,7 @@ public:
Status Activate()
{
#if ARCH_X86_X64
if(!x86_x64_cap(X86_X64_CAP_TSC))
if(!x86_x64::Cap(x86_x64::CAP_TSC))
return ERR::NOT_SUPPORTED; // NOWARN (CPU doesn't support RDTSC)
#endif
@ -177,7 +189,7 @@ public:
#if ARCH_X86_X64
// recent CPU:
//if(x86_x64_Generation() >= 7)
//if(x86_x64::Generation() >= 7)
{
// note: 8th generation CPUs support C1-clock ramping, which causes
// drift on multi-core systems, but those were excluded above.
@ -204,7 +216,7 @@ public:
u64 Counter() const
{
return x86_x64_rdtsc();
return x86_x64::rdtsc();
}
size_t CounterBits() const
@ -214,7 +226,7 @@ public:
double NominalFrequency() const
{
// WARNING: do not call x86_x64_ClockFrequency because it uses the
// WARNING: do not call x86_x64::ClockFrequency because it uses the
// HRT, which we're currently in the process of initializing.
// instead query CPU clock frequency via OS.
//
@ -224,9 +236,10 @@ public:
#if ARCH_X86_X64
if(MSR::IsAccessible() && MSR::HasPlatformInfo())
{
const i64 busFrequency = IsSandyBridge()? 100000000 : 133333333;
const u64 platformInfo = MSR::Read(MSR::PLATFORM_INFO);
const u8 maxNonTurboRatio = bits(platformInfo, 8, 15);
return maxNonTurboRatio * 133.33e6f;
return double(maxNonTurboRatio) * busFrequency;
}
else
#endif

View File

@ -37,7 +37,7 @@
#include "lib/module_init.h"
#include "lib/sysdep/cpu.h" // cpu_AtomicAdd
#include "lib/sysdep/numa.h"
#include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64_ApicId
#include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64::ApicId
#include "lib/sysdep/arch/x86_x64/apic.h" // ProcessorFromApicId
#include "lib/sysdep/os/win/wversion.h"
#include "lib/sysdep/os/win/winit.h"
@ -53,7 +53,7 @@ static WUTIL_FUNC(pVirtualAllocExNuma, LPVOID, (HANDLE, LPVOID, SIZE_T, DWORD, D
static DWORD WINAPI EmulateGetCurrentProcessorNumber(VOID)
{
const u8 apicId = x86_x64_ApicId();
const ApicId apicId = GetApicId();
const DWORD processor = (DWORD)ProcessorFromApicId(apicId);
ASSERT(processor < os_cpu_MaxProcessors);
return processor;

View File

@ -31,7 +31,7 @@
#include "lib/sysdep/cpu.h" // cpu_AtomicAdd
#if ARCH_X86_X64 && CONFIG2_TIMER_ALLOW_RDTSC
# include "lib/sysdep/os_cpu.h" // os_cpu_ClockFrequency
# include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64_rdtsc
# include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64::rdtsc
#endif
@ -142,7 +142,7 @@ private:
// since TIMER_ACCRUE et al. are called so often, we try to keep
// overhead to an absolute minimum. storing raw tick counts (e.g. CPU cycles
// returned by x86_x64_rdtsc) instead of absolute time has two benefits:
// returned by x86_x64::rdtsc) instead of absolute time has two benefits:
// - no need to convert from raw->time on every call
// (instead, it's only done once when displaying the totals)
// - possibly less overhead to querying the time itself
@ -170,7 +170,7 @@ public:
void SetFromTimer()
{
m_cycles = x86_x64_rdtsc();
m_cycles = x86_x64::rdtsc();
}
void AddDifference(TimerUnit t0, TimerUnit t1)

View File

@ -45,14 +45,14 @@
static void ReportGLLimits(ScriptInterface& scriptInterface, CScriptValRooted settings);
#if ARCH_X86_X64
CScriptVal ConvertCaches(ScriptInterface& scriptInterface, IdxCache idxCache)
CScriptVal ConvertCaches(ScriptInterface& scriptInterface, x86_x64::IdxCache idxCache)
{
CScriptVal ret;
scriptInterface.Eval("[]", ret);
for (size_t idxLevel = 0; idxLevel < x86_x64_Cache::maxLevels; ++idxLevel)
for (size_t idxLevel = 0; idxLevel < x86_x64::Cache::maxLevels; ++idxLevel)
{
const x86_x64_Cache* pcache = x86_x64_Caches(idxCache+idxLevel);
if (pcache->type == x86_x64_Cache::kNull || pcache->numEntries == 0)
const x86_x64::Cache* pcache = x86_x64::Caches(idxCache+idxLevel);
if (pcache->type == x86_x64::Cache::kNull || pcache->numEntries == 0)
continue;
CScriptVal cache;
scriptInterface.Eval("({})", cache);
@ -73,7 +73,7 @@ CScriptVal ConvertTLBs(ScriptInterface& scriptInterface)
scriptInterface.Eval("[]", ret);
for(size_t i = 0; ; i++)
{
const x86_x64_Cache* ptlb = x86_x64_Caches(TLB+i);
const x86_x64::Cache* ptlb = x86_x64::Caches(x86_x64::TLB+i);
if (!ptlb)
break;
CScriptVal tlb;
@ -229,10 +229,10 @@ void RunHardwareDetection()
scriptInterface.SetProperty(settings.get(), "cpu_largepagesize", (u32)os_cpu_LargePageSize());
scriptInterface.SetProperty(settings.get(), "cpu_numprocs", (u32)os_cpu_NumProcessors());
#if ARCH_X86_X64
scriptInterface.SetProperty(settings.get(), "cpu_numpackages", (u32)cpu_topology_NumPackages());
scriptInterface.SetProperty(settings.get(), "cpu_coresperpackage", (u32)cpu_topology_CoresPerPackage());
scriptInterface.SetProperty(settings.get(), "cpu_logicalpercore", (u32)cpu_topology_LogicalPerCore());
scriptInterface.SetProperty(settings.get(), "cpu_numcaches", (u32)cache_topology_NumCaches());
scriptInterface.SetProperty(settings.get(), "cpu_numpackages", (u32)topology::NumPackages());
scriptInterface.SetProperty(settings.get(), "cpu_coresperpackage", (u32)topology::CoresPerPackage());
scriptInterface.SetProperty(settings.get(), "cpu_logicalpercore", (u32)topology::LogicalPerCore());
scriptInterface.SetProperty(settings.get(), "cpu_numcaches", (u32)topology::NumCaches());
#endif
scriptInterface.SetProperty(settings.get(), "numa_numnodes", (u32)numa_NumNodes());
@ -244,21 +244,21 @@ void RunHardwareDetection()
scriptInterface.SetProperty(settings.get(), "ram_free", (u32)os_cpu_MemoryAvailable());
#if ARCH_X86_X64
scriptInterface.SetProperty(settings.get(), "x86_frequency", x86_x64_ClockFrequency());
scriptInterface.SetProperty(settings.get(), "x86_frequency", x86_x64::ClockFrequency());
scriptInterface.SetProperty(settings.get(), "x86_vendor", (u32)x86_x64_Vendor());
scriptInterface.SetProperty(settings.get(), "x86_model", (u32)x86_x64_Model());
scriptInterface.SetProperty(settings.get(), "x86_family", (u32)x86_x64_Family());
scriptInterface.SetProperty(settings.get(), "x86_vendor", (u32)x86_x64::Vendor());
scriptInterface.SetProperty(settings.get(), "x86_model", (u32)x86_x64::Model());
scriptInterface.SetProperty(settings.get(), "x86_family", (u32)x86_x64::Family());
u32 caps0, caps1, caps2, caps3;
x86_x64_caps(&caps0, &caps1, &caps2, &caps3);
x86_x64::GetCapBits(&caps0, &caps1, &caps2, &caps3);
scriptInterface.SetProperty(settings.get(), "x86_caps[0]", caps0);
scriptInterface.SetProperty(settings.get(), "x86_caps[1]", caps1);
scriptInterface.SetProperty(settings.get(), "x86_caps[2]", caps2);
scriptInterface.SetProperty(settings.get(), "x86_caps[3]", caps3);
scriptInterface.SetProperty(settings.get(), "x86_icaches", ConvertCaches(scriptInterface, L1I));
scriptInterface.SetProperty(settings.get(), "x86_dcaches", ConvertCaches(scriptInterface, L1D));
scriptInterface.SetProperty(settings.get(), "x86_icaches", ConvertCaches(scriptInterface, x86_x64::L1I));
scriptInterface.SetProperty(settings.get(), "x86_dcaches", ConvertCaches(scriptInterface, x86_x64::L1D));
scriptInterface.SetProperty(settings.get(), "x86_tlbs", ConvertTLBs(scriptInterface));
#endif

View File

@ -99,10 +99,10 @@ void WriteSystemInfo()
fprintf(f, "OS : %s %s (%s)\n", un.sysname, un.release, un.version);
// CPU
fprintf(f, "CPU : %s, %s (%dx%dx%d)", un.machine, cpu_IdentifierString(), (int)cpu_topology_NumPackages(), (int)cpu_topology_CoresPerPackage(), (int)cpu_topology_LogicalPerCore());
fprintf(f, "CPU : %s, %s (%dx%dx%d)", un.machine, cpu_IdentifierString(), (int)topology::NumPackages(), (int)topology::CoresPerPackage(), (int)topology::LogicalPerCore());
double cpuClock = os_cpu_ClockFrequency(); // query OS (may fail)
if(cpuClock <= 0.0)
cpuClock = x86_x64_ClockFrequency(); // measure (takes a few ms)
cpuClock = x86_x64::ClockFrequency(); // measure (takes a few ms)
if(cpuClock > 0.0)
{
if(cpuClock < 1e9)

View File

@ -53,7 +53,7 @@ static bool g_EnableSSE = false;
void ModelRenderer::Init()
{
#if ARCH_X86_X64
if (x86_x64_cap(X86_X64_CAP_SSE))
if (x86_x64::Cap(x86_x64::CAP_SSE))
g_EnableSSE = true;
#endif
}