1
0
forked from 0ad/0ad

improvements/additions from work

bits: fix bit_mask for signed types, add SetBitsTo, LeastSignificantBit,
ClearLeastSignificantBit.
add MSR support (read/write via mahaf in kernel mode)
x86_x64: expose family/model
topology: add support for determining core/package from APIC ID.
TSC: report actual frequency for nehalem invariant TSC.
improved UNREACHABLE/ASSUME_UNREACHABLE (avoid ICC warning, add GCC4.5
support)

This was SVN commit r7860.
This commit is contained in:
janwas 2010-08-06 13:03:44 +00:00
parent 3a0123b7b4
commit 3d45069b3f
11 changed files with 304 additions and 217 deletions

View File

@ -40,7 +40,7 @@ template<typename T>
T Bit(size_t n) T Bit(size_t n)
{ {
const T one = T(1); const T one = T(1);
return (one << n); return (T)(one << n);
} }
/** /**
@ -71,16 +71,14 @@ bool IsBitSet(T value, size_t index)
template<typename T> template<typename T>
T bit_mask(size_t numBits) T bit_mask(size_t numBits)
{ {
if(numBits == 0) // prevent shift count == bitsInT, which would be undefined.
return 0;
// notes:
// - the perhaps more intuitive (1 << numBits)-1 cannot
// handle numBits == bitsInT, but this implementation does.
// - though bulky, the below statements avoid sign-conversion warnings.
const T bitsInT = sizeof(T)*CHAR_BIT; const T bitsInT = sizeof(T)*CHAR_BIT;
T mask(0); const T allBits = (T)~T(0);
mask = T(~mask); // (shifts of at least bitsInT are undefined)
mask >>= T(bitsInT-numBits); if(numBits >= bitsInT)
return allBits;
// (note: the previous allBits >> (bitsInT-numBits) is not safe
// because right-shifts of negative numbers are undefined.)
const T mask = T(T(1) << numBits)-1;
return mask; return mask;
} }
@ -98,12 +96,31 @@ T bit_mask(size_t numBits)
template<typename T> template<typename T>
inline T bits(T num, size_t lo_idx, size_t hi_idx) inline T bits(T num, size_t lo_idx, size_t hi_idx)
{ {
const size_t count = (hi_idx - lo_idx)+1; // # bits to return const size_t numBits = (hi_idx - lo_idx)+1; // # bits to return
T result = T(num >> lo_idx); T result = T(num >> lo_idx);
result = T(result & bit_mask<T>(count)); result = T(result & bit_mask<T>(numBits));
return result; return result;
} }
/**
* set the value of bits hi_idx:lo_idx
*
* @param lo_idx bit index of lowest bit to include
* @param hi_idx bit index of highest bit to include
* @param value new value to be assigned to these bits
**/
template<typename T>
inline T SetBitsTo(T num, size_t lo_idx, size_t hi_idx, size_t value)
{
const size_t numBits = (hi_idx - lo_idx)+1;
debug_assert(value < (T(1) << numBits));
const T mask = bit_mask<T>(numBits) << lo_idx;
T result = num & ~mask;
result = T(result | (value << lo_idx));
return result;
}
/** /**
* @return number of 1-bits in mask * @return number of 1-bits in mask
**/ **/
@ -127,7 +144,7 @@ size_t PopulationCount(T mask)
* @return whether the given number is a power of two. * @return whether the given number is a power of two.
**/ **/
template<typename T> template<typename T>
bool is_pow2(T n) inline bool is_pow2(T n)
{ {
// 0 would pass the test below but isn't a POT. // 0 would pass the test below but isn't a POT.
if(n == 0) if(n == 0)
@ -135,6 +152,19 @@ bool is_pow2(T n)
return (n & (n-1)) == 0; return (n & (n-1)) == 0;
} }
template<typename T>
inline T LeastSignificantBit(T x)
{
const T negX = T(~x + 1); // 2's complement (avoids 'negating unsigned type' warning)
return x & negX;
}
template<typename T>
inline T ClearLeastSignificantBit(T x)
{
return x & (x-1);
}
/** /**
* ceil(log2(x)) * ceil(log2(x))
* *

View File

@ -43,60 +43,48 @@
/** /**
"unreachable code" helpers * "unreachable code" helpers
*
unreachable lines of code are often the source or symptom of subtle bugs. * unreachable lines of code are often the source or symptom of subtle bugs.
they are flagged by compiler warnings; however, the opposite problem - * they are flagged by compiler warnings; however, the opposite problem -
erroneously reaching certain spots (e.g. due to missing return statement) * erroneously reaching certain spots (e.g. due to missing return statement)
is worse and not detected automatically. * is worse and not detected automatically.
*
to defend against this, the programmer can annotate their code to * to defend against this, the programmer can annotate their code to
indicate to humans that a particular spot should never be reached. * indicate to humans that a particular spot should never be reached.
however, that isn't much help; better is a sentinel that raises an * however, that isn't much help; better is a sentinel that raises an
error if if it is actually reached. hence, the UNREACHABLE macro. * error if if it is actually reached. hence, the UNREACHABLE macro.
*
ironically, if the code guarded by UNREACHABLE works as it should, * ironically, if the code guarded by UNREACHABLE works as it should,
compilers may flag the macro's code as unreachable. this would * compilers may flag the macro's code as unreachable. this would
distract from genuine warnings, which is unacceptable. * distract from genuine warnings, which is unacceptable.
*
even worse, compilers differ in their code checking: GCC only complains if * even worse, compilers differ in their code checking: GCC only complains if
non-void functions end without returning a value (i.e. missing return * non-void functions end without returning a value (i.e. missing return
statement), while VC checks if lines are unreachable (e.g. if they are * statement), while VC checks if lines are unreachable (e.g. if they are
preceded by a return on all paths). * preceded by a return on all paths).
*
our implementation of UNREACHABLE solves this dilemna as follows: * the implementation below enables optimization and automated checking
- on GCC: call abort(); since it has the noreturn attributes, the * without raising warnings.
"non-void" warning disappears. **/
- on VC: avoid generating any code. we allow the compiler to assume the
spot is actually unreachable, which incidentally helps optimization.
if reached after all, a crash usually results. in that case, compile with
CONFIG_PARANOIA, which will cause an error message to be displayed.
this approach still allows for the possiblity of automated
checking, but does not cause any compiler warnings.
**/
#define UNREACHABLE // actually defined below.. this is for #define UNREACHABLE // actually defined below.. this is for
# undef UNREACHABLE // CppDoc's benefit only. # undef UNREACHABLE // CppDoc's benefit only.
// 1) final build: optimize assuming this location cannot be reached. // compiler supports ASSUME_UNREACHABLE => allow it to assume the code is
// may crash if that turns out to be untrue, but removes checking overhead. // never reached (improves optimization at the cost of undefined behavior
#if CONFIG_FINAL // if the annotation turns out to be incorrect).
#if HAVE_ASSUME_UNREACHABLE && !CONFIG_PARANOIA
# define UNREACHABLE ASSUME_UNREACHABLE # define UNREACHABLE ASSUME_UNREACHABLE
// 2) normal build: // otherwise (or if CONFIG_PARANOIA is set), add a user-visible
// warning if the code is reached. note that abort() fails to stop
// ICC from warning about the lack of a return statement, so we
// use an infinite loop instead.
#else #else
// a) normal implementation: includes "abort", which is declared with # define UNREACHABLE\
// noreturn attribute and therefore avoids GCC's "execution reaches
// end of non-void function" warning.
# if !MSC_VERSION || ICC_VERSION || CONFIG_PARANOIA
# define UNREACHABLE\
STMT(\ STMT(\
debug_assert(0); /* hit supposedly unreachable code */\ debug_assert(0); /* hit supposedly unreachable code */\
abort();\ for(;;){};\
) )
// b) VC only: don't generate any code; squelch the warning and optimize.
# else
# define UNREACHABLE ASSUME_UNREACHABLE
# endif
#endif #endif
/** /**

View File

@ -116,7 +116,7 @@ static size_t MaxLogicalPerCache()
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// determination of enabled cores/HTs // APIC IDs
// APIC IDs consist of variable-length fields identifying the logical unit, // APIC IDs consist of variable-length fields identifying the logical unit,
// core, package and shared cache. if they are available, we can determine // core, package and shared cache. if they are available, we can determine
@ -174,106 +174,102 @@ const u8* ApicIds()
} }
/** // (if maxValues == 1, the field is zero-width and thus zero)
* count the number of unique APIC IDs after application of a mask. static size_t ApicField(size_t apicId, size_t indexOfLowestBit, size_t maxValues)
*
* this is used to implement NumUniqueValuesInField and also required
* for counting the number of caches.
**/
static size_t NumUniqueMaskedValues(const u8* apicIds, u8 mask)
{ {
std::set<u8> ids; const size_t numBits = ceil_log2(maxValues);
for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++) const size_t mask = bit_mask<size_t>(numBits);
{ return (apicId >> indexOfLowestBit) & mask;
const u8 apicId = apicIds[processor];
const u8 field = u8(apicId & mask);
ids.insert(field);
}
return ids.size();
} }
/** //-----------------------------------------------------------------------------
* Count the number of values assumed by a certain field within APIC IDs. // CPU topology interface
*
* @param apicIds
* @param offset Index of the lowest bit that is part of the field.
* @param numValues Number of values that can be assumed by the field.
* If equal to one, the field is zero-width.
* @return number of unique values (for convenience of the topology code,
* this is always at least one)
**/
static size_t NumUniqueValuesInField(const u8* apicIds, size_t offset, size_t numValues)
{
if(numValues == 1) // see parameter description above
return 1;
const size_t numBits = ceil_log2(numValues);
const u8 mask = u8((bit_mask<u8>(numBits) << offset) & 0xFF);
return NumUniqueMaskedValues(apicIds, mask);
}
static size_t MinPackages(size_t maxCoresPerPackage, size_t maxLogicalPerCore)
{
const size_t numNodes = numa_NumNodes();
const size_t logicalPerNode = PopulationCount(numa_ProcessorMaskFromNode(0));
// NB: some cores or logical processors may be disabled.
const size_t maxLogicalPerPackage = maxCoresPerPackage*maxLogicalPerCore;
const size_t minPackagesPerNode = DivideRoundUp(logicalPerNode, maxLogicalPerPackage);
return minPackagesPerNode*numNodes;
}
struct CpuTopology // POD struct CpuTopology // POD
{ {
size_t numPackages; size_t maxLogicalPerCore;
size_t coresPerPackage; size_t maxCoresPerPackage;
size_t logicalOffset;
size_t coreOffset;
size_t packageOffset;
// how many are actually enabled
size_t logicalPerCore; size_t logicalPerCore;
size_t coresPerPackage;
size_t numPackages;
}; };
static CpuTopology cpuTopology; static CpuTopology cpuTopology;
static ModuleInitState cpuInitState; static ModuleInitState cpuInitState;
static LibError InitCpuTopology() static LibError InitCpuTopology()
{ {
const size_t numProcessors = os_cpu_NumProcessors(); cpuTopology.maxLogicalPerCore = MaxLogicalPerCore();
const size_t maxCoresPerPackage = MaxCoresPerPackage(); cpuTopology.maxCoresPerPackage = MaxCoresPerPackage();
const size_t maxLogicalPerCore = MaxLogicalPerCore();
cpuTopology.logicalOffset = 0;
cpuTopology.coreOffset = ceil_log2(cpuTopology.maxLogicalPerCore);
cpuTopology.packageOffset = cpuTopology.coreOffset + ceil_log2(cpuTopology.maxCoresPerPackage);
const u8* apicIds = ApicIds(); const u8* apicIds = ApicIds();
if(apicIds) if(apicIds)
{ {
const size_t packageOffset = ceil_log2(maxCoresPerPackage) + ceil_log2(maxLogicalPerCore); struct NumUniqueValuesInField
const size_t coreOffset = ceil_log2(maxLogicalPerCore); {
const size_t logicalOffset = 0; size_t operator()(const u8* apicIds, size_t indexOfLowestBit, size_t numValues) const
cpuTopology.numPackages = NumUniqueValuesInField(apicIds, packageOffset, 256); {
cpuTopology.coresPerPackage = NumUniqueValuesInField(apicIds, coreOffset, maxCoresPerPackage); std::set<size_t> values;
cpuTopology.logicalPerCore = NumUniqueValuesInField(apicIds, logicalOffset, maxLogicalPerCore); for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
{
const size_t value = ApicField(apicIds[processor], numValues, indexOfLowestBit);
values.insert(value);
}
return values.size();
}
};
cpuTopology.logicalPerCore = NumUniqueValuesInField()(apicIds, cpuTopology.logicalOffset, cpuTopology.maxLogicalPerCore);
cpuTopology.coresPerPackage = NumUniqueValuesInField()(apicIds, cpuTopology.coreOffset, cpuTopology.maxCoresPerPackage);
cpuTopology.numPackages = NumUniqueValuesInField()(apicIds, cpuTopology.packageOffset, 256);
} }
else // the processor lacks an xAPIC, or the IDs are invalid else // the processor lacks an xAPIC, or the IDs are invalid
{ {
struct MinPackages
{
size_t operator()(size_t maxCoresPerPackage, size_t maxLogicalPerCore) const
{
const size_t numNodes = numa_NumNodes();
const size_t logicalPerNode = PopulationCount(numa_ProcessorMaskFromNode(0));
// NB: some cores or logical processors may be disabled.
const size_t maxLogicalPerPackage = maxCoresPerPackage*maxLogicalPerCore;
const size_t minPackagesPerNode = DivideRoundUp(logicalPerNode, maxLogicalPerPackage);
return minPackagesPerNode*numNodes;
}
};
// we can't differentiate between cores and logical processors. // we can't differentiate between cores and logical processors.
// since the former are less likely to be disabled, we seek the // since the former are less likely to be disabled, we seek the
// maximum feasible number of cores and minimal number of packages: // maximum feasible number of cores and minimal number of packages:
const size_t minPackages = MinPackages(maxCoresPerPackage, maxLogicalPerCore); const size_t minPackages = MinPackages()(cpuTopology.maxCoresPerPackage, cpuTopology.maxLogicalPerCore);
const size_t maxPackages = numProcessors; const size_t numProcessors = os_cpu_NumProcessors();
for(size_t numPackages = minPackages; numPackages <= maxPackages; numPackages++) for(size_t numPackages = minPackages; numPackages <= numProcessors; numPackages++)
{ {
if(numProcessors % numPackages != 0) if(numProcessors % numPackages != 0)
continue; continue;
const size_t logicalPerPackage = numProcessors / numPackages; const size_t logicalPerPackage = numProcessors / numPackages;
const size_t minCoresPerPackage = DivideRoundUp(logicalPerPackage, maxLogicalPerCore); const size_t minCoresPerPackage = DivideRoundUp(logicalPerPackage, cpuTopology.maxLogicalPerCore);
for(size_t coresPerPackage = maxCoresPerPackage; coresPerPackage >= minCoresPerPackage; coresPerPackage--) for(size_t coresPerPackage = cpuTopology.maxCoresPerPackage; coresPerPackage >= minCoresPerPackage; coresPerPackage--)
{ {
if(logicalPerPackage % coresPerPackage != 0) if(logicalPerPackage % coresPerPackage != 0)
continue; continue;
const size_t logicalPerCore = logicalPerPackage / coresPerPackage; const size_t logicalPerCore = logicalPerPackage / coresPerPackage;
if(logicalPerCore <= maxLogicalPerCore) if(logicalPerCore <= cpuTopology.maxLogicalPerCore)
{ {
debug_assert(numProcessors == numPackages*coresPerPackage*logicalPerCore); debug_assert(numProcessors == numPackages*coresPerPackage*logicalPerCore);
cpuTopology.numPackages = numPackages;
cpuTopology.coresPerPackage = coresPerPackage;
cpuTopology.logicalPerCore = logicalPerCore; cpuTopology.logicalPerCore = logicalPerCore;
cpuTopology.coresPerPackage = coresPerPackage;
cpuTopology.numPackages = numPackages;
return INFO::OK; return INFO::OK;
} }
} }
@ -303,6 +299,24 @@ size_t cpu_topology_LogicalPerCore()
return cpuTopology.logicalPerCore; return cpuTopology.logicalPerCore;
} }
size_t cpu_topology_LogicalFromId(size_t apicId)
{
ModuleInit(&cpuInitState, InitCpuTopology);
return ApicField(apicId, cpuTopology.logicalOffset, cpuTopology.maxLogicalPerCore);
}
size_t cpu_topology_CoreFromId(size_t apicId)
{
ModuleInit(&cpuInitState, InitCpuTopology);
return ApicField(apicId, cpuTopology.coreOffset, cpuTopology.maxCoresPerPackage);
}
size_t cpu_topology_PackageFromId(size_t apicId)
{
ModuleInit(&cpuInitState, InitCpuTopology);
return ApicField(apicId, cpuTopology.packageOffset, 256);
}
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// cache topology // cache topology

View File

@ -65,6 +65,11 @@ LIB_API size_t cpu_topology_CoresPerPackage();
LIB_API size_t cpu_topology_LogicalPerCore(); LIB_API size_t cpu_topology_LogicalPerCore();
LIB_API size_t cpu_topology_LogicalFromId(size_t apicId);
LIB_API size_t cpu_topology_CoreFromId(size_t apicId);
LIB_API size_t cpu_topology_PackageFromId(size_t apicId);
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// L2 cache // L2 cache

View File

@ -157,7 +157,7 @@ bool x86_x64_cap(x86_x64_Cap cap)
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// CPU identification // vendor
static x86_x64_Vendors vendor; static x86_x64_Vendors vendor;
@ -197,10 +197,14 @@ x86_x64_Vendors x86_x64_Vendor()
} }
//-----------------------------------------------------------------------------
// signature
static size_t model; static size_t model;
static size_t family; static size_t family;
static ModuleInitState signatureInitState;
static void InitModelAndFamily() static LibError InitSignature()
{ {
x86_x64_CpuidRegs regs = { 0 }; x86_x64_CpuidRegs regs = { 0 };
regs.eax = 1; regs.eax = 1;
@ -214,71 +218,19 @@ static void InitModelAndFamily()
family += extendedFamily; family += extendedFamily;
if(family == 0xF || (x86_x64_Vendor() == X86_X64_VENDOR_INTEL && family == 6)) if(family == 0xF || (x86_x64_Vendor() == X86_X64_VENDOR_INTEL && family == 6))
model += extendedModel << 4; model += extendedModel << 4;
}
static size_t generation;
static LibError InitGeneration()
{
InitModelAndFamily();
switch(x86_x64_Vendor())
{
case X86_X64_VENDOR_AMD:
switch(family)
{
case 5:
if(model < 6)
generation = 5; // K5
else
generation = 6; // K6
break;
case 6:
generation = 7; // K7 (Athlon)
break;
case 0xF:
case 0x10:
generation = 8; // K8 (Opteron)
break;
}
break;
case X86_X64_VENDOR_INTEL:
switch(family)
{
case 5:
generation = 5; // Pentium
break;
case 6:
if(model < 0xF)
generation = 6; // Pentium Pro/II/III/M
else
generation = 8; // Core2Duo
break;
case 0xF:
if(model <= 6)
generation = 7; // Pentium 4/D
break;
}
if(family >= 0x10)
generation = 9;
break;
}
debug_assert(generation != 0);
return INFO::OK; return INFO::OK;
} }
size_t x86_x64_Generation() size_t x86_x64_Model()
{ {
static ModuleInitState initState; ModuleInit(&signatureInitState, InitSignature);
ModuleInit(&initState, InitGeneration); return model;
return generation; }
size_t x86_x64_Family()
{
ModuleInit(&signatureInitState, InitSignature);
return family;
} }
@ -832,7 +784,8 @@ static LibError InitIdentifierString()
// doesn't recognize. // doesn't recognize.
if(!gotBrandString || strncmp(identifierString, "Unknow", 6) == 0) if(!gotBrandString || strncmp(identifierString, "Unknow", 6) == 0)
{ {
InitModelAndFamily(); const size_t family = x86_x64_Family();
const size_t model = x86_x64_Model();
switch(x86_x64_Vendor()) switch(x86_x64_Vendor())
{ {
case X86_X64_VENDOR_AMD: case X86_X64_VENDOR_AMD:

View File

@ -73,6 +73,11 @@ enum x86_x64_Vendors
LIB_API x86_x64_Vendors x86_x64_Vendor(); LIB_API x86_x64_Vendors x86_x64_Vendor();
LIB_API size_t x86_x64_Model();
LIB_API size_t x86_x64_Family();
/** /**
* @return the colloquial processor generation * @return the colloquial processor generation
* (5 = Pentium, 6 = Pentium Pro/II/III / K6, 7 = Pentium4 / Athlon, 8 = Core / Opteron) * (5 = Pentium, 6 = Pentium Pro/II/III / K6, 7 = Pentium4 / Athlon, 8 = Core / Opteron)
@ -96,6 +101,7 @@ enum x86_x64_Cap
// standard (edx) // standard (edx)
X86_X64_CAP_FPU = 32+0, // Floating Point Unit X86_X64_CAP_FPU = 32+0, // Floating Point Unit
X86_X64_CAP_TSC = 32+4, // TimeStamp Counter X86_X64_CAP_TSC = 32+4, // TimeStamp Counter
X86_X64_CAP_MSR = 32+5, // Model Specific Registers
X86_X64_CAP_CMOV = 32+15, // Conditional MOVe X86_X64_CAP_CMOV = 32+15, // Conditional MOVe
X86_X64_CAP_TM_SCC = 32+22, // Thermal Monitoring and Software Controlled Clock X86_X64_CAP_TM_SCC = 32+22, // Thermal Monitoring and Software Controlled Clock
X86_X64_CAP_MMX = 32+23, // MultiMedia eXtensions X86_X64_CAP_MMX = 32+23, // MultiMedia eXtensions

View File

@ -175,10 +175,15 @@
// this macro should not generate any fallback code; it is merely the // this macro should not generate any fallback code; it is merely the
// compiler-specific backend for lib.h's UNREACHABLE. // compiler-specific backend for lib.h's UNREACHABLE.
// #define it to nothing if the compiler doesn't support such a hint. // #define it to nothing if the compiler doesn't support such a hint.
#if MSC_VERSION #define HAVE_ASSUME_UNREACHABLE 1
#if MSC_VERSION && !ICC_VERSION // (ICC ignores this)
# define ASSUME_UNREACHABLE __assume(0) # define ASSUME_UNREACHABLE __assume(0)
#elif GCC_VERSION >= 450
# define ASSUME_UNREACHABLE __builtin_unreachable()
#else #else
# define ASSUME_UNREACHABLE # define ASSUME_UNREACHABLE
# undef HAVE_ASSUME_UNREACHABLE
# define HAVE_ASSUME_UNREACHABLE 0
#endif #endif

View File

@ -41,47 +41,75 @@
#define IOCTL_AKEN_WRITE_PORT CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+1, METHOD_BUFFERED, FILE_ANY_ACCESS) #define IOCTL_AKEN_WRITE_PORT CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+1, METHOD_BUFFERED, FILE_ANY_ACCESS)
#define IOCTL_AKEN_MAP CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+2, METHOD_BUFFERED, FILE_ANY_ACCESS) #define IOCTL_AKEN_MAP CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+2, METHOD_BUFFERED, FILE_ANY_ACCESS)
#define IOCTL_AKEN_UNMAP CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+3, METHOD_BUFFERED, FILE_ANY_ACCESS) #define IOCTL_AKEN_UNMAP CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+3, METHOD_BUFFERED, FILE_ANY_ACCESS)
#define IOCTL_AKEN_READ_MSR CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+4, METHOD_BUFFERED, FILE_ANY_ACCESS)
#define IOCTL_AKEN_WRITE_MSR CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+5, METHOD_BUFFERED, FILE_ANY_ACCESS)
#define IOCTL_AKEN_READ_PMC CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+6, METHOD_BUFFERED, FILE_ANY_ACCESS)
// input and output data structures for the IOCTLs // input and output data structures for the IOCTLs
#pragma pack(push, 1) #pragma pack(push, 1)
struct AkenReadPortIn typedef struct AkenReadPortIn_
{ {
USHORT port; USHORT port;
UCHAR numBytes; UCHAR numBytes;
}; }
AkenReadPortIn;
struct AkenReadPortOut typedef struct AkenReadPortOut_
{ {
DWORD32 value; DWORD32 value;
}; }
AkenReadPortOut;
struct AkenWritePortIn typedef struct AkenWritePortIn_
{ {
DWORD32 value; DWORD32 value;
USHORT port; USHORT port;
UCHAR numBytes; UCHAR numBytes;
}; }
AkenWritePortIn;
struct AkenMapIn typedef struct AkenMapIn_
{ {
// note: fixed-width types allow the 32 or 64-bit Mahaf wrapper to // note: fixed-width types allow the 32 or 64-bit Mahaf wrapper to
// interoperate with the 32 or 64-bit Aken driver. // interoperate with the 32 or 64-bit Aken driver.
DWORD64 physicalAddress; DWORD64 physicalAddress;
DWORD64 numBytes; DWORD64 numBytes;
}; }
AkenMapIn;
struct AkenMapOut typedef struct AkenMapOut_
{ {
DWORD64 virtualAddress; DWORD64 virtualAddress;
}; }
AkenMapOut;
struct AkenUnmapIn typedef struct AkenUnmapIn_
{ {
DWORD64 virtualAddress; DWORD64 virtualAddress;
}; }
AkenUnmapIn;
typedef struct AkenReadRegisterIn_
{
DWORD64 reg;
}
AkenReadRegisterIn;
typedef struct AkenReadRegisterOut_
{
DWORD64 value;
}
AkenReadRegisterOut;
typedef struct AkenWriteRegisterIn_
{
DWORD64 reg;
DWORD64 value;
}
AkenWriteRegisterIn;
#pragma pack(pop) #pragma pack(pop)

View File

@ -25,6 +25,7 @@
*/ */
#include "precompiled.h" #include "precompiled.h"
#include "lib/sysdep/os/win/mahaf.h"
#include "lib/sysdep/os/win/win.h" #include "lib/sysdep/os/win/win.h"
#include <winioctl.h> #include <winioctl.h>
@ -56,8 +57,7 @@ static u32 ReadPort(u16 port, u8 numBytes)
} }
debug_assert(bytesReturned == sizeof(out)); debug_assert(bytesReturned == sizeof(out));
const u32 value = out.value; return out.value;
return value;
} }
u8 mahaf_ReadPort8(u16 port) u8 mahaf_ReadPort8(u16 port)
@ -159,6 +159,48 @@ void mahaf_UnmapPhysicalMemory(volatile void* virtualAddress)
} }
static u64 ReadRegister(DWORD ioctl, u64 reg)
{
AkenReadRegisterIn in;
in.reg = reg;
AkenReadRegisterOut out;
DWORD bytesReturned;
LPOVERLAPPED ovl = 0; // synchronous
BOOL ok = DeviceIoControl(hAken, ioctl, &in, sizeof(in), &out, sizeof(out), &bytesReturned, ovl);
if(!ok)
{
WARN_WIN32_ERR;
return 0;
}
debug_assert(bytesReturned == sizeof(out));
return out.value;
}
u64 mahaf_ReadModelSpecificRegister(u64 reg)
{
return ReadRegister((DWORD)IOCTL_AKEN_READ_MSR, reg);
}
u64 mahaf_ReadPerformanceMonitoringCounter(u64 reg)
{
return ReadRegister((DWORD)IOCTL_AKEN_READ_PMC, reg);
}
void mahaf_WriteModelSpecificRegister(u64 reg, u64 value)
{
AkenWriteRegisterIn in;
in.reg = reg;
in.value = value;
DWORD bytesReturned; // unused but must be passed to DeviceIoControl
LPOVERLAPPED ovl = 0; // synchronous
BOOL ok = DeviceIoControl(hAken, (DWORD)IOCTL_AKEN_WRITE_MSR, &in, sizeof(in), 0, 0u, &bytesReturned, ovl);
WARN_IF_FALSE(ok);
}
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// driver installation // driver installation
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------

View File

@ -39,20 +39,26 @@
* note: mahaf_MapPhysicalMemory will complain if it * note: mahaf_MapPhysicalMemory will complain if it
* is called despite this function having returned true. * is called despite this function having returned true.
**/ **/
extern bool mahaf_IsPhysicalMappingDangerous(); LIB_API bool mahaf_IsPhysicalMappingDangerous();
extern LibError mahaf_Init(); LIB_API LibError mahaf_Init();
extern void mahaf_Shutdown(); LIB_API void mahaf_Shutdown();
extern u8 mahaf_ReadPort8 (u16 port); LIB_API u8 mahaf_ReadPort8 (u16 port);
extern u16 mahaf_ReadPort16(u16 port); LIB_API u16 mahaf_ReadPort16(u16 port);
extern u32 mahaf_ReadPort32(u16 port); LIB_API u32 mahaf_ReadPort32(u16 port);
extern void mahaf_WritePort8 (u16 port, u8 value); LIB_API void mahaf_WritePort8 (u16 port, u8 value);
extern void mahaf_WritePort16(u16 port, u16 value); LIB_API void mahaf_WritePort16(u16 port, u16 value);
extern void mahaf_WritePort32(u16 port, u32 value); LIB_API void mahaf_WritePort32(u16 port, u32 value);
extern volatile void* mahaf_MapPhysicalMemory(uintptr_t physicalAddress, size_t numBytes); LIB_API volatile void* mahaf_MapPhysicalMemory(uintptr_t physicalAddress, size_t numBytes);
extern void mahaf_UnmapPhysicalMemory(volatile void* virtualAddress); LIB_API void mahaf_UnmapPhysicalMemory(volatile void* virtualAddress);
LIB_API u64 mahaf_ReadModelSpecificRegister(u64 reg);
LIB_API void mahaf_WriteModelSpecificRegister(u64 reg, u64 value);
// must be done in the driver because Windows clears CR4.PCE[8]
LIB_API u64 mahaf_ReadPerformanceMonitoringCounter(u64 reg);
#endif // INCLUDED_MAHAF #endif // INCLUDED_MAHAF

View File

@ -38,6 +38,7 @@
#if ARCH_X86_X64 #if ARCH_X86_X64
# include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64_rdtsc # include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64_rdtsc
# include "lib/sysdep/arch/x86_x64/topology.h" # include "lib/sysdep/arch/x86_x64/topology.h"
# include "lib/sysdep/arch/x86_x64/msr.h"
#endif #endif
@ -173,7 +174,7 @@ public:
#if ARCH_X86_X64 #if ARCH_X86_X64
// recent CPU: // recent CPU:
if(x86_x64_Generation() >= 7) //if(x86_x64_Generation() >= 7)
{ {
// note: 8th generation CPUs support C1-clock ramping, which causes // note: 8th generation CPUs support C1-clock ramping, which causes
// drift on multi-core systems, but those were excluded above. // drift on multi-core systems, but those were excluded above.
@ -183,7 +184,7 @@ public:
// the chipset thinks the system is dangerously overheated; the // the chipset thinks the system is dangerously overheated; the
// OS isn't even notified. this may be rare, but could cause // OS isn't even notified. this may be rare, but could cause
// incorrect results => unsafe. // incorrect results => unsafe.
return false; //return false;
} }
#endif #endif
@ -217,6 +218,15 @@ public:
// note: even here, initial accuracy isn't critical because the // note: even here, initial accuracy isn't critical because the
// clock is subject to thermal drift and would require continual // clock is subject to thermal drift and would require continual
// recalibration anyway. // recalibration anyway.
#if ARCH_X86_X64
if(MSR::HasNehalem())
{
const u64 platformInfo = MSR::Read(MSR::PLATFORM_INFO);
const u8 maxNonTurboRatio = bits(platformInfo, 8, 15);
return maxNonTurboRatio * 133.33e6f;
}
else
#endif
return os_cpu_ClockFrequency(); return os_cpu_ClockFrequency();
} }