improvements/additions from work
bits: fix bit_mask for signed types, add SetBitsTo, LeastSignificantBit, ClearLeastSignificantBit. add MSR support (read/write via mahaf in kernel mode) x86_x64: expose family/model topology: add support for determining core/package from APIC ID. TSC: report actual frequency for nehalem invariant TSC. improved UNREACHABLE/ASSUME_UNREACHABLE (avoid ICC warning, add GCC4.5 support) This was SVN commit r7860.
This commit is contained in:
parent
3a0123b7b4
commit
3d45069b3f
@ -40,7 +40,7 @@ template<typename T>
|
|||||||
T Bit(size_t n)
|
T Bit(size_t n)
|
||||||
{
|
{
|
||||||
const T one = T(1);
|
const T one = T(1);
|
||||||
return (one << n);
|
return (T)(one << n);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -71,16 +71,14 @@ bool IsBitSet(T value, size_t index)
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
T bit_mask(size_t numBits)
|
T bit_mask(size_t numBits)
|
||||||
{
|
{
|
||||||
if(numBits == 0) // prevent shift count == bitsInT, which would be undefined.
|
|
||||||
return 0;
|
|
||||||
// notes:
|
|
||||||
// - the perhaps more intuitive (1 << numBits)-1 cannot
|
|
||||||
// handle numBits == bitsInT, but this implementation does.
|
|
||||||
// - though bulky, the below statements avoid sign-conversion warnings.
|
|
||||||
const T bitsInT = sizeof(T)*CHAR_BIT;
|
const T bitsInT = sizeof(T)*CHAR_BIT;
|
||||||
T mask(0);
|
const T allBits = (T)~T(0);
|
||||||
mask = T(~mask);
|
// (shifts of at least bitsInT are undefined)
|
||||||
mask >>= T(bitsInT-numBits);
|
if(numBits >= bitsInT)
|
||||||
|
return allBits;
|
||||||
|
// (note: the previous allBits >> (bitsInT-numBits) is not safe
|
||||||
|
// because right-shifts of negative numbers are undefined.)
|
||||||
|
const T mask = T(T(1) << numBits)-1;
|
||||||
return mask;
|
return mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -98,12 +96,31 @@ T bit_mask(size_t numBits)
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
inline T bits(T num, size_t lo_idx, size_t hi_idx)
|
inline T bits(T num, size_t lo_idx, size_t hi_idx)
|
||||||
{
|
{
|
||||||
const size_t count = (hi_idx - lo_idx)+1; // # bits to return
|
const size_t numBits = (hi_idx - lo_idx)+1; // # bits to return
|
||||||
T result = T(num >> lo_idx);
|
T result = T(num >> lo_idx);
|
||||||
result = T(result & bit_mask<T>(count));
|
result = T(result & bit_mask<T>(numBits));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* set the value of bits hi_idx:lo_idx
|
||||||
|
*
|
||||||
|
* @param lo_idx bit index of lowest bit to include
|
||||||
|
* @param hi_idx bit index of highest bit to include
|
||||||
|
* @param value new value to be assigned to these bits
|
||||||
|
**/
|
||||||
|
template<typename T>
|
||||||
|
inline T SetBitsTo(T num, size_t lo_idx, size_t hi_idx, size_t value)
|
||||||
|
{
|
||||||
|
const size_t numBits = (hi_idx - lo_idx)+1;
|
||||||
|
debug_assert(value < (T(1) << numBits));
|
||||||
|
const T mask = bit_mask<T>(numBits) << lo_idx;
|
||||||
|
T result = num & ~mask;
|
||||||
|
result = T(result | (value << lo_idx));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return number of 1-bits in mask
|
* @return number of 1-bits in mask
|
||||||
**/
|
**/
|
||||||
@ -127,7 +144,7 @@ size_t PopulationCount(T mask)
|
|||||||
* @return whether the given number is a power of two.
|
* @return whether the given number is a power of two.
|
||||||
**/
|
**/
|
||||||
template<typename T>
|
template<typename T>
|
||||||
bool is_pow2(T n)
|
inline bool is_pow2(T n)
|
||||||
{
|
{
|
||||||
// 0 would pass the test below but isn't a POT.
|
// 0 would pass the test below but isn't a POT.
|
||||||
if(n == 0)
|
if(n == 0)
|
||||||
@ -135,6 +152,19 @@ bool is_pow2(T n)
|
|||||||
return (n & (n-1)) == 0;
|
return (n & (n-1)) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
inline T LeastSignificantBit(T x)
|
||||||
|
{
|
||||||
|
const T negX = T(~x + 1); // 2's complement (avoids 'negating unsigned type' warning)
|
||||||
|
return x & negX;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
inline T ClearLeastSignificantBit(T x)
|
||||||
|
{
|
||||||
|
return x & (x-1);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ceil(log2(x))
|
* ceil(log2(x))
|
||||||
*
|
*
|
||||||
|
@ -43,60 +43,48 @@
|
|||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
"unreachable code" helpers
|
* "unreachable code" helpers
|
||||||
|
*
|
||||||
unreachable lines of code are often the source or symptom of subtle bugs.
|
* unreachable lines of code are often the source or symptom of subtle bugs.
|
||||||
they are flagged by compiler warnings; however, the opposite problem -
|
* they are flagged by compiler warnings; however, the opposite problem -
|
||||||
erroneously reaching certain spots (e.g. due to missing return statement)
|
* erroneously reaching certain spots (e.g. due to missing return statement)
|
||||||
is worse and not detected automatically.
|
* is worse and not detected automatically.
|
||||||
|
*
|
||||||
to defend against this, the programmer can annotate their code to
|
* to defend against this, the programmer can annotate their code to
|
||||||
indicate to humans that a particular spot should never be reached.
|
* indicate to humans that a particular spot should never be reached.
|
||||||
however, that isn't much help; better is a sentinel that raises an
|
* however, that isn't much help; better is a sentinel that raises an
|
||||||
error if if it is actually reached. hence, the UNREACHABLE macro.
|
* error if if it is actually reached. hence, the UNREACHABLE macro.
|
||||||
|
*
|
||||||
ironically, if the code guarded by UNREACHABLE works as it should,
|
* ironically, if the code guarded by UNREACHABLE works as it should,
|
||||||
compilers may flag the macro's code as unreachable. this would
|
* compilers may flag the macro's code as unreachable. this would
|
||||||
distract from genuine warnings, which is unacceptable.
|
* distract from genuine warnings, which is unacceptable.
|
||||||
|
*
|
||||||
even worse, compilers differ in their code checking: GCC only complains if
|
* even worse, compilers differ in their code checking: GCC only complains if
|
||||||
non-void functions end without returning a value (i.e. missing return
|
* non-void functions end without returning a value (i.e. missing return
|
||||||
statement), while VC checks if lines are unreachable (e.g. if they are
|
* statement), while VC checks if lines are unreachable (e.g. if they are
|
||||||
preceded by a return on all paths).
|
* preceded by a return on all paths).
|
||||||
|
*
|
||||||
our implementation of UNREACHABLE solves this dilemna as follows:
|
* the implementation below enables optimization and automated checking
|
||||||
- on GCC: call abort(); since it has the noreturn attributes, the
|
* without raising warnings.
|
||||||
"non-void" warning disappears.
|
**/
|
||||||
- on VC: avoid generating any code. we allow the compiler to assume the
|
|
||||||
spot is actually unreachable, which incidentally helps optimization.
|
|
||||||
if reached after all, a crash usually results. in that case, compile with
|
|
||||||
CONFIG_PARANOIA, which will cause an error message to be displayed.
|
|
||||||
|
|
||||||
this approach still allows for the possiblity of automated
|
|
||||||
checking, but does not cause any compiler warnings.
|
|
||||||
**/
|
|
||||||
#define UNREACHABLE // actually defined below.. this is for
|
#define UNREACHABLE // actually defined below.. this is for
|
||||||
# undef UNREACHABLE // CppDoc's benefit only.
|
# undef UNREACHABLE // CppDoc's benefit only.
|
||||||
|
|
||||||
// 1) final build: optimize assuming this location cannot be reached.
|
// compiler supports ASSUME_UNREACHABLE => allow it to assume the code is
|
||||||
// may crash if that turns out to be untrue, but removes checking overhead.
|
// never reached (improves optimization at the cost of undefined behavior
|
||||||
#if CONFIG_FINAL
|
// if the annotation turns out to be incorrect).
|
||||||
|
#if HAVE_ASSUME_UNREACHABLE && !CONFIG_PARANOIA
|
||||||
# define UNREACHABLE ASSUME_UNREACHABLE
|
# define UNREACHABLE ASSUME_UNREACHABLE
|
||||||
// 2) normal build:
|
// otherwise (or if CONFIG_PARANOIA is set), add a user-visible
|
||||||
|
// warning if the code is reached. note that abort() fails to stop
|
||||||
|
// ICC from warning about the lack of a return statement, so we
|
||||||
|
// use an infinite loop instead.
|
||||||
#else
|
#else
|
||||||
// a) normal implementation: includes "abort", which is declared with
|
# define UNREACHABLE\
|
||||||
// noreturn attribute and therefore avoids GCC's "execution reaches
|
|
||||||
// end of non-void function" warning.
|
|
||||||
# if !MSC_VERSION || ICC_VERSION || CONFIG_PARANOIA
|
|
||||||
# define UNREACHABLE\
|
|
||||||
STMT(\
|
STMT(\
|
||||||
debug_assert(0); /* hit supposedly unreachable code */\
|
debug_assert(0); /* hit supposedly unreachable code */\
|
||||||
abort();\
|
for(;;){};\
|
||||||
)
|
)
|
||||||
// b) VC only: don't generate any code; squelch the warning and optimize.
|
|
||||||
# else
|
|
||||||
# define UNREACHABLE ASSUME_UNREACHABLE
|
|
||||||
# endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -116,7 +116,7 @@ static size_t MaxLogicalPerCache()
|
|||||||
|
|
||||||
|
|
||||||
//-----------------------------------------------------------------------------
|
//-----------------------------------------------------------------------------
|
||||||
// determination of enabled cores/HTs
|
// APIC IDs
|
||||||
|
|
||||||
// APIC IDs consist of variable-length fields identifying the logical unit,
|
// APIC IDs consist of variable-length fields identifying the logical unit,
|
||||||
// core, package and shared cache. if they are available, we can determine
|
// core, package and shared cache. if they are available, we can determine
|
||||||
@ -174,106 +174,102 @@ const u8* ApicIds()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
// (if maxValues == 1, the field is zero-width and thus zero)
|
||||||
* count the number of unique APIC IDs after application of a mask.
|
static size_t ApicField(size_t apicId, size_t indexOfLowestBit, size_t maxValues)
|
||||||
*
|
|
||||||
* this is used to implement NumUniqueValuesInField and also required
|
|
||||||
* for counting the number of caches.
|
|
||||||
**/
|
|
||||||
static size_t NumUniqueMaskedValues(const u8* apicIds, u8 mask)
|
|
||||||
{
|
{
|
||||||
std::set<u8> ids;
|
const size_t numBits = ceil_log2(maxValues);
|
||||||
for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
|
const size_t mask = bit_mask<size_t>(numBits);
|
||||||
{
|
return (apicId >> indexOfLowestBit) & mask;
|
||||||
const u8 apicId = apicIds[processor];
|
|
||||||
const u8 field = u8(apicId & mask);
|
|
||||||
ids.insert(field);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ids.size();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
//-----------------------------------------------------------------------------
|
||||||
* Count the number of values assumed by a certain field within APIC IDs.
|
// CPU topology interface
|
||||||
*
|
|
||||||
* @param apicIds
|
|
||||||
* @param offset Index of the lowest bit that is part of the field.
|
|
||||||
* @param numValues Number of values that can be assumed by the field.
|
|
||||||
* If equal to one, the field is zero-width.
|
|
||||||
* @return number of unique values (for convenience of the topology code,
|
|
||||||
* this is always at least one)
|
|
||||||
**/
|
|
||||||
static size_t NumUniqueValuesInField(const u8* apicIds, size_t offset, size_t numValues)
|
|
||||||
{
|
|
||||||
if(numValues == 1) // see parameter description above
|
|
||||||
return 1;
|
|
||||||
const size_t numBits = ceil_log2(numValues);
|
|
||||||
const u8 mask = u8((bit_mask<u8>(numBits) << offset) & 0xFF);
|
|
||||||
return NumUniqueMaskedValues(apicIds, mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static size_t MinPackages(size_t maxCoresPerPackage, size_t maxLogicalPerCore)
|
|
||||||
{
|
|
||||||
const size_t numNodes = numa_NumNodes();
|
|
||||||
const size_t logicalPerNode = PopulationCount(numa_ProcessorMaskFromNode(0));
|
|
||||||
// NB: some cores or logical processors may be disabled.
|
|
||||||
const size_t maxLogicalPerPackage = maxCoresPerPackage*maxLogicalPerCore;
|
|
||||||
const size_t minPackagesPerNode = DivideRoundUp(logicalPerNode, maxLogicalPerPackage);
|
|
||||||
return minPackagesPerNode*numNodes;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
struct CpuTopology // POD
|
struct CpuTopology // POD
|
||||||
{
|
{
|
||||||
size_t numPackages;
|
size_t maxLogicalPerCore;
|
||||||
size_t coresPerPackage;
|
size_t maxCoresPerPackage;
|
||||||
|
|
||||||
|
size_t logicalOffset;
|
||||||
|
size_t coreOffset;
|
||||||
|
size_t packageOffset;
|
||||||
|
|
||||||
|
// how many are actually enabled
|
||||||
size_t logicalPerCore;
|
size_t logicalPerCore;
|
||||||
|
size_t coresPerPackage;
|
||||||
|
size_t numPackages;
|
||||||
};
|
};
|
||||||
static CpuTopology cpuTopology;
|
static CpuTopology cpuTopology;
|
||||||
static ModuleInitState cpuInitState;
|
static ModuleInitState cpuInitState;
|
||||||
|
|
||||||
static LibError InitCpuTopology()
|
static LibError InitCpuTopology()
|
||||||
{
|
{
|
||||||
const size_t numProcessors = os_cpu_NumProcessors();
|
cpuTopology.maxLogicalPerCore = MaxLogicalPerCore();
|
||||||
const size_t maxCoresPerPackage = MaxCoresPerPackage();
|
cpuTopology.maxCoresPerPackage = MaxCoresPerPackage();
|
||||||
const size_t maxLogicalPerCore = MaxLogicalPerCore();
|
|
||||||
|
cpuTopology.logicalOffset = 0;
|
||||||
|
cpuTopology.coreOffset = ceil_log2(cpuTopology.maxLogicalPerCore);
|
||||||
|
cpuTopology.packageOffset = cpuTopology.coreOffset + ceil_log2(cpuTopology.maxCoresPerPackage);
|
||||||
|
|
||||||
const u8* apicIds = ApicIds();
|
const u8* apicIds = ApicIds();
|
||||||
if(apicIds)
|
if(apicIds)
|
||||||
{
|
{
|
||||||
const size_t packageOffset = ceil_log2(maxCoresPerPackage) + ceil_log2(maxLogicalPerCore);
|
struct NumUniqueValuesInField
|
||||||
const size_t coreOffset = ceil_log2(maxLogicalPerCore);
|
{
|
||||||
const size_t logicalOffset = 0;
|
size_t operator()(const u8* apicIds, size_t indexOfLowestBit, size_t numValues) const
|
||||||
cpuTopology.numPackages = NumUniqueValuesInField(apicIds, packageOffset, 256);
|
{
|
||||||
cpuTopology.coresPerPackage = NumUniqueValuesInField(apicIds, coreOffset, maxCoresPerPackage);
|
std::set<size_t> values;
|
||||||
cpuTopology.logicalPerCore = NumUniqueValuesInField(apicIds, logicalOffset, maxLogicalPerCore);
|
for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
|
||||||
|
{
|
||||||
|
const size_t value = ApicField(apicIds[processor], numValues, indexOfLowestBit);
|
||||||
|
values.insert(value);
|
||||||
|
}
|
||||||
|
return values.size();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
cpuTopology.logicalPerCore = NumUniqueValuesInField()(apicIds, cpuTopology.logicalOffset, cpuTopology.maxLogicalPerCore);
|
||||||
|
cpuTopology.coresPerPackage = NumUniqueValuesInField()(apicIds, cpuTopology.coreOffset, cpuTopology.maxCoresPerPackage);
|
||||||
|
cpuTopology.numPackages = NumUniqueValuesInField()(apicIds, cpuTopology.packageOffset, 256);
|
||||||
}
|
}
|
||||||
else // the processor lacks an xAPIC, or the IDs are invalid
|
else // the processor lacks an xAPIC, or the IDs are invalid
|
||||||
{
|
{
|
||||||
|
struct MinPackages
|
||||||
|
{
|
||||||
|
size_t operator()(size_t maxCoresPerPackage, size_t maxLogicalPerCore) const
|
||||||
|
{
|
||||||
|
const size_t numNodes = numa_NumNodes();
|
||||||
|
const size_t logicalPerNode = PopulationCount(numa_ProcessorMaskFromNode(0));
|
||||||
|
// NB: some cores or logical processors may be disabled.
|
||||||
|
const size_t maxLogicalPerPackage = maxCoresPerPackage*maxLogicalPerCore;
|
||||||
|
const size_t minPackagesPerNode = DivideRoundUp(logicalPerNode, maxLogicalPerPackage);
|
||||||
|
return minPackagesPerNode*numNodes;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// we can't differentiate between cores and logical processors.
|
// we can't differentiate between cores and logical processors.
|
||||||
// since the former are less likely to be disabled, we seek the
|
// since the former are less likely to be disabled, we seek the
|
||||||
// maximum feasible number of cores and minimal number of packages:
|
// maximum feasible number of cores and minimal number of packages:
|
||||||
const size_t minPackages = MinPackages(maxCoresPerPackage, maxLogicalPerCore);
|
const size_t minPackages = MinPackages()(cpuTopology.maxCoresPerPackage, cpuTopology.maxLogicalPerCore);
|
||||||
const size_t maxPackages = numProcessors;
|
const size_t numProcessors = os_cpu_NumProcessors();
|
||||||
for(size_t numPackages = minPackages; numPackages <= maxPackages; numPackages++)
|
for(size_t numPackages = minPackages; numPackages <= numProcessors; numPackages++)
|
||||||
{
|
{
|
||||||
if(numProcessors % numPackages != 0)
|
if(numProcessors % numPackages != 0)
|
||||||
continue;
|
continue;
|
||||||
const size_t logicalPerPackage = numProcessors / numPackages;
|
const size_t logicalPerPackage = numProcessors / numPackages;
|
||||||
const size_t minCoresPerPackage = DivideRoundUp(logicalPerPackage, maxLogicalPerCore);
|
const size_t minCoresPerPackage = DivideRoundUp(logicalPerPackage, cpuTopology.maxLogicalPerCore);
|
||||||
for(size_t coresPerPackage = maxCoresPerPackage; coresPerPackage >= minCoresPerPackage; coresPerPackage--)
|
for(size_t coresPerPackage = cpuTopology.maxCoresPerPackage; coresPerPackage >= minCoresPerPackage; coresPerPackage--)
|
||||||
{
|
{
|
||||||
if(logicalPerPackage % coresPerPackage != 0)
|
if(logicalPerPackage % coresPerPackage != 0)
|
||||||
continue;
|
continue;
|
||||||
const size_t logicalPerCore = logicalPerPackage / coresPerPackage;
|
const size_t logicalPerCore = logicalPerPackage / coresPerPackage;
|
||||||
if(logicalPerCore <= maxLogicalPerCore)
|
if(logicalPerCore <= cpuTopology.maxLogicalPerCore)
|
||||||
{
|
{
|
||||||
debug_assert(numProcessors == numPackages*coresPerPackage*logicalPerCore);
|
debug_assert(numProcessors == numPackages*coresPerPackage*logicalPerCore);
|
||||||
cpuTopology.numPackages = numPackages;
|
|
||||||
cpuTopology.coresPerPackage = coresPerPackage;
|
|
||||||
cpuTopology.logicalPerCore = logicalPerCore;
|
cpuTopology.logicalPerCore = logicalPerCore;
|
||||||
|
cpuTopology.coresPerPackage = coresPerPackage;
|
||||||
|
cpuTopology.numPackages = numPackages;
|
||||||
return INFO::OK;
|
return INFO::OK;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -303,6 +299,24 @@ size_t cpu_topology_LogicalPerCore()
|
|||||||
return cpuTopology.logicalPerCore;
|
return cpuTopology.logicalPerCore;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t cpu_topology_LogicalFromId(size_t apicId)
|
||||||
|
{
|
||||||
|
ModuleInit(&cpuInitState, InitCpuTopology);
|
||||||
|
return ApicField(apicId, cpuTopology.logicalOffset, cpuTopology.maxLogicalPerCore);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t cpu_topology_CoreFromId(size_t apicId)
|
||||||
|
{
|
||||||
|
ModuleInit(&cpuInitState, InitCpuTopology);
|
||||||
|
return ApicField(apicId, cpuTopology.coreOffset, cpuTopology.maxCoresPerPackage);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t cpu_topology_PackageFromId(size_t apicId)
|
||||||
|
{
|
||||||
|
ModuleInit(&cpuInitState, InitCpuTopology);
|
||||||
|
return ApicField(apicId, cpuTopology.packageOffset, 256);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//-----------------------------------------------------------------------------
|
//-----------------------------------------------------------------------------
|
||||||
// cache topology
|
// cache topology
|
||||||
|
@ -65,6 +65,11 @@ LIB_API size_t cpu_topology_CoresPerPackage();
|
|||||||
LIB_API size_t cpu_topology_LogicalPerCore();
|
LIB_API size_t cpu_topology_LogicalPerCore();
|
||||||
|
|
||||||
|
|
||||||
|
LIB_API size_t cpu_topology_LogicalFromId(size_t apicId);
|
||||||
|
LIB_API size_t cpu_topology_CoreFromId(size_t apicId);
|
||||||
|
LIB_API size_t cpu_topology_PackageFromId(size_t apicId);
|
||||||
|
|
||||||
|
|
||||||
//-----------------------------------------------------------------------------
|
//-----------------------------------------------------------------------------
|
||||||
// L2 cache
|
// L2 cache
|
||||||
|
|
||||||
|
@ -157,7 +157,7 @@ bool x86_x64_cap(x86_x64_Cap cap)
|
|||||||
|
|
||||||
|
|
||||||
//-----------------------------------------------------------------------------
|
//-----------------------------------------------------------------------------
|
||||||
// CPU identification
|
// vendor
|
||||||
|
|
||||||
static x86_x64_Vendors vendor;
|
static x86_x64_Vendors vendor;
|
||||||
|
|
||||||
@ -197,10 +197,14 @@ x86_x64_Vendors x86_x64_Vendor()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//-----------------------------------------------------------------------------
|
||||||
|
// signature
|
||||||
|
|
||||||
static size_t model;
|
static size_t model;
|
||||||
static size_t family;
|
static size_t family;
|
||||||
|
static ModuleInitState signatureInitState;
|
||||||
|
|
||||||
static void InitModelAndFamily()
|
static LibError InitSignature()
|
||||||
{
|
{
|
||||||
x86_x64_CpuidRegs regs = { 0 };
|
x86_x64_CpuidRegs regs = { 0 };
|
||||||
regs.eax = 1;
|
regs.eax = 1;
|
||||||
@ -214,71 +218,19 @@ static void InitModelAndFamily()
|
|||||||
family += extendedFamily;
|
family += extendedFamily;
|
||||||
if(family == 0xF || (x86_x64_Vendor() == X86_X64_VENDOR_INTEL && family == 6))
|
if(family == 0xF || (x86_x64_Vendor() == X86_X64_VENDOR_INTEL && family == 6))
|
||||||
model += extendedModel << 4;
|
model += extendedModel << 4;
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static size_t generation;
|
|
||||||
|
|
||||||
static LibError InitGeneration()
|
|
||||||
{
|
|
||||||
InitModelAndFamily();
|
|
||||||
|
|
||||||
switch(x86_x64_Vendor())
|
|
||||||
{
|
|
||||||
case X86_X64_VENDOR_AMD:
|
|
||||||
switch(family)
|
|
||||||
{
|
|
||||||
case 5:
|
|
||||||
if(model < 6)
|
|
||||||
generation = 5; // K5
|
|
||||||
else
|
|
||||||
generation = 6; // K6
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 6:
|
|
||||||
generation = 7; // K7 (Athlon)
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0xF:
|
|
||||||
case 0x10:
|
|
||||||
generation = 8; // K8 (Opteron)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case X86_X64_VENDOR_INTEL:
|
|
||||||
switch(family)
|
|
||||||
{
|
|
||||||
case 5:
|
|
||||||
generation = 5; // Pentium
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 6:
|
|
||||||
if(model < 0xF)
|
|
||||||
generation = 6; // Pentium Pro/II/III/M
|
|
||||||
else
|
|
||||||
generation = 8; // Core2Duo
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0xF:
|
|
||||||
if(model <= 6)
|
|
||||||
generation = 7; // Pentium 4/D
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if(family >= 0x10)
|
|
||||||
generation = 9;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
debug_assert(generation != 0);
|
|
||||||
return INFO::OK;
|
return INFO::OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t x86_x64_Generation()
|
size_t x86_x64_Model()
|
||||||
{
|
{
|
||||||
static ModuleInitState initState;
|
ModuleInit(&signatureInitState, InitSignature);
|
||||||
ModuleInit(&initState, InitGeneration);
|
return model;
|
||||||
return generation;
|
}
|
||||||
|
|
||||||
|
size_t x86_x64_Family()
|
||||||
|
{
|
||||||
|
ModuleInit(&signatureInitState, InitSignature);
|
||||||
|
return family;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -832,7 +784,8 @@ static LibError InitIdentifierString()
|
|||||||
// doesn't recognize.
|
// doesn't recognize.
|
||||||
if(!gotBrandString || strncmp(identifierString, "Unknow", 6) == 0)
|
if(!gotBrandString || strncmp(identifierString, "Unknow", 6) == 0)
|
||||||
{
|
{
|
||||||
InitModelAndFamily();
|
const size_t family = x86_x64_Family();
|
||||||
|
const size_t model = x86_x64_Model();
|
||||||
switch(x86_x64_Vendor())
|
switch(x86_x64_Vendor())
|
||||||
{
|
{
|
||||||
case X86_X64_VENDOR_AMD:
|
case X86_X64_VENDOR_AMD:
|
||||||
|
@ -73,6 +73,11 @@ enum x86_x64_Vendors
|
|||||||
LIB_API x86_x64_Vendors x86_x64_Vendor();
|
LIB_API x86_x64_Vendors x86_x64_Vendor();
|
||||||
|
|
||||||
|
|
||||||
|
LIB_API size_t x86_x64_Model();
|
||||||
|
|
||||||
|
LIB_API size_t x86_x64_Family();
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return the colloquial processor generation
|
* @return the colloquial processor generation
|
||||||
* (5 = Pentium, 6 = Pentium Pro/II/III / K6, 7 = Pentium4 / Athlon, 8 = Core / Opteron)
|
* (5 = Pentium, 6 = Pentium Pro/II/III / K6, 7 = Pentium4 / Athlon, 8 = Core / Opteron)
|
||||||
@ -96,6 +101,7 @@ enum x86_x64_Cap
|
|||||||
// standard (edx)
|
// standard (edx)
|
||||||
X86_X64_CAP_FPU = 32+0, // Floating Point Unit
|
X86_X64_CAP_FPU = 32+0, // Floating Point Unit
|
||||||
X86_X64_CAP_TSC = 32+4, // TimeStamp Counter
|
X86_X64_CAP_TSC = 32+4, // TimeStamp Counter
|
||||||
|
X86_X64_CAP_MSR = 32+5, // Model Specific Registers
|
||||||
X86_X64_CAP_CMOV = 32+15, // Conditional MOVe
|
X86_X64_CAP_CMOV = 32+15, // Conditional MOVe
|
||||||
X86_X64_CAP_TM_SCC = 32+22, // Thermal Monitoring and Software Controlled Clock
|
X86_X64_CAP_TM_SCC = 32+22, // Thermal Monitoring and Software Controlled Clock
|
||||||
X86_X64_CAP_MMX = 32+23, // MultiMedia eXtensions
|
X86_X64_CAP_MMX = 32+23, // MultiMedia eXtensions
|
||||||
|
@ -175,10 +175,15 @@
|
|||||||
// this macro should not generate any fallback code; it is merely the
|
// this macro should not generate any fallback code; it is merely the
|
||||||
// compiler-specific backend for lib.h's UNREACHABLE.
|
// compiler-specific backend for lib.h's UNREACHABLE.
|
||||||
// #define it to nothing if the compiler doesn't support such a hint.
|
// #define it to nothing if the compiler doesn't support such a hint.
|
||||||
#if MSC_VERSION
|
#define HAVE_ASSUME_UNREACHABLE 1
|
||||||
|
#if MSC_VERSION && !ICC_VERSION // (ICC ignores this)
|
||||||
# define ASSUME_UNREACHABLE __assume(0)
|
# define ASSUME_UNREACHABLE __assume(0)
|
||||||
|
#elif GCC_VERSION >= 450
|
||||||
|
# define ASSUME_UNREACHABLE __builtin_unreachable()
|
||||||
#else
|
#else
|
||||||
# define ASSUME_UNREACHABLE
|
# define ASSUME_UNREACHABLE
|
||||||
|
# undef HAVE_ASSUME_UNREACHABLE
|
||||||
|
# define HAVE_ASSUME_UNREACHABLE 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
@ -41,47 +41,75 @@
|
|||||||
#define IOCTL_AKEN_WRITE_PORT CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+1, METHOD_BUFFERED, FILE_ANY_ACCESS)
|
#define IOCTL_AKEN_WRITE_PORT CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+1, METHOD_BUFFERED, FILE_ANY_ACCESS)
|
||||||
#define IOCTL_AKEN_MAP CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+2, METHOD_BUFFERED, FILE_ANY_ACCESS)
|
#define IOCTL_AKEN_MAP CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+2, METHOD_BUFFERED, FILE_ANY_ACCESS)
|
||||||
#define IOCTL_AKEN_UNMAP CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+3, METHOD_BUFFERED, FILE_ANY_ACCESS)
|
#define IOCTL_AKEN_UNMAP CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+3, METHOD_BUFFERED, FILE_ANY_ACCESS)
|
||||||
|
#define IOCTL_AKEN_READ_MSR CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+4, METHOD_BUFFERED, FILE_ANY_ACCESS)
|
||||||
|
#define IOCTL_AKEN_WRITE_MSR CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+5, METHOD_BUFFERED, FILE_ANY_ACCESS)
|
||||||
|
#define IOCTL_AKEN_READ_PMC CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+6, METHOD_BUFFERED, FILE_ANY_ACCESS)
|
||||||
|
|
||||||
|
|
||||||
// input and output data structures for the IOCTLs
|
// input and output data structures for the IOCTLs
|
||||||
|
|
||||||
#pragma pack(push, 1)
|
#pragma pack(push, 1)
|
||||||
|
|
||||||
struct AkenReadPortIn
|
typedef struct AkenReadPortIn_
|
||||||
{
|
{
|
||||||
USHORT port;
|
USHORT port;
|
||||||
UCHAR numBytes;
|
UCHAR numBytes;
|
||||||
};
|
}
|
||||||
|
AkenReadPortIn;
|
||||||
|
|
||||||
struct AkenReadPortOut
|
typedef struct AkenReadPortOut_
|
||||||
{
|
{
|
||||||
DWORD32 value;
|
DWORD32 value;
|
||||||
};
|
}
|
||||||
|
AkenReadPortOut;
|
||||||
|
|
||||||
struct AkenWritePortIn
|
typedef struct AkenWritePortIn_
|
||||||
{
|
{
|
||||||
DWORD32 value;
|
DWORD32 value;
|
||||||
USHORT port;
|
USHORT port;
|
||||||
UCHAR numBytes;
|
UCHAR numBytes;
|
||||||
};
|
}
|
||||||
|
AkenWritePortIn;
|
||||||
|
|
||||||
struct AkenMapIn
|
typedef struct AkenMapIn_
|
||||||
{
|
{
|
||||||
// note: fixed-width types allow the 32 or 64-bit Mahaf wrapper to
|
// note: fixed-width types allow the 32 or 64-bit Mahaf wrapper to
|
||||||
// interoperate with the 32 or 64-bit Aken driver.
|
// interoperate with the 32 or 64-bit Aken driver.
|
||||||
DWORD64 physicalAddress;
|
DWORD64 physicalAddress;
|
||||||
DWORD64 numBytes;
|
DWORD64 numBytes;
|
||||||
};
|
}
|
||||||
|
AkenMapIn;
|
||||||
|
|
||||||
struct AkenMapOut
|
typedef struct AkenMapOut_
|
||||||
{
|
{
|
||||||
DWORD64 virtualAddress;
|
DWORD64 virtualAddress;
|
||||||
};
|
}
|
||||||
|
AkenMapOut;
|
||||||
|
|
||||||
struct AkenUnmapIn
|
typedef struct AkenUnmapIn_
|
||||||
{
|
{
|
||||||
DWORD64 virtualAddress;
|
DWORD64 virtualAddress;
|
||||||
};
|
}
|
||||||
|
AkenUnmapIn;
|
||||||
|
|
||||||
|
typedef struct AkenReadRegisterIn_
|
||||||
|
{
|
||||||
|
DWORD64 reg;
|
||||||
|
}
|
||||||
|
AkenReadRegisterIn;
|
||||||
|
|
||||||
|
typedef struct AkenReadRegisterOut_
|
||||||
|
{
|
||||||
|
DWORD64 value;
|
||||||
|
}
|
||||||
|
AkenReadRegisterOut;
|
||||||
|
|
||||||
|
typedef struct AkenWriteRegisterIn_
|
||||||
|
{
|
||||||
|
DWORD64 reg;
|
||||||
|
DWORD64 value;
|
||||||
|
}
|
||||||
|
AkenWriteRegisterIn;
|
||||||
|
|
||||||
#pragma pack(pop)
|
#pragma pack(pop)
|
||||||
|
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "precompiled.h"
|
#include "precompiled.h"
|
||||||
|
#include "lib/sysdep/os/win/mahaf.h"
|
||||||
|
|
||||||
#include "lib/sysdep/os/win/win.h"
|
#include "lib/sysdep/os/win/win.h"
|
||||||
#include <winioctl.h>
|
#include <winioctl.h>
|
||||||
@ -56,8 +57,7 @@ static u32 ReadPort(u16 port, u8 numBytes)
|
|||||||
}
|
}
|
||||||
|
|
||||||
debug_assert(bytesReturned == sizeof(out));
|
debug_assert(bytesReturned == sizeof(out));
|
||||||
const u32 value = out.value;
|
return out.value;
|
||||||
return value;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u8 mahaf_ReadPort8(u16 port)
|
u8 mahaf_ReadPort8(u16 port)
|
||||||
@ -159,6 +159,48 @@ void mahaf_UnmapPhysicalMemory(volatile void* virtualAddress)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static u64 ReadRegister(DWORD ioctl, u64 reg)
|
||||||
|
{
|
||||||
|
AkenReadRegisterIn in;
|
||||||
|
in.reg = reg;
|
||||||
|
AkenReadRegisterOut out;
|
||||||
|
|
||||||
|
DWORD bytesReturned;
|
||||||
|
LPOVERLAPPED ovl = 0; // synchronous
|
||||||
|
BOOL ok = DeviceIoControl(hAken, ioctl, &in, sizeof(in), &out, sizeof(out), &bytesReturned, ovl);
|
||||||
|
if(!ok)
|
||||||
|
{
|
||||||
|
WARN_WIN32_ERR;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
debug_assert(bytesReturned == sizeof(out));
|
||||||
|
return out.value;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 mahaf_ReadModelSpecificRegister(u64 reg)
|
||||||
|
{
|
||||||
|
return ReadRegister((DWORD)IOCTL_AKEN_READ_MSR, reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 mahaf_ReadPerformanceMonitoringCounter(u64 reg)
|
||||||
|
{
|
||||||
|
return ReadRegister((DWORD)IOCTL_AKEN_READ_PMC, reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void mahaf_WriteModelSpecificRegister(u64 reg, u64 value)
|
||||||
|
{
|
||||||
|
AkenWriteRegisterIn in;
|
||||||
|
in.reg = reg;
|
||||||
|
in.value = value;
|
||||||
|
|
||||||
|
DWORD bytesReturned; // unused but must be passed to DeviceIoControl
|
||||||
|
LPOVERLAPPED ovl = 0; // synchronous
|
||||||
|
BOOL ok = DeviceIoControl(hAken, (DWORD)IOCTL_AKEN_WRITE_MSR, &in, sizeof(in), 0, 0u, &bytesReturned, ovl);
|
||||||
|
WARN_IF_FALSE(ok);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//-----------------------------------------------------------------------------
|
//-----------------------------------------------------------------------------
|
||||||
// driver installation
|
// driver installation
|
||||||
//-----------------------------------------------------------------------------
|
//-----------------------------------------------------------------------------
|
||||||
|
@ -39,20 +39,26 @@
|
|||||||
* note: mahaf_MapPhysicalMemory will complain if it
|
* note: mahaf_MapPhysicalMemory will complain if it
|
||||||
* is called despite this function having returned true.
|
* is called despite this function having returned true.
|
||||||
**/
|
**/
|
||||||
extern bool mahaf_IsPhysicalMappingDangerous();
|
LIB_API bool mahaf_IsPhysicalMappingDangerous();
|
||||||
|
|
||||||
|
|
||||||
extern LibError mahaf_Init();
|
LIB_API LibError mahaf_Init();
|
||||||
extern void mahaf_Shutdown();
|
LIB_API void mahaf_Shutdown();
|
||||||
|
|
||||||
extern u8 mahaf_ReadPort8 (u16 port);
|
LIB_API u8 mahaf_ReadPort8 (u16 port);
|
||||||
extern u16 mahaf_ReadPort16(u16 port);
|
LIB_API u16 mahaf_ReadPort16(u16 port);
|
||||||
extern u32 mahaf_ReadPort32(u16 port);
|
LIB_API u32 mahaf_ReadPort32(u16 port);
|
||||||
extern void mahaf_WritePort8 (u16 port, u8 value);
|
LIB_API void mahaf_WritePort8 (u16 port, u8 value);
|
||||||
extern void mahaf_WritePort16(u16 port, u16 value);
|
LIB_API void mahaf_WritePort16(u16 port, u16 value);
|
||||||
extern void mahaf_WritePort32(u16 port, u32 value);
|
LIB_API void mahaf_WritePort32(u16 port, u32 value);
|
||||||
|
|
||||||
extern volatile void* mahaf_MapPhysicalMemory(uintptr_t physicalAddress, size_t numBytes);
|
LIB_API volatile void* mahaf_MapPhysicalMemory(uintptr_t physicalAddress, size_t numBytes);
|
||||||
extern void mahaf_UnmapPhysicalMemory(volatile void* virtualAddress);
|
LIB_API void mahaf_UnmapPhysicalMemory(volatile void* virtualAddress);
|
||||||
|
|
||||||
|
LIB_API u64 mahaf_ReadModelSpecificRegister(u64 reg);
|
||||||
|
LIB_API void mahaf_WriteModelSpecificRegister(u64 reg, u64 value);
|
||||||
|
|
||||||
|
// must be done in the driver because Windows clears CR4.PCE[8]
|
||||||
|
LIB_API u64 mahaf_ReadPerformanceMonitoringCounter(u64 reg);
|
||||||
|
|
||||||
#endif // INCLUDED_MAHAF
|
#endif // INCLUDED_MAHAF
|
||||||
|
@ -38,6 +38,7 @@
|
|||||||
#if ARCH_X86_X64
|
#if ARCH_X86_X64
|
||||||
# include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64_rdtsc
|
# include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64_rdtsc
|
||||||
# include "lib/sysdep/arch/x86_x64/topology.h"
|
# include "lib/sysdep/arch/x86_x64/topology.h"
|
||||||
|
# include "lib/sysdep/arch/x86_x64/msr.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@ -173,7 +174,7 @@ public:
|
|||||||
|
|
||||||
#if ARCH_X86_X64
|
#if ARCH_X86_X64
|
||||||
// recent CPU:
|
// recent CPU:
|
||||||
if(x86_x64_Generation() >= 7)
|
//if(x86_x64_Generation() >= 7)
|
||||||
{
|
{
|
||||||
// note: 8th generation CPUs support C1-clock ramping, which causes
|
// note: 8th generation CPUs support C1-clock ramping, which causes
|
||||||
// drift on multi-core systems, but those were excluded above.
|
// drift on multi-core systems, but those were excluded above.
|
||||||
@ -183,7 +184,7 @@ public:
|
|||||||
// the chipset thinks the system is dangerously overheated; the
|
// the chipset thinks the system is dangerously overheated; the
|
||||||
// OS isn't even notified. this may be rare, but could cause
|
// OS isn't even notified. this may be rare, but could cause
|
||||||
// incorrect results => unsafe.
|
// incorrect results => unsafe.
|
||||||
return false;
|
//return false;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -217,6 +218,15 @@ public:
|
|||||||
// note: even here, initial accuracy isn't critical because the
|
// note: even here, initial accuracy isn't critical because the
|
||||||
// clock is subject to thermal drift and would require continual
|
// clock is subject to thermal drift and would require continual
|
||||||
// recalibration anyway.
|
// recalibration anyway.
|
||||||
|
#if ARCH_X86_X64
|
||||||
|
if(MSR::HasNehalem())
|
||||||
|
{
|
||||||
|
const u64 platformInfo = MSR::Read(MSR::PLATFORM_INFO);
|
||||||
|
const u8 maxNonTurboRatio = bits(platformInfo, 8, 15);
|
||||||
|
return maxNonTurboRatio * 133.33e6f;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
return os_cpu_ClockFrequency();
|
return os_cpu_ClockFrequency();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user