1
1
forked from 0ad/0ad

further refactoring of new cache detect code

- add invariant - level/type are always valid, numEntries indicates
whether the cache is actually present and not disabled
- handle pseudo-descriptors 0xF0 and F1 (avoids warning)

This was SVN commit r9083.
This commit is contained in:
janwas 2011-03-19 12:48:56 +00:00
parent 4ae8cfe858
commit fb499ff0d0
2 changed files with 186 additions and 134 deletions

View File

@ -37,7 +37,7 @@ static x86_x64_Cache caches[numCaches];
static void AddCache(const x86_x64_Cache& cache)
{
debug_assert(1 <= cache.level && cache.level <= x86_x64_Cache::maxLevels);
debug_assert(cache.Validate());
if(cache.type == x86_x64_Cache::kData || cache.type == x86_x64_Cache::kUnified)
caches[L1D + cache.level-1] = cache;
@ -48,6 +48,7 @@ static void AddCache(const x86_x64_Cache& cache)
static void AddTLB(const x86_x64_Cache& tlb)
{
debug_assert(tlb.Validate());
debug_assert(tlb.level == 1 || tlb.level == 2); // see maxTLBs
debug_assert(numTLBs < maxTLBs);
@ -66,19 +67,23 @@ namespace AMD
static x86_x64_Cache L1Cache(u32 reg, x86_x64_Cache::Type type)
{
x86_x64_Cache cache;
memset(&cache, 0, sizeof(cache));
cache.type = type;
cache.level = 1;
cache.associativity = bits(reg, 16, 23);
cache.entrySize = bits(reg, 0, 7);
cache.sharedBy = 1;
if (cache.entrySize)
cache.numEntries = bits(reg, 24, 31)*KiB / cache.entrySize;
cache.Initialize(1, type);
const size_t lineSize = bits(reg, 0, 7);
const size_t associativity = bits(reg, 16, 23); // 0 = reserved
const size_t totalSize = bits(reg, 24, 31)*KiB;
if(lineSize != 0 && associativity != 0 && totalSize != 0)
{
cache.numEntries = totalSize / lineSize;
cache.entrySize = lineSize;
cache.associativity = associativity;
cache.sharedBy = 1;
}
return cache;
}
// applies to L2, L3 and TLB2
static const size_t associativities[16] =
static const size_t associativityTable[16] =
{
0, 1, 2, 0, 4, 0, 8, 0,
16, 0, 32, 48, 64, 96, 128, x86_x64_Cache::fullyAssociative
@ -87,69 +92,72 @@ static const size_t associativities[16] =
static x86_x64_Cache L2Cache(u32 reg, x86_x64_Cache::Type type)
{
x86_x64_Cache cache;
memset(&cache, 0, sizeof(cache));
const size_t associativityIndex = bits(reg, 12, 15);
if(associativityIndex == 0) // disabled
cache.Initialize(2, type);
const size_t lineSize = bits(reg, 0, 7);
const size_t idxAssociativity = bits(reg, 12, 15); // 0 = disabled
const size_t totalSize = bits(reg, 16, 31)*KiB;
if(lineSize != 0 && idxAssociativity != 0 && totalSize != 0)
{
cache.type = x86_x64_Cache::kNull;
cache.associativity = 0;
cache.numEntries = totalSize / lineSize;
cache.entrySize = lineSize;
cache.associativity = associativityTable[idxAssociativity];
cache.sharedBy = 1;
}
else
{
cache.type = type;
cache.associativity = associativities[associativityIndex];
debug_assert(cache.associativity != 0); // else: encoding is "reserved"
}
cache.level = 2;
cache.entrySize = bits(reg, 0, 7);
cache.sharedBy = 1;
if (cache.entrySize)
cache.numEntries = bits(reg, 16, 31)*KiB / cache.entrySize;
return cache;
}
// (same as L2 except for the size)
static x86_x64_Cache L3Cache(u32 reg, x86_x64_Cache::Type type)
{
x86_x64_Cache cache = L2Cache(reg, type);
cache.level = 3;
if (cache.entrySize)
cache.numEntries = bits(reg, 18, 31)*512*KiB / cache.entrySize; // (rounded down)
x86_x64_Cache cache;
cache.Initialize(3, type);
const size_t lineSize = bits(reg, 0, 7);
const size_t idxAssociativity = bits(reg, 12, 15); // 0 = disabled
const size_t totalSize = bits(reg, 18, 31)*512*KiB; // (rounded down)
// NB: some Athlon 64 X2 models have no L3 cache
if(lineSize != 0 && idxAssociativity != 0 && totalSize != 0)
{
cache.numEntries = totalSize / lineSize;
cache.entrySize = lineSize;
cache.associativity = associativityTable[idxAssociativity];
cache.sharedBy = 1;
}
return cache;
}
static x86_x64_Cache TLB1(u32 reg, size_t bitOffset, size_t pageSize, x86_x64_Cache::Type type)
{
x86_x64_Cache cache;
memset(&cache, 0, sizeof(cache));
cache.type = type;
cache.level = 1;
cache.associativity = bits(reg, bitOffset+8, bitOffset+15);
cache.entrySize = pageSize;
cache.sharedBy = 1;
cache.numEntries = bits(reg, bitOffset, bitOffset+7);
cache.Initialize(1, type);
const size_t numEntries = bits(reg, bitOffset+0, bitOffset+ 7);
const size_t associativity = bits(reg, bitOffset+8, bitOffset+15); // 0 = reserved
if(numEntries != 0 && associativity != 0)
{
cache.numEntries = numEntries;
cache.entrySize = pageSize;
cache.associativity = associativity;
cache.sharedBy = 1;
}
return cache;
}
static x86_x64_Cache TLB2(u32 reg, size_t bitOffset, size_t pageSize, x86_x64_Cache::Type type)
{
x86_x64_Cache cache;
memset(&cache, 0, sizeof(cache));
const size_t associativityIndex = bits(reg, bitOffset+12, bitOffset+15);
if(associativityIndex == 0) // disabled
cache.Initialize(2, type);
const size_t numEntries = bits(reg, bitOffset+ 0, bitOffset+11);
const size_t idxAssociativity = bits(reg, bitOffset+12, bitOffset+15); // 0 = disabled
if(numEntries != 0 && idxAssociativity != 0)
{
cache.type = x86_x64_Cache::kNull;
cache.associativity = 0;
cache.numEntries = numEntries;
cache.entrySize = pageSize;
cache.associativity = associativityTable[idxAssociativity];
cache.sharedBy = 1;
}
else
{
cache.type = type;
cache.associativity = associativities[associativityIndex];
}
cache.level = 2;
cache.entrySize = pageSize;
cache.sharedBy = 1;
cache.numEntries = bits(reg, bitOffset, bitOffset+11);
return cache;
}
@ -187,7 +195,6 @@ static void DetectCacheAndTLB()
{
AddCache(L2Cache(regs.ecx, x86_x64_Cache::kUnified));
AddCache(L3Cache(regs.edx, x86_x64_Cache::kUnified));
// NB: some Athlon 64 X2 models report L3 type == null
AddTLB2Pair(regs.eax, 2*MiB);
AddTLB2Pair(regs.ebx, 4*KiB);
@ -213,22 +220,21 @@ static bool DetectCache()
if(!x86_x64_cpuid(&regs))
return false;
const x86_x64_Cache::Type type = (x86_x64_Cache::Type)bits(regs.eax, 0, 4);
const x86_x64_Cache::Type type = (x86_x64_Cache::Type)bits(regs.eax, 0, 4);
if(type == x86_x64_Cache::kNull) // no more remaining
break;
const size_t level = (size_t)bits(regs.eax, 5, 7);
const size_t partitions = (size_t)bits(regs.ebx, 12, 21)+1;
const size_t sets = (size_t)bits(regs.ecx, 0, 31)+1;
if(type == x86_x64_Cache::kNull) // no more remaining
break;
x86_x64_Cache cache;
memset(&cache, 0, sizeof(cache));
cache.type = type;
cache.level = level;
cache.associativity = (size_t)bits(regs.ebx, 22, 31)+1;
cache.entrySize = (size_t)bits(regs.ebx, 0, 11)+1; // (yes, this also uses +1 encoding)
cache.sharedBy = (size_t)bits(regs.eax, 14, 25)+1;
cache.Initialize(level, type);
cache.numEntries = cache.associativity * partitions * sets;
cache.entrySize = (size_t)bits(regs.ebx, 0, 11)+1; // (yes, this also uses +1 encoding)
cache.associativity = (size_t)bits(regs.ebx, 22, 31)+1;
cache.sharedBy = (size_t)bits(regs.eax, 14, 25)+1;
AddCache(cache);
}
@ -342,16 +348,16 @@ struct Characteristics // POD
return smallSize >= 0;
}
size_t EntrySize() const
{
return IsTLB()? (flags & ~0xF) : -smallSize;
}
size_t NumEntries() const
{
return IsTLB()? smallSize : (flags & ~0xF);
}
size_t EntrySize() const
{
return IsTLB()? (flags & ~0xF) : -smallSize;
}
u8 descriptor;
u8 associativity;
i16 smallSize; // negative cache entrySize or TLB numEntries
@ -526,26 +532,49 @@ static const Characteristics* CharacteristicsFromDescriptor(Descriptor descripto
enum DescriptorFlags
{
SKIP_CACHE_DESCRIPTORS = 1,
NO_LAST_LEVEL_CACHE = 2
NO_LAST_LEVEL_CACHE = 2,
PREFETCH64 = 64,
PREFETCH128 = 128,
};
static bool HandleSpecialDescriptor(Descriptor descriptor, size_t& descriptorFlags)
{
switch(descriptor)
{
case 0: // carries no information
return true;
case 0x40:
descriptorFlags |= NO_LAST_LEVEL_CACHE;
return true;
case 0xF0:
descriptorFlags |= PREFETCH64;
return true;
case 0xF1:
descriptorFlags |= PREFETCH128;
return true;
case 0xFF: // descriptors don't include caches (use CPUID.4 instead)
descriptorFlags |= SKIP_CACHE_DESCRIPTORS;
return true;
default:
return false;
}
}
static void DetectCacheAndTLB(size_t& descriptorFlags)
{
const Descriptors descriptors = GetDescriptors();
for(Descriptors::const_iterator it = descriptors.begin(); it != descriptors.end(); ++it)
{
const Descriptor descriptor = *it;
switch(descriptor)
{
case 0: // carries no information
if(HandleSpecialDescriptor(descriptor, descriptorFlags))
continue;
case 0x40:
descriptorFlags |= NO_LAST_LEVEL_CACHE;
continue;
case 0xFF: // descriptors don't include caches (use CPUID.4 instead)
descriptorFlags |= SKIP_CACHE_DESCRIPTORS;
continue;
}
const Characteristics* characteristics = CharacteristicsFromDescriptor(*it);
if(!characteristics)
continue;
@ -554,13 +583,11 @@ static void DetectCacheAndTLB(size_t& descriptorFlags)
continue;
x86_x64_Cache cache;
memset(&cache, 0, sizeof(cache));
cache.type = characteristics->Type();
cache.level = characteristics->Level();
cache.associativity = characteristics->associativity;
cache.entrySize = characteristics->EntrySize();
cache.sharedBy = 1; // (safe default)
cache.Initialize(characteristics->Level(), characteristics->Type());
cache.numEntries = characteristics->NumEntries();
cache.entrySize = characteristics->EntrySize();
cache.associativity = characteristics->associativity;
cache.sharedBy = 1; // (safe default)
if(characteristics->IsTLB())
AddTLB(cache);
else
@ -573,8 +600,12 @@ static void DetectCacheAndTLB(size_t& descriptorFlags)
static LibError DetectCacheAndTLB()
{
for(size_t i = 0; i < ARRAY_SIZE(caches); i++)
caches[i].Init();
// ensure all cache entries are initialized (DetectCache* might not set them all)
for(size_t idxLevel = 0; idxLevel < x86_x64_Cache::maxLevels; idxLevel++)
{
caches[L1D+idxLevel].Initialize(idxLevel+1, x86_x64_Cache::kData);
caches[L1I+idxLevel].Initialize(idxLevel+1, x86_x64_Cache::kInstruction);
}
if(x86_x64_Vendor() == X86_X64_VENDOR_AMD)
AMD::DetectCacheAndTLB();
@ -587,18 +618,18 @@ static LibError DetectCacheAndTLB()
}
// sanity checks
for(size_t i = 0; i < x86_x64_Cache::maxLevels; i++)
for(size_t idxLevel = 0; idxLevel < x86_x64_Cache::maxLevels; idxLevel++)
{
debug_assert(caches[L1D+i].type != x86_x64_Cache::kInstruction);
if(caches[L1D+i].type != x86_x64_Cache::kNull)
debug_assert(caches[L1D+i].level == i+1);
debug_assert(caches[L1D+i].Validate() == true);
debug_assert(caches[L1D+idxLevel].type == x86_x64_Cache::kData || caches[L1D+idxLevel].type == x86_x64_Cache::kUnified);
debug_assert(caches[L1D+idxLevel].level == idxLevel+1);
debug_assert(caches[L1D+idxLevel].Validate() == true);
debug_assert(caches[L1I+i].type != x86_x64_Cache::kData);
if(caches[L1I+i].type != x86_x64_Cache::kNull)
debug_assert(caches[L1I+i].level == i+1);
debug_assert(caches[L1I+i].Validate() == true);
debug_assert(caches[L1I+idxLevel].type == x86_x64_Cache::kInstruction || caches[L1I+idxLevel].type == x86_x64_Cache::kUnified);
debug_assert(caches[L1I+idxLevel].level == idxLevel+1);
debug_assert(caches[L1I+idxLevel].Validate() == true);
}
for(size_t i = 0; i < numTLBs; i++)
debug_assert(caches[TLB+i].Validate() == true);
return INFO::OK;
}

View File

@ -37,29 +37,77 @@ struct x86_x64_Cache // POD (may be used before static constructors)
static const size_t maxLevels = 3;
static const size_t fullyAssociative = 0xFF;
static const size_t fullyAssociative = 0xFF; // (CPUID.4 definition)
void Init()
/**
* 1..maxLevels
**/
size_t level;
/**
* never kNull
**/
Type type;
/**
* if 0, the cache is disabled and all other values are zero
**/
size_t numEntries;
/**
* NB: cache entries are lines, TLB entries are pages
**/
size_t entrySize;
/**
* = fullyAssociative or the actual ways of associativity
**/
size_t associativity;
/**
* how many logical processors share this cache?
**/
size_t sharedBy;
void Initialize(size_t level, Type type)
{
type = kNull;
level = 0;
this->level = level;
this->type = type;
numEntries = 0;
entrySize = 0;
associativity = 0;
entrySize = 0;
sharedBy = 0;
numEntries = 0;
sharedBy = 0;
debug_assert(Validate());
}
bool Validate() const
{
if(type == kNull)
return true;
if(!(1 <= level && level <= maxLevels))
return false;
if(entrySize == 0)
if(type == kNull)
return false;
if(numEntries == 0) // disabled
{
if(entrySize != 0)
return false;
if(associativity != 0)
return false;
if(sharedBy != 0)
return false;
}
else
{
if(entrySize == 0)
return false;
if(associativity == 0 || associativity > fullyAssociative)
return false;
if(sharedBy == 0)
return false;
}
return true;
}
@ -67,33 +115,6 @@ struct x86_x64_Cache // POD (may be used before static constructors)
{
return u64(numEntries)*entrySize;
}
/**
* if kNull, all other values are invalid.
**/
Type type;
/**
* 1..maxLevels
**/
size_t level;
/**
* = fullyAssociative or the actual ways of associativity
**/
size_t associativity;
/**
* NB: cache entries are lines, TLB entries are pages
**/
size_t entrySize;
/**
* how many logical processors share this cache?
**/
size_t sharedBy;
size_t numEntries;
};
enum IdxCache
@ -110,7 +131,7 @@ enum IdxCache
/**
* @return 0 if idxCache >= TLB+numTLBs, otherwise a valid pointer to
* a Cache whose type is null if not present.
* a Cache whose numEntries is 0 if disabled / not present.
**/
LIB_API const x86_x64_Cache* x86_x64_Caches(size_t idxCache);