#ifndef ADTS_H__ #define ADTS_H__ #include "lib.h" #include #include #include #include template class DHT_Traits { public: static const size_t initial_entries = 16; size_t hash(Key key) const; bool equal(Key k1, Key k2) const; Key get_key(T t) const; }; template<> class DHT_Traits { public: static const size_t initial_entries = 512; size_t hash(const char* key) const { return (size_t)fnv_lc_hash(key); } bool equal(const char* k1, const char* k2) const { return !strcmp(k1, k2); } const char* get_key(const char* t) const { return t; } }; // intended for pointer types template > class DynHashTbl { T* tbl; u16 num_entries; u16 max_entries; // when initialized, = 2**n for faster modulo Traits tr; T& get_slot(Key key) const { size_t hash = tr.hash(key); debug_assert(max_entries != 0); // otherwise, mask will be incorrect const uint mask = max_entries-1; for(;;) { T& t = tbl[hash & mask]; // empty slot encountered => not found if(!t) return t; // keys are actually equal => found it if(tr.equal(key, tr.get_key(t))) return t; // keep going (linear probing) hash++; } } void expand_tbl() { // alloc a new table (but don't assign it to unless successful) T* old_tbl = tbl; tbl = (T*)calloc(max_entries*2, sizeof(T)); if(!tbl) { tbl = old_tbl; throw std::bad_alloc(); } max_entries += max_entries; // must be set before get_slot // newly initialized, nothing to copy - done if(!old_tbl) return; // re-hash from old table into the new one for(size_t i = 0; i < max_entries/2u; i++) { T t = old_tbl[i]; if(t) get_slot(tr.get_key(t)) = t; } free(old_tbl); } public: DynHashTbl() { tbl = 0; num_entries = 0; max_entries = tr.initial_entries/2; // will be doubled in expand_tbl debug_assert(is_pow2(max_entries)); expand_tbl(); } ~DynHashTbl() { clear(); } void clear() { free(tbl); tbl = 0; num_entries = 0; // rationale: must not set to 0 because expand_tbl only doubles the size. // don't keep the previous size because it may have become huge and // there is no provision for shrinking. max_entries = tr.initial_entries/2; // will be doubled in expand_tbl } void insert(const Key key, const T t) { // more than 75% full - increase table size. // do so before determining slot; this will invalidate previous pnodes. if(num_entries*4 >= max_entries*3) expand_tbl(); T& slot = get_slot(key); debug_assert(slot == 0); // not already present slot = t; num_entries++; } T find(Key key) const { return get_slot(key); } size_t size() const { return num_entries; } class iterator { public: typedef std::forward_iterator_tag iterator_category; typedef T value_type; typedef ptrdiff_t difference_type; typedef const T* pointer; typedef const T& reference; iterator() { } iterator(T* pos_, T* end_) : pos(pos_), end(end_) { } T& operator*() const { return *pos; } iterator& operator++() // pre { do pos++; while(pos != end && *pos == 0); return (*this); } bool operator==(const iterator& rhs) const { return pos == rhs.pos; } bool operator<(const iterator& rhs) const { return (pos < rhs.pos); } // derived const T* operator->() const { return &**this; } bool operator!=(const iterator& rhs) const { return !(*this == rhs); } iterator operator++(int) // post { iterator tmp = *this; ++*this; return tmp; } protected: T* pos; T* end; // only used when incrementing (avoid going beyond end of table) }; iterator begin() const { T* pos = tbl; while(pos != tbl+max_entries && *pos == 0) pos++; return iterator(pos, tbl+max_entries); } iterator end() const { return iterator(tbl+max_entries, 0); } }; // Cache for items of variable size and value/"cost". // currently uses Landlord algorithm. #define LL_OPT_MINCREDIT #define LL_OPT_RECIP #define LL_OPT_DELAYCHARGE template class Cache { public: Cache() { #ifdef LL_OPT_MINCREDIT min_credit_density = FLT_MAX; #endif } void add(Key key, T item, size_t size, uint cost) { typedef std::pair PairIB; typename CacheMap::value_type val = std::make_pair(key, CacheEntry(item, size, cost)); PairIB ret = map.insert(val); debug_assert(ret.second); // must not already be in map #ifdef LL_OPT_MINCREDIT // adding new item - min_credit_density may decrease const CacheEntry& new_entry = ret.first->second; notify_credit_reduced(new_entry); #endif } // remove the entry identified by . expected usage is to check // if present and determine size via retrieve(), so no need to // do anything else here. // useful for invalidating single cache entries. void remove(Key key) { CacheMapIt it = map.find(key); if(it == map.end()) { debug_warn("Cache: item to be removed not found"); return; } #include #ifdef LL_OPT_MINCREDIT // we're removing. if this one had the smallest // density, recalculate. const bool need_recalc = is_min_entry(it->second); #endif map.erase(it); #ifdef LL_OPT_MINCREDIT if(need_recalc) recalc_min_credit_density(); #endif } // if there is no entry for in the cache, return 0 with // psize unchanged. otherwise, return its item and // optionally pass back its size. T retrieve(Key key, size_t* psize = 0, bool refill_credit = true) { CacheMapIt it = map.find(key); if(it == map.end()) return 0; CacheEntry& entry = it->second; if(psize) *psize = entry.size; if(refill_credit) { #ifdef LL_OPT_MINCREDIT // we're increasing credit. if this one had the smallest // density, recalculate. const bool need_recalc = is_min_entry(entry); #endif // Landlord algorithm calls for credit to be reset to anything // between its current value and the cost. const float gain = 0.75f; // restore most credit entry.credit = gain*entry.cost + (1.0f-gain)*entry.credit; #ifdef LL_OPT_MINCREDIT if(need_recalc) recalc_min_credit_density(); #endif } return entry.item; } // remove the least valuable item and optionally indicate // how big it was (useful for statistics). // returns 0 if cache is empty. T remove_least_valuable(size_t* psize = 0) { if(map.empty()) return 0; #ifdef LL_OPT_DELAYCHARGE // determine who has least density via priqueue // remove it // add its delta to accumulator #endif #ifndef LL_OPT_MINCREDIT // not implicitly updated: we need to calculate min_credit_density now. recalc_min_credit_density(); #endif // latch current delta value to avoid it changing during the loop // (due to notify_* calls). this ensures fairness. const float delta = min_credit_density; // one iteration ought to suffice to evict someone due to // definition of min_credit_density, but we provide for // repeating in case of floating-point imprecision. // (goto vs. loop avoids nesting and emphasizes rarity) again: // charge everyone rent (proportional to delta and size) for(CacheMapIt it = map.begin(); it != map.end(); ++it) { CacheEntry& entry = it->second; entry.credit -= delta * entry.size; #ifdef LL_OPT_MINCREDIT // reducing credit - min_credit_density may decrease notify_credit_reduced(entry); #endif // evict immediately if credit is exhausted // (note: Landlord algorithm calls for 'any subset' of // these items to be evicted. since we need to return // information about the item, we can only discard one.) // // this means every call will end up charging more than // intended, but we compensate by resetting credit // fairly high upon cache hit. if(entry.credit <= 0.01f) // a bit of tolerance { T item = entry.item; if(psize) *psize = entry.size; map.erase(it); #ifdef LL_OPT_MINCREDIT // this item had the least density, else it wouldn't // have been removed. recalculate. recalc_min_credit_density(); #endif return item; } } // none were evicted - do it all again. goto again; } bool empty() { return map.empty(); } private: struct CacheEntry { T item; size_t size; #ifdef LL_OPT_RECIP float size_reciprocal; #endif uint cost; float credit; CacheEntry(T item_, size_t size_, uint cost_) : item(item_) { size = size_; #ifdef LL_OPT_RECIP size_reciprocal = 1.0f / size; #endif cost = cost_; credit = cost; } }; // note: use hash_map instead of map for better locality // (relevant when iterating over all items in remove_least_valuable) typedef STL_HASH_MAP CacheMap; typedef typename CacheMap::iterator CacheMapIt; CacheMap map; // = \delta in [Young02] (needed for charge step) // this is cached to avoid having to iterate over the whole map. float min_credit_density; float credit_density(const CacheEntry& entry) { #ifdef LL_OPT_RECIP return entry.credit * entry.size_reciprocal; #else return entry.credit / entry.size; #endif } void recalc_min_credit_density() { min_credit_density = FLT_MAX; for(CacheMapIt it = map.begin(); it != map.end(); ++it) min_credit_density = MIN(min_credit_density, credit_density(it->second)); } #ifdef LL_OPT_MINCREDIT void notify_credit_reduced(const CacheEntry& entry) { min_credit_density = MIN(min_credit_density, credit_density(entry)); } bool is_min_entry(const CacheEntry& entry) { return feq(min_credit_density, credit_density(entry)); } #endif }; // // FIFO bit queue // struct BitBuf { ulong buf; ulong cur; // bit to be appended (toggled by add()) ulong len; // |buf| [bits] void reset() { buf = 0; cur = 0; len = 0; } // toggle current bit if desired, and add to buffer (new bit is LSB) void add(ulong toggle) { cur ^= toggle; buf <<= 1; buf |= cur; len++; } // extract LS n bits uint extract(ulong n) { ulong i = buf & ((1ul << n) - 1); buf >>= n; return i; } }; // // ring buffer - static array, accessible modulo n // template class RingBuf { size_t size_; // # of entries in buffer size_t head; // index of oldest item size_t tail; // index of newest item T data[n]; public: RingBuf() : data() { clear(); } void clear() { size_ = 0; head = 0; tail = n-1; } size_t size() { return size_; } bool empty() { return size_ == 0; } const T& operator[](int ofs) const { debug_assert(!empty()); size_t idx = (size_t)(head + ofs); return data[idx % n]; } T& operator[](int ofs) { debug_assert(!empty()); size_t idx = (size_t)(head + ofs); return data[idx % n]; } T& front() { debug_assert(!empty()); return data[head]; } const T& front() const { debug_assert(!empty()); return data[head]; } T& back() { debug_assert(!empty()); return data[tail]; } const T& back() const { debug_assert(!empty()); return data[tail]; } void push_back(const T& item) { if(size_ < n) size_++; // do not complain - overwriting old values is legit // (e.g. sliding window). else head = (head + 1) % n; tail = (tail + 1) % n; data[tail] = item; } void pop_front() { if(size_ != 0) { size_--; head = (head + 1) % n; } else debug_warn("underflow"); } #include class iterator { public: typedef std::random_access_iterator_tag iterator_category; typedef T value_type; typedef ptrdiff_t difference_type; typedef T* pointer; typedef T& reference; iterator() : data(0), pos(0) {} iterator(T* data_, size_t pos_) : data(data_), pos(pos_) {} T& operator[](int idx) const { return data[(pos+idx) % n]; } T& operator*() const { return data[pos % n]; } T* operator->() const { return &**this; } iterator& operator++() // pre { ++pos; return (*this); } iterator operator++(int) // post { iterator tmp = *this; ++*this; return tmp; } bool operator==(const iterator& rhs) const { return data == rhs.data && pos == rhs.pos; } bool operator!=(const iterator& rhs) const { return !(*this == rhs); } bool operator<(const iterator& rhs) const { return (pos < rhs.pos); } iterator& operator+=(difference_type ofs) { pos += ofs; return *this; } iterator& operator-=(difference_type ofs) { return (*this += -ofs); } iterator operator+(difference_type ofs) const { iterator tmp = *this; return (tmp += ofs); } iterator operator-(difference_type ofs) const { iterator tmp = *this; return (tmp -= ofs); } difference_type operator-(const iterator right) const { return (difference_type)(pos - right.pos); } protected: T* data; size_t pos; // not mod-N so that begin != end when buffer is full. }; class const_iterator { public: typedef std::random_access_iterator_tag iterator_category; typedef T value_type; typedef ptrdiff_t difference_type; typedef const T* pointer; typedef const T& reference; const_iterator() : data(0), pos(0) {} const_iterator(const T* data_, size_t pos_) : data(data_), pos(pos_) {} const T& operator[](int idx) const { return data[(pos+idx) % n]; } const T& operator*() const { return data[pos % n]; } const T* operator->() const { return &**this; } const_iterator& operator++() // pre { ++pos; return (*this); } const_iterator operator++(int) // post { const_iterator tmp = *this; ++*this; return tmp; } bool operator==(const const_iterator& rhs) const { return data == rhs.data && pos == rhs.pos; } bool operator!=(const const_iterator& rhs) const { return !(*this == rhs); } bool operator<(const const_iterator& rhs) const { return (pos < rhs.pos); } iterator& operator+=(difference_type ofs) { pos += ofs; return *this; } iterator& operator-=(difference_type ofs) { return (*this += -ofs); } iterator operator+(difference_type ofs) const { iterator tmp = *this; return (tmp += ofs); } iterator operator-(difference_type ofs) const { iterator tmp = *this; return (tmp -= ofs); } difference_type operator-(const iterator right) const { return (difference_type)(pos - right.pos); } protected: const T* data; size_t pos; // not mod-N so that begin != end when buffer is full. }; iterator begin() { return iterator(data, (size_ < n)? 0 : head); } const_iterator begin() const { return const_iterator(data, (size_ < n)? 0 : head); } iterator end() { return iterator(data, (size_ < n)? size_ : head+n); } const_iterator end() const { return const_iterator(data, (size_ < n)? size_ : head+n); } }; // // cache // // owns a pool of resources (Entry-s), associated with a 64 bit id. // typical use: add all available resources to the cache via grow(); // assign() ids to the resources, and update the resource data if necessary; // retrieve() the resource, given id. template class LRUCache { public: // 'give' Entry to the cache. int grow(Entry& e) { // add to front of LRU list, but not index // (since we don't have an id yet) lru_list.push_front(Line(e)); return 0; } // find the least-recently used line; associate id with it, // and return its Entry. fails (returns 0) if id is already // associated, or all lines are locked. Entry* assign(u64 id) { if(find_line(id)) { debug_warn("assign: id already in cache!"); return 0; } // scan in least->most used order for first non-locked entry List_iterator l = lru_list.end(); while(l != lru_list.begin()) { --l; if(l->refs == 0) goto have_line; } // all are locked and cannot be displaced. // caller should grow() enough lines so that this never happens. debug_warn("assign: all lines locked - grow() more lines"); return 0; have_line: // update mapping (index) idx.erase(id); idx[id] = l; l->id = id; return &l->ent; } // find line identified by id; return its entry or 0 if not in cache. Entry* retrieve(u64 id) { // invalid: id 0 denotes not-yet-associated lines if(id == 0) { debug_warn("retrieve: id 0 not allowed"); return 0; } Line* l = find_line(id); return l? &l->ent : 0; } // add/release a reference to a line, to protect it against // displacement via associate(). we verify refs >= 0. int lock(u64 id, bool locked) { Line* l = find_line(id); if(!l) return -1; if(locked) l->refs++; else { debug_assert(l->refs > 0); l->refs--; } return 0; } private: // implementation: // cache lines are stored in a list, most recently used in front. // a map finds the list entry containing a given id in log-time. struct Line { u64 id; Entry ent; int refs; // protect from displacement if > 0 Line(Entry& _ent) { id = 0; ent = _ent; refs = 0; } }; typedef std::list List; typedef typename List::iterator List_iterator; List lru_list; typedef std::map Map; Map idx; // return the line identified by id, or 0 if not in cache. // mark it as the most recently used line. Line* find_line(u64 id) { typename Map::const_iterator i = idx.find(id); // not found if(i == idx.end()) return 0; // index points us to list entry List_iterator l = i->second; // mark l as the most recently used line. lru_list.splice(lru_list.begin(), lru_list, l); idx[l->id] = l; return &*l; } }; // // expansible hash table (linear probing) // // from VFS, not currently needed #if 0 template class StringMap { public: T* add(const char* fn, T& t) { const FnHash fn_hash = fnv_hash(fn); t.name = fn; std::pair item = std::make_pair(fn_hash, t); std::pair res; res = map.insert(item); if(!res.second) { debug_warn("add: already in container"); return 0; } // return address of user data (T) inserted into container. return &((res.first)->second); } T* find(const char* fn) { const FnHash fn_hash = fnv_hash(fn); MapIt it = map.find(fn_hash); // O(log(size)) if(it == map.end()) return 0; return &it->second; } size_t size() const { return map.size(); } void clear() { map.clear(); } private: typedef std::map Map; typedef typename Map::iterator MapIt; Map map; public: class iterator { public: iterator() {} iterator(typename StringMap::MapIt _it) { it = _it; } T& operator*() const { return it->second; } T* operator->() const { return &**this; } iterator& operator++() // pre { ++it; return (*this); } bool operator==(const iterator& rhs) const { return it == rhs.it; } bool operator!=(const iterator& rhs) const { return !(*this == rhs); } protected: typename StringMap::MapIt it; }; iterator begin() { return iterator(map.begin()); } iterator end() { return iterator(map.end()); } }; template class PriMap { public: int add(Key key, uint pri, Data& data) { Item item = std::make_pair(pri, data); MapEntry ent = std::make_pair(key, item); std::pair ret; ret = map.insert(ent); // already in map if(!ret.second) { MapIt it = ret.first; Item item = it->second; const uint old_pri = item.first; Data& old_data = item.second; // new data is of higher priority; replace older data if(old_pri <= pri) { old_data = data; return 0; } // new data is of lower priority; don't add else return 1; } return 0; } Data* find(Key key) { MapIt it = map.find(key); if(it == map.end()) return 0; return &it->second.second; } void clear() { map.clear(); } private: typedef std::pair Item; typedef std::pair MapEntry; typedef std::map Map; typedef typename Map::iterator MapIt; Map map; }; #endif // #if 0 #endif // #ifndef ADTS_H__