From e07622b56aad56191819e42deedeb7573b38822a Mon Sep 17 00:00:00 2001 From: janwas Date: Sat, 28 Jan 2006 22:19:42 +0000 Subject: [PATCH] Cache: implement meat of landlord algorithm and add remove() allocators: add freelist capability to Bucket; add provision for variable XOR fixed size allocs archive: re-tag file buffers if reading uncompressed from archive; improve LFH fixup logic file_cache: add cache line invalidation; lock down pages (readonly) when IO finished file_io: cleanup+docs; properly cut off at EOF without breaking alignment. file_stats: add seek accounting (WIP) vfs_optimizer: also record file_buf_free in the trace. initial implementation of archive builder (WIP) zip: lfh_fixup now more efficient (does not involve buffer manager - instead it grabs LFH from temp blocks) tex: plug FileIOBuf leak. avoid writing to tex.hm because that is a read-only file_buf. This was SVN commit r3428. --- source/lib/adts.h | 57 ++- source/lib/allocators.cpp | 154 +++++--- source/lib/allocators.h | 106 +++--- source/lib/res/file/archive.cpp | 14 +- source/lib/res/file/archive.h | 2 +- source/lib/res/file/compression.cpp | 3 +- source/lib/res/file/compression.h | 1 + source/lib/res/file/file.cpp | 1 + source/lib/res/file/file_cache.cpp | 458 +++++++++++++++--------- source/lib/res/file/file_cache.h | 28 +- source/lib/res/file/file_internal.h | 4 +- source/lib/res/file/file_io.cpp | 180 +++++----- source/lib/res/file/file_io.h | 3 +- source/lib/res/file/file_stats.cpp | 21 +- source/lib/res/file/file_stats.h | 9 +- source/lib/res/file/vfs.cpp | 13 +- source/lib/res/file/vfs_optimizer.cpp | 485 ++++++++++++++++---------- source/lib/res/file/vfs_optimizer.h | 32 +- source/lib/res/file/zip.cpp | 103 ++++-- source/lib/res/file/zip.h | 7 + source/lib/res/graphics/tex.cpp | 49 ++- 21 files changed, 1117 insertions(+), 613 deletions(-) diff --git a/source/lib/adts.h b/source/lib/adts.h index e081734e39..4ccbd596ba 100755 --- a/source/lib/adts.h +++ b/source/lib/adts.h @@ -230,7 +230,19 @@ public: debug_assert(ret.second); // must not already be in map } - T retrieve(Key key, size_t* psize = 0) + // remove the entry identified by . expected usage is to check + // if present and determine size via retrieve(), so no need to + // do anything else here. + // useful for invalidating single cache entries. + void remove(Key key) + { + map.erase(key); + } + + // if there is no entry for in the cache, return 0 with + // psize unchanged. otherwise, return its item and + // optionally pass back its size. + T retrieve(Key key, size_t* psize = 0, bool refill_credit = true) { CacheMapIt it = map.find(key); if(it == map.end()) @@ -238,22 +250,54 @@ public: CacheEntry& entry = it->second; if(psize) *psize = entry.size; -// increase credit + + if(refill_credit) + { + // Landlord algorithm calls for credit to be reset to anything + // between its current value and the cost. + const float gain = 0.75f; // restore most credit + entry.credit = gain*entry.cost + (1.0f-gain)*entry.credit; + } + return entry.item; } + // remove the least valuable item and optionally indicate + // how big it was (useful for statistics). T remove_least_valuable(size_t* psize = 0) { CacheMapIt it; -again: // until we find someone to evict + // one iteration ought to suffice to evict someone due to + // definition of min_density, but we provide for repeating + // in case of floating-point imprecision. + // (goto vs. loop avoids nesting and emphasizes rarity) +again: - // foreach entry: decrease credit and evict if <= 0 + // find minimum credit density (needed for charge step) + float min_density = 1e10; // = \delta in [Young02] for( it = map.begin(); it != map.end(); ++it) { CacheEntry& entry = it->second; - // found someone we can evict + const float density = entry.credit / entry.size; + min_density = MIN(density, min_density); + } + + // .. charge everyone rent (proportional to min_density and size) + for( it = map.begin(); it != map.end(); ++it) + { + CacheEntry& entry = it->second; + entry.credit -= min_density * entry.size; + + // evict immediately if credit is exhausted + // (note: Landlord algorithm calls for 'any subset' of + // these items to be evicted. since we need to return + // information about the item, we can only discard one.) + // + // this means every call will end up charging more than + // intended, but we compensate by resetting credit + // fairly high upon cache hit. if(entry.credit <= 0.0f) { T item = entry.item; @@ -264,8 +308,7 @@ again: // until we find someone to evict } } - // none were evicted -// charge rent + // none were evicted - do it all again. goto again; } diff --git a/source/lib/allocators.cpp b/source/lib/allocators.cpp index 7afbc8cde1..a547e3d36b 100644 --- a/source/lib/allocators.cpp +++ b/source/lib/allocators.cpp @@ -364,6 +364,7 @@ LibError da_append(DynArray* da, const void* data, size_t size) // - doesn't preallocate the entire pool; // - returns sequential addresses. + // "freelist" is a pointer to the first unused element (0 if there are none); // its memory holds a pointer to the next free one in list. @@ -386,7 +387,8 @@ static void* freelist_pop(void** pfreelist) } -static const size_t POOL_CHUNK = 4*KiB; +// elements returned are aligned to this many bytes: +static const size_t ALIGN = 8; // ready

for use. is the upper limit [bytes] on @@ -396,15 +398,10 @@ static const size_t POOL_CHUNK = 4*KiB; // (which cannot be freed individually); // otherwise, it specifies the number of bytes that will be // returned by pool_alloc (whose size parameter is then ignored). -// in the latter case, size must at least be enough for a pointer -// (due to freelist implementation). LibError pool_create(Pool* p, size_t max_size, size_t el_size) { - if(el_size != 0 && el_size < sizeof(void*)) - WARN_RETURN(ERR_INVALID_PARAM); - + p->el_size = round_up(el_size, ALIGN); RETURN_ERR(da_alloc(&p->da, max_size)); - p->el_size = el_size; return ERR_OK; } @@ -446,7 +443,7 @@ void* pool_alloc(Pool* p, size_t size) { // if pool allows variable sizes, go with the size parameter, // otherwise the pool el_size setting. - const size_t el_size = p->el_size? p->el_size : size; + const size_t el_size = p->el_size? p->el_size : round_up(size, ALIGN); // note: this can never happen in pools with variable-sized elements // because they disallow pool_free. @@ -470,17 +467,19 @@ have_el: } -// make available for reuse in the given pool. +// make available for reuse in the given Pool. // -// this is not allowed if the pool was set up for variable-size elements. -// (copying with fragmentation would defeat the point of a pool - simplicity) -// we could allow this, but instead warn and bail to make sure it -// never happens inadvertently (leaking memory in the pool). +// this is not allowed if created for variable-size elements. +// rationale: avoids having to pass el_size here and compare with size when +// allocating; also prevents fragmentation and leaking memory. void pool_free(Pool* p, void* el) { + // only allowed to free items if we were initialized with + // fixed el_size. (this avoids having to pass el_size here and + // check if requested_size matches that when allocating) if(p->el_size == 0) { - debug_warn("pool is set up for variable-size items"); + debug_warn("cannot free variable-size items"); return; } @@ -506,9 +505,8 @@ void pool_free_all(Pool* p) //----------------------------------------------------------------------------- // design goals: -// - variable-sized allocations; -// - no reuse of allocations, can only free all at once; -// - no init necessary; +// - fixed- XOR variable-sized blocks; +// - allow freeing individual blocks if they are all fixed-size; // - never relocates; // - no fixed limit. @@ -518,46 +516,41 @@ void pool_free_all(Pool* p) // basically a combination of region and heap, where frees go to the heap and // allocs exhaust that memory first and otherwise use the region. -// must be constant and power-of-2 to allow fast modulo. -const size_t BUCKET_SIZE = 4*KiB; +// power-of-2 isn't required; value is arbitrary. +const size_t BUCKET_SIZE = 4000; -// allocate bytes of memory from the given Bucket object. -// must initially be zeroed (e.g. by defining it as static data). -void* bucket_alloc(Bucket* b, size_t size) +// ready for use. +// +// can be 0 to allow variable-sized allocations +// (which cannot be freed individually); +// otherwise, it specifies the number of bytes that will be +// returned by bucket_alloc (whose size parameter is then ignored). +LibError bucket_create(Bucket* b, size_t el_size) { - // would overflow a bucket - if(size > BUCKET_SIZE-sizeof(u8*)) + b->freelist = 0; + b->el_size = round_up(el_size, ALIGN); + + // note: allocating here avoids the is-this-the-first-time check + // in bucket_alloc, which speeds things up. + b->bucket = (u8*)malloc(BUCKET_SIZE); + if(!b->bucket) { - debug_warn("size doesn't fit in a bucket"); - return 0; + // cause next bucket_alloc to retry the allocation + b->pos = BUCKET_SIZE; + b->num_buckets = 0; + return ERR_NO_MEM; } - // make sure the next item will be aligned - size = round_up(size, 8); - - // if there's not enough space left or no bucket yet (first call), - // close it and allocate another. - if(b->pos+size > BUCKET_SIZE || !b->bucket) - { - u8* bucket = (u8*)malloc(BUCKET_SIZE); - if(!bucket) - return 0; - *(u8**)bucket = b->bucket; - b->bucket = bucket; - // skip bucket list field and align to 8 bytes (note: malloc already - // aligns to at least 8 bytes, so don't take b->bucket into account) - b->pos = round_up(sizeof(u8*), 8); - b->num_buckets++; - } - - void* ret = b->bucket+b->pos; - b->pos += size; - return ret; + *(u8**)b->bucket = 0; // terminate list + b->pos = round_up(sizeof(u8*), ALIGN); + b->num_buckets = 1; + return ERR_OK; } -// free all allocations that ensued from the given Bucket. -void bucket_free_all(Bucket* b) +// free all memory that ensued from . +// future alloc and free calls on this Bucket will fail. +void bucket_destroy(Bucket* b) { while(b->bucket) { @@ -568,6 +561,69 @@ void bucket_free_all(Bucket* b) } debug_assert(b->num_buckets == 0); + + // poison pill: cause subsequent alloc and free to fail + b->freelist = 0; + b->el_size = BUCKET_SIZE; +} + + +// return an entry from the bucket, or 0 if another would have to be +// allocated and there isn't enough memory to do so. +// exhausts the freelist before returning new entries to improve locality. +// +// if the bucket was set up with fixed-size elements, is ignored; +// otherwise, bytes are allocated. +void* bucket_alloc(Bucket* b, size_t size) +{ + size_t el_size = b->el_size? b->el_size : round_up(size, ALIGN); + // must fit in a bucket + debug_assert(el_size <= BUCKET_SIZE-sizeof(u8*)); + + // try to satisfy alloc from freelist + void* el = freelist_pop(&b->freelist); + if(el) + return el; + + // if there's not enough space left, close current bucket and + // allocate another. + if(b->pos+el_size > BUCKET_SIZE) + { + u8* bucket = (u8*)malloc(BUCKET_SIZE); + if(!bucket) + return 0; + *(u8**)bucket = b->bucket; + b->bucket = bucket; + // skip bucket list field and align (note: malloc already + // aligns to at least 8 bytes, so don't take b->bucket into account) + b->pos = round_up(sizeof(u8*), ALIGN); + b->num_buckets++; + } + + void* ret = b->bucket+b->pos; + b->pos += el_size; + return ret; +} + + +// make available for reuse in . +// +// this is not allowed if created for variable-size elements. +// rationale: avoids having to pass el_size here and compare with size when +// allocating; also prevents fragmentation and leaking memory. +void bucket_free(Bucket* b, void* el) +{ + if(b->el_size == 0) + { + debug_warn("cannot free variable-size items"); + return; + } + + freelist_push(&b->freelist, el); + + // note: checking if was actually allocated from is difficult: + // it may not be in the currently open bucket, so we'd have to + // iterate over the list - too much work. } diff --git a/source/lib/allocators.h b/source/lib/allocators.h index 0d82eb716b..6cf19073a5 100644 --- a/source/lib/allocators.h +++ b/source/lib/allocators.h @@ -164,8 +164,6 @@ const size_t POOL_VARIABLE_ALLOCS = 0; // (which cannot be freed individually); // otherwise, it specifies the number of bytes that will be // returned by pool_alloc (whose size parameter is then ignored). -// in the latter case, size must at least be enough for a pointer -// (due to freelist implementation). extern LibError pool_create(Pool* p, size_t max_size, size_t el_size); // free all memory that ensued from

. all elements are made unusable @@ -185,12 +183,11 @@ extern bool pool_contains(Pool* p, void* el); // otherwise, bytes are allocated. extern void* pool_alloc(Pool* p, size_t size); -// make available for reuse in the given pool. +// make available for reuse in the given Pool. // -// this is not allowed if the pool was set up for variable-size elements. -// (copying with fragmentation would defeat the point of a pool - simplicity) -// we could allow this, but instead warn and bail to make sure it -// never happens inadvertently (leaking memory in the pool). +// this is not allowed if created for variable-size elements. +// rationale: avoids having to pass el_size here and compare with size when +// allocating; also prevents fragmentation and leaking memory. extern void pool_free(Pool* p, void* el); // "free" all allocations that ensued from the given Pool. @@ -204,40 +201,61 @@ extern void pool_free_all(Pool* p); // // design goals: -// - variable-sized allocations; -// - no reuse of allocations, can only free all at once; -// - no init necessary; +// - fixed- XOR variable-sized blocks; +// - allow freeing individual blocks if they are all fixed-size; // - never relocates; // - no fixed limit. // note: this type of allocator is called "region-based" in the literature. // see "Reconsidering Custom Memory Allocation" (Berger, Zorn, McKinley). -// if individual elements must be freeable, consider "reaps": +// if individual variable-size elements must be freeable, consider "reaps": // basically a combination of region and heap, where frees go to the heap and // allocs exhaust that memory first and otherwise use the region. // opaque! do not read/write any fields! struct Bucket { - // currently open bucket. must be initialized to 0. + // currently open bucket. u8* bucket; // offset of free space at end of current bucket (i.e. # bytes in use). - // must be initialized to 0. size_t pos; - // records # buckets allocated; used to check if the list of them - // isn't corrupted. must be initialized to 0. - uint num_buckets; + void* freelist; + + size_t el_size : 16; + + // records # buckets allocated; verifies the list of buckets is correct. + uint num_buckets : 16; }; -// allocate bytes of memory from the given Bucket object. -// must initially be zeroed (e.g. by defining it as static data). +// ready for use. +// +// can be 0 to allow variable-sized allocations +// (which cannot be freed individually); +// otherwise, it specifies the number of bytes that will be +// returned by bucket_alloc (whose size parameter is then ignored). +extern LibError bucket_create(Bucket* b, size_t el_size); + +// free all memory that ensued from . +// future alloc and free calls on this Bucket will fail. +extern void bucket_destroy(Bucket* b); + +// return an entry from the bucket, or 0 if another would have to be +// allocated and there isn't enough memory to do so. +// exhausts the freelist before returning new entries to improve locality. +// +// if the bucket was set up with fixed-size elements, is ignored; +// otherwise, bytes are allocated. extern void* bucket_alloc(Bucket* b, size_t size); -// free all allocations that ensued from the given Bucket. -extern void bucket_free_all(Bucket* b); +// make available for reuse in . +// +// this is not allowed if created for variable-size elements. +// rationale: avoids having to pass el_size here and compare with size when +// allocating; also prevents fragmentation and leaking memory. +extern void bucket_free(Bucket* b, void* el); // @@ -267,25 +285,29 @@ extern void matrix_free(void** matrix); // overrun protection // -// this class wraps an arbitrary object in DynArray memory and can detect -// inadvertent writes to it. this is useful for tracking down memory overruns. -// -// the basic idea is to require users to request access to the object and -// notify us when done; memory access permission is temporarily granted. -// (similar in principle to Software Transaction Memory). -// -// since this is quite slow, the protection is disabled unless -// CONFIG_OVERRUN_PROTECTION == 1; this avoids having to remove the -// wrapper code in release builds and re-write when looking for overruns. -// -// example usage: -// OverrunProtector your_class_wrapper; -// .. -// your_class* yc = your_class_wrapper.get(); -// if(!yc) abort(); // not enough memory to allocate a your_class instance -// // access/write to -// your_class_wrapper.lock(); // disallow further access -// .. +/* +OverrunProtector wraps an arbitrary object in DynArray memory and can detect +inadvertent writes to it. this is useful for tracking down memory overruns. + +the basic idea is to require users to request access to the object and +notify us when done; memory access permission is temporarily granted. +(similar in principle to Software Transaction Memory). + +since this is quite slow, the protection is disabled unless +CONFIG_OVERRUN_PROTECTION == 1; this avoids having to remove the +wrapper code in release builds and re-write when looking for overruns. + +example usage: +OverrunProtector your_class_wrapper; +.. +your_class* yc = your_class_wrapper.get(); // unlock, make ready for use +if(!yc) // your_class_wrapper's one-time alloc of a your_class- + abort(); // instance had failed - can't continue. +doSomethingWith(yc); // read/write access +your_class_wrapper.lock(); // disallow further access until next .get() +.. +*/ + template class OverrunProtector { DynArray da; @@ -322,11 +344,9 @@ private: void init() { - const size_t size = 4096; - cassert(sizeof(T) <= size); - if(da_alloc(&da, size) < 0) + if(da_alloc(&da, sizeof(T)) < 0) goto fail; - if(da_set_size(&da, size) < 0) + if(da_set_size(&da, sizeof(T)) < 0) goto fail; #include "nommgr.h" diff --git a/source/lib/res/file/archive.cpp b/source/lib/res/file/archive.cpp index 2fef7da27a..d9cbb8b337 100644 --- a/source/lib/res/file/archive.cpp +++ b/source/lib/res/file/archive.cpp @@ -281,11 +281,7 @@ LibError afile_open(const Handle ha, const char* fn, uintptr_t memento, int flag // => need to copy ArchiveEntry fields into AFile. RETURN_ERR(archive_get_file_info(a, atom_fn, memento, ent)); - if(ent->flags & ZIP_LFH_FIXUP_NEEDED) - { - zip_fixup_lfh(&a->f, ent); - ent->flags &= ~ZIP_LFH_FIXUP_NEEDED; - } + zip_fixup_lfh(&a->f, ent); uintptr_t ctx = 0; // slight optimization: do not allocate context if not compressed @@ -517,8 +513,14 @@ ssize_t afile_read(AFile* af, off_t ofs, size_t size, FileIOBuf* pbuf, FileIOCB H_DEREF(af->ha, Archive, a); if(!is_compressed(af)) + { + bool we_allocated = (pbuf != FILE_BUF_TEMP) && (*pbuf == FILE_BUF_ALLOC); // no need to set last_cofs - only checked if compressed. - return file_io(&a->f, af->ofs+ofs, size, pbuf, cb, cb_ctx); + RETURN_ERR(file_io(&a->f, af->ofs+ofs, size, pbuf, cb, cb_ctx)); + if(we_allocated) + (void)file_buf_set_real_fn(*pbuf, af->fc.atom_fn); + return ERR_OK; + } debug_assert(af->ctx != 0); diff --git a/source/lib/res/file/archive.h b/source/lib/res/file/archive.h index 79a7ec17ae..7967f179e3 100644 --- a/source/lib/res/file/archive.h +++ b/source/lib/res/file/archive.h @@ -174,7 +174,7 @@ struct ArchiveEntry time_t mtime; // used in IO - off_t ofs; // bit 31 set if fixup needed + off_t ofs; off_t csize; CompressionMethod method; diff --git a/source/lib/res/file/compression.cpp b/source/lib/res/file/compression.cpp index e411aed2f0..9c43658b2e 100644 --- a/source/lib/res/file/compression.cpp +++ b/source/lib/res/file/compression.cpp @@ -392,9 +392,9 @@ uintptr_t comp_alloc(ContextType type, CompressionMethod method) return 0; Compressor* c; +#include "nommgr.h" // protect placement new and free() from macros switch(method) { -#include "nommgr.h" #ifndef NO_ZLIB case CM_DEFLATE: cassert(sizeof(ZLibCompressor) <= MAX_COMPRESSOR_SIZE); @@ -407,6 +407,7 @@ uintptr_t comp_alloc(ContextType type, CompressionMethod method) return 0; #include "mmgr.h" } +#include "mmgr.h" c->init(); return (uintptr_t)c; diff --git a/source/lib/res/file/compression.h b/source/lib/res/file/compression.h index 59d7630963..b9b5e0b1b2 100644 --- a/source/lib/res/file/compression.h +++ b/source/lib/res/file/compression.h @@ -26,6 +26,7 @@ extern ssize_t comp_feed(uintptr_t ctx, const void* in, size_t in_size); extern LibError comp_finish(uintptr_t ctx, void** out, size_t* out_size); +extern LibError comp_reset(uintptr_t ctx); extern void comp_free(uintptr_t ctx); #endif // #ifndef COMPRESSION_H__ diff --git a/source/lib/res/file/file.cpp b/source/lib/res/file/file.cpp index 700e77d0f8..6a36a2c600 100755 --- a/source/lib/res/file/file.cpp +++ b/source/lib/res/file/file.cpp @@ -824,6 +824,7 @@ LibError file_init() { atom_init(); file_cache_init(); + file_io_init(); return ERR_OK; } diff --git a/source/lib/res/file/file_cache.cpp b/source/lib/res/file/file_cache.cpp index 1c9fd75f5f..49bb1e1b80 100644 --- a/source/lib/res/file/file_cache.cpp +++ b/source/lib/res/file/file_cache.cpp @@ -7,17 +7,214 @@ #include "lib/adts.h" #include "file_internal.h" -// strategy: -// policy: -// - allocation: use all available mem first, then look at freelist -// - freelist: good fit, address-ordered, always split -// - free: immediately coalesce -// mechanism: -// - coalesce: boundary tags in freed memory -// - freelist: 2**n segregated doubly-linked, address-ordered +//----------------------------------------------------------------------------- + +// block cache: intended to cache raw compressed data, since files aren't aligned +// in the archive; alignment code would force a read of the whole block, +// which would be a slowdown unless we keep them in memory. +// +// keep out of async code (although extra work for sync: must not issue/wait +// if was cached) to simplify things. disadvantage: problems if same block +// is issued twice, before the first call completes (via wait_io). +// that won't happen though unless we have threaded file_ios => +// rare enough not to worry about performance. +// +// since sync code allocates the (temp) buffer, it's guaranteed +// to remain valid. +// + +class BlockMgr +{ + static const size_t MAX_BLOCKS = 32; + enum BlockStatus + { + BS_PENDING, + BS_COMPLETE, + BS_INVALID + }; + struct Block + { + BlockId id; + void* mem; + BlockStatus status; + int refs; + + Block() {} // for RingBuf + Block(BlockId id_, void* mem_) + : id(id_), mem(mem_), status(BS_PENDING), refs(0) {} + }; + RingBuf blocks; + typedef RingBuf::iterator BlockIt; + + // use Pool to allocate mem for all blocks because it guarantees + // page alignment (required for IO) and obviates manually aligning. + Pool pool; + +public: + void init() + { + (void)pool_create(&pool, MAX_BLOCKS*FILE_BLOCK_SIZE, FILE_BLOCK_SIZE); + } + + void shutdown() + { + (void)pool_destroy(&pool); + } + + void* alloc(BlockId id) + { + if(blocks.size() == MAX_BLOCKS) + { + Block& b = blocks.front(); + // if this block is still locked, big trouble.. + // (someone forgot to free it and we can't reuse it) + debug_assert(b.status != BS_PENDING && b.refs == 0); + pool_free(&pool, b.mem); + blocks.pop_front(); + } + void* mem = pool_alloc(&pool, FILE_BLOCK_SIZE); // can't fail + blocks.push_back(Block(id, mem)); + return mem; + } + + void mark_completed(BlockId id) + { + for(BlockIt it = blocks.begin(); it != blocks.end(); ++it) + { + if(block_eq(it->id, id)) + it->status = BS_COMPLETE; + } + } + + void* find(BlockId id) + { + // linear search is ok, since we only keep a few blocks. + for(BlockIt it = blocks.begin(); it != blocks.end(); ++it) + { + if(block_eq(it->id, id) && it->status == BS_COMPLETE) + { + it->refs++; + return it->mem; + } + } + return 0; // not found + } + + void release(BlockId id) + { + for(BlockIt it = blocks.begin(); it != blocks.end(); ++it) + { + if(block_eq(it->id, id)) + { + it->refs--; + debug_assert(it->refs >= 0); + return; + } + } + debug_warn("release: block not found, but ought still to be in cache"); + } + + + void invalidate(const char* atom_fn) + { + for(BlockIt it = blocks.begin(); it != blocks.end(); ++it) + { + if(it->id.atom_fn == atom_fn) + { + if(it->refs) + debug_warn("invalidating block that is currently in-use"); + it->status = BS_INVALID; + } + } + } +}; +static BlockMgr block_mgr; + + +bool block_eq(BlockId b1, BlockId b2) +{ + return b1.atom_fn == b2.atom_fn && b1.block_num == b2.block_num; +} + +// create an id for use with the cache that uniquely identifies +// the block from the file starting at . +BlockId block_cache_make_id(const char* atom_fn, const off_t ofs) +{ + // is guaranteed to be unique (see file_make_unique_fn_copy). + // block_num should always fit in 32 bits (assuming maximum file size + // = 2^32 * FILE_BLOCK_SIZE ~= 2^48 -- plenty). we don't bother + // checking this. + const u32 block_num = (u32)(ofs / FILE_BLOCK_SIZE); + BlockId id = { atom_fn, block_num }; + return id; +} + +void* block_cache_alloc(BlockId id) +{ + return block_mgr.alloc(id); +} + +void block_cache_mark_completed(BlockId id) +{ + block_mgr.mark_completed(id); +} + +void* block_cache_find(BlockId id) +{ + return block_mgr.find(id); +} + +void block_cache_release(BlockId id) +{ + return block_mgr.release(id); +} + + +//----------------------------------------------------------------------------- + +// >= AIO_SECTOR_SIZE or else waio will have to realign. +// chosen as exactly 1 page: this allows write-protecting file buffers +// without worrying about their (non-page-aligned) borders. +// internal fragmentation is considerable but acceptable. +static const size_t BUF_ALIGN = 4*KiB; + +/* +CacheAllocator + +the biggest worry of a file cache is fragmentation. there are 2 +basic approaches to combat this: +1) 'defragment' periodically - move blocks around to increase + size of available 'holes'. +2) prevent fragmentation from occurring at all via + deliberate alloc/free policy. + +file_io returns cache blocks directly to the user (zero-copy IO), +so only currently unreferenced blocks can be moved (while holding a +lock, to boot). it is believed that this would severely hamper +defragmentation; we therefore go with the latter approach. + +basic insight is: fragmentation occurs when a block is freed whose +neighbors are not free (thus preventing coalescing). this can be +prevented by allocating objects of similar lifetimes together. +typical workloads (uniform access frequency) already show such behavior: +the Landlord cache manager evicts files in an LRU manner, which matches +the allocation policy. + +references: +"The Memory Fragmentation Problem - Solved?" (Johnstone and Wilson) +"Dynamic Storage Allocation - A Survey and Critical Review" (Johnstone and Wilson) + +policy: +- allocation: use all available mem first, then look at freelist +- freelist: good fit, address-ordered, always split blocks +- free: immediately coalesce +mechanism: +- coalesce: boundary tags in freed memory with magic value +- freelist: 2**n segregated doubly-linked, address-ordered +*/ class CacheAllocator { - static const size_t MAX_CACHE_SIZE = 64*MiB; + static const size_t MAX_CACHE_SIZE = 32*MiB; public: void init() @@ -34,27 +231,41 @@ public: void* alloc(size_t size) { - const size_t size_pa = round_up(size, AIO_SECTOR_SIZE); - - // use all available space first - void* p = pool_alloc(&pool, size_pa); - if(p) - return p; + const size_t size_pa = round_up(size, BUF_ALIGN); + void* p; // try to reuse a freed entry const uint size_class = size_class_of(size_pa); p = alloc_from_class(size_class, size_pa); if(p) - return p; + goto have_p; + + // grab more space from pool + p = pool_alloc(&pool, size_pa); + if(p) + goto have_p; + + // last resort: split a larger element p = alloc_from_larger_class(size_class, size_pa); if(p) - return p; + goto have_p; // failed - can no longer expand and nothing big enough was // found in freelists. // file cache will decide which elements are least valuable, // free() those and call us again. return 0; + +have_p: + // make sure range is writable + (void)mprotect(p, size_pa, PROT_READ|PROT_WRITE); + return p; + } + + void make_read_only(u8* p, size_t size) + { + const size_t size_pa = round_up(size, BUF_ALIGN); + (void)mprotect(p, size_pa, PROT_READ); } #include "nommgr.h" @@ -63,11 +274,11 @@ public: { if(!pool_contains(&pool, p)) { - debug_warn("not in arena"); + debug_warn("invalid pointer"); return; } - size_t size_pa = round_up(size, AIO_SECTOR_SIZE); + size_t size_pa = round_up(size, BUF_ALIGN); coalesce(p, size_pa); freelist_add(p, size_pa); } @@ -92,8 +303,8 @@ private: u32 magic1; u32 magic2; }; - // must be enough room to stash header+footer in the freed page. - cassert(AIO_SECTOR_SIZE >= 2*sizeof(FreePage)); + // must be enough room to stash 2 FreePage instances in the freed page. + cassert(BUF_ALIGN >= 2*sizeof(FreePage)); FreePage* freed_page_at(u8* p, size_t ofs) { @@ -105,7 +316,7 @@ private: FreePage* page = (FreePage*)p; if(page->magic1 != MAGIC1 || page->magic2 != MAGIC2) return 0; - debug_assert(page->size_pa % AIO_SECTOR_SIZE == 0); + debug_assert(page->size_pa % BUF_ALIGN == 0); return page; } @@ -275,19 +486,19 @@ public: extant_bufs.push_back(ExtantBuf(buf, size, atom_fn)); } - bool includes(FileIOBuf buf) + const char* get_owner_filename(FileIOBuf buf) { debug_assert(buf != 0); for(size_t i = 0; i < extant_bufs.size(); i++) { ExtantBuf& eb = extant_bufs[i]; if(matches(eb, buf)) - return true; + return eb.atom_fn; } - return false; + return 0; } - void find_and_remove(FileIOBuf buf, size_t* size) + void find_and_remove(FileIOBuf buf, size_t* size, const char** atom_fn) { debug_assert(buf != 0); for(size_t i = 0; i < extant_bufs.size(); i++) @@ -296,6 +507,7 @@ public: if(matches(eb, buf)) { *size = eb.size; + *atom_fn = eb.atom_fn; eb.buf = 0; eb.size = 0; eb.atom_fn = 0; @@ -356,7 +568,7 @@ FileIOBuf file_buf_alloc(size_t size, const char* atom_fn) extant_bufs.add(buf, size, atom_fn); - stats_buf_alloc(size, round_up(size, AIO_SECTOR_SIZE)); + stats_buf_alloc(size, round_up(size, BUF_ALIGN)); return buf; } @@ -395,38 +607,69 @@ LibError file_buf_free(FileIOBuf buf) if(!buf) return ERR_OK; - stats_buf_free(); + size_t size; const char* atom_fn; + extant_bufs.find_and_remove(buf, &size, &atom_fn); + + stats_buf_free(); + trace_notify_free(atom_fn); - size_t size; - extant_bufs.find_and_remove(buf, &size); return ERR_OK; } +// mark as belonging to the file . this is done after +// reading uncompressed data from archive: file_io.cpp must allocate the +// buffer, since only it knows how much padding is needed; however, +// archive.cpp knows the real filename (as opposed to that of the archive, +// which is what the file buffer is associated with). therefore, +// we fix up the filename afterwards. +LibError file_buf_set_real_fn(FileIOBuf buf, const char* atom_fn) +{ + // remove and reinsert into list instead of replacing atom_fn + // in-place for simplicity (speed isn't critical, since there + // should only be a few active bufs). + size_t size; const char* old_atom_fn; + extant_bufs.find_and_remove(buf, &size, &old_atom_fn); + extant_bufs.add(buf, size, atom_fn); + return ERR_OK; +} + + + + LibError file_cache_add(FileIOBuf buf, size_t size, const char* atom_fn) { // decide (based on flags) if buf is to be cached; set cost uint cost = 1; + cache_allocator.make_read_only((u8*)buf, size); file_cache.add(atom_fn, buf, size, cost); return ERR_OK; } -FileIOBuf file_cache_retrieve(const char* atom_fn, size_t* size) +FileIOBuf file_cache_find(const char* atom_fn, size_t* size) +{ + return file_cache.retrieve(atom_fn, size, false); +} + + +FileIOBuf file_cache_retrieve(const char* atom_fn, size_t* psize) { // note: do not query extant_bufs - reusing that doesn't make sense // (why would someone issue a second IO for the entire file while // still referencing the previous instance?) - return file_cache.retrieve(atom_fn, size); + FileIOBuf buf = file_cache.retrieve(atom_fn, psize); + + CacheRet cr = buf? CR_HIT : CR_MISS; + stats_cache(cr, *psize, atom_fn); + + return buf; } - - - /* a) FileIOBuf is opaque type with getter FileIOBuf buf; <--------------------- how to initialize?? @@ -459,147 +702,24 @@ file_buf_free and there are only a few active at a time ( < 10) -//----------------------------------------------------------------------------- - -// block cache: intended to cache raw compressed data, since files aren't aligned -// in the archive; alignment code would force a read of the whole block, -// which would be a slowdown unless we keep them in memory. -// -// keep out of async code (although extra work for sync: must not issue/wait -// if was cached) to simplify things. disadvantage: problems if same block -// is issued twice, before the first call completes (via wait_io). -// that won't happen though unless we have threaded file_ios => -// rare enough not to worry about performance. -// -// since sync code allocates the (temp) buffer, it's guaranteed -// to remain valid. -// - -class BlockMgr -{ - static const size_t MAX_BLOCKS = 32; - enum BlockStatus - { - BS_PENDING, - BS_COMPLETE, - BS_INVALID - }; - struct Block - { - BlockId id; - void* mem; - BlockStatus status; - - Block() {} // for RingBuf - Block(BlockId id_, void* mem_) - : id(id_), mem(mem_), status(BS_PENDING) {} - }; - RingBuf blocks; - typedef RingBuf::iterator BlockIt; - - // use Pool to allocate mem for all blocks because it guarantees - // page alignment (required for IO) and obviates manually aligning. - Pool pool; - -public: - void init() - { - (void)pool_create(&pool, MAX_BLOCKS*FILE_BLOCK_SIZE, FILE_BLOCK_SIZE); - } - - void shutdown() - { - (void)pool_destroy(&pool); - } - - void* alloc(BlockId id) - { - if(blocks.size() == MAX_BLOCKS) - { - Block& b = blocks.front(); - // if this block is still locked, big trouble.. - // (someone forgot to free it and we can't reuse it) - debug_assert(b.status != BS_PENDING); - pool_free(&pool, b.mem); - blocks.pop_front(); - } - void* mem = pool_alloc(&pool, FILE_BLOCK_SIZE); // can't fail - blocks.push_back(Block(id, mem)); - return mem; - } - - void mark_completed(BlockId id) - { - for(BlockIt it = blocks.begin(); it != blocks.end(); ++it) - { - if(it->id == id) - it->status = BS_COMPLETE; - } - } - - void* find(BlockId id) - { - // linear search is ok, since we only keep a few blocks. - for(BlockIt it = blocks.begin(); it != blocks.end(); ++it) - { - if(it->status == BS_COMPLETE && it->id == id) - return it->mem; - } - return 0; // not found - } - - void invalidate(const char* atom_fn) - { - for(BlockIt it = blocks.begin(); it != blocks.end(); ++it) - if((const char*)(it->id >> 32) == atom_fn) - it->status = BS_INVALID; - } -}; -static BlockMgr block_mgr; - - -// create an id for use with the cache that uniquely identifies -// the block from the file starting at (aligned). -BlockId block_cache_make_id(const char* atom_fn, const off_t ofs) -{ - cassert(sizeof(atom_fn) == 4); - // format: filename atom | block number - // 63 32 31 0 - // - // is guaranteed to be unique (see file_make_unique_fn_copy). - // - // block_num should always fit in 32 bits (assuming maximum file size - // = 2^32 * FILE_BLOCK_SIZE ~= 2^48 -- plenty). we don't bother - // checking this. - - const size_t block_num = ofs / FILE_BLOCK_SIZE; - return u64_from_u32((u32)(uintptr_t)atom_fn, (u32)block_num); -} - -void* block_cache_alloc(BlockId id) -{ - return block_mgr.alloc(id); -} - -void block_cache_mark_completed(BlockId id) -{ - block_mgr.mark_completed(id); -} - -void* block_cache_find(BlockId id) -{ - return block_mgr.find(id); -} - - -//----------------------------------------------------------------------------- // remove all blocks loaded from the file . used when reloading the file. LibError file_cache_invalidate(const char* P_fn) { const char* atom_fn = file_make_unique_fn_copy(P_fn, 0); + + // mark all blocks from the file as invalid block_mgr.invalidate(atom_fn); + // file was cached: remove it and free that memory + size_t size; + FileIOBuf cached_buf = file_cache.retrieve(atom_fn, &size); + if(cached_buf) + { + file_cache.remove(atom_fn); + cache_allocator.free((u8*)cached_buf, size); + } + return ERR_OK; } diff --git a/source/lib/res/file/file_cache.h b/source/lib/res/file/file_cache.h index 269ea7369b..d562643822 100644 --- a/source/lib/res/file/file_cache.h +++ b/source/lib/res/file/file_cache.h @@ -1,15 +1,14 @@ -extern LibError file_buf_get(FileIOBuf* pbuf, size_t size, - const char* atom_fn, bool is_write, FileIOCB cb); +struct BlockId +{ + const char* atom_fn; + u32 block_num; +}; -extern FileIOBuf file_cache_retrieve(const char* atom_fn, size_t* size); -extern LibError file_cache_add(FileIOBuf buf, size_t size, const char* atom_fn); - - -typedef u64 BlockId; +extern bool block_eq(BlockId b1, BlockId b2); // create an id for use with the cache that uniquely identifies -// the block from the file starting at (aligned). +// the block from the file starting at . extern BlockId block_cache_make_id(const char* atom_fn, const off_t ofs); extern void* block_cache_alloc(BlockId id); @@ -17,6 +16,19 @@ extern void* block_cache_alloc(BlockId id); extern void block_cache_mark_completed(BlockId id); extern void* block_cache_find(BlockId id); +extern void block_cache_release(BlockId id); + + + + +extern LibError file_buf_get(FileIOBuf* pbuf, size_t size, + const char* atom_fn, bool is_write, FileIOCB cb); + +extern LibError file_buf_set_real_fn(FileIOBuf buf, const char* atom_fn); + +extern FileIOBuf file_cache_find(const char* atom_fn, size_t* size); +extern FileIOBuf file_cache_retrieve(const char* atom_fn, size_t* size); +extern LibError file_cache_add(FileIOBuf buf, size_t size, const char* atom_fn); extern void file_cache_init(); diff --git a/source/lib/res/file/file_internal.h b/source/lib/res/file/file_internal.h index 2e7eed65eb..b1692ce68d 100644 --- a/source/lib/res/file/file_internal.h +++ b/source/lib/res/file/file_internal.h @@ -1,9 +1,9 @@ -#include "file_stats.h" - #include "file.h" #include "file_cache.h" #include "file_io.h" +#include "file_stats.h" // must come after file and file_cache + #include "compression.h" #include "zip.h" #include "archive.h" diff --git a/source/lib/res/file/file_io.cpp b/source/lib/res/file/file_io.cpp index 0c98b803f8..1d02ef5158 100644 --- a/source/lib/res/file/file_io.cpp +++ b/source/lib/res/file/file_io.cpp @@ -13,88 +13,90 @@ // async I/O //----------------------------------------------------------------------------- +// we don't do any caching or alignment here - this is just a thin AIO wrapper. // rationale: -// asynchronous IO routines don't cache; they're just a thin AIO wrapper. -// it's taken care of by file_io, which splits transfers into blocks -// and keeps temp buffers in memory (not user-allocated, because they -// might pull the rug out from under us at any time). -// -// caching here would be more complicated: would have to handle "forwarding", -// i.e. recognizing that the desired block has been issued, but isn't yet -// complete. file_io also knows more about whether a block should be cached. +// - aligning the transfer isn't possible here since we have no control +// over the buffer, i.e. we cannot read more data than requested. +// instead, this is done in file_io. +// - transfer sizes here are arbitrary (viz. not block-aligned); +// that means the cache would have to handle this or also split them up +// into blocks, which is redundant (already done by file_io). +// - if caching here, we'd also have to handle "forwarding" (i.e. +// desired block has been issued but isn't yet complete). again, it +// is easier to let the synchronous file_io manager handle this. +// - finally, file_io knows more about whether the block should be cached +// (e.g. whether another block request will follow), but we don't +// currently make use of this. // // disadvantages: // - streamed data will always be read from disk. no problem, because // such data (e.g. music, long speech) is unlikely to be used again soon. -// - prefetching (issuing the next few blocks from an archive during idle -// time, so that future out-of-order reads don't need to seek) isn't -// possible in the background (unless via thread, but that's discouraged). -// the utility is questionable, though: how to prefetch so as not to delay -// real IOs? can't determine "idle time" without completion notification, -// which is hard. -// we could get the same effect by bridging small gaps in file_io, -// and rearranging files in the archive in order of access. +// - prefetching (issuing the next few blocks from archive/file during +// idle time to satisfy potential future IOs) requires extra buffers; +// this is a bit more complicated than just using the cache as storage. - -static Pool aiocb_pool; - -static inline void aiocb_pool_init() +// FileIO must reference an aiocb, which is used to pass IO params to the OS. +// unfortunately it is 144 bytes on Linux - too much to put in FileIO, +// since that is stored in a 'resource control block' (see h_mgr.h). +// we therefore allocate dynamically, but via suballocator to avoid +// hitting the heap on every IO. +class AiocbAllocator { - (void)pool_create(&aiocb_pool, 32*sizeof(aiocb), sizeof(aiocb)); -} - -static inline void aiocb_pool_shutdown() -{ - (void)pool_destroy(&aiocb_pool); -} - -static inline aiocb* aiocb_pool_alloc() -{ - ONCE(aiocb_pool_init()); - return (aiocb*)pool_alloc(&aiocb_pool, 0); -} - -static inline void aiocb_pool_free(void* cb) -{ - pool_free(&aiocb_pool, cb); -} + Pool pool; +public: + void init() + { + (void)pool_create(&pool, 32*sizeof(aiocb), sizeof(aiocb)); + } + void shutdown() + { + (void)pool_destroy(&pool); + } + aiocb* alloc() + { + return (aiocb*)pool_alloc(&pool, 0); + } + // weird name to avoid trouble with mem tracker macros + // (renaming is less annoying than #include "nommgr.h") + void free_(void* cb) + { + pool_free(&pool, cb); + } +}; +static AiocbAllocator aiocb_allocator; // starts transferring to/from the given buffer. // no attempt is made at aligning or padding the transfer. LibError file_io_issue(File* f, off_t ofs, size_t size, void* p, FileIo* io) { + debug_printf("FILE| issue ofs=%d size=%d\n", ofs, size); + // zero output param in case we fail below. memset(io, 0, sizeof(FileIo)); - debug_printf("FILE| issue ofs=%d size=%d\n", ofs, size); - - - // // check params - // - CHECK_FILE(f); - if(!size || !p || !io) WARN_RETURN(ERR_INVALID_PARAM); - const bool is_write = (f->fc.flags & FILE_WRITE) != 0; - - // cut off at EOF. - if(!is_write) - { - const off_t bytes_left = f->fc.size - ofs; - if(bytes_left < 0) - WARN_RETURN(ERR_EOF); - size = MIN(size, (size_t)bytes_left); - size = round_up(size, AIO_SECTOR_SIZE); - } + // note: cutting off at EOF is necessary to avoid transfer errors, + // but makes size no longer sector-aligned, which would force + // waio to realign (slow). we want to pad back to sector boundaries + // afterwards (to avoid realignment), but that is not possible here + // since we have no control over the buffer (there might not be + // enough room in it). hence, do cut-off in IOManager. + // + // example: 200-byte file. IOManager issues 16KB chunks; that is way + // beyond EOF, so ReadFile fails. limiting size to 200 bytes works, + // but causes waio to pad the transfer and use align buffer (slow). + // rounding up to 512 bytes avoids realignment and does not fail + // (apparently since NTFS files are sector-padded anyway?) // (we can't store the whole aiocb directly - glibc's version is // 144 bytes large) - aiocb* cb = aiocb_pool_alloc(); + aiocb* cb = aiocb_allocator.alloc(); io->cb = cb; if(!cb) return ERR_NO_MEM; @@ -153,10 +155,12 @@ LibError file_io_wait(FileIo* io, void*& p, size_t& size) const ssize_t bytes_transferred = aio_return(cb); debug_printf("FILE| bytes_transferred=%d aio_nbytes=%u\n", bytes_transferred, cb->aio_nbytes); -// disabled: we no longer clamp to EOF -// // (size was clipped to EOF in file_io => this is an actual IO error) -// if(bytes_transferred < (ssize_t)cb->aio_nbytes) -// return ERR_IO; + // see if actual transfer count matches requested size. + // note: most callers clamp to EOF but round back up to sector size + // (see explanation in file_io_issue). since we're not sure what + // the exact sector size is (only waio knows), we can only warn of + // too small transfer counts (not return error). + debug_assert(bytes_transferred >= (ssize_t)(cb->aio_nbytes-AIO_SECTOR_SIZE)); p = (void*)cb->aio_buf; // cast from volatile void* size = bytes_transferred; @@ -167,7 +171,7 @@ LibError file_io_wait(FileIo* io, void*& p, size_t& size) LibError file_io_discard(FileIo* io) { memset(io->cb, 0, sizeof(aiocb)); // prevent further use. - aiocb_pool_free(io->cb); + aiocb_allocator.free_(io->cb); io->cb = 0; return ERR_OK; } @@ -239,7 +243,7 @@ class IOManager const void* cached_block; - u64 block_id; + BlockId block_id; // needed so that we can add the block to the cache when // its IO is complete. if we add it when issuing, we'd no longer be // thread-safe: someone else might find it in the cache before its @@ -257,7 +261,7 @@ class IOManager { memset(&io, 0, sizeof(io)); temp_buf = 0; - block_id = 0; + memset(&block_id, 0, sizeof(block_id)); cached_block = 0; } }; @@ -350,6 +354,16 @@ class IOManager ofs_misalign = start_ofs % FILE_BLOCK_SIZE; start_ofs -= (off_t)ofs_misalign; size = round_up(ofs_misalign + user_size, FILE_BLOCK_SIZE); + + // but cut off at EOF (necessary to prevent IO error). + const off_t bytes_left = f->fc.size - start_ofs; + if(bytes_left < 0) + WARN_RETURN(ERR_EOF); + size = MIN(size, (size_t)bytes_left); + + // and round back up to sector size. + // see rationale in file_io_issue. + size = round_up(size, AIO_SECTOR_SIZE); } RETURN_ERR(file_buf_get(pbuf, size, f->fc.atom_fn, is_write, cb)); @@ -360,16 +374,11 @@ class IOManager void issue(IOSlot& slot) { const off_t ofs = start_ofs+(off_t)total_issued; - size_t issue_size; - - // write: must not issue beyond end of data. - if(is_write) - issue_size = MIN(FILE_BLOCK_SIZE, size - total_issued); - // read: always grab whole blocks so we can put them in the cache. - // any excess data (can only be within first or last block) is - // discarded in wait(). - else - issue_size = FILE_BLOCK_SIZE; + // for both reads and writes, do not issue beyond end of file/data + const size_t issue_size = MIN(FILE_BLOCK_SIZE, size - total_issued); +// try to grab whole blocks (so we can put them in the cache). +// any excess data (can only be within first or last) is +// discarded in wait(). // check if in cache slot.block_id = block_cache_make_id(f->fc.atom_fn, ofs); @@ -441,11 +450,14 @@ class IOManager // pending transfers to complete. } - if(!slot.cached_block) + if(slot.cached_block) + block_cache_release(slot.block_id); + else + { file_io_discard(&slot.io); - - if(!slot.cached_block && pbuf == FILE_BUF_TEMP) - block_cache_mark_completed(slot.block_id); + if(pbuf == FILE_BUF_TEMP) + block_cache_mark_completed(slot.block_id); + } } @@ -539,9 +551,11 @@ ssize_t file_io(File* f, off_t ofs, size_t size, FileIOBuf* pbuf, FileIOCB cb, uintptr_t ctx) // optional { debug_printf("FILE| io: size=%u ofs=%u fn=%s\n", size, ofs, f->fc.atom_fn); - CHECK_FILE(f); + // note: do not update stats/trace here: this includes Zip IOs, + // which shouldn't be reported. + IOManager mgr(f, ofs, size, pbuf, cb, ctx); return mgr.run(); } @@ -549,7 +563,13 @@ ssize_t file_io(File* f, off_t ofs, size_t size, FileIOBuf* pbuf, +void file_io_init() +{ + aiocb_allocator.init(); +} + + void file_io_shutdown() { - aiocb_pool_shutdown(); + aiocb_allocator.shutdown(); } diff --git a/source/lib/res/file/file_io.h b/source/lib/res/file/file_io.h index 158f2a49df..dd3ca45616 100644 --- a/source/lib/res/file/file_io.h +++ b/source/lib/res/file/file_io.h @@ -1 +1,2 @@ -extern void file_io_shutdown(); +extern void file_io_init(); +extern void file_io_shutdown(); diff --git a/source/lib/res/file/file_stats.cpp b/source/lib/res/file/file_stats.cpp index 41feaf777a..8894c67818 100644 --- a/source/lib/res/file/file_stats.cpp +++ b/source/lib/res/file/file_stats.cpp @@ -48,7 +48,8 @@ static uint user_ios; static double user_io_size_total; static double io_actual_size_total[FI_MAX_IDX][2]; static double io_elapsed_time[FI_MAX_IDX][2]; -static BlockId io_disk_head_pos; +static double io_process_time_total; +static BlockId io_disk_pos_cur; static uint io_seeks; // file_cache @@ -148,13 +149,19 @@ void stats_user_io(size_t user_size) user_io_size_total += user_size; } -void stats_io_start(FileIOImplentation fi, FileOp fo, size_t actual_size, double* start_time_storage) +void stats_io_start(FileIOImplentation fi, FileOp fo, size_t actual_size, + BlockId disk_pos, double* start_time_storage) { debug_assert(fi < FI_MAX_IDX); debug_assert(fo == FO_READ || FO_WRITE); io_actual_size_total[fi][fo] += actual_size; + if(disk_pos.atom_fn != io_disk_pos_cur.atom_fn || + disk_pos.block_num != io_disk_pos_cur.block_num+1) + io_seeks++; + io_disk_pos_cur = disk_pos; + timer_start(start_time_storage); } @@ -166,6 +173,16 @@ void stats_io_finish(FileIOImplentation fi, FileOp fo, double* start_time_storag io_elapsed_time[fi][fo] += timer_reset(start_time_storage); } +void stats_cb_start() +{ + timer_start(); +} + +void stats_cb_finish() +{ + io_process_time_total += timer_reset(); +} + // // file_cache diff --git a/source/lib/res/file/file_stats.h b/source/lib/res/file/file_stats.h index bb5100d131..328ef8d504 100644 --- a/source/lib/res/file/file_stats.h +++ b/source/lib/res/file/file_stats.h @@ -28,8 +28,11 @@ extern void stats_buf_free(); // file_io extern void stats_user_io(size_t user_size); -extern void stats_io_start(FileIOImplentation fi, FileOp fo, size_t actual_size, double* start_time_storage); +extern void stats_io_start(FileIOImplentation fi, FileOp fo, + size_t actual_size, BlockId disk_pos, double* start_time_storage); extern void stats_io_finish(FileIOImplentation fi, FileOp fo, double* start_time_storage); +extern void stats_cb_start(); +extern void stats_cb_finish(); // file_cache extern void stats_cache(CacheRet cr, size_t size, const char* atom_fn); @@ -49,8 +52,10 @@ extern void stats_dump(); #define stats_buf_alloc(user_size, padded_size) #define stats_buf_free() #define stats_user_io(user_size) -#define stats_io_start(fi, fo, actual_size, start_time_storage) +#define stats_io_start(fi, fo, actual_size, disk_pos, start_time_storage) #define stats_io_finish(fi, fo, start_time_storage) +#define stats_cb_start() +#define stats_cb_finish() #define stats_cache(cr, size, atom_fn) #define stats_block_cache(cr) #define stats_dump() diff --git a/source/lib/res/file/vfs.cpp b/source/lib/res/file/vfs.cpp index 05e94aa3f2..a90027e477 100755 --- a/source/lib/res/file/vfs.cpp +++ b/source/lib/res/file/vfs.cpp @@ -326,8 +326,6 @@ static LibError VFile_reload(VFile* vf, const char* V_path, Handle) if(x_is_open(&vf->xf)) return ERR_OK; - trace_add(V_path); - TFile* tf; uint lf = (flags & FILE_WRITE)? LF_CREATE_MISSING : 0; LibError err = tree_lookup(V_path, &tf, lf); @@ -425,6 +423,10 @@ ssize_t vfs_io(const Handle hf, const size_t size, FileIOBuf* pbuf, debug_printf("VFS| io: size=%d\n", size); H_DEREF(hf, VFile, vf); + FileCommon* fc = &vf->xf.u.fc; + + stats_user_io(size); + trace_notify_load(fc->atom_fn, fc->flags); off_t ofs = vf->ofs; vf->ofs += (off_t)size; @@ -445,7 +447,8 @@ LibError vfs_load(const char* V_fn, FileIOBuf& buf, size_t& size, uint flags /* buf = file_cache_retrieve(atom_fn, &size); if(buf) { - stats_cache(CR_HIT, size, atom_fn); + stats_user_io(size); + trace_notify_load(atom_fn, flags); return ERR_OK; } @@ -459,10 +462,6 @@ LibError vfs_load(const char* V_fn, FileIOBuf& buf, size_t& size, uint flags /* H_DEREF(hf, VFile, vf); size = x_size(&vf->xf); - // only now can we report misses, since we need to know the size for - // statistics purposes. that means vfs_load on nonexistant files will - // not show up in cache misses, which is fine. - stats_cache(CR_MISS, size, atom_fn); buf = FILE_BUF_ALLOC; ssize_t nread = vfs_io(hf, size, &buf); diff --git a/source/lib/res/file/vfs_optimizer.cpp b/source/lib/res/file/vfs_optimizer.cpp index 071677137f..41e225f2a5 100644 --- a/source/lib/res/file/vfs_optimizer.cpp +++ b/source/lib/res/file/vfs_optimizer.cpp @@ -4,114 +4,58 @@ #include "lib/timer.h" #include "file_internal.h" - -enum TraceState -{ - TS_UNINITIALIZED, - TS_DISABLED, - TS_ENABLED, - TS_ERROR, - TS_SHUTDOWN -}; -static uintptr_t trace_state = TS_UNINITIALIZED; // values from TraceState; type for use with CAS - - +static uintptr_t trace_initialized; // set via CAS static Pool trace_pool; - +// call at before using trace_pool. no-op if called more than once. +static inline void trace_init() +{ + if(CAS(&trace_initialized, 0, 1)) + (void)pool_create(&trace_pool, 4*MiB, sizeof(TraceEntry)); +} void trace_shutdown() { - if(trace_state == TS_DISABLED || trace_state == TS_ENABLED) - { + if(CAS(&trace_initialized, 1, 2)) (void)pool_destroy(&trace_pool); - trace_state = TS_SHUTDOWN; - } } + +static bool trace_enabled; + void trace_enable(bool want_enabled) { - if(trace_state == TS_SHUTDOWN || trace_state == TS_ERROR) - WARN_ERR_RETURN(ERR_LOGIC); - - if(CAS(&trace_state, TS_UNINITIALIZED, TS_ERROR)) - { - if(pool_create(&trace_pool, 4*MiB, sizeof(TraceEntry)) < 0) - return; // leave trace_state set to TS_ERROR - } - - trace_state = want_enabled? TS_ENABLED : TS_DISABLED; + trace_enabled = want_enabled; } -void trace_add(const char* P_fn) +static void trace_add(TraceOp op, const char* P_fn, uint flags = 0, double timestamp = 0.0) { - if(trace_state == TS_DISABLED || trace_state == TS_UNINITIALIZED) + trace_init(); + if(!trace_enabled) return; - if(trace_state != TS_ENABLED) - WARN_ERR_RETURN(ERR_LOGIC); + + if(timestamp == 0.0) + timestamp = get_time(); TraceEntry* t = (TraceEntry*)pool_alloc(&trace_pool, 0); if(!t) return; - t->timestamp = get_time(); + t->timestamp = timestamp; t->atom_fn = file_make_unique_fn_copy(P_fn, 0); + t->op = op; + t->flags = flags; } -LibError trace_write_to_file(const char* trace_filename) +void trace_notify_load(const char* P_fn, uint flags) { - if(trace_state == TS_UNINITIALIZED) - return ERR_OK; - if(trace_state != TS_ENABLED && trace_state != TS_DISABLED) - WARN_RETURN(ERR_LOGIC); - - char N_fn[PATH_MAX]; - RETURN_ERR(file_make_full_native_path(trace_filename, N_fn)); - FILE* f = fopen(N_fn, "wt"); - if(!f) - return ERR_FILE_ACCESS; - - Trace t; - trace_get(&t); - for(size_t i = 0; i < t.num_ents; i++) - fprintf(f, "%#010f: %s\n", t.ents[i].timestamp, t.ents[i].atom_fn); - - (void)fclose(f); - return ERR_OK; + trace_add(TO_LOAD, P_fn, flags); } - -LibError trace_load_from_file(const char* trace_filename) +void trace_notify_free(const char* P_fn) { - char N_fn[PATH_MAX]; - RETURN_ERR(file_make_full_native_path(trace_filename, N_fn)); - FILE* f = fopen(N_fn, "rt"); - if(!f) - return ERR_FILE_NOT_FOUND; - - // parse lines and stuff them in trace_pool - // (as if they had been trace_add-ed; replaces any existing data) - pool_free_all(&trace_pool); - char fmt[20]; - snprintf(fmt, ARRAY_SIZE(fmt), "%%f: %%%ds\n", PATH_MAX); - for(;;) - { - double timestamp; char P_path[PATH_MAX]; - int ret = fscanf(f, fmt, ×tamp, P_path); - if(ret == EOF) - break; - if(ret != 2) - debug_warn("invalid line in trace file"); - - TraceEntry* ent = (TraceEntry*)pool_alloc(&trace_pool, 0); - debug_assert(ent != 0); // was written to file from same pool => must fit - ent->timestamp = timestamp; - ent->atom_fn = file_make_unique_fn_copy(P_path, 0); - } - - fclose(f); - return ERR_OK; + trace_add(TO_FREE, P_fn); } @@ -121,123 +65,314 @@ void trace_get(Trace* t) t->num_ents = (uint)(trace_pool.da.pos / sizeof(TraceEntry)); } - -/////////////////////////////////////////////////////////////////////////////// - - - - -#if 0 - -struct FileList +LibError trace_write_to_file(const char* trace_filename) { - const char* atom_fns; - size_t num_files; + char N_fn[PATH_MAX]; + RETURN_ERR(file_make_full_native_path(trace_filename, N_fn)); + FILE* f = fopen(N_fn, "wt"); + if(!f) + WARN_RETURN(ERR_FILE_ACCESS); + + Trace t; + trace_get(&t); + const TraceEntry* ent = t.ents; + for(size_t i = 0; i < t.num_ents; i++, ent++) + { + char opcode = '?'; + switch(ent->op) + { + case TO_LOAD: opcode = 'L'; break; + case TO_FREE: opcode = 'F'; break; + default: debug_warn("invalid TraceOp"); + } + + if(ent->op == TO_LOAD) + fprintf(f, "%#010f: %c %s %d\n", ent->timestamp, opcode, ent->atom_fn, ent->flags); + else + { + debug_assert(ent->op == TO_FREE); + fprintf(f, "%#010f: %c %s\n", ent->timestamp, opcode, ent->atom_fn); + } + } + + (void)fclose(f); + return ERR_OK; +} + + +LibError trace_read_from_file(const char* trace_filename, Trace* t) +{ + char N_fn[PATH_MAX]; + RETURN_ERR(file_make_full_native_path(trace_filename, N_fn)); + FILE* f = fopen(N_fn, "rt"); + if(!f) + WARN_RETURN(ERR_FILE_NOT_FOUND); + + // parse lines and stuff them in trace_pool + // (as if they had been trace_add-ed; replaces any existing data) + pool_free_all(&trace_pool); + char fmt[20]; + snprintf(fmt, ARRAY_SIZE(fmt), "%%f: %%c %%%ds %%02x\n", PATH_MAX); + for(;;) + { + double timestamp; char opcode; char P_path[PATH_MAX]; + uint flags = 0; // optional + int ret = fscanf(f, fmt, ×tamp, &opcode, P_path); + if(ret == EOF) + break; + if(ret != 3 && ret != 4) + debug_warn("invalid line in trace file"); + + TraceOp op = TO_LOAD; // default in case file is garbled + switch(opcode) + { + case 'L': op = TO_LOAD; break; + case 'F': op = TO_FREE; break; + default: debug_warn("invalid TraceOp"); + } + + trace_add(op, P_path, flags, timestamp); + } + + fclose(f); + + trace_get(t); + return ERR_OK; +} + + +enum SimulateFlags +{ + SF_SYNC_TO_TIMESTAMP = 1 }; -static LibError filelist_build(Trace* t, FileList* fl) +LibError trace_simulate(const char* trace_filename, uint flags) { -} + // prevent the actions we carry out below from generating + // trace_add-s. + trace_enabled = false; -static LibError filelist_get(FileList* fl, uint i, const char* path) -{ - return ERR_DIR_END; -} + Trace t; + RETURN_ERR(trace_read_from_file(trace_filename, &t)); + const double start_time = get_time(); + const double first_timestamp = t.ents[0].timestamp; + const TraceEntry* ent = t.ents; + for(uint i = 0; i < t.num_ents; i++, ent++) + { + // wait until time for next entry if caller requested this + if(flags & SF_SYNC_TO_TIMESTAMP) + { + while(get_time()-start_time < ent->timestamp-first_timestamp) + { + // busy-wait (don't sleep - can skew results) + } + } -static LibError compress_cb(uintptr_t cb_ctx, const void* block, size_t size, size_t* bytes_processed) -{ - uintptr_t ctx = cb_ctx; - - *bytes_processed = comp_feed(ctx, block, size); - return INFO_CB_CONTINUE; -} - -static LibError read_and_compress_file(uintptr_t ctx, ZipEntry* ze) -{ - const char* fn = ze->path; - - struct stat s; - RETURN_ERR(file_stat(fn, &s)); - const size_t ucsize = s.st_size; - - RETURN_ERR(comp_reset(ctx)); - RETURN_ERR(comp_alloc_output(ctx, ucsize)); - - File f; - RETURN_ERR(file_open(fn, 0, &f)); - FileIOBuf buf = FILE_BUF_ALLOC; - uintptr_t cb_ctx = ctx; - ssize_t cbytes_output = file_io(&f, 0, ucsize, &buf, compress_cb, cb_ctx); - (void)file_close(&f); - - void* cdata; size_t csize; - RETURN_ERR(comp_finish(ctx, &cdata, &csize)); - debug_assert(cbytes_output <= csize); - - RETURN_ERR(cbytes_output); - -// decide if it was better compressed or not - - ze->ucsize = ucsize; - ze->mtime = s.st_mtime; - ze->method = CM_DEFLATE; - ze->csize = csize; - ze->cdata = cdata; - - zip_archive_add(&za, &ze); + // carry out this entry's operation + FileIOBuf buf; size_t size; + switch(ent->op) + { + case TO_LOAD: + (void)vfs_load(ent->atom_fn, buf, size, ent->flags); + break; + case TO_FREE: + buf = file_cache_find(ent->atom_fn, &size); + (void)file_buf_free(buf); + break; + default: + debug_warn("unknown TraceOp"); + } + } return ERR_OK; } -static void build_optimized_archive(const char* trace_file, const char* zip_filename) + +//----------------------------------------------------------------------------- + +struct FileList +{ + const char** atom_fns; + size_t num_files; + size_t i; +}; + + +static LibError filelist_build(Trace* t, FileList* fl) +{ + // count # files + fl->num_files = 0; + for(size_t i = 0; i < t->num_ents; i++) + if(t->ents[i].op == TO_LOAD) + fl->num_files; + + fl->atom_fns = new const char*[fl->num_files]; + + size_t ti = 0; + for(size_t i = 0; i < fl->num_files; i++) + { + // find next trace entry that is a load (must exist) + while(t->ents[ti].op != TO_LOAD) + ti++; + fl->atom_fns[i] = t->ents[ti].atom_fn; + } + + fl->i = 0; + return ERR_OK; +} + + +static const char* filelist_get_next(FileList* fl) +{ + if(fl->i == fl->num_files) + return 0; + return fl->atom_fns[fl->i++]; +} + + +//----------------------------------------------------------------------------- + +static inline bool file_type_is_uncompressible(const char* fn) +{ + const char* ext = strrchr(fn, '.'); + // no extension? bail; assume compressible + if(!ext) + return true; + + // this is a selection of file types that are certainly not + // further compressible. we need not include every type under the sun - + // this is only a slight optimization that avoids wasting time + // compressing files. the real decision as to cmethod is made based + // on attained compression ratio. + static const char* uncompressible_exts[] = + { + "zip", "rar", + "jpg", "jpeg", "png", + "ogg", "mp3" + }; + + for(uint i = 0; i < ARRAY_SIZE(uncompressible_exts); i++) + { + if(!stricmp(ext+1, uncompressible_exts[i])) + return true; + } + + return false; +} + + +struct CompressParams +{ + bool attempt_compress; + uintptr_t ctx; +}; + +static LibError compress_cb(uintptr_t cb_ctx, const void* block, size_t size, size_t* bytes_processed) +{ + const CompressParams* p = (const CompressParams*)cb_ctx; + + // comp_feed already makes note of total #bytes fed, and we need + // vfs_io to return the uc size (to check if all data was read). + *bytes_processed = size; + + if(p->attempt_compress) + (void)comp_feed(p->ctx, block, size); + return INFO_CB_CONTINUE; +} + + +static LibError read_and_compress_file(const char* atom_fn, uintptr_t ctx, + ArchiveEntry& ent, void*& file_contents, FileIOBuf& buf) // out +{ + struct stat s; + RETURN_ERR(file_stat(atom_fn, &s)); + const size_t ucsize = s.st_size; + + const bool attempt_compress = !file_type_is_uncompressible(atom_fn); + if(attempt_compress) + { + RETURN_ERR(comp_reset(ctx)); + RETURN_ERR(comp_alloc_output(ctx, ucsize)); + } + + // read file into newly allocated buffer. if attempt_compress, also + // compress the file into another buffer while waiting for IOs. + Handle hf = vfs_open(atom_fn, 0); + RETURN_ERR(hf); + buf = FILE_BUF_ALLOC; + const CompressParams params = { attempt_compress, ctx }; + ssize_t ucsize_read = vfs_io(hf, ucsize, &buf, compress_cb, (uintptr_t)¶ms); + debug_assert(ucsize_read == (ssize_t)ucsize); + (void)vfs_close(hf); + + // if we compressed the file trial-wise, check results and + // decide whether to store as such or not (based on compression ratio) + bool store_compressed = false; + void* cdata = 0; size_t csize = 0; + if(attempt_compress) + { + RETURN_ERR(comp_finish(ctx, &cdata, &csize)); + + const float ratio = (float)ucsize / csize; + const ssize_t bytes_saved = (ssize_t)ucsize - (ssize_t)csize; + if(ratio > 1.05f && bytes_saved > 200) + store_compressed = true; + } + + // store file info + ent.ucsize = (off_t)ucsize; + ent.mtime = s.st_mtime; + // .. ent.ofs is set by zip_archive_add_file + ent.flags = 0; + ent.atom_fn = atom_fn; + if(store_compressed) + { + ent.method = CM_DEFLATE; + ent.csize = (off_t)csize; + file_contents = cdata; + } + else + { + ent.method = CM_NONE; + ent.csize = (off_t)ucsize; + file_contents = (void*)buf; + } + + // note: no need to free cdata - it is owned by the + // compression context and can be reused. + + return ERR_OK; +} + +static LibError build_optimized_archive(const char* trace_filename, const char* zip_filename) { FileList fl; { Trace t; - RETURN_ERR(trace_load_from_file(trace_filename, &t)); - filelist_build(&t, &fl); + RETURN_ERR(trace_read_from_file(trace_filename, &t)); + RETURN_ERR(filelist_build(&t, &fl)); } - ZipArchive za; - zip_archive_create(zip_filename, &za); - - uintptr_t ctx = comp_alloc(); - uint trace_i = 0; - uint queued_files = 0, committed_files = 0; + ZipArchive* za; + RETURN_ERR(zip_archive_create(zip_filename, &za)); + uintptr_t ctx = comp_alloc(CT_COMPRESSION, CM_DEFLATE); for(;;) { - -/* -document: zlib layer is ok to allocate. caller shouldnt do so from a pool: - when the next file is going to be loaded and decompressed but our pool is full, - we need to wait for the archive write to finish and mark pool as reclaimed. - this is better done with heap; also, memory isn't bottleneck for readqueue size -*/ - - ZipEntry ze; // TODO: QUEUE - const int max_readqueue_depth = 1; - for(uint i = 0; i < max_readqueue_depth; i++) - { - LibError ret = trace_get_next_file(trace, trace_i, ze.path); - if(ret == ERR_DIR_END) - break; - - WARN_ERR(read_and_compress_file(ctx, &ze)); - queued_files++; - } - - if(committed_files == queued_files) + const char* atom_fn = filelist_get_next(&fl); + if(!atom_fn) break; - zip_archive_add(&za, &ze); - committed_files++; + + ArchiveEntry ent; void* file_contents; FileIOBuf buf; + if(read_and_compress_file(atom_fn, ctx, ent, file_contents, buf) == ERR_OK) + { + (void)zip_archive_add_file(za, &ent, file_contents); + (void)file_buf_free(buf); + } } - comp_free(ctx); - - zip_archive_finish(&za); + (void)zip_archive_finish(za); } -#endif diff --git a/source/lib/res/file/vfs_optimizer.h b/source/lib/res/file/vfs_optimizer.h index c8889b1278..ac19a48d35 100644 --- a/source/lib/res/file/vfs_optimizer.h +++ b/source/lib/res/file/vfs_optimizer.h @@ -2,24 +2,42 @@ #define VFS_OPTIMIZER_H__ extern void trace_enable(bool want_enabled); -extern void trace_add(const char* P_fn); +extern void trace_shutdown(); -extern LibError trace_write_to_file(const char* trace_filename); -extern LibError trace_read_from_file(const char* trace_filename); +extern void trace_notify_load(const char* P_fn, uint flags); +extern void trace_notify_free(const char* P_fn); +// TraceEntry operation type. +// note: rather than only a list of accessed files, we also need to +// know the application's behavior WRT caching (e.g. when it releases +// cached buffers). this is necessary so that our simulation can +// yield the same results. +enum TraceOp +{ + TO_LOAD, + TO_FREE +}; + +// stores one event that is relevant for file IO / caching. +// +// size-optimized a bit since these are all kept in memory +// (to prevent trace file writes from affecting other IOs) struct TraceEntry { - double timestamp; - const char* atom_fn; + double timestamp; // returned by get_time before operation starts + const char* atom_fn; // path+name of affected file + uint op : 8; // operation - see TraceOp + uint flags : 24; // misc, e.g. file_io flags. }; struct Trace { const TraceEntry* ents; - uint num_ents; + size_t num_ents; }; extern void trace_get(Trace* t); -extern void trace_shutdown(); +extern LibError trace_write_to_file(const char* trace_filename); +extern LibError trace_read_from_file(const char* trace_filename, Trace* t); #endif // #ifndef VFS_OPTIMIZER_H__ diff --git a/source/lib/res/file/zip.cpp b/source/lib/res/file/zip.cpp index a6793ec220..b061ee27a2 100755 --- a/source/lib/res/file/zip.cpp +++ b/source/lib/res/file/zip.cpp @@ -266,28 +266,62 @@ static LibError za_extract_cdfh(const CDFH* cdfh, } +// this code grabs an LFH struct from file block(s) that are +// passed to the callback. usually, one call copies the whole thing, +// but the LFH may straddle a block boundary. +// +// rationale: this allows using temp buffers for zip_fixup_lfh, +// which avoids involving the file buffer manager and thus +// unclutters the trace and cache contents. +struct LFH_Copier +{ + u8* lfh_dst; + size_t lfh_bytes_remaining; +}; +static LibError lfh_copier_cb(uintptr_t ctx, const void* block, size_t size, size_t* bytes_processed) +{ + LFH_Copier* p = (LFH_Copier*)ctx; -// find corresponding LFH, needed to calculate file offset -// (its extra field may not match that reported by CDFH!). + debug_assert(size <= p->lfh_bytes_remaining); + memcpy2(p->lfh_dst, block, size); + p->lfh_dst += size; + p->lfh_bytes_remaining -= size; + + *bytes_processed = size; + return INFO_CB_CONTINUE; +} + +// ensures points to the actual file contents; it is initially +// the offset of the LFH. we cannot use CDFH filename and extra field +// lengths to skip past LFH since that may not mirror CDFH (has happened). +// +// this is called at file-open time instead of while mounting to +// reduce seeks: since reading the file will typically follow, the +// block cache entirely absorbs the IO cost. void zip_fixup_lfh(File* f, ArchiveEntry* ent) { - // improbable that this will be in cache - if this file had already - // been read, it would have been fixed up. only in cache if this - // file is in the same block as a previously read file (i.e. both small) - FileIOBuf buf = FILE_BUF_ALLOC; - file_io(f, ent->ofs, LFH_SIZE, &buf); - const LFH* lfh = (const LFH*)buf; + // already fixed up - done. + if(!(ent->flags & ZIP_LFH_FIXUP_NEEDED)) + return; - debug_assert(lfh->magic == lfh_magic); - const size_t fn_len = read_le16(&lfh->fn_len); - const size_t e_len = read_le16(&lfh->e_len); + // performance note: this ends up reading one file block, which is + // only in the block cache if the file starts in the same block as a + // previously read file (i.e. both are small). + LFH lfh; + LFH_Copier params = { (u8*)&lfh, sizeof(LFH) }; + ssize_t ret = file_io(f, ent->ofs, LFH_SIZE, FILE_BUF_TEMP, lfh_copier_cb, (uintptr_t)¶ms); + debug_assert(ret == sizeof(LFH)); + + debug_assert(lfh.magic == lfh_magic); + const size_t fn_len = read_le16(&lfh.fn_len); + const size_t e_len = read_le16(&lfh.e_len); ent->ofs += (off_t)(LFH_SIZE + fn_len + e_len); // LFH doesn't have a comment field! - file_buf_free(buf); + ent->flags &= ~ZIP_LFH_FIXUP_NEEDED; } @@ -393,21 +427,24 @@ struct ZipArchive uint cd_entries; }; -struct ZipEntry -{ - char path[PATH_MAX]; - size_t ucsize; - time_t mtime; - ZipCompressionMethod method; - size_t csize; - const void* cdata; -}; +// we don't want to expose ZipArchive to callers, so +// allocate the storage here and return opaque pointer. +static SingleAllocator za_mgr; -LibError zip_archive_create(const char* zip_filename, ZipArchive* za) + +LibError zip_archive_create(const char* zip_filename, ZipArchive** pza) { - memset(za, 0, sizeof(*za)); - RETURN_ERR(file_open(zip_filename, 0, &za->f)); - RETURN_ERR(pool_create(&za->cdfhs, 10*MiB, 0)); + // local za_copy simplifies things - if something fails, no cleanup is + // needed. upon success, we copy into the newly allocated real za. + ZipArchive za_copy; + RETURN_ERR(file_open(zip_filename, 0, &za_copy.f)); + RETURN_ERR(pool_create(&za_copy.cdfhs, 10*MiB, 0)); + + ZipArchive* za = (ZipArchive*)za_mgr.alloc(); + if(!za) + WARN_RETURN(ERR_NO_MEM); + *za = za_copy; + *pza = za; return ERR_OK; } @@ -424,18 +461,14 @@ static inline u16 u16_from_size_t(size_t x) return (u16)(x & 0xFFFF); } - -LibError zip_archive_add(ZipArchive* za, const ZipEntry* ze) +LibError zip_archive_add_file(ZipArchive* za, const ArchiveEntry* ze, void* file_contents) { - FileIOBuf buf; - - const char* fn = ze->path; + const char* fn = ze->atom_fn; const size_t fn_len = strlen(fn); const size_t ucsize = ze->ucsize; const u32 fat_mtime = FAT_from_time_t(ze->mtime); const u16 method = (u16)ze->method; const size_t csize = ze->csize; - const void* cdata = ze->cdata; const off_t lfh_ofs = za->cur_file_size; @@ -454,11 +487,12 @@ LibError zip_archive_add(ZipArchive* za, const ZipEntry* ze) u16_from_size_t(fn_len), 0 // e_len }; + FileIOBuf buf; buf = (FileIOBuf)&lfh; - file_io(&za->f, lfh_ofs, lfh_size, &buf); + file_io(&za->f, lfh_ofs, lfh_size, &buf); buf = (FileIOBuf)fn; - file_io(&za->f, lfh_ofs+lfh_size, fn_len, &buf); - buf = (FileIOBuf)cdata; + file_io(&za->f, lfh_ofs+lfh_size, fn_len, &buf); + buf = (FileIOBuf)file_contents; file_io(&za->f, lfh_ofs+(off_t)(lfh_size+fn_len), csize, &buf); za->cur_file_size += (off_t)(lfh_size+fn_len+csize); @@ -511,6 +545,7 @@ LibError zip_archive_finish(ZipArchive* za) (void)file_close(&za->f); (void)pool_destroy(&za->cdfhs); + za_mgr.free(za); return ERR_OK; } diff --git a/source/lib/res/file/zip.h b/source/lib/res/file/zip.h index e135c1135e..3d5286c94d 100755 --- a/source/lib/res/file/zip.h +++ b/source/lib/res/file/zip.h @@ -8,4 +8,11 @@ extern LibError zip_populate_archive(Archive* a, File* f); extern void zip_fixup_lfh(File* f, ArchiveEntry* ent); + +struct ZipArchive; +extern LibError zip_archive_create(const char* zip_filename, ZipArchive** pza); +extern LibError zip_archive_add_file(ZipArchive* za, const ArchiveEntry* ze, void* file_contents); +extern LibError zip_archive_finish(ZipArchive* za); + + #endif // #ifndef ZIP_H__ diff --git a/source/lib/res/graphics/tex.cpp b/source/lib/res/graphics/tex.cpp index 5f303d3124..ba5cb34901 100755 --- a/source/lib/res/graphics/tex.cpp +++ b/source/lib/res/graphics/tex.cpp @@ -213,27 +213,30 @@ TIMER_ACCRUE(tc_plain_transform); if(!transforms) return ERR_OK; + // allocate copy of the image data. + // rationale: L1 cache is typically A2 => swapping in-place with a + // line buffer leads to thrashing. we'll assume the whole texture*2 + // fits in cache, allocate a copy, and transfer directly from there. + // + // this is necessary even when not flipping because the initial Tex.hm + // (which is a FileIOBuf) is read-only. + Handle hm; + void* new_data = mem_alloc(data_size, 4*KiB, 0, &hm); + if(!new_data) + return ERR_NO_MEM; + memcpy2(new_data, data, data_size); + // setup row source/destination pointers (simplifies outer loop) - u8* dst = data; - const u8* src = data; + u8* dst = (u8*)new_data; + const u8* src = (const u8*)new_data; const size_t pitch = w * bpp/8; + // .. avoid y*pitch multiply in row loop; instead, add row_ofs. ssize_t row_ofs = (ssize_t)pitch; - // avoid y*pitch multiply in row loop; instead, add row_ofs. - void* clone_data = 0; + // flipping rows (0,1,2 -> 2,1,0) if(transforms & TEX_ORIENTATION) { - // L1 cache is typically A2 => swapping in-place with a line buffer - // leads to thrashing. we'll assume the whole texture*2 fits in cache, - // allocate a copy, and transfer directly from there. - // - // note: we don't want to return a new buffer: the user assumes - // buffer address will remain unchanged. - clone_data = mem_alloc(data_size, 4*KiB); - if(!clone_data) - return ERR_NO_MEM; - memcpy2(clone_data, data, data_size); - src = (const u8*)clone_data+data_size-pitch; // last row + src = (const u8*)data+data_size-pitch; // last row row_ofs = -(ssize_t)pitch; } @@ -280,8 +283,9 @@ TIMER_ACCRUE(tc_plain_transform); } } - if(clone_data) - (void)mem_free(clone_data); + mem_free_h(t->hm); + t->hm = hm; + t->ofs = 0; if(!(t->flags & TEX_MIPMAPS) && transforms & TEX_MIPMAPS) { @@ -296,10 +300,11 @@ TIMER_ACCRUE(tc_plain_transform); const u8* mipmap_data = (const u8*)mem_alloc(mipmap_size, 4*KiB, 0, &hm); if(!mipmap_data) return ERR_NO_MEM; - CreateLevelData cld = { bpp/8, w, h, data, data_size }; + CreateLevelData cld = { bpp/8, w, h, (const u8*)new_data, data_size }; tex_util_foreach_mipmap(w, h, bpp, mipmap_data, 0, 1, create_level, &cld); mem_free_h(t->hm); t->hm = hm; + t->ofs = 0; } CHECK_TEX(t); @@ -450,6 +455,12 @@ static LibError tex_load_impl(FileIOBuf file_, size_t file_size, Tex* t) } +// MEM_DTOR -> file_buf_free adapter (used for mem_wrap-ping FileIOBuf) +static void file_buf_dtor(void* p, size_t UNUSED(size), uintptr_t UNUSED(ctx)) +{ + (void)file_buf_free((FileIOBuf)p); +} + // load the specified image from file into the given Tex object. // currently supports BMP, TGA, JPG, JP2, PNG, DDS. LibError tex_load(const char* fn, Tex* t) @@ -460,7 +471,7 @@ LibError tex_load(const char* fn, Tex* t) // must be protected against being accidentally free-d in that case. RETURN_ERR(vfs_load(fn, file, file_size)); - Handle hm = mem_wrap((void*)file, file_size, 0, 0, 0, 0, 0, (void*)tex_load); + Handle hm = mem_wrap((void*)file, file_size, 0, 0, 0, file_buf_dtor, 0, (void*)tex_load); t->hm = hm; LibError ret = tex_load_impl(file, file_size, t); if(ret < 0)