forked from 0ad/0ad
Cache: implement meat of landlord algorithm and add remove()
allocators: add freelist capability to Bucket; add provision for variable XOR fixed size allocs archive: re-tag file buffers if reading uncompressed from archive; improve LFH fixup logic file_cache: add cache line invalidation; lock down pages (readonly) when IO finished file_io: cleanup+docs; properly cut off at EOF without breaking alignment. file_stats: add seek accounting (WIP) vfs_optimizer: also record file_buf_free in the trace. initial implementation of archive builder (WIP) zip: lfh_fixup now more efficient (does not involve buffer manager - instead it grabs LFH from temp blocks) tex: plug FileIOBuf leak. avoid writing to tex.hm because that is a read-only file_buf. This was SVN commit r3428.
This commit is contained in:
parent
3b4295a177
commit
e07622b56a
@ -230,7 +230,19 @@ public:
|
||||
debug_assert(ret.second); // must not already be in map
|
||||
}
|
||||
|
||||
T retrieve(Key key, size_t* psize = 0)
|
||||
// remove the entry identified by <key>. expected usage is to check
|
||||
// if present and determine size via retrieve(), so no need to
|
||||
// do anything else here.
|
||||
// useful for invalidating single cache entries.
|
||||
void remove(Key key)
|
||||
{
|
||||
map.erase(key);
|
||||
}
|
||||
|
||||
// if there is no entry for <key> in the cache, return 0 with
|
||||
// psize unchanged. otherwise, return its item and
|
||||
// optionally pass back its size.
|
||||
T retrieve(Key key, size_t* psize = 0, bool refill_credit = true)
|
||||
{
|
||||
CacheMapIt it = map.find(key);
|
||||
if(it == map.end())
|
||||
@ -238,22 +250,54 @@ public:
|
||||
CacheEntry& entry = it->second;
|
||||
if(psize)
|
||||
*psize = entry.size;
|
||||
// increase credit
|
||||
|
||||
if(refill_credit)
|
||||
{
|
||||
// Landlord algorithm calls for credit to be reset to anything
|
||||
// between its current value and the cost.
|
||||
const float gain = 0.75f; // restore most credit
|
||||
entry.credit = gain*entry.cost + (1.0f-gain)*entry.credit;
|
||||
}
|
||||
|
||||
return entry.item;
|
||||
}
|
||||
|
||||
|
||||
// remove the least valuable item and optionally indicate
|
||||
// how big it was (useful for statistics).
|
||||
T remove_least_valuable(size_t* psize = 0)
|
||||
{
|
||||
CacheMapIt it;
|
||||
|
||||
again: // until we find someone to evict
|
||||
// one iteration ought to suffice to evict someone due to
|
||||
// definition of min_density, but we provide for repeating
|
||||
// in case of floating-point imprecision.
|
||||
// (goto vs. loop avoids nesting and emphasizes rarity)
|
||||
again:
|
||||
|
||||
// foreach entry: decrease credit and evict if <= 0
|
||||
// find minimum credit density (needed for charge step)
|
||||
float min_density = 1e10; // = \delta in [Young02]
|
||||
for( it = map.begin(); it != map.end(); ++it)
|
||||
{
|
||||
CacheEntry& entry = it->second;
|
||||
// found someone we can evict
|
||||
const float density = entry.credit / entry.size;
|
||||
min_density = MIN(density, min_density);
|
||||
}
|
||||
|
||||
// .. charge everyone rent (proportional to min_density and size)
|
||||
for( it = map.begin(); it != map.end(); ++it)
|
||||
{
|
||||
CacheEntry& entry = it->second;
|
||||
entry.credit -= min_density * entry.size;
|
||||
|
||||
// evict immediately if credit is exhausted
|
||||
// (note: Landlord algorithm calls for 'any subset' of
|
||||
// these items to be evicted. since we need to return
|
||||
// information about the item, we can only discard one.)
|
||||
//
|
||||
// this means every call will end up charging more than
|
||||
// intended, but we compensate by resetting credit
|
||||
// fairly high upon cache hit.
|
||||
if(entry.credit <= 0.0f)
|
||||
{
|
||||
T item = entry.item;
|
||||
@ -264,8 +308,7 @@ again: // until we find someone to evict
|
||||
}
|
||||
}
|
||||
|
||||
// none were evicted
|
||||
// charge rent
|
||||
// none were evicted - do it all again.
|
||||
goto again;
|
||||
}
|
||||
|
||||
|
@ -364,6 +364,7 @@ LibError da_append(DynArray* da, const void* data, size_t size)
|
||||
// - doesn't preallocate the entire pool;
|
||||
// - returns sequential addresses.
|
||||
|
||||
|
||||
// "freelist" is a pointer to the first unused element (0 if there are none);
|
||||
// its memory holds a pointer to the next free one in list.
|
||||
|
||||
@ -386,7 +387,8 @@ static void* freelist_pop(void** pfreelist)
|
||||
}
|
||||
|
||||
|
||||
static const size_t POOL_CHUNK = 4*KiB;
|
||||
// elements returned are aligned to this many bytes:
|
||||
static const size_t ALIGN = 8;
|
||||
|
||||
|
||||
// ready <p> for use. <max_size> is the upper limit [bytes] on
|
||||
@ -396,15 +398,10 @@ static const size_t POOL_CHUNK = 4*KiB;
|
||||
// (which cannot be freed individually);
|
||||
// otherwise, it specifies the number of bytes that will be
|
||||
// returned by pool_alloc (whose size parameter is then ignored).
|
||||
// in the latter case, size must at least be enough for a pointer
|
||||
// (due to freelist implementation).
|
||||
LibError pool_create(Pool* p, size_t max_size, size_t el_size)
|
||||
{
|
||||
if(el_size != 0 && el_size < sizeof(void*))
|
||||
WARN_RETURN(ERR_INVALID_PARAM);
|
||||
|
||||
p->el_size = round_up(el_size, ALIGN);
|
||||
RETURN_ERR(da_alloc(&p->da, max_size));
|
||||
p->el_size = el_size;
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
@ -446,7 +443,7 @@ void* pool_alloc(Pool* p, size_t size)
|
||||
{
|
||||
// if pool allows variable sizes, go with the size parameter,
|
||||
// otherwise the pool el_size setting.
|
||||
const size_t el_size = p->el_size? p->el_size : size;
|
||||
const size_t el_size = p->el_size? p->el_size : round_up(size, ALIGN);
|
||||
|
||||
// note: this can never happen in pools with variable-sized elements
|
||||
// because they disallow pool_free.
|
||||
@ -470,17 +467,19 @@ have_el:
|
||||
}
|
||||
|
||||
|
||||
// make <el> available for reuse in the given pool.
|
||||
// make <el> available for reuse in the given Pool.
|
||||
//
|
||||
// this is not allowed if the pool was set up for variable-size elements.
|
||||
// (copying with fragmentation would defeat the point of a pool - simplicity)
|
||||
// we could allow this, but instead warn and bail to make sure it
|
||||
// never happens inadvertently (leaking memory in the pool).
|
||||
// this is not allowed if created for variable-size elements.
|
||||
// rationale: avoids having to pass el_size here and compare with size when
|
||||
// allocating; also prevents fragmentation and leaking memory.
|
||||
void pool_free(Pool* p, void* el)
|
||||
{
|
||||
// only allowed to free items if we were initialized with
|
||||
// fixed el_size. (this avoids having to pass el_size here and
|
||||
// check if requested_size matches that when allocating)
|
||||
if(p->el_size == 0)
|
||||
{
|
||||
debug_warn("pool is set up for variable-size items");
|
||||
debug_warn("cannot free variable-size items");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -506,9 +505,8 @@ void pool_free_all(Pool* p)
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
// design goals:
|
||||
// - variable-sized allocations;
|
||||
// - no reuse of allocations, can only free all at once;
|
||||
// - no init necessary;
|
||||
// - fixed- XOR variable-sized blocks;
|
||||
// - allow freeing individual blocks if they are all fixed-size;
|
||||
// - never relocates;
|
||||
// - no fixed limit.
|
||||
|
||||
@ -518,46 +516,41 @@ void pool_free_all(Pool* p)
|
||||
// basically a combination of region and heap, where frees go to the heap and
|
||||
// allocs exhaust that memory first and otherwise use the region.
|
||||
|
||||
// must be constant and power-of-2 to allow fast modulo.
|
||||
const size_t BUCKET_SIZE = 4*KiB;
|
||||
// power-of-2 isn't required; value is arbitrary.
|
||||
const size_t BUCKET_SIZE = 4000;
|
||||
|
||||
// allocate <size> bytes of memory from the given Bucket object.
|
||||
// <b> must initially be zeroed (e.g. by defining it as static data).
|
||||
void* bucket_alloc(Bucket* b, size_t size)
|
||||
// ready <b> for use.
|
||||
//
|
||||
// <el_size> can be 0 to allow variable-sized allocations
|
||||
// (which cannot be freed individually);
|
||||
// otherwise, it specifies the number of bytes that will be
|
||||
// returned by bucket_alloc (whose size parameter is then ignored).
|
||||
LibError bucket_create(Bucket* b, size_t el_size)
|
||||
{
|
||||
// would overflow a bucket
|
||||
if(size > BUCKET_SIZE-sizeof(u8*))
|
||||
b->freelist = 0;
|
||||
b->el_size = round_up(el_size, ALIGN);
|
||||
|
||||
// note: allocating here avoids the is-this-the-first-time check
|
||||
// in bucket_alloc, which speeds things up.
|
||||
b->bucket = (u8*)malloc(BUCKET_SIZE);
|
||||
if(!b->bucket)
|
||||
{
|
||||
debug_warn("size doesn't fit in a bucket");
|
||||
return 0;
|
||||
// cause next bucket_alloc to retry the allocation
|
||||
b->pos = BUCKET_SIZE;
|
||||
b->num_buckets = 0;
|
||||
return ERR_NO_MEM;
|
||||
}
|
||||
|
||||
// make sure the next item will be aligned
|
||||
size = round_up(size, 8);
|
||||
|
||||
// if there's not enough space left or no bucket yet (first call),
|
||||
// close it and allocate another.
|
||||
if(b->pos+size > BUCKET_SIZE || !b->bucket)
|
||||
{
|
||||
u8* bucket = (u8*)malloc(BUCKET_SIZE);
|
||||
if(!bucket)
|
||||
return 0;
|
||||
*(u8**)bucket = b->bucket;
|
||||
b->bucket = bucket;
|
||||
// skip bucket list field and align to 8 bytes (note: malloc already
|
||||
// aligns to at least 8 bytes, so don't take b->bucket into account)
|
||||
b->pos = round_up(sizeof(u8*), 8);
|
||||
b->num_buckets++;
|
||||
}
|
||||
|
||||
void* ret = b->bucket+b->pos;
|
||||
b->pos += size;
|
||||
return ret;
|
||||
*(u8**)b->bucket = 0; // terminate list
|
||||
b->pos = round_up(sizeof(u8*), ALIGN);
|
||||
b->num_buckets = 1;
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
|
||||
// free all allocations that ensued from the given Bucket.
|
||||
void bucket_free_all(Bucket* b)
|
||||
// free all memory that ensued from <b>.
|
||||
// future alloc and free calls on this Bucket will fail.
|
||||
void bucket_destroy(Bucket* b)
|
||||
{
|
||||
while(b->bucket)
|
||||
{
|
||||
@ -568,6 +561,69 @@ void bucket_free_all(Bucket* b)
|
||||
}
|
||||
|
||||
debug_assert(b->num_buckets == 0);
|
||||
|
||||
// poison pill: cause subsequent alloc and free to fail
|
||||
b->freelist = 0;
|
||||
b->el_size = BUCKET_SIZE;
|
||||
}
|
||||
|
||||
|
||||
// return an entry from the bucket, or 0 if another would have to be
|
||||
// allocated and there isn't enough memory to do so.
|
||||
// exhausts the freelist before returning new entries to improve locality.
|
||||
//
|
||||
// if the bucket was set up with fixed-size elements, <size> is ignored;
|
||||
// otherwise, <size> bytes are allocated.
|
||||
void* bucket_alloc(Bucket* b, size_t size)
|
||||
{
|
||||
size_t el_size = b->el_size? b->el_size : round_up(size, ALIGN);
|
||||
// must fit in a bucket
|
||||
debug_assert(el_size <= BUCKET_SIZE-sizeof(u8*));
|
||||
|
||||
// try to satisfy alloc from freelist
|
||||
void* el = freelist_pop(&b->freelist);
|
||||
if(el)
|
||||
return el;
|
||||
|
||||
// if there's not enough space left, close current bucket and
|
||||
// allocate another.
|
||||
if(b->pos+el_size > BUCKET_SIZE)
|
||||
{
|
||||
u8* bucket = (u8*)malloc(BUCKET_SIZE);
|
||||
if(!bucket)
|
||||
return 0;
|
||||
*(u8**)bucket = b->bucket;
|
||||
b->bucket = bucket;
|
||||
// skip bucket list field and align (note: malloc already
|
||||
// aligns to at least 8 bytes, so don't take b->bucket into account)
|
||||
b->pos = round_up(sizeof(u8*), ALIGN);
|
||||
b->num_buckets++;
|
||||
}
|
||||
|
||||
void* ret = b->bucket+b->pos;
|
||||
b->pos += el_size;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
// make <el> available for reuse in <b>.
|
||||
//
|
||||
// this is not allowed if created for variable-size elements.
|
||||
// rationale: avoids having to pass el_size here and compare with size when
|
||||
// allocating; also prevents fragmentation and leaking memory.
|
||||
void bucket_free(Bucket* b, void* el)
|
||||
{
|
||||
if(b->el_size == 0)
|
||||
{
|
||||
debug_warn("cannot free variable-size items");
|
||||
return;
|
||||
}
|
||||
|
||||
freelist_push(&b->freelist, el);
|
||||
|
||||
// note: checking if <el> was actually allocated from <b> is difficult:
|
||||
// it may not be in the currently open bucket, so we'd have to
|
||||
// iterate over the list - too much work.
|
||||
}
|
||||
|
||||
|
||||
|
@ -164,8 +164,6 @@ const size_t POOL_VARIABLE_ALLOCS = 0;
|
||||
// (which cannot be freed individually);
|
||||
// otherwise, it specifies the number of bytes that will be
|
||||
// returned by pool_alloc (whose size parameter is then ignored).
|
||||
// in the latter case, size must at least be enough for a pointer
|
||||
// (due to freelist implementation).
|
||||
extern LibError pool_create(Pool* p, size_t max_size, size_t el_size);
|
||||
|
||||
// free all memory that ensued from <p>. all elements are made unusable
|
||||
@ -185,12 +183,11 @@ extern bool pool_contains(Pool* p, void* el);
|
||||
// otherwise, <size> bytes are allocated.
|
||||
extern void* pool_alloc(Pool* p, size_t size);
|
||||
|
||||
// make <el> available for reuse in the given pool.
|
||||
// make <el> available for reuse in the given Pool.
|
||||
//
|
||||
// this is not allowed if the pool was set up for variable-size elements.
|
||||
// (copying with fragmentation would defeat the point of a pool - simplicity)
|
||||
// we could allow this, but instead warn and bail to make sure it
|
||||
// never happens inadvertently (leaking memory in the pool).
|
||||
// this is not allowed if created for variable-size elements.
|
||||
// rationale: avoids having to pass el_size here and compare with size when
|
||||
// allocating; also prevents fragmentation and leaking memory.
|
||||
extern void pool_free(Pool* p, void* el);
|
||||
|
||||
// "free" all allocations that ensued from the given Pool.
|
||||
@ -204,40 +201,61 @@ extern void pool_free_all(Pool* p);
|
||||
//
|
||||
|
||||
// design goals:
|
||||
// - variable-sized allocations;
|
||||
// - no reuse of allocations, can only free all at once;
|
||||
// - no init necessary;
|
||||
// - fixed- XOR variable-sized blocks;
|
||||
// - allow freeing individual blocks if they are all fixed-size;
|
||||
// - never relocates;
|
||||
// - no fixed limit.
|
||||
|
||||
// note: this type of allocator is called "region-based" in the literature.
|
||||
// see "Reconsidering Custom Memory Allocation" (Berger, Zorn, McKinley).
|
||||
// if individual elements must be freeable, consider "reaps":
|
||||
// if individual variable-size elements must be freeable, consider "reaps":
|
||||
// basically a combination of region and heap, where frees go to the heap and
|
||||
// allocs exhaust that memory first and otherwise use the region.
|
||||
|
||||
// opaque! do not read/write any fields!
|
||||
struct Bucket
|
||||
{
|
||||
// currently open bucket. must be initialized to 0.
|
||||
// currently open bucket.
|
||||
u8* bucket;
|
||||
|
||||
// offset of free space at end of current bucket (i.e. # bytes in use).
|
||||
// must be initialized to 0.
|
||||
size_t pos;
|
||||
|
||||
// records # buckets allocated; used to check if the list of them
|
||||
// isn't corrupted. must be initialized to 0.
|
||||
uint num_buckets;
|
||||
void* freelist;
|
||||
|
||||
size_t el_size : 16;
|
||||
|
||||
// records # buckets allocated; verifies the list of buckets is correct.
|
||||
uint num_buckets : 16;
|
||||
};
|
||||
|
||||
|
||||
// allocate <size> bytes of memory from the given Bucket object.
|
||||
// <b> must initially be zeroed (e.g. by defining it as static data).
|
||||
// ready <b> for use.
|
||||
//
|
||||
// <el_size> can be 0 to allow variable-sized allocations
|
||||
// (which cannot be freed individually);
|
||||
// otherwise, it specifies the number of bytes that will be
|
||||
// returned by bucket_alloc (whose size parameter is then ignored).
|
||||
extern LibError bucket_create(Bucket* b, size_t el_size);
|
||||
|
||||
// free all memory that ensued from <b>.
|
||||
// future alloc and free calls on this Bucket will fail.
|
||||
extern void bucket_destroy(Bucket* b);
|
||||
|
||||
// return an entry from the bucket, or 0 if another would have to be
|
||||
// allocated and there isn't enough memory to do so.
|
||||
// exhausts the freelist before returning new entries to improve locality.
|
||||
//
|
||||
// if the bucket was set up with fixed-size elements, <size> is ignored;
|
||||
// otherwise, <size> bytes are allocated.
|
||||
extern void* bucket_alloc(Bucket* b, size_t size);
|
||||
|
||||
// free all allocations that ensued from the given Bucket.
|
||||
extern void bucket_free_all(Bucket* b);
|
||||
// make <el> available for reuse in <b>.
|
||||
//
|
||||
// this is not allowed if created for variable-size elements.
|
||||
// rationale: avoids having to pass el_size here and compare with size when
|
||||
// allocating; also prevents fragmentation and leaking memory.
|
||||
extern void bucket_free(Bucket* b, void* el);
|
||||
|
||||
|
||||
//
|
||||
@ -267,25 +285,29 @@ extern void matrix_free(void** matrix);
|
||||
// overrun protection
|
||||
//
|
||||
|
||||
// this class wraps an arbitrary object in DynArray memory and can detect
|
||||
// inadvertent writes to it. this is useful for tracking down memory overruns.
|
||||
//
|
||||
// the basic idea is to require users to request access to the object and
|
||||
// notify us when done; memory access permission is temporarily granted.
|
||||
// (similar in principle to Software Transaction Memory).
|
||||
//
|
||||
// since this is quite slow, the protection is disabled unless
|
||||
// CONFIG_OVERRUN_PROTECTION == 1; this avoids having to remove the
|
||||
// wrapper code in release builds and re-write when looking for overruns.
|
||||
//
|
||||
// example usage:
|
||||
// OverrunProtector<your_class> your_class_wrapper;
|
||||
// ..
|
||||
// your_class* yc = your_class_wrapper.get();
|
||||
// if(!yc) abort(); // not enough memory to allocate a your_class instance
|
||||
// // access/write to <yc>
|
||||
// your_class_wrapper.lock(); // disallow further access
|
||||
// ..
|
||||
/*
|
||||
OverrunProtector wraps an arbitrary object in DynArray memory and can detect
|
||||
inadvertent writes to it. this is useful for tracking down memory overruns.
|
||||
|
||||
the basic idea is to require users to request access to the object and
|
||||
notify us when done; memory access permission is temporarily granted.
|
||||
(similar in principle to Software Transaction Memory).
|
||||
|
||||
since this is quite slow, the protection is disabled unless
|
||||
CONFIG_OVERRUN_PROTECTION == 1; this avoids having to remove the
|
||||
wrapper code in release builds and re-write when looking for overruns.
|
||||
|
||||
example usage:
|
||||
OverrunProtector<your_class> your_class_wrapper;
|
||||
..
|
||||
your_class* yc = your_class_wrapper.get(); // unlock, make ready for use
|
||||
if(!yc) // your_class_wrapper's one-time alloc of a your_class-
|
||||
abort(); // instance had failed - can't continue.
|
||||
doSomethingWith(yc); // read/write access
|
||||
your_class_wrapper.lock(); // disallow further access until next .get()
|
||||
..
|
||||
*/
|
||||
|
||||
template<class T> class OverrunProtector
|
||||
{
|
||||
DynArray da;
|
||||
@ -322,11 +344,9 @@ private:
|
||||
|
||||
void init()
|
||||
{
|
||||
const size_t size = 4096;
|
||||
cassert(sizeof(T) <= size);
|
||||
if(da_alloc(&da, size) < 0)
|
||||
if(da_alloc(&da, sizeof(T)) < 0)
|
||||
goto fail;
|
||||
if(da_set_size(&da, size) < 0)
|
||||
if(da_set_size(&da, sizeof(T)) < 0)
|
||||
goto fail;
|
||||
|
||||
#include "nommgr.h"
|
||||
|
@ -281,11 +281,7 @@ LibError afile_open(const Handle ha, const char* fn, uintptr_t memento, int flag
|
||||
// => need to copy ArchiveEntry fields into AFile.
|
||||
RETURN_ERR(archive_get_file_info(a, atom_fn, memento, ent));
|
||||
|
||||
if(ent->flags & ZIP_LFH_FIXUP_NEEDED)
|
||||
{
|
||||
zip_fixup_lfh(&a->f, ent);
|
||||
ent->flags &= ~ZIP_LFH_FIXUP_NEEDED;
|
||||
}
|
||||
zip_fixup_lfh(&a->f, ent);
|
||||
|
||||
uintptr_t ctx = 0;
|
||||
// slight optimization: do not allocate context if not compressed
|
||||
@ -517,8 +513,14 @@ ssize_t afile_read(AFile* af, off_t ofs, size_t size, FileIOBuf* pbuf, FileIOCB
|
||||
H_DEREF(af->ha, Archive, a);
|
||||
|
||||
if(!is_compressed(af))
|
||||
{
|
||||
bool we_allocated = (pbuf != FILE_BUF_TEMP) && (*pbuf == FILE_BUF_ALLOC);
|
||||
// no need to set last_cofs - only checked if compressed.
|
||||
return file_io(&a->f, af->ofs+ofs, size, pbuf, cb, cb_ctx);
|
||||
RETURN_ERR(file_io(&a->f, af->ofs+ofs, size, pbuf, cb, cb_ctx));
|
||||
if(we_allocated)
|
||||
(void)file_buf_set_real_fn(*pbuf, af->fc.atom_fn);
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
debug_assert(af->ctx != 0);
|
||||
|
||||
|
@ -174,7 +174,7 @@ struct ArchiveEntry
|
||||
time_t mtime;
|
||||
|
||||
// used in IO
|
||||
off_t ofs; // bit 31 set if fixup needed
|
||||
off_t ofs;
|
||||
off_t csize;
|
||||
CompressionMethod method;
|
||||
|
||||
|
@ -392,9 +392,9 @@ uintptr_t comp_alloc(ContextType type, CompressionMethod method)
|
||||
return 0;
|
||||
Compressor* c;
|
||||
|
||||
#include "nommgr.h" // protect placement new and free() from macros
|
||||
switch(method)
|
||||
{
|
||||
#include "nommgr.h"
|
||||
#ifndef NO_ZLIB
|
||||
case CM_DEFLATE:
|
||||
cassert(sizeof(ZLibCompressor) <= MAX_COMPRESSOR_SIZE);
|
||||
@ -407,6 +407,7 @@ uintptr_t comp_alloc(ContextType type, CompressionMethod method)
|
||||
return 0;
|
||||
#include "mmgr.h"
|
||||
}
|
||||
#include "mmgr.h"
|
||||
|
||||
c->init();
|
||||
return (uintptr_t)c;
|
||||
|
@ -26,6 +26,7 @@ extern ssize_t comp_feed(uintptr_t ctx, const void* in, size_t in_size);
|
||||
|
||||
extern LibError comp_finish(uintptr_t ctx, void** out, size_t* out_size);
|
||||
|
||||
extern LibError comp_reset(uintptr_t ctx);
|
||||
extern void comp_free(uintptr_t ctx);
|
||||
|
||||
#endif // #ifndef COMPRESSION_H__
|
||||
|
@ -824,6 +824,7 @@ LibError file_init()
|
||||
{
|
||||
atom_init();
|
||||
file_cache_init();
|
||||
file_io_init();
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
|
@ -7,17 +7,214 @@
|
||||
#include "lib/adts.h"
|
||||
#include "file_internal.h"
|
||||
|
||||
// strategy:
|
||||
// policy:
|
||||
// - allocation: use all available mem first, then look at freelist
|
||||
// - freelist: good fit, address-ordered, always split
|
||||
// - free: immediately coalesce
|
||||
// mechanism:
|
||||
// - coalesce: boundary tags in freed memory
|
||||
// - freelist: 2**n segregated doubly-linked, address-ordered
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
// block cache: intended to cache raw compressed data, since files aren't aligned
|
||||
// in the archive; alignment code would force a read of the whole block,
|
||||
// which would be a slowdown unless we keep them in memory.
|
||||
//
|
||||
// keep out of async code (although extra work for sync: must not issue/wait
|
||||
// if was cached) to simplify things. disadvantage: problems if same block
|
||||
// is issued twice, before the first call completes (via wait_io).
|
||||
// that won't happen though unless we have threaded file_ios =>
|
||||
// rare enough not to worry about performance.
|
||||
//
|
||||
// since sync code allocates the (temp) buffer, it's guaranteed
|
||||
// to remain valid.
|
||||
//
|
||||
|
||||
class BlockMgr
|
||||
{
|
||||
static const size_t MAX_BLOCKS = 32;
|
||||
enum BlockStatus
|
||||
{
|
||||
BS_PENDING,
|
||||
BS_COMPLETE,
|
||||
BS_INVALID
|
||||
};
|
||||
struct Block
|
||||
{
|
||||
BlockId id;
|
||||
void* mem;
|
||||
BlockStatus status;
|
||||
int refs;
|
||||
|
||||
Block() {} // for RingBuf
|
||||
Block(BlockId id_, void* mem_)
|
||||
: id(id_), mem(mem_), status(BS_PENDING), refs(0) {}
|
||||
};
|
||||
RingBuf<Block, MAX_BLOCKS> blocks;
|
||||
typedef RingBuf<Block, MAX_BLOCKS>::iterator BlockIt;
|
||||
|
||||
// use Pool to allocate mem for all blocks because it guarantees
|
||||
// page alignment (required for IO) and obviates manually aligning.
|
||||
Pool pool;
|
||||
|
||||
public:
|
||||
void init()
|
||||
{
|
||||
(void)pool_create(&pool, MAX_BLOCKS*FILE_BLOCK_SIZE, FILE_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
void shutdown()
|
||||
{
|
||||
(void)pool_destroy(&pool);
|
||||
}
|
||||
|
||||
void* alloc(BlockId id)
|
||||
{
|
||||
if(blocks.size() == MAX_BLOCKS)
|
||||
{
|
||||
Block& b = blocks.front();
|
||||
// if this block is still locked, big trouble..
|
||||
// (someone forgot to free it and we can't reuse it)
|
||||
debug_assert(b.status != BS_PENDING && b.refs == 0);
|
||||
pool_free(&pool, b.mem);
|
||||
blocks.pop_front();
|
||||
}
|
||||
void* mem = pool_alloc(&pool, FILE_BLOCK_SIZE); // can't fail
|
||||
blocks.push_back(Block(id, mem));
|
||||
return mem;
|
||||
}
|
||||
|
||||
void mark_completed(BlockId id)
|
||||
{
|
||||
for(BlockIt it = blocks.begin(); it != blocks.end(); ++it)
|
||||
{
|
||||
if(block_eq(it->id, id))
|
||||
it->status = BS_COMPLETE;
|
||||
}
|
||||
}
|
||||
|
||||
void* find(BlockId id)
|
||||
{
|
||||
// linear search is ok, since we only keep a few blocks.
|
||||
for(BlockIt it = blocks.begin(); it != blocks.end(); ++it)
|
||||
{
|
||||
if(block_eq(it->id, id) && it->status == BS_COMPLETE)
|
||||
{
|
||||
it->refs++;
|
||||
return it->mem;
|
||||
}
|
||||
}
|
||||
return 0; // not found
|
||||
}
|
||||
|
||||
void release(BlockId id)
|
||||
{
|
||||
for(BlockIt it = blocks.begin(); it != blocks.end(); ++it)
|
||||
{
|
||||
if(block_eq(it->id, id))
|
||||
{
|
||||
it->refs--;
|
||||
debug_assert(it->refs >= 0);
|
||||
return;
|
||||
}
|
||||
}
|
||||
debug_warn("release: block not found, but ought still to be in cache");
|
||||
}
|
||||
|
||||
|
||||
void invalidate(const char* atom_fn)
|
||||
{
|
||||
for(BlockIt it = blocks.begin(); it != blocks.end(); ++it)
|
||||
{
|
||||
if(it->id.atom_fn == atom_fn)
|
||||
{
|
||||
if(it->refs)
|
||||
debug_warn("invalidating block that is currently in-use");
|
||||
it->status = BS_INVALID;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
static BlockMgr block_mgr;
|
||||
|
||||
|
||||
bool block_eq(BlockId b1, BlockId b2)
|
||||
{
|
||||
return b1.atom_fn == b2.atom_fn && b1.block_num == b2.block_num;
|
||||
}
|
||||
|
||||
// create an id for use with the cache that uniquely identifies
|
||||
// the block from the file <atom_fn> starting at <ofs>.
|
||||
BlockId block_cache_make_id(const char* atom_fn, const off_t ofs)
|
||||
{
|
||||
// <atom_fn> is guaranteed to be unique (see file_make_unique_fn_copy).
|
||||
// block_num should always fit in 32 bits (assuming maximum file size
|
||||
// = 2^32 * FILE_BLOCK_SIZE ~= 2^48 -- plenty). we don't bother
|
||||
// checking this.
|
||||
const u32 block_num = (u32)(ofs / FILE_BLOCK_SIZE);
|
||||
BlockId id = { atom_fn, block_num };
|
||||
return id;
|
||||
}
|
||||
|
||||
void* block_cache_alloc(BlockId id)
|
||||
{
|
||||
return block_mgr.alloc(id);
|
||||
}
|
||||
|
||||
void block_cache_mark_completed(BlockId id)
|
||||
{
|
||||
block_mgr.mark_completed(id);
|
||||
}
|
||||
|
||||
void* block_cache_find(BlockId id)
|
||||
{
|
||||
return block_mgr.find(id);
|
||||
}
|
||||
|
||||
void block_cache_release(BlockId id)
|
||||
{
|
||||
return block_mgr.release(id);
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
// >= AIO_SECTOR_SIZE or else waio will have to realign.
|
||||
// chosen as exactly 1 page: this allows write-protecting file buffers
|
||||
// without worrying about their (non-page-aligned) borders.
|
||||
// internal fragmentation is considerable but acceptable.
|
||||
static const size_t BUF_ALIGN = 4*KiB;
|
||||
|
||||
/*
|
||||
CacheAllocator
|
||||
|
||||
the biggest worry of a file cache is fragmentation. there are 2
|
||||
basic approaches to combat this:
|
||||
1) 'defragment' periodically - move blocks around to increase
|
||||
size of available 'holes'.
|
||||
2) prevent fragmentation from occurring at all via
|
||||
deliberate alloc/free policy.
|
||||
|
||||
file_io returns cache blocks directly to the user (zero-copy IO),
|
||||
so only currently unreferenced blocks can be moved (while holding a
|
||||
lock, to boot). it is believed that this would severely hamper
|
||||
defragmentation; we therefore go with the latter approach.
|
||||
|
||||
basic insight is: fragmentation occurs when a block is freed whose
|
||||
neighbors are not free (thus preventing coalescing). this can be
|
||||
prevented by allocating objects of similar lifetimes together.
|
||||
typical workloads (uniform access frequency) already show such behavior:
|
||||
the Landlord cache manager evicts files in an LRU manner, which matches
|
||||
the allocation policy.
|
||||
|
||||
references:
|
||||
"The Memory Fragmentation Problem - Solved?" (Johnstone and Wilson)
|
||||
"Dynamic Storage Allocation - A Survey and Critical Review" (Johnstone and Wilson)
|
||||
|
||||
policy:
|
||||
- allocation: use all available mem first, then look at freelist
|
||||
- freelist: good fit, address-ordered, always split blocks
|
||||
- free: immediately coalesce
|
||||
mechanism:
|
||||
- coalesce: boundary tags in freed memory with magic value
|
||||
- freelist: 2**n segregated doubly-linked, address-ordered
|
||||
*/
|
||||
class CacheAllocator
|
||||
{
|
||||
static const size_t MAX_CACHE_SIZE = 64*MiB;
|
||||
static const size_t MAX_CACHE_SIZE = 32*MiB;
|
||||
|
||||
public:
|
||||
void init()
|
||||
@ -34,27 +231,41 @@ public:
|
||||
|
||||
void* alloc(size_t size)
|
||||
{
|
||||
const size_t size_pa = round_up(size, AIO_SECTOR_SIZE);
|
||||
|
||||
// use all available space first
|
||||
void* p = pool_alloc(&pool, size_pa);
|
||||
if(p)
|
||||
return p;
|
||||
const size_t size_pa = round_up(size, BUF_ALIGN);
|
||||
void* p;
|
||||
|
||||
// try to reuse a freed entry
|
||||
const uint size_class = size_class_of(size_pa);
|
||||
p = alloc_from_class(size_class, size_pa);
|
||||
if(p)
|
||||
return p;
|
||||
goto have_p;
|
||||
|
||||
// grab more space from pool
|
||||
p = pool_alloc(&pool, size_pa);
|
||||
if(p)
|
||||
goto have_p;
|
||||
|
||||
// last resort: split a larger element
|
||||
p = alloc_from_larger_class(size_class, size_pa);
|
||||
if(p)
|
||||
return p;
|
||||
goto have_p;
|
||||
|
||||
// failed - can no longer expand and nothing big enough was
|
||||
// found in freelists.
|
||||
// file cache will decide which elements are least valuable,
|
||||
// free() those and call us again.
|
||||
return 0;
|
||||
|
||||
have_p:
|
||||
// make sure range is writable
|
||||
(void)mprotect(p, size_pa, PROT_READ|PROT_WRITE);
|
||||
return p;
|
||||
}
|
||||
|
||||
void make_read_only(u8* p, size_t size)
|
||||
{
|
||||
const size_t size_pa = round_up(size, BUF_ALIGN);
|
||||
(void)mprotect(p, size_pa, PROT_READ);
|
||||
}
|
||||
|
||||
#include "nommgr.h"
|
||||
@ -63,11 +274,11 @@ public:
|
||||
{
|
||||
if(!pool_contains(&pool, p))
|
||||
{
|
||||
debug_warn("not in arena");
|
||||
debug_warn("invalid pointer");
|
||||
return;
|
||||
}
|
||||
size_t size_pa = round_up(size, AIO_SECTOR_SIZE);
|
||||
|
||||
size_t size_pa = round_up(size, BUF_ALIGN);
|
||||
coalesce(p, size_pa);
|
||||
freelist_add(p, size_pa);
|
||||
}
|
||||
@ -92,8 +303,8 @@ private:
|
||||
u32 magic1;
|
||||
u32 magic2;
|
||||
};
|
||||
// must be enough room to stash header+footer in the freed page.
|
||||
cassert(AIO_SECTOR_SIZE >= 2*sizeof(FreePage));
|
||||
// must be enough room to stash 2 FreePage instances in the freed page.
|
||||
cassert(BUF_ALIGN >= 2*sizeof(FreePage));
|
||||
|
||||
FreePage* freed_page_at(u8* p, size_t ofs)
|
||||
{
|
||||
@ -105,7 +316,7 @@ private:
|
||||
FreePage* page = (FreePage*)p;
|
||||
if(page->magic1 != MAGIC1 || page->magic2 != MAGIC2)
|
||||
return 0;
|
||||
debug_assert(page->size_pa % AIO_SECTOR_SIZE == 0);
|
||||
debug_assert(page->size_pa % BUF_ALIGN == 0);
|
||||
return page;
|
||||
}
|
||||
|
||||
@ -275,19 +486,19 @@ public:
|
||||
extant_bufs.push_back(ExtantBuf(buf, size, atom_fn));
|
||||
}
|
||||
|
||||
bool includes(FileIOBuf buf)
|
||||
const char* get_owner_filename(FileIOBuf buf)
|
||||
{
|
||||
debug_assert(buf != 0);
|
||||
for(size_t i = 0; i < extant_bufs.size(); i++)
|
||||
{
|
||||
ExtantBuf& eb = extant_bufs[i];
|
||||
if(matches(eb, buf))
|
||||
return true;
|
||||
return eb.atom_fn;
|
||||
}
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void find_and_remove(FileIOBuf buf, size_t* size)
|
||||
void find_and_remove(FileIOBuf buf, size_t* size, const char** atom_fn)
|
||||
{
|
||||
debug_assert(buf != 0);
|
||||
for(size_t i = 0; i < extant_bufs.size(); i++)
|
||||
@ -296,6 +507,7 @@ public:
|
||||
if(matches(eb, buf))
|
||||
{
|
||||
*size = eb.size;
|
||||
*atom_fn = eb.atom_fn;
|
||||
eb.buf = 0;
|
||||
eb.size = 0;
|
||||
eb.atom_fn = 0;
|
||||
@ -356,7 +568,7 @@ FileIOBuf file_buf_alloc(size_t size, const char* atom_fn)
|
||||
|
||||
extant_bufs.add(buf, size, atom_fn);
|
||||
|
||||
stats_buf_alloc(size, round_up(size, AIO_SECTOR_SIZE));
|
||||
stats_buf_alloc(size, round_up(size, BUF_ALIGN));
|
||||
return buf;
|
||||
}
|
||||
|
||||
@ -395,38 +607,69 @@ LibError file_buf_free(FileIOBuf buf)
|
||||
if(!buf)
|
||||
return ERR_OK;
|
||||
|
||||
stats_buf_free();
|
||||
size_t size; const char* atom_fn;
|
||||
extant_bufs.find_and_remove(buf, &size, &atom_fn);
|
||||
|
||||
stats_buf_free();
|
||||
trace_notify_free(atom_fn);
|
||||
|
||||
size_t size;
|
||||
extant_bufs.find_and_remove(buf, &size);
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
|
||||
// mark <buf> as belonging to the file <atom_fn>. this is done after
|
||||
// reading uncompressed data from archive: file_io.cpp must allocate the
|
||||
// buffer, since only it knows how much padding is needed; however,
|
||||
// archive.cpp knows the real filename (as opposed to that of the archive,
|
||||
// which is what the file buffer is associated with). therefore,
|
||||
// we fix up the filename afterwards.
|
||||
LibError file_buf_set_real_fn(FileIOBuf buf, const char* atom_fn)
|
||||
{
|
||||
// remove and reinsert into list instead of replacing atom_fn
|
||||
// in-place for simplicity (speed isn't critical, since there
|
||||
// should only be a few active bufs).
|
||||
size_t size; const char* old_atom_fn;
|
||||
extant_bufs.find_and_remove(buf, &size, &old_atom_fn);
|
||||
extant_bufs.add(buf, size, atom_fn);
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
LibError file_cache_add(FileIOBuf buf, size_t size, const char* atom_fn)
|
||||
{
|
||||
// decide (based on flags) if buf is to be cached; set cost
|
||||
uint cost = 1;
|
||||
|
||||
cache_allocator.make_read_only((u8*)buf, size);
|
||||
file_cache.add(atom_fn, buf, size, cost);
|
||||
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
|
||||
FileIOBuf file_cache_retrieve(const char* atom_fn, size_t* size)
|
||||
FileIOBuf file_cache_find(const char* atom_fn, size_t* size)
|
||||
{
|
||||
return file_cache.retrieve(atom_fn, size, false);
|
||||
}
|
||||
|
||||
|
||||
FileIOBuf file_cache_retrieve(const char* atom_fn, size_t* psize)
|
||||
{
|
||||
// note: do not query extant_bufs - reusing that doesn't make sense
|
||||
// (why would someone issue a second IO for the entire file while
|
||||
// still referencing the previous instance?)
|
||||
|
||||
return file_cache.retrieve(atom_fn, size);
|
||||
FileIOBuf buf = file_cache.retrieve(atom_fn, psize);
|
||||
|
||||
CacheRet cr = buf? CR_HIT : CR_MISS;
|
||||
stats_cache(cr, *psize, atom_fn);
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
a) FileIOBuf is opaque type with getter
|
||||
FileIOBuf buf; <--------------------- how to initialize??
|
||||
@ -459,147 +702,24 @@ file_buf_free and there are only a few active at a time ( < 10)
|
||||
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
// block cache: intended to cache raw compressed data, since files aren't aligned
|
||||
// in the archive; alignment code would force a read of the whole block,
|
||||
// which would be a slowdown unless we keep them in memory.
|
||||
//
|
||||
// keep out of async code (although extra work for sync: must not issue/wait
|
||||
// if was cached) to simplify things. disadvantage: problems if same block
|
||||
// is issued twice, before the first call completes (via wait_io).
|
||||
// that won't happen though unless we have threaded file_ios =>
|
||||
// rare enough not to worry about performance.
|
||||
//
|
||||
// since sync code allocates the (temp) buffer, it's guaranteed
|
||||
// to remain valid.
|
||||
//
|
||||
|
||||
class BlockMgr
|
||||
{
|
||||
static const size_t MAX_BLOCKS = 32;
|
||||
enum BlockStatus
|
||||
{
|
||||
BS_PENDING,
|
||||
BS_COMPLETE,
|
||||
BS_INVALID
|
||||
};
|
||||
struct Block
|
||||
{
|
||||
BlockId id;
|
||||
void* mem;
|
||||
BlockStatus status;
|
||||
|
||||
Block() {} // for RingBuf
|
||||
Block(BlockId id_, void* mem_)
|
||||
: id(id_), mem(mem_), status(BS_PENDING) {}
|
||||
};
|
||||
RingBuf<Block, MAX_BLOCKS> blocks;
|
||||
typedef RingBuf<Block, MAX_BLOCKS>::iterator BlockIt;
|
||||
|
||||
// use Pool to allocate mem for all blocks because it guarantees
|
||||
// page alignment (required for IO) and obviates manually aligning.
|
||||
Pool pool;
|
||||
|
||||
public:
|
||||
void init()
|
||||
{
|
||||
(void)pool_create(&pool, MAX_BLOCKS*FILE_BLOCK_SIZE, FILE_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
void shutdown()
|
||||
{
|
||||
(void)pool_destroy(&pool);
|
||||
}
|
||||
|
||||
void* alloc(BlockId id)
|
||||
{
|
||||
if(blocks.size() == MAX_BLOCKS)
|
||||
{
|
||||
Block& b = blocks.front();
|
||||
// if this block is still locked, big trouble..
|
||||
// (someone forgot to free it and we can't reuse it)
|
||||
debug_assert(b.status != BS_PENDING);
|
||||
pool_free(&pool, b.mem);
|
||||
blocks.pop_front();
|
||||
}
|
||||
void* mem = pool_alloc(&pool, FILE_BLOCK_SIZE); // can't fail
|
||||
blocks.push_back(Block(id, mem));
|
||||
return mem;
|
||||
}
|
||||
|
||||
void mark_completed(BlockId id)
|
||||
{
|
||||
for(BlockIt it = blocks.begin(); it != blocks.end(); ++it)
|
||||
{
|
||||
if(it->id == id)
|
||||
it->status = BS_COMPLETE;
|
||||
}
|
||||
}
|
||||
|
||||
void* find(BlockId id)
|
||||
{
|
||||
// linear search is ok, since we only keep a few blocks.
|
||||
for(BlockIt it = blocks.begin(); it != blocks.end(); ++it)
|
||||
{
|
||||
if(it->status == BS_COMPLETE && it->id == id)
|
||||
return it->mem;
|
||||
}
|
||||
return 0; // not found
|
||||
}
|
||||
|
||||
void invalidate(const char* atom_fn)
|
||||
{
|
||||
for(BlockIt it = blocks.begin(); it != blocks.end(); ++it)
|
||||
if((const char*)(it->id >> 32) == atom_fn)
|
||||
it->status = BS_INVALID;
|
||||
}
|
||||
};
|
||||
static BlockMgr block_mgr;
|
||||
|
||||
|
||||
// create an id for use with the cache that uniquely identifies
|
||||
// the block from the file <atom_fn> starting at <ofs> (aligned).
|
||||
BlockId block_cache_make_id(const char* atom_fn, const off_t ofs)
|
||||
{
|
||||
cassert(sizeof(atom_fn) == 4);
|
||||
// format: filename atom | block number
|
||||
// 63 32 31 0
|
||||
//
|
||||
// <atom_fn> is guaranteed to be unique (see file_make_unique_fn_copy).
|
||||
//
|
||||
// block_num should always fit in 32 bits (assuming maximum file size
|
||||
// = 2^32 * FILE_BLOCK_SIZE ~= 2^48 -- plenty). we don't bother
|
||||
// checking this.
|
||||
|
||||
const size_t block_num = ofs / FILE_BLOCK_SIZE;
|
||||
return u64_from_u32((u32)(uintptr_t)atom_fn, (u32)block_num);
|
||||
}
|
||||
|
||||
void* block_cache_alloc(BlockId id)
|
||||
{
|
||||
return block_mgr.alloc(id);
|
||||
}
|
||||
|
||||
void block_cache_mark_completed(BlockId id)
|
||||
{
|
||||
block_mgr.mark_completed(id);
|
||||
}
|
||||
|
||||
void* block_cache_find(BlockId id)
|
||||
{
|
||||
return block_mgr.find(id);
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
// remove all blocks loaded from the file <fn>. used when reloading the file.
|
||||
LibError file_cache_invalidate(const char* P_fn)
|
||||
{
|
||||
const char* atom_fn = file_make_unique_fn_copy(P_fn, 0);
|
||||
|
||||
// mark all blocks from the file as invalid
|
||||
block_mgr.invalidate(atom_fn);
|
||||
|
||||
// file was cached: remove it and free that memory
|
||||
size_t size;
|
||||
FileIOBuf cached_buf = file_cache.retrieve(atom_fn, &size);
|
||||
if(cached_buf)
|
||||
{
|
||||
file_cache.remove(atom_fn);
|
||||
cache_allocator.free((u8*)cached_buf, size);
|
||||
}
|
||||
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
|
@ -1,15 +1,14 @@
|
||||
extern LibError file_buf_get(FileIOBuf* pbuf, size_t size,
|
||||
const char* atom_fn, bool is_write, FileIOCB cb);
|
||||
|
||||
struct BlockId
|
||||
{
|
||||
const char* atom_fn;
|
||||
u32 block_num;
|
||||
};
|
||||
|
||||
extern FileIOBuf file_cache_retrieve(const char* atom_fn, size_t* size);
|
||||
extern LibError file_cache_add(FileIOBuf buf, size_t size, const char* atom_fn);
|
||||
|
||||
|
||||
typedef u64 BlockId;
|
||||
extern bool block_eq(BlockId b1, BlockId b2);
|
||||
|
||||
// create an id for use with the cache that uniquely identifies
|
||||
// the block from the file <atom_fn> starting at <ofs> (aligned).
|
||||
// the block from the file <atom_fn> starting at <ofs>.
|
||||
extern BlockId block_cache_make_id(const char* atom_fn, const off_t ofs);
|
||||
|
||||
extern void* block_cache_alloc(BlockId id);
|
||||
@ -17,6 +16,19 @@ extern void* block_cache_alloc(BlockId id);
|
||||
extern void block_cache_mark_completed(BlockId id);
|
||||
|
||||
extern void* block_cache_find(BlockId id);
|
||||
extern void block_cache_release(BlockId id);
|
||||
|
||||
|
||||
|
||||
|
||||
extern LibError file_buf_get(FileIOBuf* pbuf, size_t size,
|
||||
const char* atom_fn, bool is_write, FileIOCB cb);
|
||||
|
||||
extern LibError file_buf_set_real_fn(FileIOBuf buf, const char* atom_fn);
|
||||
|
||||
extern FileIOBuf file_cache_find(const char* atom_fn, size_t* size);
|
||||
extern FileIOBuf file_cache_retrieve(const char* atom_fn, size_t* size);
|
||||
extern LibError file_cache_add(FileIOBuf buf, size_t size, const char* atom_fn);
|
||||
|
||||
|
||||
extern void file_cache_init();
|
||||
|
@ -1,9 +1,9 @@
|
||||
#include "file_stats.h"
|
||||
|
||||
#include "file.h"
|
||||
#include "file_cache.h"
|
||||
#include "file_io.h"
|
||||
|
||||
#include "file_stats.h" // must come after file and file_cache
|
||||
|
||||
#include "compression.h"
|
||||
#include "zip.h"
|
||||
#include "archive.h"
|
||||
|
@ -13,88 +13,90 @@
|
||||
// async I/O
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
// we don't do any caching or alignment here - this is just a thin AIO wrapper.
|
||||
// rationale:
|
||||
// asynchronous IO routines don't cache; they're just a thin AIO wrapper.
|
||||
// it's taken care of by file_io, which splits transfers into blocks
|
||||
// and keeps temp buffers in memory (not user-allocated, because they
|
||||
// might pull the rug out from under us at any time).
|
||||
//
|
||||
// caching here would be more complicated: would have to handle "forwarding",
|
||||
// i.e. recognizing that the desired block has been issued, but isn't yet
|
||||
// complete. file_io also knows more about whether a block should be cached.
|
||||
// - aligning the transfer isn't possible here since we have no control
|
||||
// over the buffer, i.e. we cannot read more data than requested.
|
||||
// instead, this is done in file_io.
|
||||
// - transfer sizes here are arbitrary (viz. not block-aligned);
|
||||
// that means the cache would have to handle this or also split them up
|
||||
// into blocks, which is redundant (already done by file_io).
|
||||
// - if caching here, we'd also have to handle "forwarding" (i.e.
|
||||
// desired block has been issued but isn't yet complete). again, it
|
||||
// is easier to let the synchronous file_io manager handle this.
|
||||
// - finally, file_io knows more about whether the block should be cached
|
||||
// (e.g. whether another block request will follow), but we don't
|
||||
// currently make use of this.
|
||||
//
|
||||
// disadvantages:
|
||||
// - streamed data will always be read from disk. no problem, because
|
||||
// such data (e.g. music, long speech) is unlikely to be used again soon.
|
||||
// - prefetching (issuing the next few blocks from an archive during idle
|
||||
// time, so that future out-of-order reads don't need to seek) isn't
|
||||
// possible in the background (unless via thread, but that's discouraged).
|
||||
// the utility is questionable, though: how to prefetch so as not to delay
|
||||
// real IOs? can't determine "idle time" without completion notification,
|
||||
// which is hard.
|
||||
// we could get the same effect by bridging small gaps in file_io,
|
||||
// and rearranging files in the archive in order of access.
|
||||
// - prefetching (issuing the next few blocks from archive/file during
|
||||
// idle time to satisfy potential future IOs) requires extra buffers;
|
||||
// this is a bit more complicated than just using the cache as storage.
|
||||
|
||||
|
||||
static Pool aiocb_pool;
|
||||
|
||||
static inline void aiocb_pool_init()
|
||||
// FileIO must reference an aiocb, which is used to pass IO params to the OS.
|
||||
// unfortunately it is 144 bytes on Linux - too much to put in FileIO,
|
||||
// since that is stored in a 'resource control block' (see h_mgr.h).
|
||||
// we therefore allocate dynamically, but via suballocator to avoid
|
||||
// hitting the heap on every IO.
|
||||
class AiocbAllocator
|
||||
{
|
||||
(void)pool_create(&aiocb_pool, 32*sizeof(aiocb), sizeof(aiocb));
|
||||
}
|
||||
|
||||
static inline void aiocb_pool_shutdown()
|
||||
{
|
||||
(void)pool_destroy(&aiocb_pool);
|
||||
}
|
||||
|
||||
static inline aiocb* aiocb_pool_alloc()
|
||||
{
|
||||
ONCE(aiocb_pool_init());
|
||||
return (aiocb*)pool_alloc(&aiocb_pool, 0);
|
||||
}
|
||||
|
||||
static inline void aiocb_pool_free(void* cb)
|
||||
{
|
||||
pool_free(&aiocb_pool, cb);
|
||||
}
|
||||
Pool pool;
|
||||
public:
|
||||
void init()
|
||||
{
|
||||
(void)pool_create(&pool, 32*sizeof(aiocb), sizeof(aiocb));
|
||||
}
|
||||
void shutdown()
|
||||
{
|
||||
(void)pool_destroy(&pool);
|
||||
}
|
||||
aiocb* alloc()
|
||||
{
|
||||
return (aiocb*)pool_alloc(&pool, 0);
|
||||
}
|
||||
// weird name to avoid trouble with mem tracker macros
|
||||
// (renaming is less annoying than #include "nommgr.h")
|
||||
void free_(void* cb)
|
||||
{
|
||||
pool_free(&pool, cb);
|
||||
}
|
||||
};
|
||||
static AiocbAllocator aiocb_allocator;
|
||||
|
||||
|
||||
// starts transferring to/from the given buffer.
|
||||
// no attempt is made at aligning or padding the transfer.
|
||||
LibError file_io_issue(File* f, off_t ofs, size_t size, void* p, FileIo* io)
|
||||
{
|
||||
debug_printf("FILE| issue ofs=%d size=%d\n", ofs, size);
|
||||
|
||||
// zero output param in case we fail below.
|
||||
memset(io, 0, sizeof(FileIo));
|
||||
|
||||
debug_printf("FILE| issue ofs=%d size=%d\n", ofs, size);
|
||||
|
||||
|
||||
//
|
||||
// check params
|
||||
//
|
||||
|
||||
CHECK_FILE(f);
|
||||
|
||||
if(!size || !p || !io)
|
||||
WARN_RETURN(ERR_INVALID_PARAM);
|
||||
|
||||
const bool is_write = (f->fc.flags & FILE_WRITE) != 0;
|
||||
|
||||
|
||||
// cut off at EOF.
|
||||
if(!is_write)
|
||||
{
|
||||
const off_t bytes_left = f->fc.size - ofs;
|
||||
if(bytes_left < 0)
|
||||
WARN_RETURN(ERR_EOF);
|
||||
size = MIN(size, (size_t)bytes_left);
|
||||
size = round_up(size, AIO_SECTOR_SIZE);
|
||||
}
|
||||
// note: cutting off at EOF is necessary to avoid transfer errors,
|
||||
// but makes size no longer sector-aligned, which would force
|
||||
// waio to realign (slow). we want to pad back to sector boundaries
|
||||
// afterwards (to avoid realignment), but that is not possible here
|
||||
// since we have no control over the buffer (there might not be
|
||||
// enough room in it). hence, do cut-off in IOManager.
|
||||
//
|
||||
// example: 200-byte file. IOManager issues 16KB chunks; that is way
|
||||
// beyond EOF, so ReadFile fails. limiting size to 200 bytes works,
|
||||
// but causes waio to pad the transfer and use align buffer (slow).
|
||||
// rounding up to 512 bytes avoids realignment and does not fail
|
||||
// (apparently since NTFS files are sector-padded anyway?)
|
||||
|
||||
// (we can't store the whole aiocb directly - glibc's version is
|
||||
// 144 bytes large)
|
||||
aiocb* cb = aiocb_pool_alloc();
|
||||
aiocb* cb = aiocb_allocator.alloc();
|
||||
io->cb = cb;
|
||||
if(!cb)
|
||||
return ERR_NO_MEM;
|
||||
@ -153,10 +155,12 @@ LibError file_io_wait(FileIo* io, void*& p, size_t& size)
|
||||
const ssize_t bytes_transferred = aio_return(cb);
|
||||
debug_printf("FILE| bytes_transferred=%d aio_nbytes=%u\n", bytes_transferred, cb->aio_nbytes);
|
||||
|
||||
// disabled: we no longer clamp to EOF
|
||||
// // (size was clipped to EOF in file_io => this is an actual IO error)
|
||||
// if(bytes_transferred < (ssize_t)cb->aio_nbytes)
|
||||
// return ERR_IO;
|
||||
// see if actual transfer count matches requested size.
|
||||
// note: most callers clamp to EOF but round back up to sector size
|
||||
// (see explanation in file_io_issue). since we're not sure what
|
||||
// the exact sector size is (only waio knows), we can only warn of
|
||||
// too small transfer counts (not return error).
|
||||
debug_assert(bytes_transferred >= (ssize_t)(cb->aio_nbytes-AIO_SECTOR_SIZE));
|
||||
|
||||
p = (void*)cb->aio_buf; // cast from volatile void*
|
||||
size = bytes_transferred;
|
||||
@ -167,7 +171,7 @@ LibError file_io_wait(FileIo* io, void*& p, size_t& size)
|
||||
LibError file_io_discard(FileIo* io)
|
||||
{
|
||||
memset(io->cb, 0, sizeof(aiocb)); // prevent further use.
|
||||
aiocb_pool_free(io->cb);
|
||||
aiocb_allocator.free_(io->cb);
|
||||
io->cb = 0;
|
||||
return ERR_OK;
|
||||
}
|
||||
@ -239,7 +243,7 @@ class IOManager
|
||||
const void* cached_block;
|
||||
|
||||
|
||||
u64 block_id;
|
||||
BlockId block_id;
|
||||
// needed so that we can add the block to the cache when
|
||||
// its IO is complete. if we add it when issuing, we'd no longer be
|
||||
// thread-safe: someone else might find it in the cache before its
|
||||
@ -257,7 +261,7 @@ class IOManager
|
||||
{
|
||||
memset(&io, 0, sizeof(io));
|
||||
temp_buf = 0;
|
||||
block_id = 0;
|
||||
memset(&block_id, 0, sizeof(block_id));
|
||||
cached_block = 0;
|
||||
}
|
||||
};
|
||||
@ -350,6 +354,16 @@ class IOManager
|
||||
ofs_misalign = start_ofs % FILE_BLOCK_SIZE;
|
||||
start_ofs -= (off_t)ofs_misalign;
|
||||
size = round_up(ofs_misalign + user_size, FILE_BLOCK_SIZE);
|
||||
|
||||
// but cut off at EOF (necessary to prevent IO error).
|
||||
const off_t bytes_left = f->fc.size - start_ofs;
|
||||
if(bytes_left < 0)
|
||||
WARN_RETURN(ERR_EOF);
|
||||
size = MIN(size, (size_t)bytes_left);
|
||||
|
||||
// and round back up to sector size.
|
||||
// see rationale in file_io_issue.
|
||||
size = round_up(size, AIO_SECTOR_SIZE);
|
||||
}
|
||||
|
||||
RETURN_ERR(file_buf_get(pbuf, size, f->fc.atom_fn, is_write, cb));
|
||||
@ -360,16 +374,11 @@ class IOManager
|
||||
void issue(IOSlot& slot)
|
||||
{
|
||||
const off_t ofs = start_ofs+(off_t)total_issued;
|
||||
size_t issue_size;
|
||||
|
||||
// write: must not issue beyond end of data.
|
||||
if(is_write)
|
||||
issue_size = MIN(FILE_BLOCK_SIZE, size - total_issued);
|
||||
// read: always grab whole blocks so we can put them in the cache.
|
||||
// any excess data (can only be within first or last block) is
|
||||
// discarded in wait().
|
||||
else
|
||||
issue_size = FILE_BLOCK_SIZE;
|
||||
// for both reads and writes, do not issue beyond end of file/data
|
||||
const size_t issue_size = MIN(FILE_BLOCK_SIZE, size - total_issued);
|
||||
// try to grab whole blocks (so we can put them in the cache).
|
||||
// any excess data (can only be within first or last) is
|
||||
// discarded in wait().
|
||||
|
||||
// check if in cache
|
||||
slot.block_id = block_cache_make_id(f->fc.atom_fn, ofs);
|
||||
@ -441,11 +450,14 @@ class IOManager
|
||||
// pending transfers to complete.
|
||||
}
|
||||
|
||||
if(!slot.cached_block)
|
||||
if(slot.cached_block)
|
||||
block_cache_release(slot.block_id);
|
||||
else
|
||||
{
|
||||
file_io_discard(&slot.io);
|
||||
|
||||
if(!slot.cached_block && pbuf == FILE_BUF_TEMP)
|
||||
block_cache_mark_completed(slot.block_id);
|
||||
if(pbuf == FILE_BUF_TEMP)
|
||||
block_cache_mark_completed(slot.block_id);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -539,9 +551,11 @@ ssize_t file_io(File* f, off_t ofs, size_t size, FileIOBuf* pbuf,
|
||||
FileIOCB cb, uintptr_t ctx) // optional
|
||||
{
|
||||
debug_printf("FILE| io: size=%u ofs=%u fn=%s\n", size, ofs, f->fc.atom_fn);
|
||||
|
||||
CHECK_FILE(f);
|
||||
|
||||
// note: do not update stats/trace here: this includes Zip IOs,
|
||||
// which shouldn't be reported.
|
||||
|
||||
IOManager mgr(f, ofs, size, pbuf, cb, ctx);
|
||||
return mgr.run();
|
||||
}
|
||||
@ -549,7 +563,13 @@ ssize_t file_io(File* f, off_t ofs, size_t size, FileIOBuf* pbuf,
|
||||
|
||||
|
||||
|
||||
void file_io_init()
|
||||
{
|
||||
aiocb_allocator.init();
|
||||
}
|
||||
|
||||
|
||||
void file_io_shutdown()
|
||||
{
|
||||
aiocb_pool_shutdown();
|
||||
aiocb_allocator.shutdown();
|
||||
}
|
||||
|
@ -1 +1,2 @@
|
||||
extern void file_io_shutdown();
|
||||
extern void file_io_init();
|
||||
extern void file_io_shutdown();
|
||||
|
@ -48,7 +48,8 @@ static uint user_ios;
|
||||
static double user_io_size_total;
|
||||
static double io_actual_size_total[FI_MAX_IDX][2];
|
||||
static double io_elapsed_time[FI_MAX_IDX][2];
|
||||
static BlockId io_disk_head_pos;
|
||||
static double io_process_time_total;
|
||||
static BlockId io_disk_pos_cur;
|
||||
static uint io_seeks;
|
||||
|
||||
// file_cache
|
||||
@ -148,13 +149,19 @@ void stats_user_io(size_t user_size)
|
||||
user_io_size_total += user_size;
|
||||
}
|
||||
|
||||
void stats_io_start(FileIOImplentation fi, FileOp fo, size_t actual_size, double* start_time_storage)
|
||||
void stats_io_start(FileIOImplentation fi, FileOp fo, size_t actual_size,
|
||||
BlockId disk_pos, double* start_time_storage)
|
||||
{
|
||||
debug_assert(fi < FI_MAX_IDX);
|
||||
debug_assert(fo == FO_READ || FO_WRITE);
|
||||
|
||||
io_actual_size_total[fi][fo] += actual_size;
|
||||
|
||||
if(disk_pos.atom_fn != io_disk_pos_cur.atom_fn ||
|
||||
disk_pos.block_num != io_disk_pos_cur.block_num+1)
|
||||
io_seeks++;
|
||||
io_disk_pos_cur = disk_pos;
|
||||
|
||||
timer_start(start_time_storage);
|
||||
}
|
||||
|
||||
@ -166,6 +173,16 @@ void stats_io_finish(FileIOImplentation fi, FileOp fo, double* start_time_storag
|
||||
io_elapsed_time[fi][fo] += timer_reset(start_time_storage);
|
||||
}
|
||||
|
||||
void stats_cb_start()
|
||||
{
|
||||
timer_start();
|
||||
}
|
||||
|
||||
void stats_cb_finish()
|
||||
{
|
||||
io_process_time_total += timer_reset();
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// file_cache
|
||||
|
@ -28,8 +28,11 @@ extern void stats_buf_free();
|
||||
|
||||
// file_io
|
||||
extern void stats_user_io(size_t user_size);
|
||||
extern void stats_io_start(FileIOImplentation fi, FileOp fo, size_t actual_size, double* start_time_storage);
|
||||
extern void stats_io_start(FileIOImplentation fi, FileOp fo,
|
||||
size_t actual_size, BlockId disk_pos, double* start_time_storage);
|
||||
extern void stats_io_finish(FileIOImplentation fi, FileOp fo, double* start_time_storage);
|
||||
extern void stats_cb_start();
|
||||
extern void stats_cb_finish();
|
||||
|
||||
// file_cache
|
||||
extern void stats_cache(CacheRet cr, size_t size, const char* atom_fn);
|
||||
@ -49,8 +52,10 @@ extern void stats_dump();
|
||||
#define stats_buf_alloc(user_size, padded_size)
|
||||
#define stats_buf_free()
|
||||
#define stats_user_io(user_size)
|
||||
#define stats_io_start(fi, fo, actual_size, start_time_storage)
|
||||
#define stats_io_start(fi, fo, actual_size, disk_pos, start_time_storage)
|
||||
#define stats_io_finish(fi, fo, start_time_storage)
|
||||
#define stats_cb_start()
|
||||
#define stats_cb_finish()
|
||||
#define stats_cache(cr, size, atom_fn)
|
||||
#define stats_block_cache(cr)
|
||||
#define stats_dump()
|
||||
|
@ -326,8 +326,6 @@ static LibError VFile_reload(VFile* vf, const char* V_path, Handle)
|
||||
if(x_is_open(&vf->xf))
|
||||
return ERR_OK;
|
||||
|
||||
trace_add(V_path);
|
||||
|
||||
TFile* tf;
|
||||
uint lf = (flags & FILE_WRITE)? LF_CREATE_MISSING : 0;
|
||||
LibError err = tree_lookup(V_path, &tf, lf);
|
||||
@ -425,6 +423,10 @@ ssize_t vfs_io(const Handle hf, const size_t size, FileIOBuf* pbuf,
|
||||
debug_printf("VFS| io: size=%d\n", size);
|
||||
|
||||
H_DEREF(hf, VFile, vf);
|
||||
FileCommon* fc = &vf->xf.u.fc;
|
||||
|
||||
stats_user_io(size);
|
||||
trace_notify_load(fc->atom_fn, fc->flags);
|
||||
|
||||
off_t ofs = vf->ofs;
|
||||
vf->ofs += (off_t)size;
|
||||
@ -445,7 +447,8 @@ LibError vfs_load(const char* V_fn, FileIOBuf& buf, size_t& size, uint flags /*
|
||||
buf = file_cache_retrieve(atom_fn, &size);
|
||||
if(buf)
|
||||
{
|
||||
stats_cache(CR_HIT, size, atom_fn);
|
||||
stats_user_io(size);
|
||||
trace_notify_load(atom_fn, flags);
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
@ -459,10 +462,6 @@ LibError vfs_load(const char* V_fn, FileIOBuf& buf, size_t& size, uint flags /*
|
||||
H_DEREF(hf, VFile, vf);
|
||||
|
||||
size = x_size(&vf->xf);
|
||||
// only now can we report misses, since we need to know the size for
|
||||
// statistics purposes. that means vfs_load on nonexistant files will
|
||||
// not show up in cache misses, which is fine.
|
||||
stats_cache(CR_MISS, size, atom_fn);
|
||||
|
||||
buf = FILE_BUF_ALLOC;
|
||||
ssize_t nread = vfs_io(hf, size, &buf);
|
||||
|
@ -4,114 +4,58 @@
|
||||
#include "lib/timer.h"
|
||||
#include "file_internal.h"
|
||||
|
||||
|
||||
enum TraceState
|
||||
{
|
||||
TS_UNINITIALIZED,
|
||||
TS_DISABLED,
|
||||
TS_ENABLED,
|
||||
TS_ERROR,
|
||||
TS_SHUTDOWN
|
||||
};
|
||||
static uintptr_t trace_state = TS_UNINITIALIZED; // values from TraceState; type for use with CAS
|
||||
|
||||
|
||||
static uintptr_t trace_initialized; // set via CAS
|
||||
static Pool trace_pool;
|
||||
|
||||
|
||||
// call at before using trace_pool. no-op if called more than once.
|
||||
static inline void trace_init()
|
||||
{
|
||||
if(CAS(&trace_initialized, 0, 1))
|
||||
(void)pool_create(&trace_pool, 4*MiB, sizeof(TraceEntry));
|
||||
}
|
||||
|
||||
void trace_shutdown()
|
||||
{
|
||||
if(trace_state == TS_DISABLED || trace_state == TS_ENABLED)
|
||||
{
|
||||
if(CAS(&trace_initialized, 1, 2))
|
||||
(void)pool_destroy(&trace_pool);
|
||||
trace_state = TS_SHUTDOWN;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static bool trace_enabled;
|
||||
|
||||
void trace_enable(bool want_enabled)
|
||||
{
|
||||
if(trace_state == TS_SHUTDOWN || trace_state == TS_ERROR)
|
||||
WARN_ERR_RETURN(ERR_LOGIC);
|
||||
|
||||
if(CAS(&trace_state, TS_UNINITIALIZED, TS_ERROR))
|
||||
{
|
||||
if(pool_create(&trace_pool, 4*MiB, sizeof(TraceEntry)) < 0)
|
||||
return; // leave trace_state set to TS_ERROR
|
||||
}
|
||||
|
||||
trace_state = want_enabled? TS_ENABLED : TS_DISABLED;
|
||||
trace_enabled = want_enabled;
|
||||
}
|
||||
|
||||
|
||||
void trace_add(const char* P_fn)
|
||||
static void trace_add(TraceOp op, const char* P_fn, uint flags = 0, double timestamp = 0.0)
|
||||
{
|
||||
if(trace_state == TS_DISABLED || trace_state == TS_UNINITIALIZED)
|
||||
trace_init();
|
||||
if(!trace_enabled)
|
||||
return;
|
||||
if(trace_state != TS_ENABLED)
|
||||
WARN_ERR_RETURN(ERR_LOGIC);
|
||||
|
||||
if(timestamp == 0.0)
|
||||
timestamp = get_time();
|
||||
|
||||
TraceEntry* t = (TraceEntry*)pool_alloc(&trace_pool, 0);
|
||||
if(!t)
|
||||
return;
|
||||
t->timestamp = get_time();
|
||||
t->timestamp = timestamp;
|
||||
t->atom_fn = file_make_unique_fn_copy(P_fn, 0);
|
||||
t->op = op;
|
||||
t->flags = flags;
|
||||
}
|
||||
|
||||
|
||||
LibError trace_write_to_file(const char* trace_filename)
|
||||
void trace_notify_load(const char* P_fn, uint flags)
|
||||
{
|
||||
if(trace_state == TS_UNINITIALIZED)
|
||||
return ERR_OK;
|
||||
if(trace_state != TS_ENABLED && trace_state != TS_DISABLED)
|
||||
WARN_RETURN(ERR_LOGIC);
|
||||
|
||||
char N_fn[PATH_MAX];
|
||||
RETURN_ERR(file_make_full_native_path(trace_filename, N_fn));
|
||||
FILE* f = fopen(N_fn, "wt");
|
||||
if(!f)
|
||||
return ERR_FILE_ACCESS;
|
||||
|
||||
Trace t;
|
||||
trace_get(&t);
|
||||
for(size_t i = 0; i < t.num_ents; i++)
|
||||
fprintf(f, "%#010f: %s\n", t.ents[i].timestamp, t.ents[i].atom_fn);
|
||||
|
||||
(void)fclose(f);
|
||||
return ERR_OK;
|
||||
trace_add(TO_LOAD, P_fn, flags);
|
||||
}
|
||||
|
||||
|
||||
LibError trace_load_from_file(const char* trace_filename)
|
||||
void trace_notify_free(const char* P_fn)
|
||||
{
|
||||
char N_fn[PATH_MAX];
|
||||
RETURN_ERR(file_make_full_native_path(trace_filename, N_fn));
|
||||
FILE* f = fopen(N_fn, "rt");
|
||||
if(!f)
|
||||
return ERR_FILE_NOT_FOUND;
|
||||
|
||||
// parse lines and stuff them in trace_pool
|
||||
// (as if they had been trace_add-ed; replaces any existing data)
|
||||
pool_free_all(&trace_pool);
|
||||
char fmt[20];
|
||||
snprintf(fmt, ARRAY_SIZE(fmt), "%%f: %%%ds\n", PATH_MAX);
|
||||
for(;;)
|
||||
{
|
||||
double timestamp; char P_path[PATH_MAX];
|
||||
int ret = fscanf(f, fmt, ×tamp, P_path);
|
||||
if(ret == EOF)
|
||||
break;
|
||||
if(ret != 2)
|
||||
debug_warn("invalid line in trace file");
|
||||
|
||||
TraceEntry* ent = (TraceEntry*)pool_alloc(&trace_pool, 0);
|
||||
debug_assert(ent != 0); // was written to file from same pool => must fit
|
||||
ent->timestamp = timestamp;
|
||||
ent->atom_fn = file_make_unique_fn_copy(P_path, 0);
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
return ERR_OK;
|
||||
trace_add(TO_FREE, P_fn);
|
||||
}
|
||||
|
||||
|
||||
@ -121,123 +65,314 @@ void trace_get(Trace* t)
|
||||
t->num_ents = (uint)(trace_pool.da.pos / sizeof(TraceEntry));
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
|
||||
struct FileList
|
||||
LibError trace_write_to_file(const char* trace_filename)
|
||||
{
|
||||
const char* atom_fns;
|
||||
size_t num_files;
|
||||
char N_fn[PATH_MAX];
|
||||
RETURN_ERR(file_make_full_native_path(trace_filename, N_fn));
|
||||
FILE* f = fopen(N_fn, "wt");
|
||||
if(!f)
|
||||
WARN_RETURN(ERR_FILE_ACCESS);
|
||||
|
||||
Trace t;
|
||||
trace_get(&t);
|
||||
const TraceEntry* ent = t.ents;
|
||||
for(size_t i = 0; i < t.num_ents; i++, ent++)
|
||||
{
|
||||
char opcode = '?';
|
||||
switch(ent->op)
|
||||
{
|
||||
case TO_LOAD: opcode = 'L'; break;
|
||||
case TO_FREE: opcode = 'F'; break;
|
||||
default: debug_warn("invalid TraceOp");
|
||||
}
|
||||
|
||||
if(ent->op == TO_LOAD)
|
||||
fprintf(f, "%#010f: %c %s %d\n", ent->timestamp, opcode, ent->atom_fn, ent->flags);
|
||||
else
|
||||
{
|
||||
debug_assert(ent->op == TO_FREE);
|
||||
fprintf(f, "%#010f: %c %s\n", ent->timestamp, opcode, ent->atom_fn);
|
||||
}
|
||||
}
|
||||
|
||||
(void)fclose(f);
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
|
||||
LibError trace_read_from_file(const char* trace_filename, Trace* t)
|
||||
{
|
||||
char N_fn[PATH_MAX];
|
||||
RETURN_ERR(file_make_full_native_path(trace_filename, N_fn));
|
||||
FILE* f = fopen(N_fn, "rt");
|
||||
if(!f)
|
||||
WARN_RETURN(ERR_FILE_NOT_FOUND);
|
||||
|
||||
// parse lines and stuff them in trace_pool
|
||||
// (as if they had been trace_add-ed; replaces any existing data)
|
||||
pool_free_all(&trace_pool);
|
||||
char fmt[20];
|
||||
snprintf(fmt, ARRAY_SIZE(fmt), "%%f: %%c %%%ds %%02x\n", PATH_MAX);
|
||||
for(;;)
|
||||
{
|
||||
double timestamp; char opcode; char P_path[PATH_MAX];
|
||||
uint flags = 0; // optional
|
||||
int ret = fscanf(f, fmt, ×tamp, &opcode, P_path);
|
||||
if(ret == EOF)
|
||||
break;
|
||||
if(ret != 3 && ret != 4)
|
||||
debug_warn("invalid line in trace file");
|
||||
|
||||
TraceOp op = TO_LOAD; // default in case file is garbled
|
||||
switch(opcode)
|
||||
{
|
||||
case 'L': op = TO_LOAD; break;
|
||||
case 'F': op = TO_FREE; break;
|
||||
default: debug_warn("invalid TraceOp");
|
||||
}
|
||||
|
||||
trace_add(op, P_path, flags, timestamp);
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
|
||||
trace_get(t);
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
|
||||
enum SimulateFlags
|
||||
{
|
||||
SF_SYNC_TO_TIMESTAMP = 1
|
||||
};
|
||||
|
||||
static LibError filelist_build(Trace* t, FileList* fl)
|
||||
LibError trace_simulate(const char* trace_filename, uint flags)
|
||||
{
|
||||
}
|
||||
// prevent the actions we carry out below from generating
|
||||
// trace_add-s.
|
||||
trace_enabled = false;
|
||||
|
||||
static LibError filelist_get(FileList* fl, uint i, const char* path)
|
||||
{
|
||||
return ERR_DIR_END;
|
||||
}
|
||||
Trace t;
|
||||
RETURN_ERR(trace_read_from_file(trace_filename, &t));
|
||||
|
||||
const double start_time = get_time();
|
||||
const double first_timestamp = t.ents[0].timestamp;
|
||||
|
||||
const TraceEntry* ent = t.ents;
|
||||
for(uint i = 0; i < t.num_ents; i++, ent++)
|
||||
{
|
||||
// wait until time for next entry if caller requested this
|
||||
if(flags & SF_SYNC_TO_TIMESTAMP)
|
||||
{
|
||||
while(get_time()-start_time < ent->timestamp-first_timestamp)
|
||||
{
|
||||
// busy-wait (don't sleep - can skew results)
|
||||
}
|
||||
}
|
||||
|
||||
static LibError compress_cb(uintptr_t cb_ctx, const void* block, size_t size, size_t* bytes_processed)
|
||||
{
|
||||
uintptr_t ctx = cb_ctx;
|
||||
|
||||
*bytes_processed = comp_feed(ctx, block, size);
|
||||
return INFO_CB_CONTINUE;
|
||||
}
|
||||
|
||||
static LibError read_and_compress_file(uintptr_t ctx, ZipEntry* ze)
|
||||
{
|
||||
const char* fn = ze->path;
|
||||
|
||||
struct stat s;
|
||||
RETURN_ERR(file_stat(fn, &s));
|
||||
const size_t ucsize = s.st_size;
|
||||
|
||||
RETURN_ERR(comp_reset(ctx));
|
||||
RETURN_ERR(comp_alloc_output(ctx, ucsize));
|
||||
|
||||
File f;
|
||||
RETURN_ERR(file_open(fn, 0, &f));
|
||||
FileIOBuf buf = FILE_BUF_ALLOC;
|
||||
uintptr_t cb_ctx = ctx;
|
||||
ssize_t cbytes_output = file_io(&f, 0, ucsize, &buf, compress_cb, cb_ctx);
|
||||
(void)file_close(&f);
|
||||
|
||||
void* cdata; size_t csize;
|
||||
RETURN_ERR(comp_finish(ctx, &cdata, &csize));
|
||||
debug_assert(cbytes_output <= csize);
|
||||
|
||||
RETURN_ERR(cbytes_output);
|
||||
|
||||
// decide if it was better compressed or not
|
||||
|
||||
ze->ucsize = ucsize;
|
||||
ze->mtime = s.st_mtime;
|
||||
ze->method = CM_DEFLATE;
|
||||
ze->csize = csize;
|
||||
ze->cdata = cdata;
|
||||
|
||||
zip_archive_add(&za, &ze);
|
||||
// carry out this entry's operation
|
||||
FileIOBuf buf; size_t size;
|
||||
switch(ent->op)
|
||||
{
|
||||
case TO_LOAD:
|
||||
(void)vfs_load(ent->atom_fn, buf, size, ent->flags);
|
||||
break;
|
||||
case TO_FREE:
|
||||
buf = file_cache_find(ent->atom_fn, &size);
|
||||
(void)file_buf_free(buf);
|
||||
break;
|
||||
default:
|
||||
debug_warn("unknown TraceOp");
|
||||
}
|
||||
}
|
||||
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
static void build_optimized_archive(const char* trace_file, const char* zip_filename)
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
struct FileList
|
||||
{
|
||||
const char** atom_fns;
|
||||
size_t num_files;
|
||||
size_t i;
|
||||
};
|
||||
|
||||
|
||||
static LibError filelist_build(Trace* t, FileList* fl)
|
||||
{
|
||||
// count # files
|
||||
fl->num_files = 0;
|
||||
for(size_t i = 0; i < t->num_ents; i++)
|
||||
if(t->ents[i].op == TO_LOAD)
|
||||
fl->num_files;
|
||||
|
||||
fl->atom_fns = new const char*[fl->num_files];
|
||||
|
||||
size_t ti = 0;
|
||||
for(size_t i = 0; i < fl->num_files; i++)
|
||||
{
|
||||
// find next trace entry that is a load (must exist)
|
||||
while(t->ents[ti].op != TO_LOAD)
|
||||
ti++;
|
||||
fl->atom_fns[i] = t->ents[ti].atom_fn;
|
||||
}
|
||||
|
||||
fl->i = 0;
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
|
||||
static const char* filelist_get_next(FileList* fl)
|
||||
{
|
||||
if(fl->i == fl->num_files)
|
||||
return 0;
|
||||
return fl->atom_fns[fl->i++];
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
static inline bool file_type_is_uncompressible(const char* fn)
|
||||
{
|
||||
const char* ext = strrchr(fn, '.');
|
||||
// no extension? bail; assume compressible
|
||||
if(!ext)
|
||||
return true;
|
||||
|
||||
// this is a selection of file types that are certainly not
|
||||
// further compressible. we need not include every type under the sun -
|
||||
// this is only a slight optimization that avoids wasting time
|
||||
// compressing files. the real decision as to cmethod is made based
|
||||
// on attained compression ratio.
|
||||
static const char* uncompressible_exts[] =
|
||||
{
|
||||
"zip", "rar",
|
||||
"jpg", "jpeg", "png",
|
||||
"ogg", "mp3"
|
||||
};
|
||||
|
||||
for(uint i = 0; i < ARRAY_SIZE(uncompressible_exts); i++)
|
||||
{
|
||||
if(!stricmp(ext+1, uncompressible_exts[i]))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
struct CompressParams
|
||||
{
|
||||
bool attempt_compress;
|
||||
uintptr_t ctx;
|
||||
};
|
||||
|
||||
static LibError compress_cb(uintptr_t cb_ctx, const void* block, size_t size, size_t* bytes_processed)
|
||||
{
|
||||
const CompressParams* p = (const CompressParams*)cb_ctx;
|
||||
|
||||
// comp_feed already makes note of total #bytes fed, and we need
|
||||
// vfs_io to return the uc size (to check if all data was read).
|
||||
*bytes_processed = size;
|
||||
|
||||
if(p->attempt_compress)
|
||||
(void)comp_feed(p->ctx, block, size);
|
||||
return INFO_CB_CONTINUE;
|
||||
}
|
||||
|
||||
|
||||
static LibError read_and_compress_file(const char* atom_fn, uintptr_t ctx,
|
||||
ArchiveEntry& ent, void*& file_contents, FileIOBuf& buf) // out
|
||||
{
|
||||
struct stat s;
|
||||
RETURN_ERR(file_stat(atom_fn, &s));
|
||||
const size_t ucsize = s.st_size;
|
||||
|
||||
const bool attempt_compress = !file_type_is_uncompressible(atom_fn);
|
||||
if(attempt_compress)
|
||||
{
|
||||
RETURN_ERR(comp_reset(ctx));
|
||||
RETURN_ERR(comp_alloc_output(ctx, ucsize));
|
||||
}
|
||||
|
||||
// read file into newly allocated buffer. if attempt_compress, also
|
||||
// compress the file into another buffer while waiting for IOs.
|
||||
Handle hf = vfs_open(atom_fn, 0);
|
||||
RETURN_ERR(hf);
|
||||
buf = FILE_BUF_ALLOC;
|
||||
const CompressParams params = { attempt_compress, ctx };
|
||||
ssize_t ucsize_read = vfs_io(hf, ucsize, &buf, compress_cb, (uintptr_t)¶ms);
|
||||
debug_assert(ucsize_read == (ssize_t)ucsize);
|
||||
(void)vfs_close(hf);
|
||||
|
||||
// if we compressed the file trial-wise, check results and
|
||||
// decide whether to store as such or not (based on compression ratio)
|
||||
bool store_compressed = false;
|
||||
void* cdata = 0; size_t csize = 0;
|
||||
if(attempt_compress)
|
||||
{
|
||||
RETURN_ERR(comp_finish(ctx, &cdata, &csize));
|
||||
|
||||
const float ratio = (float)ucsize / csize;
|
||||
const ssize_t bytes_saved = (ssize_t)ucsize - (ssize_t)csize;
|
||||
if(ratio > 1.05f && bytes_saved > 200)
|
||||
store_compressed = true;
|
||||
}
|
||||
|
||||
// store file info
|
||||
ent.ucsize = (off_t)ucsize;
|
||||
ent.mtime = s.st_mtime;
|
||||
// .. ent.ofs is set by zip_archive_add_file
|
||||
ent.flags = 0;
|
||||
ent.atom_fn = atom_fn;
|
||||
if(store_compressed)
|
||||
{
|
||||
ent.method = CM_DEFLATE;
|
||||
ent.csize = (off_t)csize;
|
||||
file_contents = cdata;
|
||||
}
|
||||
else
|
||||
{
|
||||
ent.method = CM_NONE;
|
||||
ent.csize = (off_t)ucsize;
|
||||
file_contents = (void*)buf;
|
||||
}
|
||||
|
||||
// note: no need to free cdata - it is owned by the
|
||||
// compression context and can be reused.
|
||||
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
static LibError build_optimized_archive(const char* trace_filename, const char* zip_filename)
|
||||
{
|
||||
FileList fl;
|
||||
{
|
||||
Trace t;
|
||||
RETURN_ERR(trace_load_from_file(trace_filename, &t));
|
||||
filelist_build(&t, &fl);
|
||||
RETURN_ERR(trace_read_from_file(trace_filename, &t));
|
||||
RETURN_ERR(filelist_build(&t, &fl));
|
||||
}
|
||||
|
||||
ZipArchive za;
|
||||
zip_archive_create(zip_filename, &za);
|
||||
|
||||
uintptr_t ctx = comp_alloc();
|
||||
uint trace_i = 0;
|
||||
uint queued_files = 0, committed_files = 0;
|
||||
ZipArchive* za;
|
||||
RETURN_ERR(zip_archive_create(zip_filename, &za));
|
||||
uintptr_t ctx = comp_alloc(CT_COMPRESSION, CM_DEFLATE);
|
||||
|
||||
for(;;)
|
||||
{
|
||||
|
||||
/*
|
||||
document: zlib layer is ok to allocate. caller shouldnt do so from a pool:
|
||||
when the next file is going to be loaded and decompressed but our pool is full,
|
||||
we need to wait for the archive write to finish and mark pool as reclaimed.
|
||||
this is better done with heap; also, memory isn't bottleneck for readqueue size
|
||||
*/
|
||||
|
||||
ZipEntry ze; // TODO: QUEUE
|
||||
const int max_readqueue_depth = 1;
|
||||
for(uint i = 0; i < max_readqueue_depth; i++)
|
||||
{
|
||||
LibError ret = trace_get_next_file(trace, trace_i, ze.path);
|
||||
if(ret == ERR_DIR_END)
|
||||
break;
|
||||
|
||||
WARN_ERR(read_and_compress_file(ctx, &ze));
|
||||
queued_files++;
|
||||
}
|
||||
|
||||
if(committed_files == queued_files)
|
||||
const char* atom_fn = filelist_get_next(&fl);
|
||||
if(!atom_fn)
|
||||
break;
|
||||
zip_archive_add(&za, &ze);
|
||||
committed_files++;
|
||||
|
||||
ArchiveEntry ent; void* file_contents; FileIOBuf buf;
|
||||
if(read_and_compress_file(atom_fn, ctx, ent, file_contents, buf) == ERR_OK)
|
||||
{
|
||||
(void)zip_archive_add_file(za, &ent, file_contents);
|
||||
(void)file_buf_free(buf);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
comp_free(ctx);
|
||||
|
||||
zip_archive_finish(&za);
|
||||
(void)zip_archive_finish(za);
|
||||
}
|
||||
#endif
|
||||
|
@ -2,24 +2,42 @@
|
||||
#define VFS_OPTIMIZER_H__
|
||||
|
||||
extern void trace_enable(bool want_enabled);
|
||||
extern void trace_add(const char* P_fn);
|
||||
extern void trace_shutdown();
|
||||
|
||||
extern LibError trace_write_to_file(const char* trace_filename);
|
||||
extern LibError trace_read_from_file(const char* trace_filename);
|
||||
extern void trace_notify_load(const char* P_fn, uint flags);
|
||||
extern void trace_notify_free(const char* P_fn);
|
||||
|
||||
// TraceEntry operation type.
|
||||
// note: rather than only a list of accessed files, we also need to
|
||||
// know the application's behavior WRT caching (e.g. when it releases
|
||||
// cached buffers). this is necessary so that our simulation can
|
||||
// yield the same results.
|
||||
enum TraceOp
|
||||
{
|
||||
TO_LOAD,
|
||||
TO_FREE
|
||||
};
|
||||
|
||||
// stores one event that is relevant for file IO / caching.
|
||||
//
|
||||
// size-optimized a bit since these are all kept in memory
|
||||
// (to prevent trace file writes from affecting other IOs)
|
||||
struct TraceEntry
|
||||
{
|
||||
double timestamp;
|
||||
const char* atom_fn;
|
||||
double timestamp; // returned by get_time before operation starts
|
||||
const char* atom_fn; // path+name of affected file
|
||||
uint op : 8; // operation - see TraceOp
|
||||
uint flags : 24; // misc, e.g. file_io flags.
|
||||
};
|
||||
|
||||
struct Trace
|
||||
{
|
||||
const TraceEntry* ents;
|
||||
uint num_ents;
|
||||
size_t num_ents;
|
||||
};
|
||||
|
||||
extern void trace_get(Trace* t);
|
||||
extern void trace_shutdown();
|
||||
extern LibError trace_write_to_file(const char* trace_filename);
|
||||
extern LibError trace_read_from_file(const char* trace_filename, Trace* t);
|
||||
|
||||
#endif // #ifndef VFS_OPTIMIZER_H__
|
||||
|
@ -266,28 +266,62 @@ static LibError za_extract_cdfh(const CDFH* cdfh,
|
||||
}
|
||||
|
||||
|
||||
// this code grabs an LFH struct from file block(s) that are
|
||||
// passed to the callback. usually, one call copies the whole thing,
|
||||
// but the LFH may straddle a block boundary.
|
||||
//
|
||||
// rationale: this allows using temp buffers for zip_fixup_lfh,
|
||||
// which avoids involving the file buffer manager and thus
|
||||
// unclutters the trace and cache contents.
|
||||
|
||||
struct LFH_Copier
|
||||
{
|
||||
u8* lfh_dst;
|
||||
size_t lfh_bytes_remaining;
|
||||
};
|
||||
|
||||
static LibError lfh_copier_cb(uintptr_t ctx, const void* block, size_t size, size_t* bytes_processed)
|
||||
{
|
||||
LFH_Copier* p = (LFH_Copier*)ctx;
|
||||
|
||||
// find corresponding LFH, needed to calculate file offset
|
||||
// (its extra field may not match that reported by CDFH!).
|
||||
debug_assert(size <= p->lfh_bytes_remaining);
|
||||
memcpy2(p->lfh_dst, block, size);
|
||||
p->lfh_dst += size;
|
||||
p->lfh_bytes_remaining -= size;
|
||||
|
||||
*bytes_processed = size;
|
||||
return INFO_CB_CONTINUE;
|
||||
}
|
||||
|
||||
// ensures <ent.ofs> points to the actual file contents; it is initially
|
||||
// the offset of the LFH. we cannot use CDFH filename and extra field
|
||||
// lengths to skip past LFH since that may not mirror CDFH (has happened).
|
||||
//
|
||||
// this is called at file-open time instead of while mounting to
|
||||
// reduce seeks: since reading the file will typically follow, the
|
||||
// block cache entirely absorbs the IO cost.
|
||||
void zip_fixup_lfh(File* f, ArchiveEntry* ent)
|
||||
{
|
||||
// improbable that this will be in cache - if this file had already
|
||||
// been read, it would have been fixed up. only in cache if this
|
||||
// file is in the same block as a previously read file (i.e. both small)
|
||||
FileIOBuf buf = FILE_BUF_ALLOC;
|
||||
file_io(f, ent->ofs, LFH_SIZE, &buf);
|
||||
const LFH* lfh = (const LFH*)buf;
|
||||
// already fixed up - done.
|
||||
if(!(ent->flags & ZIP_LFH_FIXUP_NEEDED))
|
||||
return;
|
||||
|
||||
debug_assert(lfh->magic == lfh_magic);
|
||||
const size_t fn_len = read_le16(&lfh->fn_len);
|
||||
const size_t e_len = read_le16(&lfh->e_len);
|
||||
// performance note: this ends up reading one file block, which is
|
||||
// only in the block cache if the file starts in the same block as a
|
||||
// previously read file (i.e. both are small).
|
||||
LFH lfh;
|
||||
LFH_Copier params = { (u8*)&lfh, sizeof(LFH) };
|
||||
ssize_t ret = file_io(f, ent->ofs, LFH_SIZE, FILE_BUF_TEMP, lfh_copier_cb, (uintptr_t)¶ms);
|
||||
debug_assert(ret == sizeof(LFH));
|
||||
|
||||
debug_assert(lfh.magic == lfh_magic);
|
||||
const size_t fn_len = read_le16(&lfh.fn_len);
|
||||
const size_t e_len = read_le16(&lfh.e_len);
|
||||
|
||||
ent->ofs += (off_t)(LFH_SIZE + fn_len + e_len);
|
||||
// LFH doesn't have a comment field!
|
||||
|
||||
file_buf_free(buf);
|
||||
ent->flags &= ~ZIP_LFH_FIXUP_NEEDED;
|
||||
}
|
||||
|
||||
|
||||
@ -393,21 +427,24 @@ struct ZipArchive
|
||||
uint cd_entries;
|
||||
};
|
||||
|
||||
struct ZipEntry
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
size_t ucsize;
|
||||
time_t mtime;
|
||||
ZipCompressionMethod method;
|
||||
size_t csize;
|
||||
const void* cdata;
|
||||
};
|
||||
// we don't want to expose ZipArchive to callers, so
|
||||
// allocate the storage here and return opaque pointer.
|
||||
static SingleAllocator<ZipArchive> za_mgr;
|
||||
|
||||
LibError zip_archive_create(const char* zip_filename, ZipArchive* za)
|
||||
|
||||
LibError zip_archive_create(const char* zip_filename, ZipArchive** pza)
|
||||
{
|
||||
memset(za, 0, sizeof(*za));
|
||||
RETURN_ERR(file_open(zip_filename, 0, &za->f));
|
||||
RETURN_ERR(pool_create(&za->cdfhs, 10*MiB, 0));
|
||||
// local za_copy simplifies things - if something fails, no cleanup is
|
||||
// needed. upon success, we copy into the newly allocated real za.
|
||||
ZipArchive za_copy;
|
||||
RETURN_ERR(file_open(zip_filename, 0, &za_copy.f));
|
||||
RETURN_ERR(pool_create(&za_copy.cdfhs, 10*MiB, 0));
|
||||
|
||||
ZipArchive* za = (ZipArchive*)za_mgr.alloc();
|
||||
if(!za)
|
||||
WARN_RETURN(ERR_NO_MEM);
|
||||
*za = za_copy;
|
||||
*pza = za;
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
@ -424,18 +461,14 @@ static inline u16 u16_from_size_t(size_t x)
|
||||
return (u16)(x & 0xFFFF);
|
||||
}
|
||||
|
||||
|
||||
LibError zip_archive_add(ZipArchive* za, const ZipEntry* ze)
|
||||
LibError zip_archive_add_file(ZipArchive* za, const ArchiveEntry* ze, void* file_contents)
|
||||
{
|
||||
FileIOBuf buf;
|
||||
|
||||
const char* fn = ze->path;
|
||||
const char* fn = ze->atom_fn;
|
||||
const size_t fn_len = strlen(fn);
|
||||
const size_t ucsize = ze->ucsize;
|
||||
const u32 fat_mtime = FAT_from_time_t(ze->mtime);
|
||||
const u16 method = (u16)ze->method;
|
||||
const size_t csize = ze->csize;
|
||||
const void* cdata = ze->cdata;
|
||||
|
||||
const off_t lfh_ofs = za->cur_file_size;
|
||||
|
||||
@ -454,11 +487,12 @@ LibError zip_archive_add(ZipArchive* za, const ZipEntry* ze)
|
||||
u16_from_size_t(fn_len),
|
||||
0 // e_len
|
||||
};
|
||||
FileIOBuf buf;
|
||||
buf = (FileIOBuf)&lfh;
|
||||
file_io(&za->f, lfh_ofs, lfh_size, &buf);
|
||||
file_io(&za->f, lfh_ofs, lfh_size, &buf);
|
||||
buf = (FileIOBuf)fn;
|
||||
file_io(&za->f, lfh_ofs+lfh_size, fn_len, &buf);
|
||||
buf = (FileIOBuf)cdata;
|
||||
file_io(&za->f, lfh_ofs+lfh_size, fn_len, &buf);
|
||||
buf = (FileIOBuf)file_contents;
|
||||
file_io(&za->f, lfh_ofs+(off_t)(lfh_size+fn_len), csize, &buf);
|
||||
za->cur_file_size += (off_t)(lfh_size+fn_len+csize);
|
||||
|
||||
@ -511,6 +545,7 @@ LibError zip_archive_finish(ZipArchive* za)
|
||||
|
||||
(void)file_close(&za->f);
|
||||
(void)pool_destroy(&za->cdfhs);
|
||||
za_mgr.free(za);
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
|
@ -8,4 +8,11 @@ extern LibError zip_populate_archive(Archive* a, File* f);
|
||||
|
||||
extern void zip_fixup_lfh(File* f, ArchiveEntry* ent);
|
||||
|
||||
|
||||
struct ZipArchive;
|
||||
extern LibError zip_archive_create(const char* zip_filename, ZipArchive** pza);
|
||||
extern LibError zip_archive_add_file(ZipArchive* za, const ArchiveEntry* ze, void* file_contents);
|
||||
extern LibError zip_archive_finish(ZipArchive* za);
|
||||
|
||||
|
||||
#endif // #ifndef ZIP_H__
|
||||
|
@ -213,27 +213,30 @@ TIMER_ACCRUE(tc_plain_transform);
|
||||
if(!transforms)
|
||||
return ERR_OK;
|
||||
|
||||
// allocate copy of the image data.
|
||||
// rationale: L1 cache is typically A2 => swapping in-place with a
|
||||
// line buffer leads to thrashing. we'll assume the whole texture*2
|
||||
// fits in cache, allocate a copy, and transfer directly from there.
|
||||
//
|
||||
// this is necessary even when not flipping because the initial Tex.hm
|
||||
// (which is a FileIOBuf) is read-only.
|
||||
Handle hm;
|
||||
void* new_data = mem_alloc(data_size, 4*KiB, 0, &hm);
|
||||
if(!new_data)
|
||||
return ERR_NO_MEM;
|
||||
memcpy2(new_data, data, data_size);
|
||||
|
||||
// setup row source/destination pointers (simplifies outer loop)
|
||||
u8* dst = data;
|
||||
const u8* src = data;
|
||||
u8* dst = (u8*)new_data;
|
||||
const u8* src = (const u8*)new_data;
|
||||
const size_t pitch = w * bpp/8;
|
||||
// .. avoid y*pitch multiply in row loop; instead, add row_ofs.
|
||||
ssize_t row_ofs = (ssize_t)pitch;
|
||||
// avoid y*pitch multiply in row loop; instead, add row_ofs.
|
||||
void* clone_data = 0;
|
||||
|
||||
// flipping rows (0,1,2 -> 2,1,0)
|
||||
if(transforms & TEX_ORIENTATION)
|
||||
{
|
||||
// L1 cache is typically A2 => swapping in-place with a line buffer
|
||||
// leads to thrashing. we'll assume the whole texture*2 fits in cache,
|
||||
// allocate a copy, and transfer directly from there.
|
||||
//
|
||||
// note: we don't want to return a new buffer: the user assumes
|
||||
// buffer address will remain unchanged.
|
||||
clone_data = mem_alloc(data_size, 4*KiB);
|
||||
if(!clone_data)
|
||||
return ERR_NO_MEM;
|
||||
memcpy2(clone_data, data, data_size);
|
||||
src = (const u8*)clone_data+data_size-pitch; // last row
|
||||
src = (const u8*)data+data_size-pitch; // last row
|
||||
row_ofs = -(ssize_t)pitch;
|
||||
}
|
||||
|
||||
@ -280,8 +283,9 @@ TIMER_ACCRUE(tc_plain_transform);
|
||||
}
|
||||
}
|
||||
|
||||
if(clone_data)
|
||||
(void)mem_free(clone_data);
|
||||
mem_free_h(t->hm);
|
||||
t->hm = hm;
|
||||
t->ofs = 0;
|
||||
|
||||
if(!(t->flags & TEX_MIPMAPS) && transforms & TEX_MIPMAPS)
|
||||
{
|
||||
@ -296,10 +300,11 @@ TIMER_ACCRUE(tc_plain_transform);
|
||||
const u8* mipmap_data = (const u8*)mem_alloc(mipmap_size, 4*KiB, 0, &hm);
|
||||
if(!mipmap_data)
|
||||
return ERR_NO_MEM;
|
||||
CreateLevelData cld = { bpp/8, w, h, data, data_size };
|
||||
CreateLevelData cld = { bpp/8, w, h, (const u8*)new_data, data_size };
|
||||
tex_util_foreach_mipmap(w, h, bpp, mipmap_data, 0, 1, create_level, &cld);
|
||||
mem_free_h(t->hm);
|
||||
t->hm = hm;
|
||||
t->ofs = 0;
|
||||
}
|
||||
|
||||
CHECK_TEX(t);
|
||||
@ -450,6 +455,12 @@ static LibError tex_load_impl(FileIOBuf file_, size_t file_size, Tex* t)
|
||||
}
|
||||
|
||||
|
||||
// MEM_DTOR -> file_buf_free adapter (used for mem_wrap-ping FileIOBuf)
|
||||
static void file_buf_dtor(void* p, size_t UNUSED(size), uintptr_t UNUSED(ctx))
|
||||
{
|
||||
(void)file_buf_free((FileIOBuf)p);
|
||||
}
|
||||
|
||||
// load the specified image from file into the given Tex object.
|
||||
// currently supports BMP, TGA, JPG, JP2, PNG, DDS.
|
||||
LibError tex_load(const char* fn, Tex* t)
|
||||
@ -460,7 +471,7 @@ LibError tex_load(const char* fn, Tex* t)
|
||||
// must be protected against being accidentally free-d in that case.
|
||||
|
||||
RETURN_ERR(vfs_load(fn, file, file_size));
|
||||
Handle hm = mem_wrap((void*)file, file_size, 0, 0, 0, 0, 0, (void*)tex_load);
|
||||
Handle hm = mem_wrap((void*)file, file_size, 0, 0, 0, file_buf_dtor, 0, (void*)tex_load);
|
||||
t->hm = hm;
|
||||
LibError ret = tex_load_impl(file, file_size, t);
|
||||
if(ret < 0)
|
||||
|
Loading…
Reference in New Issue
Block a user