forked from 0ad/0ad
adts: remove_least_valuable safely handles empty cache
lib: add u32_hi/lo archive: move archive builder logic here from vf_optimizer file_cache: add flush() call - used when simulating cache vfs_optimizer: WIP, not yet functional: file gatherer, 50% of TSP tour generator This was SVN commit r3486.
This commit is contained in:
parent
56d8d31cd0
commit
21f8f8555e
@ -294,8 +294,12 @@ public:
|
||||
|
||||
// remove the least valuable item and optionally indicate
|
||||
// how big it was (useful for statistics).
|
||||
// returns 0 if cache is empty.
|
||||
T remove_least_valuable(size_t* psize = 0)
|
||||
{
|
||||
if(map.empty())
|
||||
return 0;
|
||||
|
||||
// one iteration ought to suffice to evict someone due to
|
||||
// definition of min_density, but we provide for repeating
|
||||
// in case of floating-point imprecision.
|
||||
|
@ -315,9 +315,21 @@ u32 u64_hi(u64 x)
|
||||
|
||||
u32 u64_lo(u64 x)
|
||||
{
|
||||
return (u32)(x & 0xffffffff);
|
||||
return (u32)(x & 0xFFFFFFFF);
|
||||
}
|
||||
|
||||
u16 u32_hi(u32 x)
|
||||
{
|
||||
return (u16)(x >> 16);
|
||||
}
|
||||
|
||||
u16 u32_lo(u32 x)
|
||||
{
|
||||
return (u16)(x & 0xFFFF);
|
||||
}
|
||||
|
||||
|
||||
|
||||
u64 u64_from_u32(u32 hi, u32 lo)
|
||||
{
|
||||
u64 x = (u64)hi;
|
||||
|
@ -283,6 +283,9 @@ extern uintptr_t round_down(uintptr_t n, uintptr_t multiple);
|
||||
// less than the bit width of the type).
|
||||
extern u32 u64_hi(u64 x);
|
||||
extern u32 u64_lo(u64 x);
|
||||
extern u16 u32_hi(u32 x);
|
||||
extern u16 u32_lo(u32 x);
|
||||
|
||||
|
||||
extern u64 u64_from_u32(u32 hi, u32 lo);
|
||||
extern u32 u32_from_u16(u16 hi, u16 lo);
|
||||
|
@ -604,3 +604,158 @@ LibError afile_unmap(AFile* af)
|
||||
H_DEREF(af->ha, Archive, a);
|
||||
return file_unmap(&a->f);
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// archive builder
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
static inline bool file_type_is_uncompressible(const char* fn)
|
||||
{
|
||||
const char* ext = strrchr(fn, '.');
|
||||
// no extension? bail; assume compressible
|
||||
if(!ext)
|
||||
return true;
|
||||
|
||||
// this is a selection of file types that are certainly not
|
||||
// further compressible. we need not include every type under the sun -
|
||||
// this is only a slight optimization that avoids wasting time
|
||||
// compressing files. the real decision as to cmethod is made based
|
||||
// on attained compression ratio.
|
||||
static const char* uncompressible_exts[] =
|
||||
{
|
||||
"zip", "rar",
|
||||
"jpg", "jpeg", "png",
|
||||
"ogg", "mp3"
|
||||
};
|
||||
|
||||
for(uint i = 0; i < ARRAY_SIZE(uncompressible_exts); i++)
|
||||
{
|
||||
if(!stricmp(ext+1, uncompressible_exts[i]))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
struct CompressParams
|
||||
{
|
||||
bool attempt_compress;
|
||||
uintptr_t ctx;
|
||||
u32 crc;
|
||||
};
|
||||
|
||||
#include <zlib.h>
|
||||
|
||||
static LibError compress_cb(uintptr_t cb_ctx, const void* block, size_t size, size_t* bytes_processed)
|
||||
{
|
||||
CompressParams* p = (CompressParams*)cb_ctx;
|
||||
|
||||
// comp_feed already makes note of total #bytes fed, and we need
|
||||
// vfs_io to return the uc size (to check if all data was read).
|
||||
*bytes_processed = size;
|
||||
|
||||
// update checksum
|
||||
p->crc = crc32(p->crc, (const Bytef*)block, (uInt)size);
|
||||
|
||||
if(p->attempt_compress)
|
||||
(void)comp_feed(p->ctx, block, size);
|
||||
return INFO_CB_CONTINUE;
|
||||
}
|
||||
|
||||
|
||||
static LibError read_and_compress_file(const char* atom_fn, uintptr_t ctx,
|
||||
ArchiveEntry& ent, void*& file_contents, FileIOBuf& buf) // out
|
||||
{
|
||||
struct stat s;
|
||||
RETURN_ERR(vfs_stat(atom_fn, &s));
|
||||
const size_t ucsize = s.st_size;
|
||||
|
||||
const bool attempt_compress = !file_type_is_uncompressible(atom_fn);
|
||||
if(attempt_compress)
|
||||
{
|
||||
RETURN_ERR(comp_reset(ctx));
|
||||
RETURN_ERR(comp_alloc_output(ctx, ucsize));
|
||||
}
|
||||
|
||||
// read file into newly allocated buffer. if attempt_compress, also
|
||||
// compress the file into another buffer while waiting for IOs.
|
||||
Handle hf = vfs_open(atom_fn, 0);
|
||||
RETURN_ERR(hf);
|
||||
buf = FILE_BUF_ALLOC;
|
||||
CompressParams params = { attempt_compress, ctx, 0 };
|
||||
ssize_t ucsize_read = vfs_io(hf, ucsize, &buf, compress_cb, (uintptr_t)¶ms);
|
||||
debug_assert(ucsize_read == (ssize_t)ucsize);
|
||||
(void)vfs_close(hf);
|
||||
|
||||
// if we compressed the file trial-wise, check results and
|
||||
// decide whether to store as such or not (based on compression ratio)
|
||||
bool store_compressed = false;
|
||||
void* cdata = 0; size_t csize = 0;
|
||||
if(attempt_compress)
|
||||
{
|
||||
RETURN_ERR(comp_finish(ctx, &cdata, &csize));
|
||||
|
||||
const float ratio = (float)ucsize / csize;
|
||||
const ssize_t bytes_saved = (ssize_t)ucsize - (ssize_t)csize;
|
||||
if(ratio > 1.05f && bytes_saved > 200)
|
||||
store_compressed = true;
|
||||
}
|
||||
|
||||
// store file info
|
||||
ent.ucsize = (off_t)ucsize;
|
||||
ent.mtime = s.st_mtime;
|
||||
// .. ent.ofs is set by zip_archive_add_file
|
||||
ent.flags = 0;
|
||||
ent.atom_fn = atom_fn;
|
||||
ent.crc32 = params.crc;
|
||||
if(store_compressed)
|
||||
{
|
||||
ent.method = CM_DEFLATE;
|
||||
ent.csize = (off_t)csize;
|
||||
file_contents = cdata;
|
||||
}
|
||||
else
|
||||
{
|
||||
ent.method = CM_NONE;
|
||||
ent.csize = (off_t)ucsize;
|
||||
file_contents = (void*)buf;
|
||||
}
|
||||
|
||||
// note: no need to free cdata - it is owned by the
|
||||
// compression context and can be reused.
|
||||
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
|
||||
LibError archive_build(const char* P_archive_filename, Filenames V_fl)
|
||||
{
|
||||
ZipArchive* za;
|
||||
RETURN_ERR(zip_archive_create(P_archive_filename, &za));
|
||||
uintptr_t ctx = comp_alloc(CT_COMPRESSION, CM_DEFLATE);
|
||||
|
||||
const char* fn; // declare outside loop for easier debugging
|
||||
for(size_t i = 0; ; i++)
|
||||
{
|
||||
fn = V_fl[i];
|
||||
if(!fn)
|
||||
break;
|
||||
|
||||
ArchiveEntry ent; void* file_contents; FileIOBuf buf;
|
||||
if(read_and_compress_file(fn, ctx, ent, file_contents, buf) == ERR_OK)
|
||||
{
|
||||
(void)zip_archive_add_file(za, &ent, file_contents);
|
||||
(void)file_buf_free(buf);
|
||||
}
|
||||
}
|
||||
|
||||
// note: this is currently known to fail if there are no files in the list
|
||||
// - zlib.h says: Z_DATA_ERROR is returned if freed prematurely.
|
||||
// safe to ignore.
|
||||
comp_free(ctx);
|
||||
(void)zip_archive_finish(za);
|
||||
|
||||
return ERR_OK;
|
||||
}
|
@ -159,6 +159,18 @@ extern LibError afile_map(AFile* af, void*& p, size_t& size);
|
||||
extern LibError afile_unmap(AFile* af);
|
||||
|
||||
|
||||
//
|
||||
// archive builder
|
||||
//
|
||||
|
||||
// array of pointers to VFS filenames (including path), terminated by a
|
||||
// NULL entry.
|
||||
typedef const char** Filenames;
|
||||
|
||||
extern LibError archive_build(const char* P_archive_filename, Filenames V_fl);
|
||||
|
||||
|
||||
|
||||
|
||||
enum ArchiveFileFlags
|
||||
{
|
||||
|
@ -313,7 +313,7 @@ typedef const u8* FileIOBuf;
|
||||
FileIOBuf* const FILE_BUF_TEMP = (FileIOBuf*)1;
|
||||
const FileIOBuf FILE_BUF_ALLOC = (FileIOBuf)2;
|
||||
|
||||
extern FileIOBuf file_buf_alloc(size_t size, const char* atom_fn);
|
||||
extern FileIOBuf file_buf_alloc(size_t size, const char* atom_fn, bool long_lived);
|
||||
extern LibError file_buf_free(FileIOBuf buf);
|
||||
|
||||
|
||||
|
@ -786,13 +786,17 @@ LibError file_cache_add(FileIOBuf buf, size_t size, const char* atom_fn)
|
||||
// decide (based on flags) if buf is to be cached; set cost
|
||||
uint cost = 1;
|
||||
|
||||
cache_allocator.make_read_only((u8*)buf, size);
|
||||
if(buf)
|
||||
cache_allocator.make_read_only((u8*)buf, size);
|
||||
file_cache.add(atom_fn, buf, size, cost);
|
||||
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// called by trace simulator to retrieve buffer address, given atom_fn.
|
||||
// must not change any cache state (e.g. notify stats or add ref).
|
||||
FileIOBuf file_cache_find(const char* atom_fn, size_t* size)
|
||||
@ -851,9 +855,6 @@ file_buf_free and there are only a few active at a time ( < 10)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// remove all blocks loaded from the file <fn>. used when reloading the file.
|
||||
LibError file_cache_invalidate(const char* P_fn)
|
||||
{
|
||||
@ -877,6 +878,23 @@ LibError file_cache_invalidate(const char* P_fn)
|
||||
}
|
||||
|
||||
|
||||
void file_cache_flush()
|
||||
{
|
||||
for(;;)
|
||||
{
|
||||
size_t size;
|
||||
FileIOBuf discarded_buf = file_cache.remove_least_valuable(&size);
|
||||
// cache is now empty - done
|
||||
if(!discarded_buf)
|
||||
return;
|
||||
#include "nommgr.h"
|
||||
cache_allocator.free((u8*)discarded_buf, size);
|
||||
#include "mmgr.h"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void file_cache_init()
|
||||
{
|
||||
}
|
||||
|
@ -30,6 +30,9 @@ extern FileIOBuf file_cache_find(const char* atom_fn, size_t* size);
|
||||
extern FileIOBuf file_cache_retrieve(const char* atom_fn, size_t* size);
|
||||
extern LibError file_cache_add(FileIOBuf buf, size_t size, const char* atom_fn);
|
||||
|
||||
extern LibError file_cache_invalidate(const char* fn);
|
||||
extern void file_cache_flush();
|
||||
|
||||
|
||||
extern void file_cache_init();
|
||||
extern void file_cache_shutdown();
|
||||
|
@ -353,6 +353,7 @@ static LibError add_ent(TDir* td, DirEnt* ent, const char* P_parent_path, const
|
||||
// prepend parent path to get complete pathname.
|
||||
char P_path[PATH_MAX];
|
||||
CHECK_ERR(vfs_path_append(P_path, P_parent_path, name));
|
||||
vfs_opt_notify_loose_file(P_path);
|
||||
|
||||
// it's a regular data file; add it to the directory.
|
||||
return tree_add_file(td, name, m, ent->size, ent->mtime, 0);
|
||||
|
@ -104,9 +104,7 @@ LibError trace_write_to_file(const char* trace_filename)
|
||||
|
||||
LibError trace_read_from_file(const char* trace_filename, Trace* t)
|
||||
{
|
||||
// we use trace_add, which is the same mechanism called by trace_notify*;
|
||||
// therefore, tracing needs to be enabled.
|
||||
trace_enabled = true;
|
||||
trace_clear();
|
||||
|
||||
char N_fn[PATH_MAX];
|
||||
RETURN_ERR(file_make_full_native_path(trace_filename, N_fn));
|
||||
@ -114,9 +112,12 @@ LibError trace_read_from_file(const char* trace_filename, Trace* t)
|
||||
if(!f)
|
||||
WARN_RETURN(ERR_FILE_NOT_FOUND);
|
||||
|
||||
// we use trace_add, which is the same mechanism called by trace_notify*;
|
||||
// therefore, tracing needs to be enabled.
|
||||
trace_enabled = true;
|
||||
|
||||
// parse lines and stuff them in trace_pool
|
||||
// (as if they had been trace_add-ed; replaces any existing data)
|
||||
trace_clear();
|
||||
// .. bake PATH_MAX limit into string.
|
||||
char fmt[30];
|
||||
snprintf(fmt, ARRAY_SIZE(fmt), "%%lf: %%c \"%%%d[^\"]\" %%d %%04x\n", PATH_MAX);
|
||||
@ -204,213 +205,444 @@ LibError trace_simulate(const char* trace_filename, uint flags)
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
struct FileList
|
||||
|
||||
|
||||
// enough for 64K unique files - ought to suffice.
|
||||
typedef u16 FileId;
|
||||
static const FileId NULL_ID = 0;
|
||||
|
||||
class IdMgr
|
||||
{
|
||||
const char** atom_fns;
|
||||
size_t num_files;
|
||||
size_t i;
|
||||
FileId cur;
|
||||
typedef std::map<const char*, FileId> Map;
|
||||
Map map;
|
||||
public:
|
||||
FileId get(const char* atom_fn)
|
||||
{
|
||||
Map::iterator it = map.find(atom_fn);
|
||||
if(it != map.end())
|
||||
return it->second;
|
||||
FileId id = cur++;
|
||||
map[atom_fn] = id;
|
||||
return id;
|
||||
}
|
||||
void reset() { cur = NULL_ID+1; }
|
||||
IdMgr() { reset(); }
|
||||
};
|
||||
static IdMgr id_mgr;
|
||||
|
||||
|
||||
struct FileAccess
|
||||
{
|
||||
const char* atom_fn;
|
||||
FileId id;
|
||||
|
||||
FileId prev;
|
||||
FileId next;
|
||||
bool visited;
|
||||
|
||||
FileAccess(const char* atom_fn_)
|
||||
{
|
||||
atom_fn = atom_fn_;
|
||||
prev = next = NULL_ID;
|
||||
id = id_mgr.get(atom_fn);
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::vector<FileAccess> FileAccesses;
|
||||
|
||||
class FileGatherer
|
||||
{
|
||||
// put all entries in one trace file: easier to handle; obviates FS enum code
|
||||
// rationale: don't go through file in order; instead, process most recent
|
||||
// run first, to give more weight to it (TSP code should go with first entry
|
||||
// when #occurrences are equal)
|
||||
struct Run
|
||||
{
|
||||
const TraceEntry* first;
|
||||
uint count;
|
||||
|
||||
// note: up to caller to initialize count (that's done when
|
||||
// starting the next run
|
||||
Run(const TraceEntry* first_) : first(first_) {}
|
||||
};
|
||||
|
||||
FileAccesses& file_accesses;
|
||||
|
||||
// improvement: postprocess the trace and remove all IOs that would be
|
||||
// satisfied by our cache. often repeated IOs would otherwise potentially
|
||||
// be arranged badly.
|
||||
void extract_accesses_from_run(const Run& run)
|
||||
{
|
||||
file_cache_flush();
|
||||
|
||||
const TraceEntry* ent = run.first;
|
||||
for(uint i = 0; i < run.count; i++, ent++)
|
||||
{
|
||||
// simulate carrying out the entry's TraceOp to determine
|
||||
// whether this IO would be satisfied by the file_buf cache.
|
||||
FileIOBuf buf;
|
||||
size_t size = ent->size;
|
||||
const char* atom_fn = ent->atom_fn;
|
||||
switch(ent->op)
|
||||
{
|
||||
case TO_LOAD:
|
||||
buf = file_cache_retrieve(atom_fn, &size);
|
||||
// would not be in cache: add to list of real IOs
|
||||
if(!buf)
|
||||
{
|
||||
bool long_lived = (ent->flags & FILE_LONG_LIVED) != 0;
|
||||
buf = file_buf_alloc(size, atom_fn, long_lived);
|
||||
(void)file_cache_add(buf, size, atom_fn);
|
||||
|
||||
file_accesses.push_back(atom_fn);
|
||||
}
|
||||
break;
|
||||
case TO_FREE:
|
||||
buf = file_cache_find(atom_fn, &size);
|
||||
(void)file_buf_free(buf);
|
||||
break;
|
||||
default:
|
||||
debug_warn("unknown TraceOp");
|
||||
}
|
||||
} // foreach entry
|
||||
|
||||
file_cache_flush();
|
||||
}
|
||||
|
||||
|
||||
// note: passing i and comparing timestamp with previous timestamp
|
||||
// avoids having to keep an extra local cur_time variable.
|
||||
bool is_start_of_run(uint i, const TraceEntry* ent)
|
||||
{
|
||||
// first item is always start of a run (protects [-1] below)
|
||||
if(i == 0)
|
||||
return true;
|
||||
|
||||
// timestamp started over from 0 (e.g. 29, 30, 1) -> start of new run.
|
||||
if(ent->timestamp < ent[-1].timestamp)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
typedef std::vector<Run> Runs;
|
||||
Runs runs;
|
||||
void split_trace_into_runs(const Trace* t)
|
||||
{
|
||||
uint cur_run_length = 0;
|
||||
const TraceEntry* cur_entry = t->ents;
|
||||
for(uint i = 0; i < t->num_ents; i++)
|
||||
{
|
||||
cur_run_length++;
|
||||
if(is_start_of_run(i, cur_entry))
|
||||
{
|
||||
if(!runs.empty())
|
||||
runs.back().count = cur_run_length;
|
||||
cur_run_length = 0;
|
||||
runs.push_back(Run(cur_entry));
|
||||
}
|
||||
cur_entry++;
|
||||
}
|
||||
// set the last run's length
|
||||
if(!runs.empty())
|
||||
runs.back().count = cur_run_length;
|
||||
}
|
||||
|
||||
public:
|
||||
FileGatherer(const char* trace_filename, Filenames required_fns,
|
||||
FileAccesses& file_accesses_)
|
||||
: file_accesses(file_accesses_)
|
||||
{
|
||||
Trace t;
|
||||
if(trace_read_from_file(trace_filename, &t) == 0)
|
||||
{
|
||||
split_trace_into_runs(&t);
|
||||
// extract accesses from each run (starting with most recent
|
||||
// first. this isn't critical, but may help a bit since
|
||||
// files that are equally strongly 'connected' are ordered
|
||||
// according to position in file_accesses. that means files from
|
||||
// more recent traces tend to go first, which is good.)
|
||||
for(Runs::iterator it = runs.begin(); it != runs.end(); ++it)
|
||||
extract_accesses_from_run(*it);
|
||||
}
|
||||
|
||||
// add all remaining files that are to be put in archive
|
||||
for(uint i = 0; required_fns[i] != 0; i++)
|
||||
file_accesses.push_back(required_fns[i]);
|
||||
}
|
||||
|
||||
// should never be copied; this also squelches warning
|
||||
private:
|
||||
FileGatherer(const FileGatherer& rhs);
|
||||
FileGatherer& operator=(const FileGatherer& rhs);
|
||||
};
|
||||
|
||||
|
||||
static LibError filelist_build(Trace* t, FileList* fl)
|
||||
class TourBuilder
|
||||
{
|
||||
// count # files
|
||||
fl->num_files = 0;
|
||||
for(size_t i = 0; i < t->num_ents; i++)
|
||||
if(t->ents[i].op == TO_LOAD)
|
||||
fl->num_files++;
|
||||
typedef u32 ConnectionId;
|
||||
cassert(sizeof(FileId)*2 <= sizeof(ConnectionId));
|
||||
ConnectionId cid_make(FileId prev, FileId next)
|
||||
{
|
||||
return u32_from_u16(prev, next);
|
||||
}
|
||||
FileId cid_prev(ConnectionId id)
|
||||
{
|
||||
return u32_hi(id);
|
||||
}
|
||||
FileId cid_next(ConnectionId id)
|
||||
{
|
||||
return u32_lo(id);
|
||||
}
|
||||
|
||||
if(!fl->num_files)
|
||||
struct Connection
|
||||
{
|
||||
ConnectionId id;
|
||||
// repeated edges ("connections") are reflected in
|
||||
// the 'occurrences' count; we optimize the ordering so that
|
||||
// files with frequent connections are nearby.
|
||||
uint occurrences;
|
||||
|
||||
Connection(ConnectionId id_)
|
||||
: id(id_), occurrences(1) {}
|
||||
};
|
||||
|
||||
struct decreasing_occurrences: public std::binary_function<const Connection&, const Connection&, bool>
|
||||
{
|
||||
bool operator()(const Connection& c1, const Connection& c2) const
|
||||
{
|
||||
return (c1.occurrences > c2.occurrences);
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::vector<Connection> Connections;
|
||||
Connections connections;
|
||||
|
||||
// not const because we change the graph-related members
|
||||
FileAccesses& file_accesses;
|
||||
|
||||
void build_connections()
|
||||
{
|
||||
// reserve memory for worst-case amount of connections (happens if
|
||||
// all accesses are unique). this is necessary because we store
|
||||
// pointers to Connection in the map, which would be invalidated if
|
||||
// connections[] ever expands.
|
||||
connections.reserve(file_accesses.size()-1);
|
||||
|
||||
// we need to check before inserting a new connection if it has
|
||||
// come up before (to increment occurrences). this map speeds
|
||||
// things up from n*n to n*log(n) (n = # files).
|
||||
typedef std::map<ConnectionId, Connection*> Map;
|
||||
Map map;
|
||||
|
||||
// for each file pair (i-1, i): set up a Connection
|
||||
for(uint i = 1; i < file_accesses.size(); i++)
|
||||
{
|
||||
const ConnectionId c_id = cid_make(file_accesses[i-1].id, file_accesses[i].id);
|
||||
|
||||
Map::iterator it = map.find(c_id);
|
||||
if(it != map.end())
|
||||
it->second->occurrences++;
|
||||
else
|
||||
{
|
||||
connections.push_back(Connection(c_id));
|
||||
map[c_id] = &connections.back();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool has_cycle;
|
||||
void detect_cycleR(FileId node)
|
||||
{
|
||||
FileAccess* pnode = &file_accesses[node];
|
||||
pnode->visited = true;
|
||||
FileId next = pnode->next;
|
||||
if(next != NULL_ID)
|
||||
{
|
||||
FileAccess* pnext = &file_accesses[next];
|
||||
if(pnext->visited)
|
||||
has_cycle = true;
|
||||
else
|
||||
detect_cycleR(next);
|
||||
}
|
||||
}
|
||||
bool is_cycle_at(FileId node)
|
||||
{
|
||||
has_cycle = false;
|
||||
for(FileAccesses::iterator it = file_accesses.begin(); it != file_accesses.end(); ++it)
|
||||
it->visited = 0;
|
||||
detect_cycleR(node);
|
||||
return has_cycle;
|
||||
}
|
||||
|
||||
void add_edge(const Connection& c)
|
||||
{
|
||||
FileId prev_id = cid_prev(c.id);
|
||||
FileId next_id = cid_next(c.id);
|
||||
|
||||
FileAccess& prev = file_accesses[prev_id];
|
||||
FileAccess& next = file_accesses[next_id];
|
||||
if(prev.next != NULL_ID || next.prev != NULL_ID)
|
||||
return;
|
||||
|
||||
prev.next = next_id;
|
||||
next.prev = prev_id;
|
||||
|
||||
bool introduced_cycle = is_cycle_at(next_id);
|
||||
debug_assert(introduced_cycle == is_cycle_at(prev_id));
|
||||
if(introduced_cycle)
|
||||
{
|
||||
// undo
|
||||
prev.next = next.prev = NULL_ID;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
TourBuilder(FileAccesses& file_accesses_)
|
||||
: file_accesses(file_accesses_)
|
||||
{
|
||||
build_connections();
|
||||
std::sort(connections.begin(), connections.end(), decreasing_occurrences());
|
||||
for(Connections::iterator it = connections.begin(); it != connections.end(); ++it)
|
||||
add_edge(*it);
|
||||
|
||||
|
||||
// walk tour; make sure all nodes are covered
|
||||
// add each one to FileList
|
||||
}
|
||||
|
||||
// should never be copied; this also squelches warning
|
||||
private:
|
||||
TourBuilder(const TourBuilder& rhs);
|
||||
TourBuilder& operator=(const TourBuilder& rhs);
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
|
||||
static LibError determine_optimal_ordering(const char* trace_filename, Filenames* pfns)
|
||||
{
|
||||
|
||||
|
||||
*pfl = 0;
|
||||
|
||||
|
||||
// count # files
|
||||
uint num_files = 0;
|
||||
for(size_t i = 0; i < t.num_ents; i++)
|
||||
if(t.ents[i].op == TO_LOAD)
|
||||
num_files++;
|
||||
|
||||
if(!num_files)
|
||||
return ERR_DIR_END;
|
||||
|
||||
fl->atom_fns = new const char*[fl->num_files];
|
||||
Filenames fns = (Filenames)malloc((num_files+1)*sizeof(const char*));
|
||||
if(!fns)
|
||||
return ERR_NO_MEM;
|
||||
|
||||
size_t ti = 0;
|
||||
for(size_t i = 0; i < fl->num_files; i++)
|
||||
for(size_t i = 0; i < num_files; i++)
|
||||
{
|
||||
// find next trace entry that is a load (must exist)
|
||||
while(t->ents[ti].op != TO_LOAD)
|
||||
while(t.ents[ti].op != TO_LOAD)
|
||||
ti++;
|
||||
fl->atom_fns[i] = t->ents[ti].atom_fn;
|
||||
fns[i] = t.ents[ti].atom_fn;
|
||||
ti++;
|
||||
}
|
||||
|
||||
fl->i = 0;
|
||||
trace_clear();
|
||||
*pfl = fl;
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
|
||||
static const char* filelist_get_next(FileList* fl)
|
||||
{
|
||||
if(fl->i == fl->num_files)
|
||||
return 0;
|
||||
return fl->atom_fns[fl->i++];
|
||||
}
|
||||
|
||||
|
||||
static void filelist_free(FileList* fl)
|
||||
{
|
||||
delete[] fl->atom_fns;
|
||||
fl->atom_fns = 0;
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
static inline bool file_type_is_uncompressible(const char* fn)
|
||||
typedef std::vector<const char*> FnVector;
|
||||
static FnVector loose_files;
|
||||
|
||||
void vfs_opt_notify_loose_file(const char* atom_fn)
|
||||
{
|
||||
const char* ext = strrchr(fn, '.');
|
||||
// no extension? bail; assume compressible
|
||||
if(!ext)
|
||||
// we could stop adding to loose_files if it's already got more than
|
||||
// REBUILD_MAIN_ARCHIVE_THRESHOLD entries, but don't bother
|
||||
// (it's ok to waste a bit of mem - this is rare)
|
||||
|
||||
loose_files.push_back(atom_fn);
|
||||
}
|
||||
|
||||
|
||||
LibError vfs_opt_rebuild_main_archive(const char* P_archive_dst_dir)
|
||||
{
|
||||
debug_warn("currently non-functional");
|
||||
|
||||
// for each mount point (with VFS_MOUNT_ARCHIVE flag set):
|
||||
// get list of all files
|
||||
Filenames required_fns = 0;
|
||||
|
||||
FileAccesses file_accesses;
|
||||
FileGatherer gatherer("../logs/trace.txt", required_fns, file_accesses);
|
||||
|
||||
TourBuilder builder(file_accesses);
|
||||
// builder.store_list(pfns);
|
||||
Filenames fns = 0;
|
||||
// (Filenames)malloc((num_files+1)*sizeof(const char*));
|
||||
// if(!fns)
|
||||
// return ERR_NO_MEM;
|
||||
|
||||
char P_path[VFS_MAX_PATH];
|
||||
RETURN_ERR(vfs_path_append(P_path, P_archive_dst_dir, "main.zip"));
|
||||
LibError ret = archive_build("main.zip", fns);
|
||||
// delete all loose files in list
|
||||
free(fns);
|
||||
// delete all archives in P_dst_path
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// autobuild logic: decides when to (re)build an archive.
|
||||
//
|
||||
|
||||
static const size_t REBUILD_MAIN_ARCHIVE_THRESHOLD = 100;
|
||||
static const size_t BUILD_MINI_ARCHIVE_THRESHOLD = 30;
|
||||
|
||||
static bool should_rebuild_main_archive()
|
||||
{
|
||||
if(loose_files.size() >= REBUILD_MAIN_ARCHIVE_THRESHOLD)
|
||||
return true;
|
||||
|
||||
// this is a selection of file types that are certainly not
|
||||
// further compressible. we need not include every type under the sun -
|
||||
// this is only a slight optimization that avoids wasting time
|
||||
// compressing files. the real decision as to cmethod is made based
|
||||
// on attained compression ratio.
|
||||
static const char* uncompressible_exts[] =
|
||||
{
|
||||
"zip", "rar",
|
||||
"jpg", "jpeg", "png",
|
||||
"ogg", "mp3"
|
||||
};
|
||||
// more than 3 mini archives
|
||||
|
||||
for(uint i = 0; i < ARRAY_SIZE(uncompressible_exts); i++)
|
||||
{
|
||||
if(!stricmp(ext+1, uncompressible_exts[i]))
|
||||
return true;
|
||||
}
|
||||
// development build only: archive is more than 2 weeks old
|
||||
#ifndef FINAL
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
struct CompressParams
|
||||
static bool should_build_mini_archive()
|
||||
{
|
||||
bool attempt_compress;
|
||||
uintptr_t ctx;
|
||||
u32 crc;
|
||||
};
|
||||
|
||||
#include <zlib.h>
|
||||
|
||||
static LibError compress_cb(uintptr_t cb_ctx, const void* block, size_t size, size_t* bytes_processed)
|
||||
{
|
||||
CompressParams* p = (CompressParams*)cb_ctx;
|
||||
|
||||
// comp_feed already makes note of total #bytes fed, and we need
|
||||
// vfs_io to return the uc size (to check if all data was read).
|
||||
*bytes_processed = size;
|
||||
|
||||
// update checksum
|
||||
p->crc = crc32(p->crc, (const Bytef*)block, (uInt)size);
|
||||
|
||||
if(p->attempt_compress)
|
||||
(void)comp_feed(p->ctx, block, size);
|
||||
return INFO_CB_CONTINUE;
|
||||
if(loose_files.size() >= BUILD_MINI_ARCHIVE_THRESHOLD)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static LibError read_and_compress_file(const char* atom_fn, uintptr_t ctx,
|
||||
ArchiveEntry& ent, void*& file_contents, FileIOBuf& buf) // out
|
||||
LibError vfs_opt_auto_build_archive(const char* P_dst_path)
|
||||
{
|
||||
struct stat s;
|
||||
RETURN_ERR(vfs_stat(atom_fn, &s));
|
||||
const size_t ucsize = s.st_size;
|
||||
|
||||
const bool attempt_compress = !file_type_is_uncompressible(atom_fn);
|
||||
if(attempt_compress)
|
||||
if(should_rebuild_main_archive())
|
||||
return vfs_opt_rebuild_main_archive(P_dst_path);
|
||||
else if(should_build_mini_archive())
|
||||
{
|
||||
RETURN_ERR(comp_reset(ctx));
|
||||
RETURN_ERR(comp_alloc_output(ctx, ucsize));
|
||||
loose_files.push_back(0);
|
||||
// get new unused mini archive name
|
||||
const char* archive_filename = 0;
|
||||
RETURN_ERR(archive_build(archive_filename, &loose_files[0]));
|
||||
// delete all newly added loose files
|
||||
}
|
||||
|
||||
// read file into newly allocated buffer. if attempt_compress, also
|
||||
// compress the file into another buffer while waiting for IOs.
|
||||
Handle hf = vfs_open(atom_fn, 0);
|
||||
RETURN_ERR(hf);
|
||||
buf = FILE_BUF_ALLOC;
|
||||
CompressParams params = { attempt_compress, ctx, 0 };
|
||||
ssize_t ucsize_read = vfs_io(hf, ucsize, &buf, compress_cb, (uintptr_t)¶ms);
|
||||
debug_assert(ucsize_read == (ssize_t)ucsize);
|
||||
(void)vfs_close(hf);
|
||||
|
||||
// if we compressed the file trial-wise, check results and
|
||||
// decide whether to store as such or not (based on compression ratio)
|
||||
bool store_compressed = false;
|
||||
void* cdata = 0; size_t csize = 0;
|
||||
if(attempt_compress)
|
||||
{
|
||||
RETURN_ERR(comp_finish(ctx, &cdata, &csize));
|
||||
|
||||
const float ratio = (float)ucsize / csize;
|
||||
const ssize_t bytes_saved = (ssize_t)ucsize - (ssize_t)csize;
|
||||
if(ratio > 1.05f && bytes_saved > 200)
|
||||
store_compressed = true;
|
||||
}
|
||||
|
||||
// store file info
|
||||
ent.ucsize = (off_t)ucsize;
|
||||
ent.mtime = s.st_mtime;
|
||||
// .. ent.ofs is set by zip_archive_add_file
|
||||
ent.flags = 0;
|
||||
ent.atom_fn = atom_fn;
|
||||
ent.crc32 = params.crc;
|
||||
if(store_compressed)
|
||||
{
|
||||
ent.method = CM_DEFLATE;
|
||||
ent.csize = (off_t)csize;
|
||||
file_contents = cdata;
|
||||
}
|
||||
else
|
||||
{
|
||||
ent.method = CM_NONE;
|
||||
ent.csize = (off_t)ucsize;
|
||||
file_contents = (void*)buf;
|
||||
}
|
||||
|
||||
// note: no need to free cdata - it is owned by the
|
||||
// compression context and can be reused.
|
||||
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
LibError build_optimized_archive(const char* trace_filename, const char* zip_filename)
|
||||
{
|
||||
FileList fl;
|
||||
{
|
||||
Trace t;
|
||||
RETURN_ERR(trace_read_from_file(trace_filename, &t));
|
||||
RETURN_ERR(filelist_build(&t, &fl));
|
||||
trace_clear();
|
||||
}
|
||||
|
||||
ZipArchive* za;
|
||||
RETURN_ERR(zip_archive_create(zip_filename, &za));
|
||||
uintptr_t ctx = comp_alloc(CT_COMPRESSION, CM_DEFLATE);
|
||||
|
||||
const char* atom_fn; // declare outside loop for easier debugging
|
||||
for(;;)
|
||||
{
|
||||
atom_fn = filelist_get_next(&fl);
|
||||
if(!atom_fn)
|
||||
break;
|
||||
|
||||
ArchiveEntry ent; void* file_contents; FileIOBuf buf;
|
||||
if(read_and_compress_file(atom_fn, ctx, ent, file_contents, buf) == ERR_OK)
|
||||
{
|
||||
(void)zip_archive_add_file(za, &ent, file_contents);
|
||||
(void)file_buf_free(buf);
|
||||
}
|
||||
}
|
||||
|
||||
filelist_free(&fl);
|
||||
|
||||
// note: this is currently known to fail if there are no files in the list
|
||||
// - zlib.h says: Z_DATA_ERROR is returned if freed prematurely.
|
||||
// safe to ignore.
|
||||
comp_free(ctx);
|
||||
(void)zip_archive_finish(za);
|
||||
return ERR_OK;
|
||||
}
|
||||
|
@ -45,6 +45,8 @@ extern void trace_get(Trace* t);
|
||||
extern LibError trace_write_to_file(const char* trace_filename);
|
||||
extern LibError trace_read_from_file(const char* trace_filename, Trace* t);
|
||||
|
||||
extern LibError build_optimized_archive(const char* trace_filename, const char* zip_filename);
|
||||
extern LibError vfs_opt_rebuild_main_archive(const char* P_dst_path);
|
||||
|
||||
extern void vfs_opt_notify_loose_file(const char* atom_fn);
|
||||
|
||||
#endif // #ifndef VFS_OPTIMIZER_H__
|
||||
|
@ -105,7 +105,7 @@ static void ParseCommandLineArgs(int argc, char* argv[])
|
||||
// note: VFS init is sure to have been completed by now
|
||||
// (since CONFIG_Init reads from file); therefore,
|
||||
// it is safe to call this from here directly.
|
||||
build_optimized_archive("../logs/trace.txt", "test.zip");
|
||||
vfs_opt_rebuild_main_archive("mods/official");
|
||||
break;
|
||||
case 'c':
|
||||
if(strcmp(name, "conf") == 0)
|
||||
|
Loading…
Reference in New Issue
Block a user