adts: remove_least_valuable safely handles empty cache

lib: add u32_hi/lo
archive: move archive builder logic here from vf_optimizer
file_cache: add flush() call - used when simulating cache
vfs_optimizer: WIP, not yet functional: file gatherer, 50% of TSP tour
generator

This was SVN commit r3486.
This commit is contained in:
janwas 2006-02-09 05:59:33 +00:00
parent 56d8d31cd0
commit 21f8f8555e
12 changed files with 625 additions and 183 deletions

View File

@ -294,8 +294,12 @@ public:
// remove the least valuable item and optionally indicate
// how big it was (useful for statistics).
// returns 0 if cache is empty.
T remove_least_valuable(size_t* psize = 0)
{
if(map.empty())
return 0;
// one iteration ought to suffice to evict someone due to
// definition of min_density, but we provide for repeating
// in case of floating-point imprecision.

View File

@ -315,9 +315,21 @@ u32 u64_hi(u64 x)
u32 u64_lo(u64 x)
{
return (u32)(x & 0xffffffff);
return (u32)(x & 0xFFFFFFFF);
}
u16 u32_hi(u32 x)
{
return (u16)(x >> 16);
}
u16 u32_lo(u32 x)
{
return (u16)(x & 0xFFFF);
}
u64 u64_from_u32(u32 hi, u32 lo)
{
u64 x = (u64)hi;

View File

@ -283,6 +283,9 @@ extern uintptr_t round_down(uintptr_t n, uintptr_t multiple);
// less than the bit width of the type).
extern u32 u64_hi(u64 x);
extern u32 u64_lo(u64 x);
extern u16 u32_hi(u32 x);
extern u16 u32_lo(u32 x);
extern u64 u64_from_u32(u32 hi, u32 lo);
extern u32 u32_from_u16(u16 hi, u16 lo);

View File

@ -604,3 +604,158 @@ LibError afile_unmap(AFile* af)
H_DEREF(af->ha, Archive, a);
return file_unmap(&a->f);
}
//-----------------------------------------------------------------------------
// archive builder
//-----------------------------------------------------------------------------
static inline bool file_type_is_uncompressible(const char* fn)
{
const char* ext = strrchr(fn, '.');
// no extension? bail; assume compressible
if(!ext)
return true;
// this is a selection of file types that are certainly not
// further compressible. we need not include every type under the sun -
// this is only a slight optimization that avoids wasting time
// compressing files. the real decision as to cmethod is made based
// on attained compression ratio.
static const char* uncompressible_exts[] =
{
"zip", "rar",
"jpg", "jpeg", "png",
"ogg", "mp3"
};
for(uint i = 0; i < ARRAY_SIZE(uncompressible_exts); i++)
{
if(!stricmp(ext+1, uncompressible_exts[i]))
return true;
}
return false;
}
struct CompressParams
{
bool attempt_compress;
uintptr_t ctx;
u32 crc;
};
#include <zlib.h>
static LibError compress_cb(uintptr_t cb_ctx, const void* block, size_t size, size_t* bytes_processed)
{
CompressParams* p = (CompressParams*)cb_ctx;
// comp_feed already makes note of total #bytes fed, and we need
// vfs_io to return the uc size (to check if all data was read).
*bytes_processed = size;
// update checksum
p->crc = crc32(p->crc, (const Bytef*)block, (uInt)size);
if(p->attempt_compress)
(void)comp_feed(p->ctx, block, size);
return INFO_CB_CONTINUE;
}
static LibError read_and_compress_file(const char* atom_fn, uintptr_t ctx,
ArchiveEntry& ent, void*& file_contents, FileIOBuf& buf) // out
{
struct stat s;
RETURN_ERR(vfs_stat(atom_fn, &s));
const size_t ucsize = s.st_size;
const bool attempt_compress = !file_type_is_uncompressible(atom_fn);
if(attempt_compress)
{
RETURN_ERR(comp_reset(ctx));
RETURN_ERR(comp_alloc_output(ctx, ucsize));
}
// read file into newly allocated buffer. if attempt_compress, also
// compress the file into another buffer while waiting for IOs.
Handle hf = vfs_open(atom_fn, 0);
RETURN_ERR(hf);
buf = FILE_BUF_ALLOC;
CompressParams params = { attempt_compress, ctx, 0 };
ssize_t ucsize_read = vfs_io(hf, ucsize, &buf, compress_cb, (uintptr_t)&params);
debug_assert(ucsize_read == (ssize_t)ucsize);
(void)vfs_close(hf);
// if we compressed the file trial-wise, check results and
// decide whether to store as such or not (based on compression ratio)
bool store_compressed = false;
void* cdata = 0; size_t csize = 0;
if(attempt_compress)
{
RETURN_ERR(comp_finish(ctx, &cdata, &csize));
const float ratio = (float)ucsize / csize;
const ssize_t bytes_saved = (ssize_t)ucsize - (ssize_t)csize;
if(ratio > 1.05f && bytes_saved > 200)
store_compressed = true;
}
// store file info
ent.ucsize = (off_t)ucsize;
ent.mtime = s.st_mtime;
// .. ent.ofs is set by zip_archive_add_file
ent.flags = 0;
ent.atom_fn = atom_fn;
ent.crc32 = params.crc;
if(store_compressed)
{
ent.method = CM_DEFLATE;
ent.csize = (off_t)csize;
file_contents = cdata;
}
else
{
ent.method = CM_NONE;
ent.csize = (off_t)ucsize;
file_contents = (void*)buf;
}
// note: no need to free cdata - it is owned by the
// compression context and can be reused.
return ERR_OK;
}
LibError archive_build(const char* P_archive_filename, Filenames V_fl)
{
ZipArchive* za;
RETURN_ERR(zip_archive_create(P_archive_filename, &za));
uintptr_t ctx = comp_alloc(CT_COMPRESSION, CM_DEFLATE);
const char* fn; // declare outside loop for easier debugging
for(size_t i = 0; ; i++)
{
fn = V_fl[i];
if(!fn)
break;
ArchiveEntry ent; void* file_contents; FileIOBuf buf;
if(read_and_compress_file(fn, ctx, ent, file_contents, buf) == ERR_OK)
{
(void)zip_archive_add_file(za, &ent, file_contents);
(void)file_buf_free(buf);
}
}
// note: this is currently known to fail if there are no files in the list
// - zlib.h says: Z_DATA_ERROR is returned if freed prematurely.
// safe to ignore.
comp_free(ctx);
(void)zip_archive_finish(za);
return ERR_OK;
}

View File

@ -159,6 +159,18 @@ extern LibError afile_map(AFile* af, void*& p, size_t& size);
extern LibError afile_unmap(AFile* af);
//
// archive builder
//
// array of pointers to VFS filenames (including path), terminated by a
// NULL entry.
typedef const char** Filenames;
extern LibError archive_build(const char* P_archive_filename, Filenames V_fl);
enum ArchiveFileFlags
{

View File

@ -313,7 +313,7 @@ typedef const u8* FileIOBuf;
FileIOBuf* const FILE_BUF_TEMP = (FileIOBuf*)1;
const FileIOBuf FILE_BUF_ALLOC = (FileIOBuf)2;
extern FileIOBuf file_buf_alloc(size_t size, const char* atom_fn);
extern FileIOBuf file_buf_alloc(size_t size, const char* atom_fn, bool long_lived);
extern LibError file_buf_free(FileIOBuf buf);

View File

@ -786,13 +786,17 @@ LibError file_cache_add(FileIOBuf buf, size_t size, const char* atom_fn)
// decide (based on flags) if buf is to be cached; set cost
uint cost = 1;
cache_allocator.make_read_only((u8*)buf, size);
if(buf)
cache_allocator.make_read_only((u8*)buf, size);
file_cache.add(atom_fn, buf, size, cost);
return ERR_OK;
}
// called by trace simulator to retrieve buffer address, given atom_fn.
// must not change any cache state (e.g. notify stats or add ref).
FileIOBuf file_cache_find(const char* atom_fn, size_t* size)
@ -851,9 +855,6 @@ file_buf_free and there are only a few active at a time ( < 10)
// remove all blocks loaded from the file <fn>. used when reloading the file.
LibError file_cache_invalidate(const char* P_fn)
{
@ -877,6 +878,23 @@ LibError file_cache_invalidate(const char* P_fn)
}
void file_cache_flush()
{
for(;;)
{
size_t size;
FileIOBuf discarded_buf = file_cache.remove_least_valuable(&size);
// cache is now empty - done
if(!discarded_buf)
return;
#include "nommgr.h"
cache_allocator.free((u8*)discarded_buf, size);
#include "mmgr.h"
}
}
void file_cache_init()
{
}

View File

@ -30,6 +30,9 @@ extern FileIOBuf file_cache_find(const char* atom_fn, size_t* size);
extern FileIOBuf file_cache_retrieve(const char* atom_fn, size_t* size);
extern LibError file_cache_add(FileIOBuf buf, size_t size, const char* atom_fn);
extern LibError file_cache_invalidate(const char* fn);
extern void file_cache_flush();
extern void file_cache_init();
extern void file_cache_shutdown();

View File

@ -353,6 +353,7 @@ static LibError add_ent(TDir* td, DirEnt* ent, const char* P_parent_path, const
// prepend parent path to get complete pathname.
char P_path[PATH_MAX];
CHECK_ERR(vfs_path_append(P_path, P_parent_path, name));
vfs_opt_notify_loose_file(P_path);
// it's a regular data file; add it to the directory.
return tree_add_file(td, name, m, ent->size, ent->mtime, 0);

View File

@ -104,9 +104,7 @@ LibError trace_write_to_file(const char* trace_filename)
LibError trace_read_from_file(const char* trace_filename, Trace* t)
{
// we use trace_add, which is the same mechanism called by trace_notify*;
// therefore, tracing needs to be enabled.
trace_enabled = true;
trace_clear();
char N_fn[PATH_MAX];
RETURN_ERR(file_make_full_native_path(trace_filename, N_fn));
@ -114,9 +112,12 @@ LibError trace_read_from_file(const char* trace_filename, Trace* t)
if(!f)
WARN_RETURN(ERR_FILE_NOT_FOUND);
// we use trace_add, which is the same mechanism called by trace_notify*;
// therefore, tracing needs to be enabled.
trace_enabled = true;
// parse lines and stuff them in trace_pool
// (as if they had been trace_add-ed; replaces any existing data)
trace_clear();
// .. bake PATH_MAX limit into string.
char fmt[30];
snprintf(fmt, ARRAY_SIZE(fmt), "%%lf: %%c \"%%%d[^\"]\" %%d %%04x\n", PATH_MAX);
@ -204,213 +205,444 @@ LibError trace_simulate(const char* trace_filename, uint flags)
//-----------------------------------------------------------------------------
struct FileList
// enough for 64K unique files - ought to suffice.
typedef u16 FileId;
static const FileId NULL_ID = 0;
class IdMgr
{
const char** atom_fns;
size_t num_files;
size_t i;
FileId cur;
typedef std::map<const char*, FileId> Map;
Map map;
public:
FileId get(const char* atom_fn)
{
Map::iterator it = map.find(atom_fn);
if(it != map.end())
return it->second;
FileId id = cur++;
map[atom_fn] = id;
return id;
}
void reset() { cur = NULL_ID+1; }
IdMgr() { reset(); }
};
static IdMgr id_mgr;
struct FileAccess
{
const char* atom_fn;
FileId id;
FileId prev;
FileId next;
bool visited;
FileAccess(const char* atom_fn_)
{
atom_fn = atom_fn_;
prev = next = NULL_ID;
id = id_mgr.get(atom_fn);
}
};
typedef std::vector<FileAccess> FileAccesses;
class FileGatherer
{
// put all entries in one trace file: easier to handle; obviates FS enum code
// rationale: don't go through file in order; instead, process most recent
// run first, to give more weight to it (TSP code should go with first entry
// when #occurrences are equal)
struct Run
{
const TraceEntry* first;
uint count;
// note: up to caller to initialize count (that's done when
// starting the next run
Run(const TraceEntry* first_) : first(first_) {}
};
FileAccesses& file_accesses;
// improvement: postprocess the trace and remove all IOs that would be
// satisfied by our cache. often repeated IOs would otherwise potentially
// be arranged badly.
void extract_accesses_from_run(const Run& run)
{
file_cache_flush();
const TraceEntry* ent = run.first;
for(uint i = 0; i < run.count; i++, ent++)
{
// simulate carrying out the entry's TraceOp to determine
// whether this IO would be satisfied by the file_buf cache.
FileIOBuf buf;
size_t size = ent->size;
const char* atom_fn = ent->atom_fn;
switch(ent->op)
{
case TO_LOAD:
buf = file_cache_retrieve(atom_fn, &size);
// would not be in cache: add to list of real IOs
if(!buf)
{
bool long_lived = (ent->flags & FILE_LONG_LIVED) != 0;
buf = file_buf_alloc(size, atom_fn, long_lived);
(void)file_cache_add(buf, size, atom_fn);
file_accesses.push_back(atom_fn);
}
break;
case TO_FREE:
buf = file_cache_find(atom_fn, &size);
(void)file_buf_free(buf);
break;
default:
debug_warn("unknown TraceOp");
}
} // foreach entry
file_cache_flush();
}
// note: passing i and comparing timestamp with previous timestamp
// avoids having to keep an extra local cur_time variable.
bool is_start_of_run(uint i, const TraceEntry* ent)
{
// first item is always start of a run (protects [-1] below)
if(i == 0)
return true;
// timestamp started over from 0 (e.g. 29, 30, 1) -> start of new run.
if(ent->timestamp < ent[-1].timestamp)
return true;
return false;
}
typedef std::vector<Run> Runs;
Runs runs;
void split_trace_into_runs(const Trace* t)
{
uint cur_run_length = 0;
const TraceEntry* cur_entry = t->ents;
for(uint i = 0; i < t->num_ents; i++)
{
cur_run_length++;
if(is_start_of_run(i, cur_entry))
{
if(!runs.empty())
runs.back().count = cur_run_length;
cur_run_length = 0;
runs.push_back(Run(cur_entry));
}
cur_entry++;
}
// set the last run's length
if(!runs.empty())
runs.back().count = cur_run_length;
}
public:
FileGatherer(const char* trace_filename, Filenames required_fns,
FileAccesses& file_accesses_)
: file_accesses(file_accesses_)
{
Trace t;
if(trace_read_from_file(trace_filename, &t) == 0)
{
split_trace_into_runs(&t);
// extract accesses from each run (starting with most recent
// first. this isn't critical, but may help a bit since
// files that are equally strongly 'connected' are ordered
// according to position in file_accesses. that means files from
// more recent traces tend to go first, which is good.)
for(Runs::iterator it = runs.begin(); it != runs.end(); ++it)
extract_accesses_from_run(*it);
}
// add all remaining files that are to be put in archive
for(uint i = 0; required_fns[i] != 0; i++)
file_accesses.push_back(required_fns[i]);
}
// should never be copied; this also squelches warning
private:
FileGatherer(const FileGatherer& rhs);
FileGatherer& operator=(const FileGatherer& rhs);
};
static LibError filelist_build(Trace* t, FileList* fl)
class TourBuilder
{
// count # files
fl->num_files = 0;
for(size_t i = 0; i < t->num_ents; i++)
if(t->ents[i].op == TO_LOAD)
fl->num_files++;
typedef u32 ConnectionId;
cassert(sizeof(FileId)*2 <= sizeof(ConnectionId));
ConnectionId cid_make(FileId prev, FileId next)
{
return u32_from_u16(prev, next);
}
FileId cid_prev(ConnectionId id)
{
return u32_hi(id);
}
FileId cid_next(ConnectionId id)
{
return u32_lo(id);
}
if(!fl->num_files)
struct Connection
{
ConnectionId id;
// repeated edges ("connections") are reflected in
// the 'occurrences' count; we optimize the ordering so that
// files with frequent connections are nearby.
uint occurrences;
Connection(ConnectionId id_)
: id(id_), occurrences(1) {}
};
struct decreasing_occurrences: public std::binary_function<const Connection&, const Connection&, bool>
{
bool operator()(const Connection& c1, const Connection& c2) const
{
return (c1.occurrences > c2.occurrences);
}
};
typedef std::vector<Connection> Connections;
Connections connections;
// not const because we change the graph-related members
FileAccesses& file_accesses;
void build_connections()
{
// reserve memory for worst-case amount of connections (happens if
// all accesses are unique). this is necessary because we store
// pointers to Connection in the map, which would be invalidated if
// connections[] ever expands.
connections.reserve(file_accesses.size()-1);
// we need to check before inserting a new connection if it has
// come up before (to increment occurrences). this map speeds
// things up from n*n to n*log(n) (n = # files).
typedef std::map<ConnectionId, Connection*> Map;
Map map;
// for each file pair (i-1, i): set up a Connection
for(uint i = 1; i < file_accesses.size(); i++)
{
const ConnectionId c_id = cid_make(file_accesses[i-1].id, file_accesses[i].id);
Map::iterator it = map.find(c_id);
if(it != map.end())
it->second->occurrences++;
else
{
connections.push_back(Connection(c_id));
map[c_id] = &connections.back();
}
}
}
bool has_cycle;
void detect_cycleR(FileId node)
{
FileAccess* pnode = &file_accesses[node];
pnode->visited = true;
FileId next = pnode->next;
if(next != NULL_ID)
{
FileAccess* pnext = &file_accesses[next];
if(pnext->visited)
has_cycle = true;
else
detect_cycleR(next);
}
}
bool is_cycle_at(FileId node)
{
has_cycle = false;
for(FileAccesses::iterator it = file_accesses.begin(); it != file_accesses.end(); ++it)
it->visited = 0;
detect_cycleR(node);
return has_cycle;
}
void add_edge(const Connection& c)
{
FileId prev_id = cid_prev(c.id);
FileId next_id = cid_next(c.id);
FileAccess& prev = file_accesses[prev_id];
FileAccess& next = file_accesses[next_id];
if(prev.next != NULL_ID || next.prev != NULL_ID)
return;
prev.next = next_id;
next.prev = prev_id;
bool introduced_cycle = is_cycle_at(next_id);
debug_assert(introduced_cycle == is_cycle_at(prev_id));
if(introduced_cycle)
{
// undo
prev.next = next.prev = NULL_ID;
return;
}
}
public:
TourBuilder(FileAccesses& file_accesses_)
: file_accesses(file_accesses_)
{
build_connections();
std::sort(connections.begin(), connections.end(), decreasing_occurrences());
for(Connections::iterator it = connections.begin(); it != connections.end(); ++it)
add_edge(*it);
// walk tour; make sure all nodes are covered
// add each one to FileList
}
// should never be copied; this also squelches warning
private:
TourBuilder(const TourBuilder& rhs);
TourBuilder& operator=(const TourBuilder& rhs);
};
/*
static LibError determine_optimal_ordering(const char* trace_filename, Filenames* pfns)
{
*pfl = 0;
// count # files
uint num_files = 0;
for(size_t i = 0; i < t.num_ents; i++)
if(t.ents[i].op == TO_LOAD)
num_files++;
if(!num_files)
return ERR_DIR_END;
fl->atom_fns = new const char*[fl->num_files];
Filenames fns = (Filenames)malloc((num_files+1)*sizeof(const char*));
if(!fns)
return ERR_NO_MEM;
size_t ti = 0;
for(size_t i = 0; i < fl->num_files; i++)
for(size_t i = 0; i < num_files; i++)
{
// find next trace entry that is a load (must exist)
while(t->ents[ti].op != TO_LOAD)
while(t.ents[ti].op != TO_LOAD)
ti++;
fl->atom_fns[i] = t->ents[ti].atom_fn;
fns[i] = t.ents[ti].atom_fn;
ti++;
}
fl->i = 0;
trace_clear();
*pfl = fl;
return ERR_OK;
}
static const char* filelist_get_next(FileList* fl)
{
if(fl->i == fl->num_files)
return 0;
return fl->atom_fns[fl->i++];
}
static void filelist_free(FileList* fl)
{
delete[] fl->atom_fns;
fl->atom_fns = 0;
}
*/
//-----------------------------------------------------------------------------
static inline bool file_type_is_uncompressible(const char* fn)
typedef std::vector<const char*> FnVector;
static FnVector loose_files;
void vfs_opt_notify_loose_file(const char* atom_fn)
{
const char* ext = strrchr(fn, '.');
// no extension? bail; assume compressible
if(!ext)
// we could stop adding to loose_files if it's already got more than
// REBUILD_MAIN_ARCHIVE_THRESHOLD entries, but don't bother
// (it's ok to waste a bit of mem - this is rare)
loose_files.push_back(atom_fn);
}
LibError vfs_opt_rebuild_main_archive(const char* P_archive_dst_dir)
{
debug_warn("currently non-functional");
// for each mount point (with VFS_MOUNT_ARCHIVE flag set):
// get list of all files
Filenames required_fns = 0;
FileAccesses file_accesses;
FileGatherer gatherer("../logs/trace.txt", required_fns, file_accesses);
TourBuilder builder(file_accesses);
// builder.store_list(pfns);
Filenames fns = 0;
// (Filenames)malloc((num_files+1)*sizeof(const char*));
// if(!fns)
// return ERR_NO_MEM;
char P_path[VFS_MAX_PATH];
RETURN_ERR(vfs_path_append(P_path, P_archive_dst_dir, "main.zip"));
LibError ret = archive_build("main.zip", fns);
// delete all loose files in list
free(fns);
// delete all archives in P_dst_path
return ret;
}
//
// autobuild logic: decides when to (re)build an archive.
//
static const size_t REBUILD_MAIN_ARCHIVE_THRESHOLD = 100;
static const size_t BUILD_MINI_ARCHIVE_THRESHOLD = 30;
static bool should_rebuild_main_archive()
{
if(loose_files.size() >= REBUILD_MAIN_ARCHIVE_THRESHOLD)
return true;
// this is a selection of file types that are certainly not
// further compressible. we need not include every type under the sun -
// this is only a slight optimization that avoids wasting time
// compressing files. the real decision as to cmethod is made based
// on attained compression ratio.
static const char* uncompressible_exts[] =
{
"zip", "rar",
"jpg", "jpeg", "png",
"ogg", "mp3"
};
// more than 3 mini archives
for(uint i = 0; i < ARRAY_SIZE(uncompressible_exts); i++)
{
if(!stricmp(ext+1, uncompressible_exts[i]))
return true;
}
// development build only: archive is more than 2 weeks old
#ifndef FINAL
#endif
return false;
}
struct CompressParams
static bool should_build_mini_archive()
{
bool attempt_compress;
uintptr_t ctx;
u32 crc;
};
#include <zlib.h>
static LibError compress_cb(uintptr_t cb_ctx, const void* block, size_t size, size_t* bytes_processed)
{
CompressParams* p = (CompressParams*)cb_ctx;
// comp_feed already makes note of total #bytes fed, and we need
// vfs_io to return the uc size (to check if all data was read).
*bytes_processed = size;
// update checksum
p->crc = crc32(p->crc, (const Bytef*)block, (uInt)size);
if(p->attempt_compress)
(void)comp_feed(p->ctx, block, size);
return INFO_CB_CONTINUE;
if(loose_files.size() >= BUILD_MINI_ARCHIVE_THRESHOLD)
return true;
return false;
}
static LibError read_and_compress_file(const char* atom_fn, uintptr_t ctx,
ArchiveEntry& ent, void*& file_contents, FileIOBuf& buf) // out
LibError vfs_opt_auto_build_archive(const char* P_dst_path)
{
struct stat s;
RETURN_ERR(vfs_stat(atom_fn, &s));
const size_t ucsize = s.st_size;
const bool attempt_compress = !file_type_is_uncompressible(atom_fn);
if(attempt_compress)
if(should_rebuild_main_archive())
return vfs_opt_rebuild_main_archive(P_dst_path);
else if(should_build_mini_archive())
{
RETURN_ERR(comp_reset(ctx));
RETURN_ERR(comp_alloc_output(ctx, ucsize));
loose_files.push_back(0);
// get new unused mini archive name
const char* archive_filename = 0;
RETURN_ERR(archive_build(archive_filename, &loose_files[0]));
// delete all newly added loose files
}
// read file into newly allocated buffer. if attempt_compress, also
// compress the file into another buffer while waiting for IOs.
Handle hf = vfs_open(atom_fn, 0);
RETURN_ERR(hf);
buf = FILE_BUF_ALLOC;
CompressParams params = { attempt_compress, ctx, 0 };
ssize_t ucsize_read = vfs_io(hf, ucsize, &buf, compress_cb, (uintptr_t)&params);
debug_assert(ucsize_read == (ssize_t)ucsize);
(void)vfs_close(hf);
// if we compressed the file trial-wise, check results and
// decide whether to store as such or not (based on compression ratio)
bool store_compressed = false;
void* cdata = 0; size_t csize = 0;
if(attempt_compress)
{
RETURN_ERR(comp_finish(ctx, &cdata, &csize));
const float ratio = (float)ucsize / csize;
const ssize_t bytes_saved = (ssize_t)ucsize - (ssize_t)csize;
if(ratio > 1.05f && bytes_saved > 200)
store_compressed = true;
}
// store file info
ent.ucsize = (off_t)ucsize;
ent.mtime = s.st_mtime;
// .. ent.ofs is set by zip_archive_add_file
ent.flags = 0;
ent.atom_fn = atom_fn;
ent.crc32 = params.crc;
if(store_compressed)
{
ent.method = CM_DEFLATE;
ent.csize = (off_t)csize;
file_contents = cdata;
}
else
{
ent.method = CM_NONE;
ent.csize = (off_t)ucsize;
file_contents = (void*)buf;
}
// note: no need to free cdata - it is owned by the
// compression context and can be reused.
return ERR_OK;
}
LibError build_optimized_archive(const char* trace_filename, const char* zip_filename)
{
FileList fl;
{
Trace t;
RETURN_ERR(trace_read_from_file(trace_filename, &t));
RETURN_ERR(filelist_build(&t, &fl));
trace_clear();
}
ZipArchive* za;
RETURN_ERR(zip_archive_create(zip_filename, &za));
uintptr_t ctx = comp_alloc(CT_COMPRESSION, CM_DEFLATE);
const char* atom_fn; // declare outside loop for easier debugging
for(;;)
{
atom_fn = filelist_get_next(&fl);
if(!atom_fn)
break;
ArchiveEntry ent; void* file_contents; FileIOBuf buf;
if(read_and_compress_file(atom_fn, ctx, ent, file_contents, buf) == ERR_OK)
{
(void)zip_archive_add_file(za, &ent, file_contents);
(void)file_buf_free(buf);
}
}
filelist_free(&fl);
// note: this is currently known to fail if there are no files in the list
// - zlib.h says: Z_DATA_ERROR is returned if freed prematurely.
// safe to ignore.
comp_free(ctx);
(void)zip_archive_finish(za);
return ERR_OK;
}

View File

@ -45,6 +45,8 @@ extern void trace_get(Trace* t);
extern LibError trace_write_to_file(const char* trace_filename);
extern LibError trace_read_from_file(const char* trace_filename, Trace* t);
extern LibError build_optimized_archive(const char* trace_filename, const char* zip_filename);
extern LibError vfs_opt_rebuild_main_archive(const char* P_dst_path);
extern void vfs_opt_notify_loose_file(const char* atom_fn);
#endif // #ifndef VFS_OPTIMIZER_H__

View File

@ -105,7 +105,7 @@ static void ParseCommandLineArgs(int argc, char* argv[])
// note: VFS init is sure to have been completed by now
// (since CONFIG_Init reads from file); therefore,
// it is safe to call this from here directly.
build_optimized_archive("../logs/trace.txt", "test.zip");
vfs_opt_rebuild_main_archive("mods/official");
break;
case 'c':
if(strcmp(name, "conf") == 0)