janwas bc07b9ff0a file code v2 is now feature complete and works.
- adts: add basic benchmark to self-test; use fminf instead of MIN; fix
overly pessimistic invalidation in Landlord_Cached
- allocators: no longer complain if DynArray is full
- lib: warn if rand() min=max
- archive: bugfixes: plug leak, correctly handle 0-length files, carry
over flags when reading uncompressed file from archive
- file: expose file_get_sorted_dirents and implement file_enum in terms
of that
- file_cache: many bugfixes (especially related to CacheAllocator
- vfs_optimizer: finish implementation of archive builder
- zip: correctly handle 0-length files
- tex: add tex_is_known_extension
- sysdep: add efficient fminf implementation (assumes P6+)

- config: update archive builder call site
- gamesetup: add call for delayed self tests
- entity: return JSVAL_VOID as per philip's suggestion

This was SVN commit r3553.
2006-02-23 07:55:32 +00:00

668 lines
16 KiB

#include "precompiled.h"
#include "lib/allocators.h"
#include "lib/timer.h"
#include "file_internal.h"
# include "ps/VFSUtil.h"
static uintptr_t trace_initialized; // set via CAS
static Pool trace_pool;
// call at before using trace_pool. no-op if called more than once.
static inline void trace_init()
if(CAS(&trace_initialized, 0, 1))
(void)pool_create(&trace_pool, 4*MiB, sizeof(TraceEntry));
void trace_shutdown()
if(CAS(&trace_initialized, 1, 2))
static bool trace_enabled;
void trace_enable(bool want_enabled)
trace_enabled = want_enabled;
static void trace_add(TraceOp op, const char* P_fn, size_t size, uint flags = 0, double timestamp = 0.0)
if(timestamp == 0.0)
timestamp = get_time();
TraceEntry* t = (TraceEntry*)pool_alloc(&trace_pool, 0);
t->timestamp = timestamp;
t->atom_fn = file_make_unique_fn_copy(P_fn);
t->size = size;
t->op = op;
t->flags = flags;
void trace_notify_load(const char* P_fn, size_t size, uint flags)
trace_add(TO_LOAD, P_fn, size, flags);
void trace_notify_free(const char* P_fn, size_t size)
trace_add(TO_FREE, P_fn, size);
void trace_get(Trace* t)
t->ents = (const TraceEntry*)trace_pool.da.base;
t->num_ents = (uint)(trace_pool.da.pos / sizeof(TraceEntry));
void trace_clear()
LibError trace_write_to_file(const char* trace_filename)
char N_fn[PATH_MAX];
RETURN_ERR(file_make_full_native_path(trace_filename, N_fn));
FILE* f = fopen(N_fn, "wt");
Trace t;
const TraceEntry* ent = t.ents;
for(size_t i = 0; i < t.num_ents; i++, ent++)
char opcode = '?';
case TO_LOAD: opcode = 'L'; break;
case TO_FREE: opcode = 'F'; break;
default: debug_warn("invalid TraceOp");
debug_assert(ent->op == TO_LOAD || ent->op == TO_FREE);
fprintf(f, "%#010f: %c \"%s\" %d %04x\n", ent->timestamp, opcode, ent->atom_fn, ent->size, ent->flags);
return ERR_OK;
LibError trace_read_from_file(const char* trace_filename, Trace* t)
char N_fn[PATH_MAX];
RETURN_ERR(file_make_full_native_path(trace_filename, N_fn));
FILE* f = fopen(N_fn, "rt");
// we use trace_add, which is the same mechanism called by trace_notify*;
// therefore, tracing needs to be enabled.
trace_enabled = true;
// parse lines and stuff them in trace_pool
// (as if they had been trace_add-ed; replaces any existing data)
// .. bake PATH_MAX limit into string.
char fmt[30];
snprintf(fmt, ARRAY_SIZE(fmt), "%%lf: %%c \"%%%d[^\"]\" %%d %%04x\n", PATH_MAX);
double timestamp; char opcode; char P_path[PATH_MAX]; size_t size; uint flags;
int ret = fscanf(f, fmt, &timestamp, &opcode, P_path, &size, &flags);
if(ret == EOF)
debug_assert(ret == 5);
TraceOp op = TO_LOAD; // default in case file is garbled
case 'L': op = TO_LOAD; break;
case 'F': op = TO_FREE; break;
default: debug_warn("invalid TraceOp");
trace_add(op, P_path, size, flags, timestamp);
// all previous trace entries were hereby lost (overwritten),
// so there's no sense in continuing.
trace_enabled = false;
return ERR_OK;
enum SimulateFlags
LibError trace_simulate(const char* trace_filename, uint flags)
Trace t;
RETURN_ERR(trace_read_from_file(trace_filename, &t));
// prevent the actions we carry out below from generating
// trace_add-s.
trace_enabled = false;
const double start_time = get_time();
const double first_timestamp = t.ents[0].timestamp;
const TraceEntry* ent = t.ents;
for(uint i = 0; i < t.num_ents; i++, ent++)
// wait until time for next entry if caller requested this
while(get_time()-start_time < ent->timestamp-first_timestamp)
// busy-wait (don't sleep - can skew results)
// carry out this entry's operation
FileIOBuf buf; size_t size;
case TO_LOAD:
(void)vfs_load(ent->atom_fn, buf, size, ent->flags);
case TO_FREE:
buf = file_cache_find(ent->atom_fn, &size);
debug_warn("unknown TraceOp");
return ERR_OK;
// enough for 64K unique files - ought to suffice.
typedef u16 FileId;
static const FileId NULL_ID = 0;
class IdMgr
FileId cur;
typedef std::map<const char*, FileId> Map;
Map map;
FileId get(const char* atom_fn)
Map::iterator it = map.find(atom_fn);
if(it != map.end())
return it->second;
FileId id = cur++;
map[atom_fn] = id;
return id;
void reset() { cur = NULL_ID+1; }
IdMgr() { reset(); }
static IdMgr id_mgr;
struct FileAccess
const char* atom_fn;
FileId id;
FileId prev;
FileId next;
bool visited;
FileAccess(const char* atom_fn_)
atom_fn = atom_fn_;
prev = next = NULL_ID;
id = id_mgr.get(atom_fn);
typedef std::vector<FileAccess> FileAccesses;
class FileAccessGatherer
// put all entries in one trace file: easier to handle; obviates FS enum code
// rationale: don't go through trace in order; instead, process most recent
// run first, to give more weight to it (TSP code should go with first entry
// when #occurrences are equal)
struct Run
const TraceEntry* first;
uint count;
// note: up to caller to initialize count (that's done when
// starting the next run
Run(const TraceEntry* first_) : first(first_) {}
FileAccesses& file_accesses;
// improvement: postprocess the trace and remove all IOs that would be
// satisfied by our cache. often repeated IOs would otherwise potentially
// be arranged badly.
void extract_accesses_from_run(const Run& run)
const TraceEntry* ent = run.first;
for(uint i = 0; i < run.count; i++, ent++)
// simulate carrying out the entry's TraceOp to determine
// whether this IO would be satisfied by the file_buf cache.
FileIOBuf buf;
size_t size = ent->size;
const char* atom_fn = ent->atom_fn;
case TO_LOAD:
buf = file_cache_retrieve(atom_fn, &size);
// would not be in cache: add to list of real IOs
bool long_lived = (ent->flags & FILE_LONG_LIVED) != 0;
buf = file_buf_alloc(size, atom_fn, long_lived);
(void)file_cache_add(buf, size, atom_fn);
case TO_FREE:
buf = file_cache_find(atom_fn, &size);
debug_warn("unknown TraceOp");
} // foreach entry
// note: passing i and comparing timestamp with previous timestamp
// avoids having to keep an extra local cur_time variable.
bool is_start_of_run(uint i, const TraceEntry* ent)
// first item is always start of a run (protects [-1] below)
if(i == 0)
return true;
// timestamp started over from 0 (e.g. 29, 30, 1) -> start of new run.
if(ent->timestamp < ent[-1].timestamp)
return true;
return false;
typedef std::vector<Run> Runs;
Runs runs;
void split_trace_into_runs(const Trace* t)
uint cur_run_length = 0;
const TraceEntry* cur_entry = t->ents;
for(uint i = 0; i < t->num_ents; i++)
if(is_start_of_run(i, cur_entry))
runs.back().count = cur_run_length;
cur_run_length = 0;
// set the last run's length
runs.back().count = cur_run_length;
FileAccessGatherer(const char* trace_filename, Filenames required_fns,
FileAccesses& file_accesses_)
: file_accesses(file_accesses_)
Trace t;
if(trace_read_from_file(trace_filename, &t) == 0)
// extract accesses from each run (starting with most recent
// first. this isn't critical, but may help a bit since
// files that are equally strongly 'connected' are ordered
// according to position in file_accesses. that means files from
// more recent traces tend to go first, which is good.)
for(Runs::iterator it = runs.begin(); it != runs.end(); ++it)
// add all remaining files that are to be put in archive
for(uint i = 0; required_fns[i] != 0; i++)
// should never be copied; this also squelches warning
FileAccessGatherer(const FileAccessGatherer& rhs);
FileAccessGatherer& operator=(const FileAccessGatherer& rhs);
class TourBuilder
typedef u32 ConnectionId;
cassert(sizeof(FileId)*2 <= sizeof(ConnectionId));
ConnectionId cid_make(FileId prev, FileId next)
return u32_from_u16(prev, next);
FileId cid_first(ConnectionId id)
return u32_hi(id);
FileId cid_second(ConnectionId id)
return u32_lo(id);
struct Connection
ConnectionId id;
// repeated edges ("connections") are reflected in
// the 'occurrences' count; we optimize the ordering so that
// files with frequent connections are nearby.
uint occurrences;
Connection(ConnectionId id_)
: id(id_), occurrences(1) {}
// sort by decreasing occurrence
struct Occurrence_greater: public std::binary_function<const Connection&, const Connection&, bool>
bool operator()(const Connection& c1, const Connection& c2) const
return (c1.occurrences > c2.occurrences);
typedef std::vector<Connection> Connections;
Connections connections;
// not const because we change the graph-related members
FileAccesses& file_accesses;
void build_connections()
// reserve memory for worst-case amount of connections (happens if
// all accesses are unique). this is necessary because we store
// pointers to Connection in the map, which would be invalidated if
// connections[] ever expands.
// we need to check before inserting a new connection if it has
// come up before (to increment occurrences). this map speeds
// things up from n*n to n*log(n) (n = # files).
typedef std::map<ConnectionId, Connection*> Map;
Map map;
// for each file pair (i-1, i): set up a Connection
for(uint i = 1; i < file_accesses.size(); i++)
const ConnectionId c_id = cid_make(file_accesses[i-1].id, file_accesses[i].id);
Map::iterator it = map.find(c_id);
if(it != map.end())
map[c_id] = &connections.back();
bool has_cycle;
void detect_cycleR(FileId node)
FileAccess* pnode = &file_accesses[node];
pnode->visited = true;
FileId next = pnode->next;
if(next != NULL_ID)
FileAccess* pnext = &file_accesses[next];
has_cycle = true;
bool is_cycle_at(FileId node)
has_cycle = false;
for(FileAccesses::iterator it = file_accesses.begin(); it != file_accesses.end(); ++it)
it->visited = 0;
return has_cycle;
void try_add_edge(const Connection& c)
FileId first_id = cid_first(c.id);
FileId second_id = cid_second(c.id);
FileAccess& first = file_accesses[first_id];
FileAccess& second = file_accesses[second_id];
if(first.next != NULL_ID || second.prev != NULL_ID)
first.next = second_id;
second.prev = first_id;
bool introduced_cycle = is_cycle_at(second_id);
debug_assert(introduced_cycle == is_cycle_at(first_id));
debug_printf("try: undo (due tot cycle)\n");
// undo
first.next = second.prev = NULL_ID;
// pointer to this is returned by TourBuilder()!
std::vector<const char*> fn_vector;
void output_chain(const Connection& c)
FileAccess* start = &file_accesses[cid_first(c.id)];
// early out: if this access was already visited, so must the entire
// chain of which it is a part. bail to save lots of time.
// follow prev links starting with c until no more are left;
// start ends up the beginning of the chain including <c>.
while(start->prev != NULL_ID)
start = &file_accesses[start->prev];
// iterate over the chain - add to Filenames list and mark as visited
FileAccess* cur = start;
cur->visited = true;
cur = &file_accesses[cur->next];
while(cur->next != NULL_ID);
TourBuilder(FileAccesses& file_accesses_, Filenames& fns)
: file_accesses(file_accesses_)
std::sort(connections.begin(), connections.end(), Occurrence_greater());
for(Connections::iterator it = connections.begin(); it != connections.end(); ++it)
for(Connections::iterator it = connections.begin(); it != connections.end(); ++it)
fn_vector.push_back(0); // 0-terminate for use as Filenames array
fns = &fn_vector[0];
// should never be copied; this also squelches warning
TourBuilder(const TourBuilder& rhs);
TourBuilder& operator=(const TourBuilder& rhs);
typedef std::vector<const char*> FnVector;
static FnVector loose_files;
void vfs_opt_notify_loose_file(const char* atom_fn)
// we could stop adding to loose_files if it's already got more than
// REBUILD_MAIN_ARCHIVE_THRESHOLD entries, but don't bother
// (it's ok to waste a bit of mem - this is rare)
struct EntCbParams
std::vector<const char*> files;
static void EntCb(const char* path, const DirEnt* ent, void* context)
EntCbParams* params = (EntCbParams*)context;
LibError vfs_opt_rebuild_main_archive(const char* P_archive_path, const char* trace_filename)
// get list of all files
// TODO: for each mount point (with VFS_MOUNT_ARCHIVE flag set):
EntCbParams params;
RETURN_ERR(VFSUtil::EnumDirEnts("", VFSUtil::RECURSIVE, 0, EntCb, &params));
Filenames required_fns = &params.files[0];
FileAccesses file_accesses;
FileAccessGatherer gatherer(trace_filename, required_fns, file_accesses);
Filenames fns;
TourBuilder builder(file_accesses, fns);
LibError ret = archive_build(P_archive_path, fns);
// do NOT delete source files or archives! some apps might want to
// keep them (e.g. for source control), or name them differently.
// rebuild is required to make sure the new archive is used. this is
// already taken care of by VFS dir watch, unless it's disabled..
return ret;
// autobuild logic: decides when to (re)build an archive.
static const size_t REBUILD_MAIN_ARCHIVE_THRESHOLD = 100;
static const size_t BUILD_MINI_ARCHIVE_THRESHOLD = 30;
static bool should_rebuild_main_archive()
if(loose_files.size() >= REBUILD_MAIN_ARCHIVE_THRESHOLD)
return true;
// more than 3 mini archives
// development build only: archive is more than 2 weeks old
#ifndef FINAL
return false;
static bool should_build_mini_archive()
if(loose_files.size() >= BUILD_MINI_ARCHIVE_THRESHOLD)
return true;
return false;
LibError vfs_opt_auto_build_archive(const char* P_dst_path,
const char* main_archive_name, const char* trace_filename)
char P_archive_path[PATH_MAX];
RETURN_ERR(vfs_path_append(P_archive_path, P_dst_path, main_archive_name));
return vfs_opt_rebuild_main_archive(P_archive_path, trace_filename);
else if(should_build_mini_archive())
// get new unused mini archive name at P_dst_path
RETURN_ERR(archive_build(P_archive_path, &loose_files[0]));
// delete all newly added loose files
return ERR_OK;