adts: remove_least_valuable safely handles empty cache

lib: add u32_hi/lo archive: move archive builder logic here from vf_optimizer file_cache: add flush() call - used when simulating cache vfs_optimizer: WIP, not yet functional: file gatherer, 50% of TSP tour generator This was SVN commit r3486.
2006-02-09 05:59:33 +00:00 · 2006-02-09 05:59:33 +00:00 · 21f8f8555e
commit 21f8f8555e
parent 56d8d31cd0
12 changed files with 625 additions and 183 deletions
--- a/source/lib/adts.h
+++ b/source/lib/adts.h
@ -294,8 +294,12 @@ public:

 	// remove the least valuable item and optionally indicate
 	// how big it was (useful for statistics).
+	// returns 0 if cache is empty.
 	T remove_least_valuable(size_t* psize = 0)
 	{
+		if(map.empty())
+			return 0;
+
 		// one iteration ought to suffice to evict someone due to
 		// definition of min_density, but we provide for repeating
 		// in case of floating-point imprecision.
--- a/source/lib/lib.cpp
+++ b/source/lib/lib.cpp
@ -315,9 +315,21 @@ u32 u64_hi(u64 x)

 u32 u64_lo(u64 x)
 {
-	return (u32)(x & 0xffffffff);
+	return (u32)(x & 0xFFFFFFFF);
 }

+u16 u32_hi(u32 x)
+{
+	return (u16)(x >> 16);
+}
+
+u16 u32_lo(u32 x)
+{
+	return (u16)(x & 0xFFFF);
+}
+
+
+
 u64 u64_from_u32(u32 hi, u32 lo)
 {
 	u64 x = (u64)hi;
--- a/source/lib/lib.h
+++ b/source/lib/lib.h
@ -283,6 +283,9 @@ extern uintptr_t round_down(uintptr_t n, uintptr_t multiple);
 // less than the bit width of the type).
 extern u32 u64_hi(u64 x);
 extern u32 u64_lo(u64 x);
+extern u16 u32_hi(u32 x);
+extern u16 u32_lo(u32 x);
+

 extern u64 u64_from_u32(u32 hi, u32 lo);
 extern u32 u32_from_u16(u16 hi, u16 lo);
--- a/source/lib/res/file/archive.cpp
+++ b/source/lib/res/file/archive.cpp
@ -604,3 +604,158 @@ LibError afile_unmap(AFile* af)
 	H_DEREF(af->ha, Archive, a);
 	return file_unmap(&a->f);
 }
+
+
+//-----------------------------------------------------------------------------
+// archive builder
+//-----------------------------------------------------------------------------
+
+static inline bool file_type_is_uncompressible(const char* fn)
+{
+	const char* ext = strrchr(fn, '.');
+	// no extension? bail; assume compressible
+	if(!ext)
+		return true;
+
+	// this is a selection of file types that are certainly not
+	// further compressible. we need not include every type under the sun -
+	// this is only a slight optimization that avoids wasting time
+	// compressing files. the real decision as to cmethod is made based
+	// on attained compression ratio.
+	static const char* uncompressible_exts[] =
+	{
+		"zip", "rar",
+			"jpg", "jpeg", "png",
+			"ogg", "mp3"
+	};
+
+	for(uint i = 0; i < ARRAY_SIZE(uncompressible_exts); i++)
+	{
+		if(!stricmp(ext+1, uncompressible_exts[i]))
+			return true;
+	}
+
+	return false;
+}
+
+
+struct CompressParams
+{
+	bool attempt_compress;
+	uintptr_t ctx;
+	u32 crc;
+};
+
+#include <zlib.h>
+
+static LibError compress_cb(uintptr_t cb_ctx, const void* block, size_t size, size_t* bytes_processed)
+{
+	CompressParams* p = (CompressParams*)cb_ctx;
+
+	// comp_feed already makes note of total #bytes fed, and we need
+	// vfs_io to return the uc size (to check if all data was read).
+	*bytes_processed = size;
+
+	// update checksum
+	p->crc = crc32(p->crc, (const Bytef*)block, (uInt)size);
+
+	if(p->attempt_compress)
+		(void)comp_feed(p->ctx, block, size);
+	return INFO_CB_CONTINUE;
+}
+
+
+static LibError read_and_compress_file(const char* atom_fn, uintptr_t ctx,
+	ArchiveEntry& ent, void*& file_contents, FileIOBuf& buf)	// out
+{
+	struct stat s;
+	RETURN_ERR(vfs_stat(atom_fn, &s));
+	const size_t ucsize = s.st_size;
+
+	const bool attempt_compress = !file_type_is_uncompressible(atom_fn);
+	if(attempt_compress)
+	{
+		RETURN_ERR(comp_reset(ctx));
+		RETURN_ERR(comp_alloc_output(ctx, ucsize));
+	}
+
+	// read file into newly allocated buffer. if attempt_compress, also
+	// compress the file into another buffer while waiting for IOs.
+	Handle hf = vfs_open(atom_fn, 0);
+	RETURN_ERR(hf);
+	buf = FILE_BUF_ALLOC;
+	CompressParams params = { attempt_compress, ctx, 0 };
+	ssize_t ucsize_read = vfs_io(hf, ucsize, &buf, compress_cb, (uintptr_t)&params);
+	debug_assert(ucsize_read == (ssize_t)ucsize);
+	(void)vfs_close(hf);
+
+	// if we compressed the file trial-wise, check results and
+	// decide whether to store as such or not (based on compression ratio)
+	bool store_compressed = false;
+	void* cdata = 0; size_t csize = 0;
+	if(attempt_compress)
+	{
+		RETURN_ERR(comp_finish(ctx, &cdata, &csize));
+
+		const float ratio = (float)ucsize / csize;
+		const ssize_t bytes_saved = (ssize_t)ucsize - (ssize_t)csize;
+		if(ratio > 1.05f && bytes_saved > 200)
+			store_compressed = true;
+	}
+
+	// store file info
+	ent.ucsize  = (off_t)ucsize;
+	ent.mtime   = s.st_mtime;
+	// .. ent.ofs is set by zip_archive_add_file
+	ent.flags   = 0;
+	ent.atom_fn = atom_fn;
+	ent.crc32   = params.crc;
+	if(store_compressed)
+	{
+		ent.method = CM_DEFLATE;
+		ent.csize  = (off_t)csize;
+		file_contents = cdata;
+	}
+	else
+	{
+		ent.method = CM_NONE;
+		ent.csize  = (off_t)ucsize;
+		file_contents = (void*)buf;
+	}
+
+	// note: no need to free cdata - it is owned by the
+	// compression context and can be reused.
+
+	return ERR_OK;
+}
+
+
+LibError archive_build(const char* P_archive_filename, Filenames V_fl)
+{
+	ZipArchive* za;
+	RETURN_ERR(zip_archive_create(P_archive_filename, &za));
+	uintptr_t ctx = comp_alloc(CT_COMPRESSION, CM_DEFLATE);
+
+	const char* fn;	// declare outside loop for easier debugging
+	for(size_t i = 0; ; i++)
+	{
+		fn = V_fl[i];
+		if(!fn)
+			break;
+
+		ArchiveEntry ent; void* file_contents; FileIOBuf buf;
+		if(read_and_compress_file(fn, ctx, ent, file_contents, buf) == ERR_OK)
+		{
+			(void)zip_archive_add_file(za, &ent, file_contents);
+			(void)file_buf_free(buf);
+		}
+	}
+
+	// note: this is currently known to fail if there are no files in the list
+	// - zlib.h says: Z_DATA_ERROR is returned if freed prematurely.
+	// safe to ignore.
+	comp_free(ctx);
+	(void)zip_archive_finish(za);
+
+	return ERR_OK;
+}
--- a/source/lib/res/file/archive.h
+++ b/source/lib/res/file/archive.h
@ -159,6 +159,18 @@ extern LibError afile_map(AFile* af, void*& p, size_t& size);
 extern LibError afile_unmap(AFile* af);


+//
+// archive builder
+//
+
+// array of pointers to VFS filenames (including path), terminated by a
+// NULL entry.
+typedef const char** Filenames;
+
+extern LibError archive_build(const char* P_archive_filename, Filenames V_fl);
+
+
+

 enum ArchiveFileFlags
 {
--- a/source/lib/res/file/file.h
+++ b/source/lib/res/file/file.h
@ -313,7 +313,7 @@ typedef const u8* FileIOBuf;
 FileIOBuf* const FILE_BUF_TEMP = (FileIOBuf*)1;
 const FileIOBuf FILE_BUF_ALLOC = (FileIOBuf)2;

-extern FileIOBuf file_buf_alloc(size_t size, const char* atom_fn);
+extern FileIOBuf file_buf_alloc(size_t size, const char* atom_fn, bool long_lived);
 extern LibError file_buf_free(FileIOBuf buf);


--- a/source/lib/res/file/file_cache.cpp
+++ b/source/lib/res/file/file_cache.cpp
@ -786,13 +786,17 @@ LibError file_cache_add(FileIOBuf buf, size_t size, const char* atom_fn)
 	// decide (based on flags) if buf is to be cached; set cost
 	uint cost = 1;

-	cache_allocator.make_read_only((u8*)buf, size);
+	if(buf)
+		cache_allocator.make_read_only((u8*)buf, size);
 	file_cache.add(atom_fn, buf, size, cost);

 	return ERR_OK;
 }


+
+
+
 // called by trace simulator to retrieve buffer address, given atom_fn.
 // must not change any cache state (e.g. notify stats or add ref).
 FileIOBuf file_cache_find(const char* atom_fn, size_t* size)
@ -851,9 +855,6 @@ file_buf_free and there are only a few active at a time ( < 10)



-
-
-
 // remove all blocks loaded from the file <fn>. used when reloading the file.
 LibError file_cache_invalidate(const char* P_fn)
 {
@ -877,6 +878,23 @@ LibError file_cache_invalidate(const char* P_fn)
 }


+void file_cache_flush()
+{
+	for(;;)
+	{
+		size_t size;
+		FileIOBuf discarded_buf = file_cache.remove_least_valuable(&size);
+		// cache is now empty - done
+		if(!discarded_buf)
+			return;
+#include "nommgr.h"
+		cache_allocator.free((u8*)discarded_buf, size);
+#include "mmgr.h"
+	}
+}
+
+
+
 void file_cache_init()
 {
 }
--- a/source/lib/res/file/file_cache.h
+++ b/source/lib/res/file/file_cache.h
@ -30,6 +30,9 @@ extern FileIOBuf file_cache_find(const char* atom_fn, size_t* size);
 extern FileIOBuf file_cache_retrieve(const char* atom_fn, size_t* size);
 extern LibError file_cache_add(FileIOBuf buf, size_t size, const char* atom_fn);

+extern LibError file_cache_invalidate(const char* fn);
+extern void file_cache_flush();
+

 extern void file_cache_init();
 extern void file_cache_shutdown();
--- a/source/lib/res/file/vfs_mount.cpp
+++ b/source/lib/res/file/vfs_mount.cpp
@ -353,6 +353,7 @@ static LibError add_ent(TDir* td, DirEnt* ent, const char* P_parent_path, const
 	// prepend parent path to get complete pathname.
 	char P_path[PATH_MAX];
 	CHECK_ERR(vfs_path_append(P_path, P_parent_path, name));
+	vfs_opt_notify_loose_file(P_path);

 	// it's a regular data file; add it to the directory.
 	return tree_add_file(td, name, m, ent->size, ent->mtime, 0);
--- a/source/lib/res/file/vfs_optimizer.cpp
+++ b/source/lib/res/file/vfs_optimizer.cpp
@ -104,9 +104,7 @@ LibError trace_write_to_file(const char* trace_filename)

 LibError trace_read_from_file(const char* trace_filename, Trace* t)
 {
-	// we use trace_add, which is the same mechanism called by trace_notify*;
-	// therefore, tracing needs to be enabled.
-	trace_enabled = true;
+	trace_clear();

 	char N_fn[PATH_MAX];
 	RETURN_ERR(file_make_full_native_path(trace_filename, N_fn));
@ -114,9 +112,12 @@ LibError trace_read_from_file(const char* trace_filename, Trace* t)
 	if(!f)
 		WARN_RETURN(ERR_FILE_NOT_FOUND);

+	// we use trace_add, which is the same mechanism called by trace_notify*;
+	// therefore, tracing needs to be enabled.
+	trace_enabled = true;
+
 	// parse lines and stuff them in trace_pool
 	// (as if they had been trace_add-ed; replaces any existing data)
-	trace_clear();
 	// .. bake PATH_MAX limit into string.
 	char fmt[30];
 	snprintf(fmt, ARRAY_SIZE(fmt), "%%lf: %%c \"%%%d[^\"]\" %%d %%04x\n", PATH_MAX);
@ -204,213 +205,444 @@ LibError trace_simulate(const char* trace_filename, uint flags)

 //-----------------------------------------------------------------------------

-struct FileList
+
+
+// enough for 64K unique files - ought to suffice.
+typedef u16 FileId;
+static const FileId NULL_ID = 0;
+
+class IdMgr
 {
-	const char** atom_fns;
-	size_t num_files;
-	size_t i;
+	FileId cur;
+	typedef std::map<const char*, FileId> Map;
+	Map map;
+public:
+	FileId get(const char* atom_fn)
+	{
+		Map::iterator it = map.find(atom_fn);
+		if(it != map.end())
+			return it->second;
+		FileId id = cur++;
+		map[atom_fn] = id;
+		return id;
+	}
+	void reset() { cur = NULL_ID+1; }
+	IdMgr() { reset(); }
+};
+static IdMgr id_mgr;
+
+
+struct FileAccess
+{
+	const char* atom_fn;
+	FileId id;
+
+	FileId prev;
+	FileId next;
+	bool visited;
+
+	FileAccess(const char* atom_fn_)
+	{
+		atom_fn = atom_fn_;
+		prev = next = NULL_ID;
+		id = id_mgr.get(atom_fn);
+	}
+};
+
+typedef std::vector<FileAccess> FileAccesses;
+
+class FileGatherer
+{
+	// put all entries in one trace file: easier to handle; obviates FS enum code
+	// rationale: don't go through file in order; instead, process most recent
+	// run first, to give more weight to it (TSP code should go with first entry
+	// when #occurrences are equal)
+	struct Run
+	{
+		const TraceEntry* first;
+		uint count;
+
+		// note: up to caller to initialize count (that's done when
+		// starting the next run
+		Run(const TraceEntry* first_) : first(first_) {}
+	};
+
+	FileAccesses& file_accesses;
+
+	// improvement: postprocess the trace and remove all IOs that would be
+	// satisfied by our cache. often repeated IOs would otherwise potentially
+	// be arranged badly.
+	void extract_accesses_from_run(const Run& run)
+	{
+		file_cache_flush();
+
+		const TraceEntry* ent = run.first;
+		for(uint i = 0; i < run.count; i++, ent++)
+		{
+			// simulate carrying out the entry's TraceOp to determine
+			// whether this IO would be satisfied by the file_buf cache.
+			FileIOBuf buf;
+			size_t size         = ent->size;
+			const char* atom_fn = ent->atom_fn;
+			switch(ent->op)
+			{
+			case TO_LOAD:
+				buf = file_cache_retrieve(atom_fn, &size);
+				// would not be in cache: add to list of real IOs
+				if(!buf)
+				{
+					bool long_lived = (ent->flags & FILE_LONG_LIVED) != 0;
+					buf = file_buf_alloc(size, atom_fn, long_lived);
+					(void)file_cache_add(buf, size, atom_fn);
+
+					file_accesses.push_back(atom_fn);
+				}
+				break;
+			case TO_FREE:
+				buf = file_cache_find(atom_fn, &size);
+				(void)file_buf_free(buf);
+				break;
+			default:
+				debug_warn("unknown TraceOp");
+			}
+		}	// foreach entry
+
+		file_cache_flush();
+	}
+
+
+	// note: passing i and comparing timestamp with previous timestamp
+	// avoids having to keep an extra local cur_time variable.
+	bool is_start_of_run(uint i, const TraceEntry* ent)
+	{
+		// first item is always start of a run (protects [-1] below)
+		if(i == 0)
+			return true;
+
+		// timestamp started over from 0 (e.g. 29, 30, 1) -> start of new run.
+		if(ent->timestamp < ent[-1].timestamp)
+			return true;
+
+		return false;
+	}
+
+	typedef std::vector<Run> Runs;
+	Runs runs;
+	void split_trace_into_runs(const Trace* t)
+	{
+		uint cur_run_length = 0;
+		const TraceEntry* cur_entry = t->ents;
+		for(uint i = 0; i < t->num_ents; i++)
+		{
+			cur_run_length++;
+			if(is_start_of_run(i, cur_entry))
+			{
+				if(!runs.empty())
+					runs.back().count = cur_run_length;
+				cur_run_length = 0;
+				runs.push_back(Run(cur_entry));
+			}
+			cur_entry++;
+		}
+		// set the last run's length
+		if(!runs.empty())
+			runs.back().count = cur_run_length;
+	}
+
+public:
+	FileGatherer(const char* trace_filename, Filenames required_fns,
+		FileAccesses& file_accesses_)
+		: file_accesses(file_accesses_)
+	{
+		Trace t;
+		if(trace_read_from_file(trace_filename, &t) == 0)
+		{
+			split_trace_into_runs(&t);
+			// extract accesses from each run (starting with most recent
+			// first. this isn't critical, but may help a bit since
+			// files that are equally strongly 'connected' are ordered
+			// according to position in file_accesses. that means files from
+			// more recent traces tend to go first, which is good.)
+			for(Runs::iterator it = runs.begin(); it != runs.end(); ++it)
+				extract_accesses_from_run(*it);
+		}
+
+		// add all remaining files that are to be put in archive
+		for(uint i = 0; required_fns[i] != 0; i++)
+			file_accesses.push_back(required_fns[i]);
+	}
+
+	// should never be copied; this also squelches warning
+private:
+	FileGatherer(const FileGatherer& rhs);
+	FileGatherer& operator=(const FileGatherer& rhs);
 };


-static LibError filelist_build(Trace* t, FileList* fl)
+class TourBuilder
 {
-	// count # files
-	fl->num_files = 0;
-	for(size_t i = 0; i < t->num_ents; i++)
-		if(t->ents[i].op == TO_LOAD)
-			fl->num_files++;
+	typedef u32 ConnectionId;
+	cassert(sizeof(FileId)*2 <= sizeof(ConnectionId));
+	ConnectionId cid_make(FileId prev, FileId next)
+	{
+		return u32_from_u16(prev, next);
+	}
+	FileId cid_prev(ConnectionId id)
+	{
+		return u32_hi(id);
+	}
+	FileId cid_next(ConnectionId id)
+	{
+		return u32_lo(id);
+	}

-	if(!fl->num_files)
+	struct Connection
+	{
+		ConnectionId id;
+		// repeated edges ("connections") are reflected in
+		// the 'occurrences' count; we optimize the ordering so that
+		// files with frequent connections are nearby.
+		uint occurrences;
+
+		Connection(ConnectionId id_)
+			: id(id_), occurrences(1) {}
+	};
+
+	struct decreasing_occurrences: public std::binary_function<const Connection&, const Connection&, bool>
+	{
+		bool operator()(const Connection& c1, const Connection& c2) const
+		{
+			return (c1.occurrences > c2.occurrences);
+		}
+	};
+
+	typedef std::vector<Connection> Connections;
+	Connections connections;
+
+	// not const because we change the graph-related members
+	FileAccesses& file_accesses;
+
+	void build_connections()
+	{
+		// reserve memory for worst-case amount of connections (happens if
+		// all accesses are unique). this is necessary because we store
+		// pointers to Connection in the map, which would be invalidated if
+		// connections[] ever expands.
+		connections.reserve(file_accesses.size()-1);
+
+		// we need to check before inserting a new connection if it has
+		// come up before (to increment occurrences). this map speeds
+		// things up from n*n to n*log(n) (n = # files).
+		typedef std::map<ConnectionId, Connection*> Map;
+		Map map;
+
+		// for each file pair (i-1, i): set up a Connection
+		for(uint i = 1; i < file_accesses.size(); i++)
+		{
+			const ConnectionId c_id = cid_make(file_accesses[i-1].id, file_accesses[i].id);
+
+			Map::iterator it = map.find(c_id);
+			if(it != map.end())
+				it->second->occurrences++;
+			else
+			{
+				connections.push_back(Connection(c_id));
+				map[c_id] = &connections.back();
+			}
+		}
+	}
+
+	bool has_cycle;
+	void detect_cycleR(FileId node)
+	{
+		FileAccess* pnode = &file_accesses[node];
+		pnode->visited = true;
+		FileId next = pnode->next;
+		if(next != NULL_ID)
+		{
+			FileAccess* pnext = &file_accesses[next];
+			if(pnext->visited)
+				has_cycle = true;
+			else
+				detect_cycleR(next);
+		}
+	}
+	bool is_cycle_at(FileId node)
+	{
+		has_cycle = false;
+		for(FileAccesses::iterator it = file_accesses.begin(); it != file_accesses.end(); ++it)
+			it->visited = 0;
+		detect_cycleR(node);
+		return has_cycle;
+	}
+
+	void add_edge(const Connection& c)
+	{
+		FileId prev_id = cid_prev(c.id);
+		FileId next_id = cid_next(c.id);
+
+		FileAccess& prev = file_accesses[prev_id];
+		FileAccess& next = file_accesses[next_id];
+		if(prev.next != NULL_ID || next.prev != NULL_ID)
+			return;
+
+		prev.next = next_id;
+		next.prev = prev_id;
+
+		bool introduced_cycle = is_cycle_at(next_id);
+		debug_assert(introduced_cycle == is_cycle_at(prev_id));
+		if(introduced_cycle)
+		{
+			// undo
+			prev.next = next.prev = NULL_ID;
+			return;
+		}
+	}
+
+public:
+	TourBuilder(FileAccesses& file_accesses_)
+		: file_accesses(file_accesses_)
+	{
+		build_connections();
+		std::sort(connections.begin(), connections.end(), decreasing_occurrences());
+		for(Connections::iterator it = connections.begin(); it != connections.end(); ++it)
+			add_edge(*it);
+
+
+		// walk tour; make sure all nodes are covered
+		// add each one to FileList
+	}
+
+	// should never be copied; this also squelches warning
+private:
+	TourBuilder(const TourBuilder& rhs);
+	TourBuilder& operator=(const TourBuilder& rhs);
+};
+
+
+/*
+
+static LibError determine_optimal_ordering(const char* trace_filename, Filenames* pfns)
+{
+
+
+	*pfl = 0;
+
+
+	// count # files
+	uint num_files = 0;
+	for(size_t i = 0; i < t.num_ents; i++)
+		if(t.ents[i].op == TO_LOAD)
+			num_files++;
+
+	if(!num_files)
 		return ERR_DIR_END;

-	fl->atom_fns = new const char*[fl->num_files];
+	Filenames fns = (Filenames)malloc((num_files+1)*sizeof(const char*));
+	if(!fns)
+		return ERR_NO_MEM;

 	size_t ti = 0;
-	for(size_t i = 0; i < fl->num_files; i++)
+	for(size_t i = 0; i < num_files; i++)
 	{
 		// find next trace entry that is a load (must exist)
-		while(t->ents[ti].op != TO_LOAD)
+		while(t.ents[ti].op != TO_LOAD)
 			ti++;
-		fl->atom_fns[i] = t->ents[ti].atom_fn;
+		fns[i] = t.ents[ti].atom_fn;
 		ti++;
 	}

-	fl->i = 0;
+	trace_clear();
+	*pfl = fl;
 	return ERR_OK;
 }
-
-
-static const char* filelist_get_next(FileList* fl)
-{
-	if(fl->i == fl->num_files)
-		return 0;
-	return fl->atom_fns[fl->i++];
-}
-
-
-static void filelist_free(FileList* fl)
-{
-	delete[] fl->atom_fns;
-	fl->atom_fns = 0;
-}
-
+*/

 //-----------------------------------------------------------------------------

-static inline bool file_type_is_uncompressible(const char* fn)
+typedef std::vector<const char*> FnVector;
+static FnVector loose_files;
+
+void vfs_opt_notify_loose_file(const char* atom_fn)
 {
-	const char* ext = strrchr(fn, '.');
-	// no extension? bail; assume compressible
-	if(!ext)
+	// we could stop adding to loose_files if it's already got more than
+	// REBUILD_MAIN_ARCHIVE_THRESHOLD entries, but don't bother
+	// (it's ok to waste a bit of mem - this is rare)
+
+	loose_files.push_back(atom_fn);
+}
+
+
+LibError vfs_opt_rebuild_main_archive(const char* P_archive_dst_dir)
+{
+debug_warn("currently non-functional");
+
+	// for each mount point (with VFS_MOUNT_ARCHIVE flag set):
+	// get list of all files
+	Filenames required_fns = 0;
+
+	FileAccesses file_accesses;
+	FileGatherer gatherer("../logs/trace.txt", required_fns, file_accesses);
+
+	TourBuilder builder(file_accesses);
+//	builder.store_list(pfns);
+	Filenames fns = 0;
+// (Filenames)malloc((num_files+1)*sizeof(const char*));
+//	if(!fns)
+//		return ERR_NO_MEM;
+
+	char P_path[VFS_MAX_PATH];
+	RETURN_ERR(vfs_path_append(P_path, P_archive_dst_dir, "main.zip"));
+	LibError ret = archive_build("main.zip", fns);
+	// delete all loose files in list
+	free(fns);
+	// delete all archives in P_dst_path
+	return ret;
+
+}
+
+
+//
+// autobuild logic: decides when to (re)build an archive.
+//
+
+static const size_t REBUILD_MAIN_ARCHIVE_THRESHOLD = 100;
+static const size_t BUILD_MINI_ARCHIVE_THRESHOLD = 30;
+
+static bool should_rebuild_main_archive()
+{
+	if(loose_files.size() >= REBUILD_MAIN_ARCHIVE_THRESHOLD)
 		return true;

-	// this is a selection of file types that are certainly not
-	// further compressible. we need not include every type under the sun -
-	// this is only a slight optimization that avoids wasting time
-	// compressing files. the real decision as to cmethod is made based
-	// on attained compression ratio.
-	static const char* uncompressible_exts[] =
-	{
-		"zip", "rar",
-		"jpg", "jpeg", "png",
-		"ogg", "mp3"
-	};
+	// more than 3 mini archives

-	for(uint i = 0; i < ARRAY_SIZE(uncompressible_exts); i++)
-	{
-		if(!stricmp(ext+1, uncompressible_exts[i]))
-			return true;
-	}
+	// development build only: archive is more than 2 weeks old
+#ifndef FINAL
+#endif

 	return false;
 }

-
-struct CompressParams
+static bool should_build_mini_archive()
 {
-	bool attempt_compress;
-	uintptr_t ctx;
-	u32 crc;
-};
-
-#include <zlib.h>
-
-static LibError compress_cb(uintptr_t cb_ctx, const void* block, size_t size, size_t* bytes_processed)
-{
-	CompressParams* p = (CompressParams*)cb_ctx;
-
-	// comp_feed already makes note of total #bytes fed, and we need
-	// vfs_io to return the uc size (to check if all data was read).
-	*bytes_processed = size;
-
-	// update checksum
-	p->crc = crc32(p->crc, (const Bytef*)block, (uInt)size);
-
-	if(p->attempt_compress)
-		(void)comp_feed(p->ctx, block, size);
-	return INFO_CB_CONTINUE;
+	if(loose_files.size() >= BUILD_MINI_ARCHIVE_THRESHOLD)
+		return true;
+	return false;
 }


-static LibError read_and_compress_file(const char* atom_fn, uintptr_t ctx,
-	ArchiveEntry& ent, void*& file_contents, FileIOBuf& buf)	// out
+LibError vfs_opt_auto_build_archive(const char* P_dst_path)
 {
-	struct stat s;
-	RETURN_ERR(vfs_stat(atom_fn, &s));
-	const size_t ucsize = s.st_size;
-
-	const bool attempt_compress = !file_type_is_uncompressible(atom_fn);
-	if(attempt_compress)
+	if(should_rebuild_main_archive())
+		return vfs_opt_rebuild_main_archive(P_dst_path);
+	else if(should_build_mini_archive())
 	{
-		RETURN_ERR(comp_reset(ctx));
-		RETURN_ERR(comp_alloc_output(ctx, ucsize));
+		loose_files.push_back(0);
+		// get new unused mini archive name
+		const char* archive_filename = 0;
+		RETURN_ERR(archive_build(archive_filename, &loose_files[0]));
+		// delete all newly added loose files
 	}

-	// read file into newly allocated buffer. if attempt_compress, also
-	// compress the file into another buffer while waiting for IOs.
-	Handle hf = vfs_open(atom_fn, 0);
-	RETURN_ERR(hf);
-	buf = FILE_BUF_ALLOC;
-	CompressParams params = { attempt_compress, ctx, 0 };
-	ssize_t ucsize_read = vfs_io(hf, ucsize, &buf, compress_cb, (uintptr_t)&params);
-	debug_assert(ucsize_read == (ssize_t)ucsize);
-	(void)vfs_close(hf);
-
-	// if we compressed the file trial-wise, check results and
-	// decide whether to store as such or not (based on compression ratio)
-	bool store_compressed = false;
-	void* cdata = 0; size_t csize = 0;
-	if(attempt_compress)
-	{
-		RETURN_ERR(comp_finish(ctx, &cdata, &csize));
-
-		const float ratio = (float)ucsize / csize;
-		const ssize_t bytes_saved = (ssize_t)ucsize - (ssize_t)csize;
-		if(ratio > 1.05f && bytes_saved > 200)
-			store_compressed = true;
-	}
-
-	// store file info
-	ent.ucsize  = (off_t)ucsize;
-	ent.mtime   = s.st_mtime;
-	// .. ent.ofs is set by zip_archive_add_file
-	ent.flags   = 0;
-	ent.atom_fn = atom_fn;
-	ent.crc32   = params.crc;
-	if(store_compressed)
-	{
-		ent.method = CM_DEFLATE;
-		ent.csize  = (off_t)csize;
-		file_contents = cdata;
-	}
-	else
-	{
-		ent.method = CM_NONE;
-		ent.csize  = (off_t)ucsize;
-		file_contents = (void*)buf;
-	}
-
-	// note: no need to free cdata - it is owned by the
-	// compression context and can be reused.
-
-	return ERR_OK;
-}
-
-LibError build_optimized_archive(const char* trace_filename, const char* zip_filename)
-{
-	FileList fl;
-	{
-		Trace t;
-		RETURN_ERR(trace_read_from_file(trace_filename, &t));
-		RETURN_ERR(filelist_build(&t, &fl));
-		trace_clear();
-	}
-
-	ZipArchive* za;
-	RETURN_ERR(zip_archive_create(zip_filename, &za));
-	uintptr_t ctx = comp_alloc(CT_COMPRESSION, CM_DEFLATE);
-
-	const char* atom_fn;	// declare outside loop for easier debugging
-	for(;;)
-	{
-		atom_fn = filelist_get_next(&fl);
-		if(!atom_fn)
-			break;
-
-		ArchiveEntry ent; void* file_contents; FileIOBuf buf;
-		if(read_and_compress_file(atom_fn, ctx, ent, file_contents, buf) == ERR_OK)
-		{
-			(void)zip_archive_add_file(za, &ent, file_contents);
-			(void)file_buf_free(buf);
-		}
-	}
-
-	filelist_free(&fl);
-
-	// note: this is currently known to fail if there are no files in the list
-	// - zlib.h says: Z_DATA_ERROR is returned if freed prematurely.
-	// safe to ignore.
-	comp_free(ctx);
-	(void)zip_archive_finish(za);
 	return ERR_OK;
 }
--- a/source/lib/res/file/vfs_optimizer.h
+++ b/source/lib/res/file/vfs_optimizer.h
@ -45,6 +45,8 @@ extern void trace_get(Trace* t);
 extern LibError trace_write_to_file(const char* trace_filename);
 extern LibError trace_read_from_file(const char* trace_filename, Trace* t);

-extern LibError build_optimized_archive(const char* trace_filename, const char* zip_filename);
+extern LibError vfs_opt_rebuild_main_archive(const char* P_dst_path);
+
+extern void vfs_opt_notify_loose_file(const char* atom_fn);

 #endif	// #ifndef VFS_OPTIMIZER_H__
--- a/source/ps/GameSetup/Config.cpp
+++ b/source/ps/GameSetup/Config.cpp
@ -105,7 +105,7 @@ static void ParseCommandLineArgs(int argc, char* argv[])
 				// note: VFS init is sure to have been completed by now
 				// (since CONFIG_Init reads from file); therefore,
 				// it is safe to call this from here directly.
-				build_optimized_archive("../logs/trace.txt", "test.zip");
+				vfs_opt_rebuild_main_archive("mods/official");
 			break;
 		case 'c':
 			if(strcmp(name, "conf") == 0)