# bugfixes: prevent incorrect FPU results due to 0-length files, and incorrect file caching due to timestamps delayed by long sojourn in the debugger.

file_cache: avoid caching 0-length files (prevents div by 0) adts: bit more defensiveness against size=0 trace: now use "dividers" in the trace file instead of relying on monotony property of the get_time source. this avoids incorrectly splitting the trace into runs when time is actually not monotonous (which would cause lots of warnings), e.g. due to debugger. in the process, changed interface to return (most-recent first) runs, instead of only the raw unsorted entries. This was SVN commit r3832.
2006-04-30 21:45:32 +00:00 · 2006-04-30 21:45:32 +00:00 · 37663d86fb
commit 37663d86fb
parent b512733787
5 changed files with 201 additions and 119 deletions
--- a/source/lib/adts.h
+++ b/source/lib/adts.h
@ -324,7 +324,10 @@ template<class Entries> float ll_calc_min_credit_density(const Entries& entries)
 {
 	float min_credit_density = FLT_MAX;
 	for(typename Entries::const_iterator it = entries.begin(); it != entries.end(); ++it)
-		min_credit_density = fminf(min_credit_density, Entries::entry_from_it(it).credit_density());
+	{
+		const float credit_density = Entries::entry_from_it(it).credit_density();
+		min_credit_density = fminf(min_credit_density, credit_density);
+	}
 	return min_credit_density;
 }

@ -345,7 +348,8 @@ public:
 	void notify_increased_or_removed(const Entry&) const {}
 	float operator()(const Entries& entries) const
 	{
-		return ll_calc_min_credit_density(entries);
+		const float mcd = ll_calc_min_credit_density(entries);
+		return mcd;
 	}
 };

@ -481,6 +485,7 @@ again:
 		// different evictions than Landlord_Lazy, which is unacceptable.
 		// nor is doing so necessary: if mcd is tiny, so is credit.
 		const float min_credit_density = mcd_calc(map);
+		debug_assert(min_credit_density > 0.0f);

 		for(MapIt it = map.begin(); it != map.end();)	// no ++it
 		{
@ -919,6 +924,9 @@ private:
 			size = size_;
 			cost = cost_;
 			credit = cost;
+
+			// else divider will fail
+			debug_assert(size != 0);
 		}

 		float credit_density() const
--- a/source/lib/res/file/file_cache.cpp
+++ b/source/lib/res/file/file_cache.cpp
@ -1156,10 +1156,13 @@ LibError file_cache_add(FileIOBuf buf, size_t size, const char* atom_fn,
 	if(file_flags & FILE_CACHED_AT_HIGHER_LEVEL)
 		return INFO_SKIPPED;

+	// refuse to cache 0-length files (it would have no benefit and
+	// causes problems due to divide-by-0).
+	if(size == 0)
+		return INFO_SKIPPED;
+
 	// assign cost
 	uint cost = 1;
-	if(!size)
-		cost = 0;

 	ExactBufOracle::BufAndSize bas = exact_buf_oracle.get(buf, size);
 	FileIOBuf exact_buf = bas.first; size_t exact_size = bas.second;
--- a/source/lib/res/file/trace.cpp
+++ b/source/lib/res/file/trace.cpp
@ -64,7 +64,8 @@ void trace_enable(bool want_enabled)
 }


-static void trace_add(TraceOp op, const char* P_fn, size_t size, uint flags = 0, double timestamp = 0.0)
+static void trace_add(TraceOp op, const char* P_fn, size_t size,
+	uint flags = 0, double timestamp = 0.0)
 {
 	trace_init();
 	if(!trace_enabled)
@ -83,6 +84,12 @@ static void trace_add(TraceOp op, const char* P_fn, size_t size, uint flags = 0,
 	t->flags     = flags;
 }

+static void trace_get_raw_ents(const TraceEntry*& ents, size_t& num_ents)
+{
+	ents = (const TraceEntry*)trace_pool.da.base;
+	num_ents = (uint)(trace_pool.da.pos / sizeof(TraceEntry));
+}
+

 void trace_notify_io(const char* P_fn, size_t size, uint flags)
 {
@ -95,18 +102,132 @@ void trace_notify_free(const char* P_fn, size_t size)
 }


+//-----------------------------------------------------------------------------
+
+// put all entries in one trace file: easier to handle; obviates FS enum code
+// rationale: don't go through trace in order; instead, process most recent
+// run first, to give more weight to it (TSP code should go with first entry
+// when #occurrences are equal)
+
+
+static const TraceEntry delimiter_entry =
+{
+	0.0f,	// timestamp
+	"------------------------------------------------------------",
+	0,		// size
+	TO_IO,	// TraceOp (never seen by user; value doesn't matter)
+	0		// flags
+};
+
+// storage for Trace.runs.
+static const uint MAX_RUNS = 100;
+static TraceRun runs[MAX_RUNS];
+
+// note: the last entry may be one past number of actual entries.
+static std::vector<size_t> run_start_indices;
+
+class DelimiterAdder
+{
+public:
+	enum Consequence
+	{
+		SKIP_ADD,
+		CONTINUE
+	};
+	Consequence operator()(size_t i, double timestamp, const char* P_path)
+	{
+		// this entry is a delimiter
+		if(!strcmp(P_path, delimiter_entry.atom_fn))
+		{
+			run_start_indices.push_back(i+1);	// skip this entry
+			// note: its timestamp is invalid, so don't set cur_timestamp!
+			return SKIP_ADD;
+		}
+
+		const double last_timestamp = cur_timestamp;
+		cur_timestamp = timestamp;
+
+		// first item is always start of a run
+		if((i == 0) ||
+		// timestamp started over from 0 (e.g. 29, 30, 1) -> start of new run.
+		   (timestamp < last_timestamp))
+			run_start_indices.push_back(i);
+
+		return CONTINUE;
+	}
+private:
+	double cur_timestamp;
+};
+
+
+//-----------------------------------------------------------------------------
+
+
 void trace_get(Trace* t)
 {
-	t->ents = (const TraceEntry*)trace_pool.da.base;
-	t->num_ents = (uint)(trace_pool.da.pos / sizeof(TraceEntry));
+	const TraceEntry* ents; size_t num_ents;
+	trace_get_raw_ents(ents, num_ents);
+
+	// nobody had split ents up into runs; just create one big 'run'.
+	if(run_start_indices.empty())
+		run_start_indices.push_back(0);
+
+	t->runs = runs;
+	t->num_runs = 0;	// counted up
+	t->total_ents = num_ents;
+
+	size_t last_start_idx = num_ents;
+
+	std::vector<size_t>::reverse_iterator it;
+	for(it = run_start_indices.rbegin(); it != run_start_indices.rend(); ++it)
+	{
+		const size_t start_idx = *it;
+		// run_start_indices.back() may be = num_ents (could happen if
+		// a zero-length run gets written out); skip that to avoid
+		// zero-length run here.
+		if(last_start_idx == start_idx)
+			continue;
+
+		TraceRun& run = runs[t->num_runs++];
+		run.num_ents = last_start_idx - start_idx;
+		run.ents = &ents[start_idx];
+		last_start_idx = start_idx;
+
+		if(t->num_runs == MAX_RUNS)
+			break;
+	}
+
+	debug_assert(t->num_runs != 0);
 }

 void trace_clear()
 {
 	pool_free_all(&trace_pool);
+	run_start_indices.clear();
+	memset(runs, 0, sizeof(runs));	// for safety
+}
+
+//-----------------------------------------------------------------------------
+
+
+
+static void write_entry(FILE* f, const TraceEntry* ent)
+{
+	char opcode = '?';
+	switch(ent->op)
+	{
+	case TO_IO: opcode = 'L'; break;
+	case TO_FREE: opcode = 'F'; break;
+	default: debug_warn("invalid TraceOp");
+	}
+
+	debug_assert(ent->op == TO_IO || ent->op == TO_FREE);
+	fprintf(f, "%#010f: %c \"%s\" %d %04x\n", ent->timestamp, opcode,
+		ent->atom_fn, ent->size, ent->flags);
 }


+// *appends* entire current trace contents to file (with delimiter first)
 LibError trace_write_to_file(const char* trace_filename)
 {
 	if(!trace_enabled)
@ -121,22 +242,14 @@ LibError trace_write_to_file(const char* trace_filename)
 	if(!f)
 		WARN_RETURN(ERR_FILE_ACCESS);

-	Trace t;
-	trace_get(&t);
-	const TraceEntry* ent = t.ents;
-	for(size_t i = 0; i < t.num_ents; i++, ent++)
-	{
-		char opcode = '?';
-		switch(ent->op)
-		{
-		case TO_IO: opcode = 'L'; break;
-		case TO_FREE: opcode = 'F'; break;
-		default: debug_warn("invalid TraceOp");
-		}
+	write_entry(f, &delimiter_entry);

-		debug_assert(ent->op == TO_IO || ent->op == TO_FREE);
-		fprintf(f, "%#010f: %c \"%s\" %d %04x\n", ent->timestamp, opcode, ent->atom_fn, ent->size, ent->flags);
-	}
+	// somewhat of a hack: write all entries in original order, not the
+	// reverse order returned by trace_get.
+	const TraceEntry* ent; size_t num_ents;
+	trace_get_raw_ents(ent, num_ents);
+	for(size_t i = 0; i < num_ents; i++, ent++)
+		write_entry(f, ent);

 	(void)fclose(f);
 	return ERR_OK;
@ -157,12 +270,14 @@ LibError trace_read_from_file(const char* trace_filename, Trace* t)
 	// therefore, tracing needs to be enabled.
 	trace_enabled = true;

+	DelimiterAdder delim_adder;
+
 	// parse lines and stuff them in trace_pool
 	// (as if they had been trace_add-ed; replaces any existing data)
 	// .. bake PATH_MAX limit into string.
 	char fmt[30];
 	snprintf(fmt, ARRAY_SIZE(fmt), "%%lf: %%c \"%%%d[^\"]\" %%d %%04x\n", PATH_MAX);
-	for(;;)
+	for(size_t i = 0; ; i++)
 	{
 		double timestamp; char opcode; char P_path[PATH_MAX]; size_t size; uint flags;
 		int ret = fscanf(f, fmt, &timestamp, &opcode, P_path, &size, &flags);
@ -178,7 +293,8 @@ LibError trace_read_from_file(const char* trace_filename, Trace* t)
 		default: debug_warn("invalid TraceOp");
 		}

-		trace_add(op, P_path, size, flags, timestamp);
+		if(delim_adder(i, timestamp, P_path) != DelimiterAdder::SKIP_ADD)
+			trace_add(op, P_path, size, flags, timestamp);
 	}

 	fclose(f);
@ -189,6 +305,9 @@ LibError trace_read_from_file(const char* trace_filename, Trace* t)
 	// so there's no sense in continuing.
 	trace_enabled = false;

+	if(t->total_ents == 0)
+		WARN_RETURN(ERR_TRACE_EMPTY);
+
 	return ERR_OK;
 }

@ -282,36 +401,40 @@ LibError trace_run(const char* trace_filename, uint flags)
 	trace_enabled = false;

 	const double start_time = get_time();
-	const double first_timestamp = t.ents[0].timestamp;
+	const double first_timestamp = t.runs[t.num_runs-1].ents[0].timestamp;

-	const TraceEntry* ent = t.ents;
-	for(uint i = 0; i < t.num_ents; i++, ent++)
+	for(uint r = 0; r < t.num_runs; r++)
 	{
-		// wait until time for next entry if caller requested this
-		if(flags & TRF_SYNC_TO_TIMESTAMP)
+		const TraceRun& run = t.runs[r];
+		const TraceEntry* ent = run.ents;
+		for(uint i = 0; i < run.num_ents; i++, ent++)
 		{
-			while(get_time()-start_time < ent->timestamp-first_timestamp)
+			// wait until time for next entry if caller requested this
+			if(flags & TRF_SYNC_TO_TIMESTAMP)
 			{
-				// busy-wait (don't sleep - can skew results)
+				while(get_time()-start_time < ent->timestamp-first_timestamp)
+				{
+					// busy-wait (don't sleep - can skew results)
+				}
 			}
-		}

-		// carry out this entry's operation
-		FileIOBuf buf; size_t size;
-		switch(ent->op)
-		{
-		case TO_IO:
-			// do not 'run' writes - we'd destroy the existing data.
-			if(ent->flags & FILE_WRITE)
-				continue;
-			(void)vfs_load(ent->atom_fn, buf, size, ent->flags);
-			break;
-		case TO_FREE:
-			buf = file_cache_retrieve(ent->atom_fn, &size, FB_NO_STATS|FB_NO_ACCOUNTING);
-			(void)file_buf_free(buf);
-			break;
-		default:
-			debug_warn("unknown TraceOp");
+			// carry out this entry's operation
+			FileIOBuf buf; size_t size;
+			switch(ent->op)
+			{
+			case TO_IO:
+				// do not 'run' writes - we'd destroy the existing data.
+				if(ent->flags & FILE_WRITE)
+					continue;
+				(void)vfs_load(ent->atom_fn, buf, size, ent->flags);
+				break;
+			case TO_FREE:
+				buf = file_cache_retrieve(ent->atom_fn, &size, FB_NO_STATS|FB_NO_ACCOUNTING);
+				(void)file_buf_free(buf);
+				break;
+			default:
+				debug_warn("unknown TraceOp");
+			}
 		}
 	}

--- a/source/lib/res/file/trace.h
+++ b/source/lib/res/file/trace.h
@ -34,11 +34,11 @@ extern void trace_notify_free(const char* P_fn, size_t size);
 // note: rather than only a list of accessed files, we also need to
 // know the application's behavior WRT caching (e.g. when it releases
 // cached buffers). this is necessary so that our simulation can
-// yield the same results.
+// yield the same behavior.
 enum TraceOp
 {
 	TO_IO,
-	TO_FREE
+	TO_FREE,
 };

 // stores one event that is relevant for file IO / caching.
@ -58,12 +58,21 @@ struct TraceEntry
 	uint flags : 24;		// misc, e.g. file_io flags.
 };

-struct Trace
+struct TraceRun
 {
 	const TraceEntry* ents;
 	size_t num_ents;
 };

+struct Trace
+{
+	// most recent first! (see rationale in source)
+	const TraceRun* runs;
+	size_t num_runs;
+
+	size_t total_ents;
+};
+
 extern void trace_get(Trace* t);
 extern LibError trace_write_to_file(const char* trace_filename);
 extern LibError trace_read_from_file(const char* trace_filename, Trace* t);
--- a/source/lib/res/file/vfs_optimizer.cpp
+++ b/source/lib/res/file/vfs_optimizer.cpp
@ -213,61 +213,6 @@ typedef std::vector<Connection> Connections;
 // the main culprit is simulating file_cache to see if an IO would result.
 class ConnectionBuilder
 {
-	// put all entries in one trace file: easier to handle; obviates FS enum code
-	// rationale: don't go through trace in order; instead, process most recent
-	// run first, to give more weight to it (TSP code should go with first entry
-	// when #occurrences are equal)
-	struct Run
-	{
-		const TraceEntry* first;
-		uint count;
-
-		// note: up to caller to initialize count (that's done when
-		// starting the next run
-		Run(const TraceEntry* first_) : first(first_) {}
-	};
-
-	// note: passing i and comparing timestamp with previous timestamp
-	// avoids having to keep an extra local cur_time variable.
-	bool is_start_of_run(uint i, const TraceEntry* ent) const
-	{
-		// first item is always start of a run (protects [-1] below)
-		if(i == 0)
-			return true;
-
-		// timestamp started over from 0 (e.g. 29, 30, 1) -> start of new run.
-		if(ent->timestamp < ent[-1].timestamp)
-			return true;
-
-		return false;
-	}
-
-	typedef std::vector<Run> Runs;
-
-	void split_trace_into_runs(const Trace* t, Runs& runs)
-	{
-		uint cur_run_length = 0;
-		const TraceEntry* cur_entry = t->ents;
-		for(uint i = 0; i < t->num_ents; i++)
-		{
-			cur_run_length++;
-			if(is_start_of_run(i, cur_entry))
-			{
-				// not first time: mark previous run as complete
-				if(!runs.empty())
-					runs.back().count = cur_run_length;
-				cur_run_length = 0;
-
-				runs.push_back(Run(cur_entry));
-			}
-			cur_entry++;
-		}
-		// set the last run's length
-		if(!runs.empty())
-			runs.back().count = cur_run_length;
-	}
-
-
 	// functor: on every call except the first, adds a connection between
 	// the previous file (remembered here) and the current file.
 	// if the connection already exists, its occurrence count is incremented.
@ -318,7 +263,7 @@ class ConnectionBuilder
 		}
 	};

-	void add_connections_from_runs(const Runs& runs, Connections& connections)
+	void add_connections_from_runs(const Trace& t, Connections& connections)
 	{
 		file_cache_reset();

@ -332,12 +277,12 @@ class ConnectionBuilder
 		// files that are equally strongly 'connected' are ordered
 		// according to position in file_nodes. that means files from
 		// more recent traces tend to go first, which is good.)
-		for(Runs::const_reverse_iterator it = runs.rbegin(); it != runs.rend(); ++it)
+		for(size_t r = 0; r < t.num_runs; r++)
 		{
-			const Run& run = *it;
-			for(uint i = 0; i < run.count; i++)
+			const TraceRun& run = t.runs[r];
+			for(uint i = 0; i < run.num_ents; i++)
 			{
-				const TraceEntry* te = run.first + i;
+				const TraceEntry* te = &run.ents[i];
 				// improvement: postprocess the trace and remove all IOs that would be
 				// satisfied by our cache. often repeated IOs would otherwise potentially
 				// be arranged badly.
@ -365,21 +310,15 @@ public:
 		Trace t;
 		RETURN_ERR(trace_read_from_file(trace_filename, &t));

-		if(!t.num_ents)
-			WARN_RETURN(ERR_TRACE_EMPTY);
-
 		// reserve memory for worst-case amount of connections (happens if
 		// all accesses are unique). this is necessary because we store
 		// pointers to Connection in the map, which would be invalidated if
 		// connections[] ever expands.
 		// may waste up to ~3x the memory (about 1mb) for a short time,
 		// which is ok.
-		connections.reserve(t.num_ents-1);
+		connections.reserve(t.total_ents-1);

-		Runs runs;
-		split_trace_into_runs(&t, runs);
-
-		add_connections_from_runs(runs, connections);
+		add_connections_from_runs(t, connections);

 		return ERR_OK;
 	}