fixed incorrect handling of extra fields

(zip archives with extended file attributes failed to load) improved z_extract_cdfh/z_enum_files. This was SVN commit r1527.
2004-12-18 14:45:04 +00:00 · 2004-12-18 14:45:04 +00:00 · bb2de67c31
commit bb2de67c31
parent b291fafadb
1 changed files with 128 additions and 149 deletions
--- a/source/lib/res/zip.cpp
+++ b/source/lib/res/zip.cpp
@ -95,6 +95,7 @@ struct ZLoc
 static const char cdfh_id[] = "PK\1\2";
 static const char lfh_id[]  = "PK\3\4";
 static const char ecdr_id[] = "PK\5\6";
+// lengths include the id field!
 const size_t CDFH_SIZE = 46;
 const size_t LFH_SIZE  = 30;
 const size_t ECDR_SIZE = 22;
@ -102,7 +103,7 @@ const size_t ECDR_SIZE = 22;

 // return -1 if file is obviously not a valid Zip archive,
 // otherwise 0. used as early-out test in lookup_init (see call site).
-static inline int z_validate(const void* file, size_t size)
+static inline int z_validate(const u8* file, size_t size)
 {
 	// make sure it's big enough to check the header and for
 	// z_find_ecdr to succeed (if smaller, it's definitely bogus).
@ -114,39 +115,59 @@ static inline int z_validate(const void* file, size_t size)
 }


+// scan for and return a pointer to a Zip record, or 0 if not found.
+// <start> is the expected position; we scan from there until EOF for
+// the given ID (fourcc). <record_size> (includes ID field) bytes must
+// remain before EOF - this makes sure the record is completely in the file.
+// used by z_find_ecdr and z_extract_cdfh.
+static const u8* z_find_id(const u8* file, size_t size, const u8* start, const char id[5], size_t record_size)
+{
+	ssize_t bytes_left = (ssize_t)((file+size) - start - record_size);
+
+	const u8* p = start;
+		// don't increment function argument directly,
+		// so we can warn the user if we had to scan.
+
+	while(bytes_left-- >= 0)
+	{
+		// found it
+		if(*(u32*)p == *(u32*)id)
+		{
+#ifndef NDEBUG
+			if(p != start)
+				debug_warn("z_find_id: archive damaged, but still found next record.");
+#endif
+			return p;
+		}
+
+		p++;
+			// be careful not to increment before comparison;
+			// id may already be at <start>.
+	}
+
+	// passed EOF, didn't find it.
+	debug_warn("z_find_id: archive corrupted, next record not found.");
+	return 0;
+}
+
+
 // find "End of Central Dir Record" in file.
 // z_validate has made sure size >= ECDR_SIZE.
 // return -1 on failure (output param invalid), otherwise 0.
-static int z_find_ecdr(const void* file, size_t size, const u8*& ecdr_)
+static int z_find_ecdr(const u8* file, size_t size, const u8*& ecdr_)
 {
 	// early out: check expected case (ECDR at EOF; no file comment)
-	const u8* ecdr = (const u8*)file + size - ECDR_SIZE;
+	const u8* ecdr = file + size - ECDR_SIZE;
 	if(*(u32*)ecdr == *(u32*)&ecdr_id)
 		goto found_ecdr;

-	{
-
 	// scan the last 66000 bytes of file for ecdr_id signature
 	// (the Zip archive comment field, up to 64k, may follow ECDR).
 	// if the zip file is < 66000 bytes, scan the whole file.
-
-	size_t bytes_left = MIN(66000, size);
-	ecdr = (const u8*)file + size - bytes_left;
-
-	while(bytes_left >= 4)
-	{
-		if(*(u32*)ecdr == *(u32*)&ecdr_id)
-			goto found_ecdr;
-
-		// check next 4 bytes (unaligned!!)
-		ecdr++;
-		bytes_left--;
-	}
-
-	// reached EOF and still haven't found the ECDR identifier.
-	return ERR_CORRUPTED;
-
-	}
+	const u8* start = file + size - MIN(66000, size);
+	ecdr = z_find_id(file, size, start, ecdr_id, ECDR_SIZE);
+	if(!ecdr)
+		return ERR_CORRUPTED;

 found_ecdr:
 	ecdr_ = ecdr;
@ -154,37 +175,6 @@ found_ecdr:
 }


-#ifdef PARANOIA
-
-// make sure the LFH fields match those passed (from the CDFH).
-// only used in PARANOIA builds - costs time when opening archives.
-// return -1 on error or mismatch, otherwise 0.
-static int z_verify_lfh(const void* file, const off_t lfh_ofs, const off_t file_ofs)
-{
-	assert(lfh_ofs < file_ofs);	// header comes before file
-
-	const u8* lfh = (const u8*)file + lfh_ofs;
-
-	// LFH signature doesn't match
-	if(*(u32*)lfh != *(u32*)lfh_id)
-		return ERR_CORRUPTED;
-
-	const u16 lfh_fn_len = read_le16(lfh+26);
-	const u16 lfh_e_len  = read_le16(lfh+28);
-
-	const off_t lfh_file_ofs = lfh_ofs + LFH_SIZE + lfh_fn_len + lfh_e_len;
-
-	// CDFH and LFH are inconsistent =>
-	// normal builds would return incorrect offsets.
-	if(file_ofs != lfh_file_ofs)
-		return  ERR_CORRUPTED;
-
-	return 0;
-}
-
-#endif	// #ifdef PARANOIA
-
-
 //
 // date conversion from DOS to Unix
 //
@ -203,7 +193,7 @@ static uint bits(uint num, uint lo_idx, uint hi_idx)

 static time_t convert_dos_date(u16 fatdate, u16 fattime)
 {
-	struct tm t;
+	struct tm t;							// struct tm format:
 	t.tm_sec   = bits(fattime, 0,4) * 2;	// [0,59]
 	t.tm_min   = bits(fattime, 5,10);		// [0,59]
 	t.tm_hour  = bits(fattime, 11,15);		// [0,23]
@ -212,6 +202,9 @@ static time_t convert_dos_date(u16 fatdate, u16 fattime)
 	t.tm_year  = bits(fatdate, 9,15) + 80;	// since 1900
 	t.tm_isdst = -1;	// unknown - let libc determine

+	assert(t.tm_year < 138);
+		// otherwise: totally bogus, and at the limit of 32-bit time_t
+
 	time_t ret = mktime(&t);
 	if(ret == (time_t)-1)
 		debug_warn("convert_dos_date: mktime failed");
@ -222,32 +215,31 @@ static time_t convert_dos_date(u16 fatdate, u16 fattime)
 ///////////////////////////////////////////////////////////////////////////////


-// if cdfh is valid and describes a file, extract its name, offset and size
-// for use in z_enum_files (passes it to lookup).
-// return -1 on error (output params invalid), or 0 on success.
-static int z_extract_cdfh(const u8* cdfh, ssize_t bytes_left, const char*& fn, size_t& fn_len, ZLoc* loc)
+enum z_extract_cdfh_ret
 {
-	// sanity check: did we even read the CDFH?
-	if(bytes_left < CDFH_SIZE)
-	{
-		debug_warn("z_extract_cdfh: CDFH not in buffer!");
-		return -1;
-	}
+	Z_CDFH_ABORT   = -1,	// next CDFH not found; abort.
+	Z_CDFH_FILE_OK =  0,	// valid file; add to lookup.
+	Z_CDFH_SKIPPED =  1		// not valid file, but have next CDFH; continue.
+};

-	// this is checked when advancing,
-	// but we need this for the first central dir entry.
-	if(*(u32*)cdfh != *(u32*)cdfh_id)
-	{
-		debug_warn("z_extract_cdfh: CDFH signature not found");
-		return -1;
-	}
+// read the current CDFH. if a valid file, return its filename and ZLoc.
+// finally, advance to next CDFH.
+// return -1 on error (output params invalid), or 0 on success.
+// called by z_enum_files, which passes the output to lookup.
+static int z_extract_cdfh(const u8* file, size_t size,              // in
+	const u8*& cdfh, const char*& fn, size_t& fn_len, ZLoc* loc)    // out
+{
+	// scan for next CDFH (at or beyond current cdfh position)
+	cdfh = z_find_id(file, size, cdfh, cdfh_id, CDFH_SIZE);
+	if(!cdfh)					// no (further) CDFH found:
+		return Z_CDFH_ABORT;	// caller will abort.

 	// extract fields from CDFH
-	const u8 method = cdfh[10];
+	const u16 method  = read_le16(cdfh+10);
 	const u16 fattime = read_le16(cdfh+12);
 	const u16 fatdate = read_le16(cdfh+14);
-	const u32 csize_  = read_le32(cdfh+20);
-	const u32 ucsize_ = read_le32(cdfh+24);
+	const u32 csize   = read_le32(cdfh+20);
+	const u32 ucsize  = read_le32(cdfh+24);
 	const u16 fn_len_ = read_le16(cdfh+28);
 	const u16 e_len   = read_le16(cdfh+30);
 	const u16 c_len   = read_le16(cdfh+32);
@ -255,40 +247,43 @@ static int z_extract_cdfh(const u8* cdfh, ssize_t bytes_left, const char*& fn, s
 	const char* fn_ = (const char*)cdfh+CDFH_SIZE;
 		// not 0-terminated!

-	//
-	// check if valid and data should actually be returned
-	//
+	// find corresponding LFH, needed to calculate file offset
+	// (its extra field may not match that reported by CDFH!).
+	// TODO: this is slow, due to seeking backwards.
+	// optimization: calculate only on demand (i.e. open, not mount)?
+	const u8* lfh = z_find_id(file, size, (u8*)file+lfh_ofs, lfh_id, LFH_SIZE);

-	// .. compression method is unknown (neither deflated nor stored)
-	if(method & ~8)
-	{
-		debug_warn("z_extract_cdfh: unknown compression method");
-		return ERR_NOT_SUPPORTED;
-	}
-	// .. this is a directory entry; we only want files.
-	if(!csize_ && !ucsize_)
-        return -1;
-#ifdef PARANOIA
-	// .. CDFH's file ofs doesn't match that reported by LFH.
-	// don't check this in normal builds - seeking between LFHs and
-	// central dir is slow. this happens if the headers differ for some
-	// reason; we'd notice anyway, because inflate will fail
-	// (since file offset is incorrect).
-	if(z_verify_lfh(file, lfh_ofs, file_ofs) != 0)
-		return -1;
-#endif
+	// advance CDFH; we now know where the next CDFH entry should be,
+	// but will still scan ahead for its id on next call.
+	cdfh += CDFH_SIZE + fn_len_ + e_len + c_len;

+	// is this entry not a valid file?
+	if(
+		// compression method is unknown (neither deflated nor stored)
+		(method & ~8) ||
+		// it's a directory entry (we only want files).
+		(!csize && !ucsize) ||
+		// LFH signature not found
+		(!lfh)
+	)
+		return Z_CDFH_SKIPPED;
+
+	// get actual file ofs (see above)
+	const u16 lfh_fn_len = read_le16(lfh+26);
+	const u16 lfh_e_len  = read_le16(lfh+28);
+	const off_t file_ofs = lfh_ofs + LFH_SIZE + lfh_fn_len + lfh_e_len;
+		// LFH doesn't have a comment field!

 	// write out entry data
 	fn     = fn_;
 	fn_len = fn_len_;
-	loc->ofs    = (off_t)(lfh_ofs + LFH_SIZE + fn_len_ + e_len);
-	loc->csize  = (off_t)(method? csize_ : 0);
+	loc->ofs    = file_ofs;
+	loc->csize  = (off_t)(method? csize : 0);
 		// if not compressed, csize = 0 (see zfile_compressed)
-	loc->ucsize = (off_t)ucsize_;
+	loc->ucsize = (off_t)ucsize;
 	loc->mtime  = convert_dos_date(fatdate, fattime);

-	return 0;
+	return Z_CDFH_FILE_OK;
 }


@ -307,7 +302,7 @@ static int z_extract_cdfh(const u8* cdfh, ssize_t bytes_left, const char*& fn, s
 // loc is only valid during the callback! must be copied or saved.
 typedef int(*CDFH_CB)(uintptr_t user, i32 idx, const char* fn, size_t fn_len, const ZLoc* loc);

-static int z_enum_files(const void* file, const size_t size, const CDFH_CB cb, const uintptr_t user)
+static int z_enum_files(const u8* file, const size_t size, const CDFH_CB cb, const uintptr_t user)
 {
 	// find "End of Central Directory Record"
 	const u8* ecdr;
@ -330,44 +325,25 @@ static int z_enum_files(const void* file, const size_t size, const CDFH_CB cb, c
 	i32 idx = 0;
 		// only incremented when valid, so we don't leave holes
 		// in lookup's arrays (bad locality).
-	i32 entries_left = num_entries;
-	for(;;)
-	{
-		entries_left--;
-		ssize_t bytes_left = (ssize_t)size - ( cdfh - (u8*)file );

+	for(i32 i = 0; i < num_entries; i++)
+	{
 		const char* fn;
 		size_t fn_len;
 		ZLoc loc;
-		// CDFH is valid and of a file
-		if(z_extract_cdfh(cdfh, bytes_left, fn, fn_len, &loc) == 0)
+		int ret = z_extract_cdfh(file, size, cdfh, fn, fn_len, &loc);
+		// valid file
+		if(ret == Z_CDFH_FILE_OK)
 		{
 			cb(user, idx, fn, fn_len, &loc);
 			idx++;	// see rationale above
-
-			// advance to next cdfh (the easy way - we have a valid fn_len
-			// and assume there's no extra data stored after the header).
-			cdfh += CDFH_SIZE + fn_len;
-			if(*(u32*)cdfh == *(u32*)cdfh_id)
-				goto found_next_cdfh;
-
-			// not found; scan for it below (as if the CDFH were invalid).
-			// note: don't restore the previous cdfh pointer - fn_len is
-			// correct and there are additional fields before the next CDFH.
 		}
-
-		if(!entries_left)
-			break;
-
-		// scan for the next CDFH (its signature)
-		for(ssize_t i = 0; i < bytes_left - (ssize_t)CDFH_SIZE; i++)
-			if(*(u32*)(++cdfh) == *(u32*)cdfh_id)
-				goto found_next_cdfh;
-
-		debug_warn("z_enum_files: next CDFH not found");
-		return -1;
-
-found_next_cdfh:;
+		// next CDFH not found (Zip archive corrupted)
+		else if(ret == Z_CDFH_ABORT)
+			return -1;
+		// skipping this CDFH (e.g. if directory)
+		else
+			;
 	}

 	return 0;
@ -436,28 +412,28 @@ struct LookupInfo
 };


-// support for case-insensitive filenames: the hash of each
-// filename string is saved in lookup_add_file_cb and searched for by
-// lookup_get_file_info. in both cases, we convert a temporary to
-// lowercase before hashing it.
-static void strcpy_lower(char* dst, const char* src)
+// write a lower-case copy of <src> to <dst>, which holds <buf_size> bytes.
+// up to buf_size-1 chars are written; we always 0-terminate the output!
+//
+// this routine is used to convert OS and user-specified filenames
+// to lowercase before hashing them and then comparing.
+static void copy_lower_case(char* dst, const char* src, size_t buf_size)
 {
+	assert(buf_size > 0);	// otherwise, no room for trailing '\0'
+
 	int c;
 	do
 	{
 		c = *src++;
+		// this is the last remaining byte in the buffer.
+		// loop will exit below after writing 0-terminator.
+		if(--buf_size == 0)
+			c = '\0';
 		*dst++ = tolower(c);
 	}
 	while(c != '\0');
 }

-static void strncpy_lower(char* dst, const char* src, size_t count)
-{
-	int n = (int)count;
-	while (--n >= 0)
-		*dst++ = tolower(*src++);
-}
-

 // add file <fn> to the lookup data structure.
 // called from z_enum_files in order (0 <= idx < num_entries).
@ -499,8 +475,11 @@ static int lookup_add_file_cb(uintptr_t user, i32 idx,

 	// hash (lower case!) filename
 	char lc_fn[PATH_MAX];
-	strncpy_lower(lc_fn, fn, fn_len);
-	FnHash fn_hash = fnv_hash(lc_fn, fn_len);
+	size_t max_size = fn_len+1;	// fn not 0-terminated
+	if(max_size > PATH_MAX)		// (this avoids stupid min() type warning)
+		max_size = PATH_MAX;	// clamp to actual buffer size
+	copy_lower_case(lc_fn, fn, max_size);
+	FnHash fn_hash = fnv_hash(lc_fn);

 	// fill ZEnt
 	ZEnt* ent = li->ents + idx;
@ -524,7 +503,7 @@ static int lookup_add_file_cb(uintptr_t user, i32 idx,

 // initialize lookup data structure for the given Zip archive:
 // adds all files to the index.
-static int lookup_init(LookupInfo* li, const void* file, const size_t size)
+static int lookup_init(LookupInfo* li, const u8* file, const size_t size)
 {
 	int err;

@ -570,12 +549,12 @@ static int lookup_free(LookupInfo* li)
 }


-// look up ZLoc, given filename.
+// look up ZLoc, given filename (untrusted!).
 static int lookup_get_file_info(LookupInfo* li, const char* fn, ZLoc* loc)
 {
 	// hash (lower case!) filename
 	char lc_fn[PATH_MAX];
-	strcpy_lower(lc_fn, fn);
+	copy_lower_case(lc_fn, fn, sizeof(lc_fn));
 	const FnHash fn_hash = fnv_hash(lc_fn);

 	const FnHash* fn_hashes = li->fn_hashes;
@ -696,7 +675,7 @@ static int ZArchive_reload(ZArchive* za, const char* fn, Handle)
 	if(err < 0)
 		goto exit_close;

-	err = lookup_init(&za->li, file, size);
+	err = lookup_init(&za->li, (u8*)file, size);
 	if(err < 0)
 		goto exit_unmap_close;