1
0
forked from 0ad/0ad

fixed incorrect handling of extra fields

(zip archives with extended file attributes failed to load)

improved z_extract_cdfh/z_enum_files.

This was SVN commit r1527.
This commit is contained in:
janwas 2004-12-18 14:45:04 +00:00
parent b291fafadb
commit bb2de67c31

View File

@ -95,6 +95,7 @@ struct ZLoc
static const char cdfh_id[] = "PK\1\2";
static const char lfh_id[] = "PK\3\4";
static const char ecdr_id[] = "PK\5\6";
// lengths include the id field!
const size_t CDFH_SIZE = 46;
const size_t LFH_SIZE = 30;
const size_t ECDR_SIZE = 22;
@ -102,7 +103,7 @@ const size_t ECDR_SIZE = 22;
// return -1 if file is obviously not a valid Zip archive,
// otherwise 0. used as early-out test in lookup_init (see call site).
static inline int z_validate(const void* file, size_t size)
static inline int z_validate(const u8* file, size_t size)
{
// make sure it's big enough to check the header and for
// z_find_ecdr to succeed (if smaller, it's definitely bogus).
@ -114,39 +115,59 @@ static inline int z_validate(const void* file, size_t size)
}
// scan for and return a pointer to a Zip record, or 0 if not found.
// <start> is the expected position; we scan from there until EOF for
// the given ID (fourcc). <record_size> (includes ID field) bytes must
// remain before EOF - this makes sure the record is completely in the file.
// used by z_find_ecdr and z_extract_cdfh.
static const u8* z_find_id(const u8* file, size_t size, const u8* start, const char id[5], size_t record_size)
{
ssize_t bytes_left = (ssize_t)((file+size) - start - record_size);
const u8* p = start;
// don't increment function argument directly,
// so we can warn the user if we had to scan.
while(bytes_left-- >= 0)
{
// found it
if(*(u32*)p == *(u32*)id)
{
#ifndef NDEBUG
if(p != start)
debug_warn("z_find_id: archive damaged, but still found next record.");
#endif
return p;
}
p++;
// be careful not to increment before comparison;
// id may already be at <start>.
}
// passed EOF, didn't find it.
debug_warn("z_find_id: archive corrupted, next record not found.");
return 0;
}
// find "End of Central Dir Record" in file.
// z_validate has made sure size >= ECDR_SIZE.
// return -1 on failure (output param invalid), otherwise 0.
static int z_find_ecdr(const void* file, size_t size, const u8*& ecdr_)
static int z_find_ecdr(const u8* file, size_t size, const u8*& ecdr_)
{
// early out: check expected case (ECDR at EOF; no file comment)
const u8* ecdr = (const u8*)file + size - ECDR_SIZE;
const u8* ecdr = file + size - ECDR_SIZE;
if(*(u32*)ecdr == *(u32*)&ecdr_id)
goto found_ecdr;
{
// scan the last 66000 bytes of file for ecdr_id signature
// (the Zip archive comment field, up to 64k, may follow ECDR).
// if the zip file is < 66000 bytes, scan the whole file.
size_t bytes_left = MIN(66000, size);
ecdr = (const u8*)file + size - bytes_left;
while(bytes_left >= 4)
{
if(*(u32*)ecdr == *(u32*)&ecdr_id)
goto found_ecdr;
// check next 4 bytes (unaligned!!)
ecdr++;
bytes_left--;
}
// reached EOF and still haven't found the ECDR identifier.
return ERR_CORRUPTED;
}
const u8* start = file + size - MIN(66000, size);
ecdr = z_find_id(file, size, start, ecdr_id, ECDR_SIZE);
if(!ecdr)
return ERR_CORRUPTED;
found_ecdr:
ecdr_ = ecdr;
@ -154,37 +175,6 @@ found_ecdr:
}
#ifdef PARANOIA
// make sure the LFH fields match those passed (from the CDFH).
// only used in PARANOIA builds - costs time when opening archives.
// return -1 on error or mismatch, otherwise 0.
static int z_verify_lfh(const void* file, const off_t lfh_ofs, const off_t file_ofs)
{
assert(lfh_ofs < file_ofs); // header comes before file
const u8* lfh = (const u8*)file + lfh_ofs;
// LFH signature doesn't match
if(*(u32*)lfh != *(u32*)lfh_id)
return ERR_CORRUPTED;
const u16 lfh_fn_len = read_le16(lfh+26);
const u16 lfh_e_len = read_le16(lfh+28);
const off_t lfh_file_ofs = lfh_ofs + LFH_SIZE + lfh_fn_len + lfh_e_len;
// CDFH and LFH are inconsistent =>
// normal builds would return incorrect offsets.
if(file_ofs != lfh_file_ofs)
return ERR_CORRUPTED;
return 0;
}
#endif // #ifdef PARANOIA
//
// date conversion from DOS to Unix
//
@ -203,7 +193,7 @@ static uint bits(uint num, uint lo_idx, uint hi_idx)
static time_t convert_dos_date(u16 fatdate, u16 fattime)
{
struct tm t;
struct tm t; // struct tm format:
t.tm_sec = bits(fattime, 0,4) * 2; // [0,59]
t.tm_min = bits(fattime, 5,10); // [0,59]
t.tm_hour = bits(fattime, 11,15); // [0,23]
@ -212,6 +202,9 @@ static time_t convert_dos_date(u16 fatdate, u16 fattime)
t.tm_year = bits(fatdate, 9,15) + 80; // since 1900
t.tm_isdst = -1; // unknown - let libc determine
assert(t.tm_year < 138);
// otherwise: totally bogus, and at the limit of 32-bit time_t
time_t ret = mktime(&t);
if(ret == (time_t)-1)
debug_warn("convert_dos_date: mktime failed");
@ -222,32 +215,31 @@ static time_t convert_dos_date(u16 fatdate, u16 fattime)
///////////////////////////////////////////////////////////////////////////////
// if cdfh is valid and describes a file, extract its name, offset and size
// for use in z_enum_files (passes it to lookup).
// return -1 on error (output params invalid), or 0 on success.
static int z_extract_cdfh(const u8* cdfh, ssize_t bytes_left, const char*& fn, size_t& fn_len, ZLoc* loc)
enum z_extract_cdfh_ret
{
// sanity check: did we even read the CDFH?
if(bytes_left < CDFH_SIZE)
{
debug_warn("z_extract_cdfh: CDFH not in buffer!");
return -1;
}
Z_CDFH_ABORT = -1, // next CDFH not found; abort.
Z_CDFH_FILE_OK = 0, // valid file; add to lookup.
Z_CDFH_SKIPPED = 1 // not valid file, but have next CDFH; continue.
};
// this is checked when advancing,
// but we need this for the first central dir entry.
if(*(u32*)cdfh != *(u32*)cdfh_id)
{
debug_warn("z_extract_cdfh: CDFH signature not found");
return -1;
}
// read the current CDFH. if a valid file, return its filename and ZLoc.
// finally, advance to next CDFH.
// return -1 on error (output params invalid), or 0 on success.
// called by z_enum_files, which passes the output to lookup.
static int z_extract_cdfh(const u8* file, size_t size, // in
const u8*& cdfh, const char*& fn, size_t& fn_len, ZLoc* loc) // out
{
// scan for next CDFH (at or beyond current cdfh position)
cdfh = z_find_id(file, size, cdfh, cdfh_id, CDFH_SIZE);
if(!cdfh) // no (further) CDFH found:
return Z_CDFH_ABORT; // caller will abort.
// extract fields from CDFH
const u8 method = cdfh[10];
const u16 method = read_le16(cdfh+10);
const u16 fattime = read_le16(cdfh+12);
const u16 fatdate = read_le16(cdfh+14);
const u32 csize_ = read_le32(cdfh+20);
const u32 ucsize_ = read_le32(cdfh+24);
const u32 csize = read_le32(cdfh+20);
const u32 ucsize = read_le32(cdfh+24);
const u16 fn_len_ = read_le16(cdfh+28);
const u16 e_len = read_le16(cdfh+30);
const u16 c_len = read_le16(cdfh+32);
@ -255,40 +247,43 @@ static int z_extract_cdfh(const u8* cdfh, ssize_t bytes_left, const char*& fn, s
const char* fn_ = (const char*)cdfh+CDFH_SIZE;
// not 0-terminated!
//
// check if valid and data should actually be returned
//
// find corresponding LFH, needed to calculate file offset
// (its extra field may not match that reported by CDFH!).
// TODO: this is slow, due to seeking backwards.
// optimization: calculate only on demand (i.e. open, not mount)?
const u8* lfh = z_find_id(file, size, (u8*)file+lfh_ofs, lfh_id, LFH_SIZE);
// .. compression method is unknown (neither deflated nor stored)
if(method & ~8)
{
debug_warn("z_extract_cdfh: unknown compression method");
return ERR_NOT_SUPPORTED;
}
// .. this is a directory entry; we only want files.
if(!csize_ && !ucsize_)
return -1;
#ifdef PARANOIA
// .. CDFH's file ofs doesn't match that reported by LFH.
// don't check this in normal builds - seeking between LFHs and
// central dir is slow. this happens if the headers differ for some
// reason; we'd notice anyway, because inflate will fail
// (since file offset is incorrect).
if(z_verify_lfh(file, lfh_ofs, file_ofs) != 0)
return -1;
#endif
// advance CDFH; we now know where the next CDFH entry should be,
// but will still scan ahead for its id on next call.
cdfh += CDFH_SIZE + fn_len_ + e_len + c_len;
// is this entry not a valid file?
if(
// compression method is unknown (neither deflated nor stored)
(method & ~8) ||
// it's a directory entry (we only want files).
(!csize && !ucsize) ||
// LFH signature not found
(!lfh)
)
return Z_CDFH_SKIPPED;
// get actual file ofs (see above)
const u16 lfh_fn_len = read_le16(lfh+26);
const u16 lfh_e_len = read_le16(lfh+28);
const off_t file_ofs = lfh_ofs + LFH_SIZE + lfh_fn_len + lfh_e_len;
// LFH doesn't have a comment field!
// write out entry data
fn = fn_;
fn_len = fn_len_;
loc->ofs = (off_t)(lfh_ofs + LFH_SIZE + fn_len_ + e_len);
loc->csize = (off_t)(method? csize_ : 0);
loc->ofs = file_ofs;
loc->csize = (off_t)(method? csize : 0);
// if not compressed, csize = 0 (see zfile_compressed)
loc->ucsize = (off_t)ucsize_;
loc->ucsize = (off_t)ucsize;
loc->mtime = convert_dos_date(fatdate, fattime);
return 0;
return Z_CDFH_FILE_OK;
}
@ -307,7 +302,7 @@ static int z_extract_cdfh(const u8* cdfh, ssize_t bytes_left, const char*& fn, s
// loc is only valid during the callback! must be copied or saved.
typedef int(*CDFH_CB)(uintptr_t user, i32 idx, const char* fn, size_t fn_len, const ZLoc* loc);
static int z_enum_files(const void* file, const size_t size, const CDFH_CB cb, const uintptr_t user)
static int z_enum_files(const u8* file, const size_t size, const CDFH_CB cb, const uintptr_t user)
{
// find "End of Central Directory Record"
const u8* ecdr;
@ -330,44 +325,25 @@ static int z_enum_files(const void* file, const size_t size, const CDFH_CB cb, c
i32 idx = 0;
// only incremented when valid, so we don't leave holes
// in lookup's arrays (bad locality).
i32 entries_left = num_entries;
for(;;)
{
entries_left--;
ssize_t bytes_left = (ssize_t)size - ( cdfh - (u8*)file );
for(i32 i = 0; i < num_entries; i++)
{
const char* fn;
size_t fn_len;
ZLoc loc;
// CDFH is valid and of a file
if(z_extract_cdfh(cdfh, bytes_left, fn, fn_len, &loc) == 0)
int ret = z_extract_cdfh(file, size, cdfh, fn, fn_len, &loc);
// valid file
if(ret == Z_CDFH_FILE_OK)
{
cb(user, idx, fn, fn_len, &loc);
idx++; // see rationale above
// advance to next cdfh (the easy way - we have a valid fn_len
// and assume there's no extra data stored after the header).
cdfh += CDFH_SIZE + fn_len;
if(*(u32*)cdfh == *(u32*)cdfh_id)
goto found_next_cdfh;
// not found; scan for it below (as if the CDFH were invalid).
// note: don't restore the previous cdfh pointer - fn_len is
// correct and there are additional fields before the next CDFH.
}
if(!entries_left)
break;
// scan for the next CDFH (its signature)
for(ssize_t i = 0; i < bytes_left - (ssize_t)CDFH_SIZE; i++)
if(*(u32*)(++cdfh) == *(u32*)cdfh_id)
goto found_next_cdfh;
debug_warn("z_enum_files: next CDFH not found");
return -1;
found_next_cdfh:;
// next CDFH not found (Zip archive corrupted)
else if(ret == Z_CDFH_ABORT)
return -1;
// skipping this CDFH (e.g. if directory)
else
;
}
return 0;
@ -436,28 +412,28 @@ struct LookupInfo
};
// support for case-insensitive filenames: the hash of each
// filename string is saved in lookup_add_file_cb and searched for by
// lookup_get_file_info. in both cases, we convert a temporary to
// lowercase before hashing it.
static void strcpy_lower(char* dst, const char* src)
// write a lower-case copy of <src> to <dst>, which holds <buf_size> bytes.
// up to buf_size-1 chars are written; we always 0-terminate the output!
//
// this routine is used to convert OS and user-specified filenames
// to lowercase before hashing them and then comparing.
static void copy_lower_case(char* dst, const char* src, size_t buf_size)
{
assert(buf_size > 0); // otherwise, no room for trailing '\0'
int c;
do
{
c = *src++;
// this is the last remaining byte in the buffer.
// loop will exit below after writing 0-terminator.
if(--buf_size == 0)
c = '\0';
*dst++ = tolower(c);
}
while(c != '\0');
}
static void strncpy_lower(char* dst, const char* src, size_t count)
{
int n = (int)count;
while (--n >= 0)
*dst++ = tolower(*src++);
}
// add file <fn> to the lookup data structure.
// called from z_enum_files in order (0 <= idx < num_entries).
@ -499,8 +475,11 @@ static int lookup_add_file_cb(uintptr_t user, i32 idx,
// hash (lower case!) filename
char lc_fn[PATH_MAX];
strncpy_lower(lc_fn, fn, fn_len);
FnHash fn_hash = fnv_hash(lc_fn, fn_len);
size_t max_size = fn_len+1; // fn not 0-terminated
if(max_size > PATH_MAX) // (this avoids stupid min() type warning)
max_size = PATH_MAX; // clamp to actual buffer size
copy_lower_case(lc_fn, fn, max_size);
FnHash fn_hash = fnv_hash(lc_fn);
// fill ZEnt
ZEnt* ent = li->ents + idx;
@ -524,7 +503,7 @@ static int lookup_add_file_cb(uintptr_t user, i32 idx,
// initialize lookup data structure for the given Zip archive:
// adds all files to the index.
static int lookup_init(LookupInfo* li, const void* file, const size_t size)
static int lookup_init(LookupInfo* li, const u8* file, const size_t size)
{
int err;
@ -570,12 +549,12 @@ static int lookup_free(LookupInfo* li)
}
// look up ZLoc, given filename.
// look up ZLoc, given filename (untrusted!).
static int lookup_get_file_info(LookupInfo* li, const char* fn, ZLoc* loc)
{
// hash (lower case!) filename
char lc_fn[PATH_MAX];
strcpy_lower(lc_fn, fn);
copy_lower_case(lc_fn, fn, sizeof(lc_fn));
const FnHash fn_hash = fnv_hash(lc_fn);
const FnHash* fn_hashes = li->fn_hashes;
@ -696,7 +675,7 @@ static int ZArchive_reload(ZArchive* za, const char* fn, Handle)
if(err < 0)
goto exit_close;
err = lookup_init(&za->li, file, size);
err = lookup_init(&za->li, (u8*)file, size);
if(err < 0)
goto exit_unmap_close;