From 59231be9a7444777117713e68f732a9db8a59b22 Mon Sep 17 00:00:00 2001 From: janwas Date: Tue, 10 Aug 2004 15:56:04 +0000 Subject: [PATCH] massive cleanup, commenting, hardening, bugfixes => mostly works :) This was SVN commit r955. --- source/lib/res/zip.cpp | 278 +++++++++++++++++++---------------------- 1 file changed, 132 insertions(+), 146 deletions(-) diff --git a/source/lib/res/zip.cpp b/source/lib/res/zip.cpp index 7f83039aa7..5e9985dfb9 100755 --- a/source/lib/res/zip.cpp +++ b/source/lib/res/zip.cpp @@ -31,6 +31,7 @@ #ifndef NO_ZLIB # define ZLIB_WINAPI # include + # ifdef _MSC_VER # ifdef NDEBUG # pragma comment(lib, "zlib1.lib") @@ -45,14 +46,14 @@ /////////////////////////////////////////////////////////////////////////////// // -// Zip-specific code -// passes list of files in archive to lookup +// z_*: Zip-specific code +// passes the list of files in an archive to lookup. // /////////////////////////////////////////////////////////////////////////////// // convenience container for location / size of file in archive. -struct ZFileLoc +struct ZLoc { off_t ofs; off_t csize; // = 0 if not compressed @@ -64,38 +65,39 @@ struct ZFileLoc // but not critical, since Zip files are compressed individually. // (if we read too much, it's ignored by inflate). // - // we also need a way to check if file compressed (e.g. to fail mmap - // requests if the file is compressed). packing a bit in ofs or + // we also need a way to check if a file is compressed (e.g. to fail + // mmap requests if the file is compressed). packing a bit in ofs or // ucsize is error prone and ugly (1 bit less won't hurt though). // any other way will mess up the nice 2^n byte size anyway, so // might as well store csize. }; -static inline int zip_validate(const void* const file, const size_t size) +// Zip file data structures and signatures +static const char cdfh_id[] = "PK\1\2"; +static const char lfh_id[] = "PK\3\4"; +static const char ecdr_id[] = "PK\5\6"; +const size_t CDFH_SIZE = 46; +const size_t LFH_SIZE = 30; +const size_t ECDR_SIZE = 22; + + +static inline int z_validate(const void* const file, const size_t size) { - if(size < 2) + // make sure it's big enough to check the header and for + // z_find_ecdr to succeed (if smaller, it's obviously bogus). + if(size < 22) return -1; - const u8* p = (const u8*)file; - if(p[0] != 'P' || p[1] != 'K') - return -1; - - return 0; + // check "header" (first CDFH) signature + return (*(u32*)file == *(u32*)&cdfh_id)? 0 : -1; } // find end of central dir record in file (loaded or mapped). -static int zip_find_ecdr(const void* const file, const size_t size, const u8*& ecdr_) +// z_validate ensures size >= 22. +static int z_find_ecdr(const void* const file, const size_t size, const u8*& ecdr_) { - const char ecdr_id[] = "PK\5\6"; // signature - const size_t ECDR_SIZE = 22; - - if(size < ECDR_SIZE) - { - debug_warn("zip_find_ecdr: size is way too small"); - return -1; - } const u8* ecdr = (const u8*)file + size - ECDR_SIZE; // early out: check expected case (ECDR at EOF; no file comment) @@ -137,11 +139,8 @@ found_ecdr: // make sure the LFH fields match those passed (from the CDFH). // only used in PARANOIA builds - costs time when opening archives. -static int zip_verify_lfh(const void* const file, const off_t lfh_ofs, const off_t file_ofs) +static int z_verify_lfh(const void* const file, const off_t lfh_ofs, const off_t file_ofs) { - const char lfh_id[] = "PK\3\4"; // signature - const size_t LFH_SIZE = 30; - assert(lfh_ofs < file_ofs); // header comes before file const u8* lfh = (const u8*)file + lfh_ofs; @@ -173,12 +172,8 @@ static int zip_verify_lfh(const void* const file, const off_t lfh_ofs, const off // extract information from the current Central Directory File Header; // advance cdfh to point to the next; return -1 on unrecoverable error, // 0 on success (<==> output fields are valid), > 0 if file is invalid. -static int zip_read_cdfh(const u8*& cdfh, const char*& fn, size_t& fn_len, ZFileLoc* const loc) +static int z_read_cdfh(const u8*& cdfh, const char*& fn, size_t& fn_len, ZLoc* const loc) { - const char cdfh_id[] = "PK\1\2"; // signature - const size_t CDFH_SIZE = 46; - const size_t LFH_SIZE = 30; - if(*(u32*)cdfh != *(u32*)cdfh_id) { debug_warn("CDFH corrupt! (signature doesn't match)"); @@ -204,8 +199,8 @@ static int zip_read_cdfh(const u8*& cdfh, const char*& fn, size_t& fn_len, ZFile debug_warn("warning: unknown compression method"); goto skip_file; } - // tell is_compressed that the file is stored by - // setting csize_ to 0. + // tell is_compressed that the file is uncompressed, + // by setting csize_ to 0. if(method == 0) csize_ = 0; @@ -222,7 +217,7 @@ static int zip_read_cdfh(const u8*& cdfh, const char*& fn, size_t& fn_len, ZFile // we don't bother checking for this in normal builds: if they were // to be different, we'd notice: headers of files would end up corrupted. #ifdef PARANOIA - if(!zip_verify_lfh(file, lfh_ofs, file_ofs)) + if(!z_verify_lfh(file, lfh_ofs, file_ofs)) goto skip_file; #endif @@ -252,7 +247,7 @@ found_next_cdfh: // fn (filename) is not necessarily 0-terminated! // loc is only valid during the callback! must be copied or saved. -typedef int(*ZipCdfhCB)(const uintptr_t user, const i32 idx, const char* const fn, const size_t fn_len, const ZFileLoc* const loc); +typedef int(*CDFH_CB)(const uintptr_t user, const i32 idx, const char* const fn, const size_t fn_len, const ZLoc* const loc); // go through central directory of the Zip file (loaded or mapped into memory); // call back for each file. @@ -262,16 +257,11 @@ typedef int(*ZipCdfhCB)(const uintptr_t user, const i32 idx, const char* const f // allocate memory. having lookup_init call zip_get_num_files and then // zip_enum_files would require passing around a ZipInfo struct, // or searching for the ECDR twice - both ways aren't nice. -static int zip_enum_files(const void* const file, const size_t size, const ZipCdfhCB cb, const uintptr_t user) +static int z_enum_files(const void* const file, const size_t size, const CDFH_CB cb, const uintptr_t user) { - int err; - ZFileLoc loc; - // find End of Central Directory Record const u8* ecdr; - err = zip_find_ecdr(file, size, ecdr); - if(err < 0) - return err; + CHECK_ERR(z_find_ecdr(file, size, ecdr)); // call back with number of files in archive const i32 num_files = read_le16(ecdr+10); @@ -279,19 +269,18 @@ static int zip_enum_files(const void* const file, const size_t size, const ZipCd // if it's 0, the callback would treat it as an index => crash. if(!num_files) return -1; - err = cb(user, -num_files, 0, 0, 0); - if(err < 0) - return err; + CHECK_ERR(cb(user, -num_files, 0, 0, 0)); + // call back for each (valid) CDFH entry const u32 cd_ofs = read_le32(ecdr+16); const u8* cdfh = (const u8*)file + cd_ofs; // pointer is advanced in zip_read_cdfh - for(i32 idx = 0; idx < num_files; idx++) { const char* fn; size_t fn_len; - err = zip_read_cdfh(cdfh, fn, fn_len, &loc); + ZLoc loc; + int err = z_read_cdfh(cdfh, fn, fn_len, &loc); // .. non-recoverable error reading dir if(err < 0) return err; @@ -311,33 +300,35 @@ static int zip_enum_files(const void* const file, const size_t size, const ZipCd /////////////////////////////////////////////////////////////////////////////// // -// file lookup -// per archive: find file information (e.g. location, size), given filename. +// lookup_*: file lookup +// per archive: retrieve file information (e.g. offset, size), given filename. // /////////////////////////////////////////////////////////////////////////////// -// file lookup: store array of files in archive (ZEnt); lookup via linear -// search for hash of filename. -// optimization: store index of last file opened; check the one after -// that first, and only then search the whole array. this is a big -// win if files are opened sequentially (they should be so ordered -// in the archive anyway, to reduce seeks) +// current file-lookup implementation: +// store each file's ZEnt in an array. check the next entry first; if that's +// not what we're looking for, find its index via map. // +// rationale: +// - we don't export a "key" (currently array index) that would allow faster +// file lookup. this would only be useful if higher-level code were to +// store the key and use it more than once. also, lookup is currently fast +// enough. finally, this would also make our file enumerate callback +// incompatible with the others (due to the extra key param). +// +// - we don't bother with a directory tree to speed up lookup. the above +// is currently fast enough, and will be O(1) if the files are arranged +// in order of access (which would also reduce seeks). +// this could easily be added though, if need be; Zip files include a CDFH +// entry for each dir. -// rationale: added index because exposing index is bad (say we change lookup data struct) -// much cleaner to only export handle - -// don't bother making a tree structure: first, it's a bit of work -// (Zip files store paths as part of the file name - there's no extra -// directory information); second, the VFS file location DB stores -// handle and file index per file, making its lookup constant-time. struct ZEnt { const char* fn; // currently allocated individually - ZFileLoc loc; + ZLoc loc; }; @@ -361,24 +352,27 @@ struct LookupInfo // we store index of next file instead of the last one opened // to avoid trouble on first call (don't want last == -1). - // don't know size of std::map, and this is used in a control block. - // allocate dynamically to save size. + // don't know size of std::map, and this struct is + // included in a control block (ZArchive). + // allocate dynamically to be safe. LookupIdx* idx; }; // add file to the lookup data structure. -// callback for zip_enum_files, in order (0 <= idx < num_files). +// called from z_enum_files in order (0 <= idx < num_files). +// the first call notifies us of # files, so we can allocate memory. // -// fn (filename) is not necessarily 0-terminated! -// loc is only valid during the callback! must be copied or saved. -static int lookup_add_file_cb(const uintptr_t user, const i32 idx, const char* const fn, const size_t fn_len, const ZFileLoc* const loc) +// notes: +// - fn (filename) is not necessarily 0-terminated! +// - loc is only valid during the callback! must be copied or saved. +static int lookup_add_file_cb(const uintptr_t user, const i32 idx, const char* const fn, const size_t fn_len, const ZLoc* const loc) { LookupInfo* li = (LookupInfo*)user; // HACK: on first call, idx is negative and tells us how many // files are in the archive (so we can allocate memory). - // see zip_enum_files for why it's done this way. + // see z_enum_files for why it's done this way. if(idx < 0) { const i32 num_files = -idx; @@ -431,12 +425,21 @@ static int lookup_add_file_cb(const uintptr_t user, const i32 idx, const char* c // initialize lookup data structure for Zip archive static int lookup_init(LookupInfo* const li, const void* const file, const size_t size) { + int err; + + // check if it's even a Zip file. + // the VFS blindly opens files when mounting; it needs to open + // all archives, but doesn't know their extension (e.g. ".pk3"). + err = z_validate(file, size); + if(err < 0) // don't CHECK_ERR - this can happen often. + return err; + // all other fields are initialized in lookup_add_file_cb li->next_file = 0; li->idx = new LookupIdx; - int err = zip_enum_files(file, size, lookup_add_file_cb, (uintptr_t)li); + err = z_enum_files(file, size, lookup_add_file_cb, (uintptr_t)li); if(err < 0) { delete li->idx; @@ -447,9 +450,11 @@ static int lookup_init(LookupInfo* const li, const void* const file, const size_ } -// free lookup data structure. no use-after-free checking. +// free lookup data structure. +// (no use-after-free checking - that's handled by the VFS) static int lookup_free(LookupInfo* const li) { + // free memory allocated for filenames for(i32 i = 0; i < li->num_files; i++) { free((void*)li->ents[i].fn); @@ -460,12 +465,13 @@ static int lookup_free(LookupInfo* const li) delete li->idx; + // frees both ents and fn_hashes! (they share an allocation) return mem_free(li->ents); } -// return key of file for use in lookup_get_file_info. -static int lookup_file(LookupInfo* const li, const char* const fn, i32& idx) +// return file information of file . +static int lookup_get_file_info(LookupInfo* const li, const char* fn, ZLoc* const loc) { const FnHash fn_hash = fnv_hash(fn); const FnHash* fn_hashes = li->fn_hashes; @@ -483,24 +489,12 @@ static int lookup_file(LookupInfo* const li, const char* const fn, i32& idx) return ERR_FILE_NOT_FOUND; i = it->second; + assert(0 <= i && i < li->num_files); } - + li->next_file = i+1; - idx = i; - return 0; -} - -// return file information, given file key (from lookup_file). -static int lookup_get_file_info(LookupInfo* const li, const i32 idx, const char*& fn, ZFileLoc* const loc) -{ - if(idx < 0 || idx > li->num_files-1) - { - debug_warn("lookup_get_file_info: index out of bounds"); - return -1; - } - - const ZEnt* const ent = &li->ents[idx]; + const ZEnt* const ent = &li->ents[i]; fn = ent->fn; *loc = ent->loc; return 0; @@ -511,17 +505,15 @@ typedef ZipFileCB LookupFileCB; static int lookup_enum_files(LookupInfo* const li, LookupFileCB cb, uintptr_t user) { - int err; - const ZEnt* ent = li->ents; for(i32 i = 0; i < li->num_files; i++, ent++) { - int flags = LOC_ZIP; + // is this entry a directory? + int flags = 0; if(ent->loc.csize == 0 && ent->loc.ucsize == 0) flags |= LOC_DIR; - err = cb(ent->fn, flags, (ssize_t)ent->loc.ucsize, user); - if(err < 0) - return 0; + + CHECK_ERR(cb(ent->fn, flags, (ssize_t)ent->loc.ucsize, user)); } return 0; @@ -577,22 +569,16 @@ static int ZArchive_reload(ZArchive* za, const char* fn, Handle h) int err; err = file_open(fn, 0, &za->f); - if(err < 0) + if(err < 0) // don't CHECK_ERR; this happens often. return err; + // map void* file; size_t size; err = file_map(&za->f, file, size); if(err < 0) goto exit_close; - // early out: check if it's even a Zip file. - // (VFS checks if a file is an archive for mounting by attempting to - // open it with zip_archive_open) - err = zip_validate(file, size); - if(err < 0) - goto exit_unmap_close; - err = lookup_init(&za->li, file, size); if(err < 0) goto exit_unmap_close; @@ -626,8 +612,7 @@ int zip_archive_close(Handle& ha) } -// would be nice to pass along a key (allowing for O(1) lookup in archive), -// but then the callback is no longer compatible to file / vfs enum files. +// see lookup rationale for not passing along a key. int zip_enum(const Handle ha, const ZipFileCB cb, const uintptr_t user) { H_DEREF(ha, ZArchive, za); @@ -661,6 +646,7 @@ uintptr_t inf_init_ctx() } +// we will later provide data that is to be unzipped into . int inf_start_read(uintptr_t ctx, void* out, size_t out_size) { #ifdef NO_ZLIB @@ -682,6 +668,8 @@ int inf_start_read(uintptr_t ctx, void* out, size_t out_size) } +// unzip into output buffer. returns bytes written +// (may be 0, if not enough data is passed in), or < 0 on error. ssize_t inf_inflate(uintptr_t ctx, void* in, size_t in_size) { #ifdef NO_ZLIB @@ -699,6 +687,10 @@ ssize_t inf_inflate(uintptr_t ctx, void* in, size_t in_size) int err = inflate(stream, Z_SYNC_FLUSH); // check+return how much actual data was read + // + // note: zlib may not always output data, e.g. if passed very little + // data in one block (due to misalignment). return 0 ("no data output"), + // which doesn't abort the read. size_t avail_out = stream->avail_out; assert(avail_out <= prev_avail_out); // make sure output buffer size didn't magically increase @@ -706,13 +698,14 @@ ssize_t inf_inflate(uintptr_t ctx, void* in, size_t in_size) if(!nread) return (err < 0)? err : 0; // try to pass along the ZLib error code, but make sure - // it isn't treated as 'bytes read', i.e. > 0. + // it isn't treated as 'bytes output', i.e. > 0. return nread; #endif } +// unzip complete; all input and output data should have been consumed. int inf_finish_read(uintptr_t ctx) { #ifdef NO_ZLIB @@ -818,8 +811,12 @@ do\ while(0); -static int zip_open_idx(const Handle ha, const i32 idx, ZFile* zf) +int zip_open(const Handle ha, const char* fn, ZFile* zf) { + H_DEREF(ha, ZArchive, za); + LookupInfo* li = (LookupInfo*)&za->li; + + // zero output param in case we fail below. memset(zf, 0, sizeof(ZFile)); if(!zf) @@ -830,22 +827,19 @@ static int zip_open_idx(const Handle ha, const i32 idx, ZFile* zf) H_DEREF(ha, ZArchive, za); LookupInfo* li = (LookupInfo*)&za->li; - const char* fn; - ZFileLoc loc; - // don't want ZFile to contain a ZFileLoc struct - + ZLoc loc; + // don't want ZFile to contain a ZEnt struct - // its ucsize member must be 'loose' for compatibility with File. - // => need to copy ZFileLoc fields into ZFile. - int err = lookup_get_file_info(li, idx, fn, &loc); - if(err < 0) - return err; + // => need to copy ZEnt fields into ZFile. + CHECK_ERR(lookup_get_file_info(li, fn, &loc)); #ifdef PARANOIA zf->magic = ZFILE_MAGIC; #endif - zf->ucsize = loc.ucsize; - zf->ofs = loc.ofs; - zf->csize = loc.csize; + zf->ucsize = loc.ucsize; + zf->ofs = loc.ofs; + zf->csize = loc.csize; zf->ha = ha; zf->read_ctx = inf_init_ctx(); @@ -858,20 +852,6 @@ invalid_zf: } -int zip_open(const Handle ha, const char* fn, ZFile* zf) -{ - H_DEREF(ha, ZArchive, za); - LookupInfo* li = (LookupInfo*)&za->li; - - i32 idx; - int err = lookup_file(li, fn, idx); - if(err < 0) - return err; - - return zip_open_idx(ha, idx, zf); -} - - int zip_close(ZFile* zf) { CHECK_ZFILE(zf); @@ -884,18 +864,14 @@ int zip_close(ZFile* zf) // return file information for in archive int zip_stat(Handle ha, const char* fn, struct stat* s) { + // zero output param in case we fail below. + memset(s, 0, sizeof(struct stat)); + H_DEREF(ha, ZArchive, za); LookupInfo* li = &za->li; - i32 idx; - int err = lookup_file(li, fn, idx); - if(err < 0) - return err; - - const char* fn2; // unused - ZFileLoc loc; - lookup_get_file_info(li, idx, fn2, &loc); - // can't fail - returned valid index above + ZLoc loc; + CHECK_ERR(lookup_get_file_info(li, fn, &loc)); s->st_size = loc.ucsize; return 0; @@ -953,22 +929,32 @@ ssize_t zip_read(ZFile* zf, off_t raw_ofs, size_t size, void** p) return -1; } - void* our_buf = 0; // buffer we allocated (if necessary) - if(!*p) + void* buf; + bool free_buf = true; + + // user-specified buf + if(*p) { - *p = our_buf = mem_alloc(size); - if(!*p) + buf = *p; + free_buf = false; + } + // we're going to allocate + else + { + buf = mem_alloc(size, 4096); + if(!buf) return ERR_NO_MEM; + *p = buf; } - err = (ssize_t)inf_start_read(zf->read_ctx, *p, size); + err = (ssize_t)inf_start_read(zf->read_ctx, buf, size); if(err < 0) { fail: // we allocated it, so free it now - if(our_buf) + if(free_buf) { - mem_free(our_buf); + mem_free(buf); *p = 0; } return err;