several bugfixes for loading from Zip (currently broken)

This was SVN commit r950.
This commit is contained in:
janwas 2004-08-09 16:46:57 +00:00
parent 687aea271e
commit 0b3361b6f5
3 changed files with 220 additions and 129 deletions

View File

@ -361,16 +361,15 @@ int file_stat(const char* const path, struct stat* const s)
// VFile handles in the VFS.
// - we want the VFS open logic to be triggered on file invalidate
// (if the dev. file is deleted, we should use what's in the archives).
// we don't want to make this module depend on VFS, so we can't
// call up into vfs_foreach_path from reload here =>
// VFS needs to allocate the handle.
// we don't want to make this module depend on VFS, so we don't
// have access to the file location DB; VFS needs to allocate the handle.
// - no problem exposing our internals via File struct -
// we're only used by the VFS and Zip modules. don't bother making
// an opaque struct - that'd have to be kept in sync with the real thing.
// - when Zip opens its archives via file_open, a handle isn't needed -
// the Zip module hides its File struct (required to close the file),
// and the Handle approach doesn't guard against some idiot calling
// close(our_fd) directly, either.
// close(our_fd_value) directly, either.
// marker for File struct, to make sure it's valid
@ -486,7 +485,8 @@ int file_close(File* const f)
// regardless of how many references remain.
if(f->map_refs > 1)
f->map_refs = 1;
file_unmap(f);
if(f->mapping) // only free if necessary (unmap complains if not mapped)
file_unmap(f);
// (check fd to avoid BoundsChecker warning about invalid close() param)
if(f->fd != -1)
@ -559,9 +559,9 @@ static int IO_reload(IO* io, const char*, Handle)
// pads the request up to BLOCK_SIZE, and stores the original parameters in IO.
// transfers of more than 1 block (including padding) are allowed, but do not
// go through the cache. don't see any case where that's necessary, though.
// pads the request up to BLOCK_SIZE, and stores the original parameters in IO.
// transfers of more than 1 block (including padding) are allowed, but do not
// go through the cache. don't see any case where that's necessary, though.
Handle file_start_io(File* const f, const off_t user_ofs, size_t user_size, void* const user_p)
{
int err;
@ -720,111 +720,135 @@ int file_discard_io(Handle& hio)
//
// return (positive) number of raw bytes transferred if successful;
// otherwise, an error code.
ssize_t file_io(File* const f, const off_t raw_ofs, size_t raw_size, void** const p,
// the underlying aio implementation likes buffer and offset to be
// sector-aligned; if not, the transfer goes through an align buffer,
// and requires an extra memcpy.
//
// if the user specifies an unaligned buffer, there's not much we can
// do - we can't assume the buffer contains padding. therefore,
// callers should let us allocate the buffer if possible.
//
// if ofs misalign = buffer, only the first and last blocks will need
// to be copied by aio, since we read up to the next block boundary.
// otherwise, everything will have to be copied; at least we split
// the read into blocks, so aio's buffer won't have to cover the
// whole file.
ssize_t file_io(File* const f, const off_t data_ofs, size_t data_size, void** const p,
const FILE_IO_CB cb, const uintptr_t ctx) // optional
{
#ifdef PARANOIA
debug_out("file_io fd=%d size=%d ofs=%d\n", f->fd, raw_size, raw_ofs);
debug_out("file_io fd=%d size=%d ofs=%d\n", f->fd, data_size, data_ofs);
#endif
CHECK_FILE(f);
const bool is_write = (f->flags & FILE_WRITE) != 0;
const bool is_write = !!(f->flags & FILE_WRITE);
const bool no_aio = !!(f->flags & FILE_NO_AIO);
// sanity checks
// .. for writes
if(is_write)
void* data_buf = 0; // I/O source or sink buffer
// when reading:
if(!is_write)
{
// temp buffer OR supposed to be allocated here: invalid
if(!p || !*p)
{
debug_warn("file_io: write to file from 0 buffer");
return ERR_INVALID_PARAM;
}
}
// .. for reads
else
{
// cut off at EOF
const ssize_t bytes_left = f->size - raw_ofs;
// cut data_size off at EOF
const ssize_t bytes_left = f->size - data_ofs;
if(bytes_left < 0)
return -1;
raw_size = MIN(raw_size, (size_t)bytes_left);
data_size = MIN(data_size, (size_t)bytes_left);
}
//
// set buffer options
//
bool do_align = true; // => alloc_buf OR (NOT use_buf)
bool alloc_buf = false; // <==> (use_buf AND do_align)
bool use_buf = true;
// .. temp buffer: do_align
if(!p)
use_buf = false;
// .. user-specified buffer: use_buf
else if(*p)
{
data_buf = *p;
do_align = false;
}
// .. we allocate the buffer: do_align, alloc_buf, use_buf
else
{
alloc_buf = true;
// data_buf will be set from padded_buf
}
// writes use_buf AND (NOT alloc_buf); otherwise, p is invalid.
if(is_write && (!use_buf || alloc_buf))
{
debug_warn("file_io: write to file from 0 buffer");
return ERR_INVALID_PARAM;
}
//
// calculate aligned transfer size (no change if !do_align)
//
off_t actual_ofs = data_ofs;
size_t actual_size = data_size;
void* actual_buf = data_buf;
// note: we go to the trouble of aligning the first block (instead of
// just reading up to the next block and letting aio realign it),
// so that it can be taken from the cache.
// this is not possible if !do_align, since we have to allocate
// extra buffer space for the padding.
const size_t ofs_misalign = data_ofs % BLOCK_SIZE;
const size_t lead_padding = do_align? ofs_misalign : 0;
// for convenience; used below.
if(do_align)
{
actual_ofs -= (off_t)ofs_misalign;
actual_size = round_up(ofs_misalign + data_size, BLOCK_SIZE);
}
if(alloc_buf)
{
actual_buf = mem_alloc(actual_size, BLOCK_SIZE);
if(!actual_buf)
return ERR_NO_MEM;
data_buf = (char*)actual_buf + lead_padding;
}
// warn in debug build if buffer and offset don't match
// (=> aio would have to realign every block).
#ifndef NDEBUG
size_t buf_misalign = ((uintptr_t)actual_buf) % BLOCK_SIZE;
if(actual_buf && actual_ofs % BLOCK_SIZE != buf_misalign)
debug_out("file_io: warning: buffer %p and offset %x are misaligned", actual_buf, data_ofs);
#endif
// FIXME: currently doesn't handle caller requesting we alloc buffer
if(f->flags & FILE_NO_AIO)
if(no_aio)
{
lseek(f->fd, raw_ofs, SEEK_SET);
lseek(f->fd, data_ofs, SEEK_SET);
return is_write? write(f->fd, *p, raw_size) : read(f->fd, *p, raw_size);
return is_write? write(f->fd, *p, data_size) : read(f->fd, *p, data_size);
}
//
// transfer parameters
//
const size_t misalign = raw_ofs % BLOCK_SIZE;
// actual transfer start offset
// not aligned! aio takes care of initial unalignment;
// next read will be aligned, because we read up to the next block.
const off_t start_ofs = raw_ofs;
void* buf = 0; // I/O source or sink; assume temp buffer
void* our_buf = 0; // buffer we allocate, if necessary
// check buffer param
// .. temp buffer requested
if(!p)
; // nothing to do - buf already initialized to 0
// .. user specified, or requesting we allocate the buffer
else
{
// the underlying aio implementation likes buffer and offset to be
// sector-aligned; if not, the transfer goes through an align buffer,
// and requires an extra memcpy.
//
// if the user specifies an unaligned buffer, there's not much we can
// do - we can't assume the buffer contains padding. therefore,
// callers should let us allocate the buffer if possible.
//
// if ofs misalign = buffer, only the first and last blocks will need
// to be copied by aio, since we read up to the next block boundary.
// otherwise, everything will have to be copied; at least we split
// the read into blocks, so aio's buffer won't have to cover the
// whole file.
// user specified buffer
if(*p)
{
buf = *p;
// warn in debug build if buffer not aligned
#ifndef NDEBUG
size_t buf_misalign = ((uintptr_t)buf) % BLOCK_SIZE;
if(misalign != buf_misalign)
debug_out("file_io: warning: buffer %p and offset %x are misaligned", buf, raw_ofs);
#endif
}
// requesting we allocate the buffer
else
{
size_t buf_size = round_up(misalign + raw_size, BLOCK_SIZE);
our_buf = mem_alloc(buf_size, BLOCK_SIZE);
if(!our_buf)
return ERR_NO_MEM;
buf = our_buf;
*p = (char*)buf + misalign;
}
}
// buf is now the source or sink, regardless of who allocated it.
// we need to keep our_buf (memory we allocated), so we can free
// it if we fail; it's 0 if the caller passed in a buffer.
//
@ -860,17 +884,17 @@ debug_out("file_io fd=%d size=%d ofs=%d\n", f->fd, raw_size, raw_ofs);
{
// calculate issue_size:
// at most, transfer up to the next block boundary.
off_t issue_ofs = (off_t)(start_ofs + issue_cnt);
const size_t left_in_block = BLOCK_SIZE - (issue_ofs % BLOCK_SIZE);
const size_t total_left = raw_size - issue_cnt;
size_t issue_size = MIN(left_in_block, total_left);
// assume temp buffer allocated by file_start_io
void* data = 0;
// if transferring to/from normal file, use buf instead
if(buf)
data = (void*)((uintptr_t)buf + issue_cnt);
off_t issue_ofs = (off_t)(actual_ofs + issue_cnt);
size_t issue_size = BLOCK_SIZE;
if(!do_align)
{
const size_t left_in_block = BLOCK_SIZE - (issue_ofs % BLOCK_SIZE);
const size_t total_left = data_size - issue_cnt;
issue_size = MIN(left_in_block, total_left);
}
// if using buffer, set position in it; otherwise, 0 (temp)
void* data = use_buf? (char*)actual_buf + issue_cnt : 0;
Handle hio = file_start_io(f, issue_ofs, issue_size, data);
if(hio <= 0)
err = (ssize_t)hio;
@ -878,7 +902,7 @@ debug_out("file_io fd=%d size=%d ofs=%d\n", f->fd, raw_size, raw_ofs);
// waiting for all pending transfers to complete.
issue_cnt += issue_size;
if(issue_cnt >= raw_size)
if(issue_cnt >= data_size)
all_issued = true;
// store IO in ring buffer
@ -929,17 +953,20 @@ debug_out("file_io fd=%d size=%d ofs=%d\n", f->fd, raw_size, raw_ofs);
if(err < 0)
{
// user didn't specify output buffer - free what we allocated,
// and clear 'out', which points to the freed buffer.
if(our_buf)
// and clear p (value-return param)
if(alloc_buf)
{
mem_free(our_buf);
mem_free(actual_buf);
*p = 0;
// we only allocate if p && *p, but had set *p above.
// alloc_buf => p != 0
}
return err;
}
assert(issue_cnt == raw_transferred_cnt && raw_transferred_cnt == raw_size);
if(p)
*p = data_buf;
assert(/*issue_cnt == raw_transferred_cnt &&*/ raw_transferred_cnt == data_size);
return (ssize_t)actual_transferred_cnt;
}
@ -990,7 +1017,9 @@ int file_map(File* const f, void*& p, size_t& size)
}
// don't allow mapping zero-length files (doesn't make sense,
// and BoundsChecker complains about wposix mmap failing)
// and BoundsChecker warns about wposix mmap failing).
// then again, don't complain, because this might happen when mounting
// a dir containing empty files; each is opened as a Zip file.
if(f->size == 0)
return -1;
@ -1019,7 +1048,10 @@ int file_unmap(File* const f)
// file is not currently mapped
if(f->map_refs == 0)
{
debug_warn("file_unmap: not currently mapped");
return -1;
}
// still more than one reference remaining - done.
if(--f->map_refs > 0)

View File

@ -157,12 +157,11 @@ static int path_validate(const uint line, const char* const path)
// disallow:
// - ".." (prevent going above the VFS root dir)
// - "/." and "./" (security whole when mounting,
// and not supported on Windows).
// - "//" (makes no sense)
if((c == '.' || c == '/') && (last_c == '.' || last_c == '/'))
// - "./" (security hole when mounting and not supported on Windows).
// allow "/.", because CVS backup files include it.
if(last_c == '.' && (c == '.' || c == '/'))
{
msg = "contains '..', '/.', './', or '//'";
msg = "contains '..' or './'";
goto fail;
}
@ -245,6 +244,7 @@ struct Loc
//
// add_* aborts if a subdir or file of the same name already exists.
typedef std::pair<const std::string, const Loc*> FileVal;
typedef std::map<const std::string, const Loc*> Files;
typedef Files::iterator FileIt;
// notes:
@ -290,6 +290,13 @@ struct Dir
{
mounts = 0;
}
void dump()
{
FileIt it = files.begin();
for(; it != files.end(); ++it)
debug_out("%s\n", it->first.c_str());
}
};
@ -305,6 +312,9 @@ int Dir::add_subdir(const char* const fn)
subdirs[fn_s];
// side effect: maps <fn> to a newly constructed Dir()
// non-const => cannot be optimized away.
assert(find_subdir(fn));
return 0;
}
@ -333,10 +343,30 @@ int Dir::add_file(const char* const fn, const Loc* const loc)
// for absolute clarity; the container holds const Loc* objects.
// operator[] returns a reference to that.
// need this typedef to work around a GCC bug?
Data& old_loc = files[fn_s];
// Data& old_loc = files[fn_s];
// default pointer ctor sets it to 0 =>
// if fn wasn't already in the container, old_loc is 0.
FileVal val = std::make_pair(fn_s, loc);
std::pair<FileIt, bool> ret = files.insert(val);
FileIt it = ret.first;
assert(!strcmp(it->first.c_str(), fn));
assert(find_file(fn));
const Loc*& old_loc = it->second;
if(old_loc->pri > loc->pri)
return 1;
old_loc = loc;
return 0;
/*
if(ret.first) // insertion made
{
}
// old loc exists and is higher priority - keep it.
if(old_loc && old_loc->pri > loc->pri)
return 1;
@ -346,6 +376,7 @@ int Dir::add_file(const char* const fn, const Loc* const loc)
// patch archives; the one with the "largest" filename trumps the others.
old_loc = loc;
return 0;
*/
}
@ -392,7 +423,11 @@ enum
LF_CREATE_MISSING_FILE = 2,
LF_LAST = 2
// only valid with LF_CREATE_MISSING_FILE.
// *loc specifies the new file's loc
LF_HAVE_LOC = 4,
LF_LAST = 8
};
@ -402,10 +437,11 @@ static int tree_lookup(const char* path, const Loc** const loc = 0, Dir** const
{
CHECK_PATH(path);
assert(loc != 0 || dir != 0);
assert(flags <= LF_LAST);
assert(flags < LF_LAST);
const bool create_missing_components = !!(flags & LF_CREATE_MISSING_DIRS);
const bool create_missing_files = !!(flags & LF_CREATE_MISSING_FILE);
const bool have_loc = !!(flags & LF_HAVE_LOC);
// copy into (writeable) buffer so we can 'tokenize' path components
// by replacing '/' with '\0'. length check done by CHECK_PATH.
@ -467,12 +503,21 @@ static int tree_lookup(const char* path, const Loc** const loc = 0, Dir** const
if(create_missing_files)
{
// dir wasn't populated via tree_add_dirR => don't know
// the dir's Loc => cannot add this file.
if(cur_dir->mounts != 1)
return -1;
const Loc* new_loc;
CHECK_ERR(cur_dir->add_file(fn, cur_dir->loc));
if(have_loc)
new_loc = *loc;
else
{
// dir wasn't populated via tree_add_dirR => don't know
// the dir's Loc => cannot add this file.
if(cur_dir->mounts != 1)
return -1;
new_loc = cur_dir->loc;
}
CHECK_ERR(cur_dir->add_file(fn, new_loc));
}
*loc = cur_dir->find_file(fn);
@ -534,12 +579,22 @@ static int add_dirent_cb(const char* const path, const uint flags, const ssize_t
if(flags & LOC_ZIP)
{
Dir* dir;
if(tree_lookup(path, 0, &dir, LF_CREATE_MISSING_DIRS) >= 0)
if(!size)
{
const char* fn = strrchr(path, '/');
if(fn)
err = dir->add_file(fn+1, cur_loc);
if(!strchr(path, '.'))
debug_out("empty %s\n", path);
}
else
{
const Loc* loc = cur_loc;
Dir* dir;
if(tree_lookup(path, &loc, &dir, LF_CREATE_MISSING_DIRS|LF_CREATE_MISSING_FILE|LF_HAVE_LOC) < 0)
debug_out("failed to add %s\n", path);
else
{
const Loc* loc;
assert(tree_lookup(path, &loc) >= 0);
}
}
}
// directory

View File

@ -188,7 +188,7 @@ static int zip_read_cdfh(const u8*& cdfh, const char*& fn, size_t& fn_len, ZFile
{
const u8 method = cdfh[10];
const u32 csize_ = read_le32(cdfh+20);
u32 csize_ = read_le32(cdfh+20);
const u32 ucsize_ = read_le32(cdfh+24);
const u16 fn_len_ = read_le16(cdfh+28);
const u16 e_len = read_le16(cdfh+30);
@ -204,6 +204,10 @@ static int zip_read_cdfh(const u8*& cdfh, const char*& fn, size_t& fn_len, ZFile
debug_warn("warning: unknown compression method");
goto skip_file;
}
// tell is_compressed that the file is stored by
// setting csize_ to 0.
if(method == 0)
csize_ = 0;
fn = fn_;
fn_len = fn_len_;
@ -744,7 +748,7 @@ int inf_free_ctx(uintptr_t ctx)
assert(stream->next_out == 0);
inflateEnd(stream);
mem_free(stream);
free(stream);
return 0;
#endif
}