1
0
forked from 0ad/0ad

v2 of file/res code for thesis:

previous zip.* split up into archive, zip and compression. add O(1)
lookup via memento. add support for writing Zip archive. simplify IO by
having compression module allow queuing input buffers.

file: split up into file, file_Io, file_cache. add atom_fn that enables
string comparison via pointer compare. (stores all filenames/paths in
one place). add statistics gathering.
file_cache: landlord algorithm for caching files as well as LRU block
cache.
file_io: aio code that splits transfers into blocks revamped and made
into class.

vfs: filelist/trace code moved into new vfs_optimizer. add caching logic
to vfs_load.
vfs_optimizer: will generate zip archive (to replace current archive
builder script). not yet done
vfs_tree: major cleanup - now has nice base class rather than union
hackery.

This was SVN commit r3420.
This commit is contained in:
janwas 2006-01-23 20:05:09 +00:00
parent 72cc581c2e
commit 36cbf23c8e
21 changed files with 3928 additions and 2967 deletions

View File

@ -0,0 +1,607 @@
#include "precompiled.h"
#include "lib/res/res.h"
#include "lib/timer.h"
#include "lib/allocators.h"
#include "file.h"
#include "file_cache.h"
#include "zip.h"
#include "compression.h"
#include "archive.h"
// components:
// - za_*: Zip archive handling
// passes the list of files in an archive to lookup.
// - lookup_*: file lookup
// per archive: return file info (e.g. offset, size), given filename.
// - Archive_*: Handle-based container for archive info
// owns archive file and its lookup mechanism.
// - inf_*: in-memory inflate routines (zlib wrapper)
// decompresses blocks from file_io callback.
// - afile_*: file from Zip archive
// uses lookup to get file information; holds inflate state.
// - sync and async I/O
// uses file_* and inf_*.
// - file mapping
///////////////////////////////////////////////////////////////////////////////
//
// lookup_*: file lookup
// per archive: return file info (e.g. offset, size), given filename.
//
///////////////////////////////////////////////////////////////////////////////
// rationale:
// - we don't export a "key" (currently array index) that would allow faster
// file lookup. this would only be useful if higher-level code were to
// store the key and use it more than once. also, lookup is currently fast
// enough. finally, this would also make our file enumerate callback
// incompatible with the others (due to the extra key param).
//
// - we don't bother with a directory tree to speed up lookup. the above
// is fast enough: O(1) if accessed sequentially, otherwise O(log(files)).
///////////////////////////////////////////////////////////////////////////////
//
// Archive_*: Handle-based container for archive info
// owns archive file and its lookup mechanism.
//
///////////////////////////////////////////////////////////////////////////////
struct Archive
{
File f;
ArchiveEntry* ents;
// number of valid entries in above array (see lookup_add_file_cb)
uint num_files;
// note: we need to keep track of what resources reload() allocated,
// so the dtor can free everything correctly.
uint is_open : 1;
uint is_loaded : 1;
};
H_TYPE_DEFINE(Archive);
static void Archive_init(Archive*, va_list)
{
}
static void Archive_dtor(Archive* a)
{
if(a->is_loaded)
{
(void)mem_free(a->ents);
a->is_loaded = 0;
}
if(a->is_open)
{
(void)file_close(&a->f);
a->is_open = 0;
}
}
static LibError Archive_reload(Archive* a, const char* fn, Handle)
{
// (note: don't warn on failure - this happens when
// vfs_mount blindly archive_open-s a dir)
RETURN_ERR(file_open(fn, FILE_CACHE_BLOCK, &a->f));
a->is_open = 1;
RETURN_ERR(zip_populate_archive(a, &a->f));
a->is_loaded = 1;
return ERR_OK;
}
static LibError Archive_validate(const Archive* a)
{
RETURN_ERR(file_validate(&a->f));
if(debug_is_pointer_bogus(a->ents))
return ERR_1;
return ERR_OK;
}
static LibError Archive_to_string(const Archive* a, char* buf)
{
snprintf(buf, H_STRING_LEN, "(%u files)", a->num_files);
return ERR_OK;
}
// open and return a handle to the archive indicated by <fn>.
// somewhat slow - each file is added to an internal index.
Handle archive_open(const char* fn)
{
TIMER("archive_open");
return h_alloc(H_Archive, fn);
}
// close the archive <ha> and set ha to 0
LibError archive_close(Handle& ha)
{
return h_free(ha, H_Archive);
}
// look up ArchiveEntry, given filename (untrusted!).
static LibError archive_get_file_info(Archive* a, const char* fn, uintptr_t memento, ArchiveEntry*& ent)
{
if(memento)
{
ent = (ArchiveEntry*)memento;
return ERR_OK;
}
else
{
const char* atom_fn = file_make_unique_fn_copy(fn, 0);
for(uint i = 0; i < a->num_files; i++)
if(a->ents[i].atom_fn == atom_fn)
{
ent = &a->ents[i];
return ERR_OK;
}
}
return ERR_FILE_NOT_FOUND;
}
// successively call <cb> for each valid file in the archive <ha>,
// passing the complete path and <user>.
// if it returns a nonzero value, abort and return that, otherwise 0.
LibError archive_enum(const Handle ha, const FileCB cb, const uintptr_t user)
{
H_DEREF(ha, Archive, a);
struct stat s;
memset(&s, 0, sizeof(s));
for(uint i = 0; i < a->num_files; i++)
{
const ArchiveEntry* ent = &a->ents[i];
s.st_mode = S_IFREG;
s.st_size = (off_t)ent->ucsize;
s.st_mtime = ent->mtime;
const uintptr_t memento = (uintptr_t)ent;
LibError ret = cb(ent->atom_fn, &s, memento, user);
if(ret != INFO_CB_CONTINUE)
return ret;
}
return ERR_OK;
}
LibError archive_allocate_entries(Archive* a, size_t num_entries)
{
debug_assert(num_entries != 0); // =0 makes no sense but wouldn't be fatal
debug_assert(a->ents == 0); // must not have been allocated yet
a->ents = (ArchiveEntry*)mem_alloc(num_entries * sizeof(ArchiveEntry), 32);
if(!a->ents)
WARN_RETURN(ERR_NO_MEM);
return ERR_OK;
}
// add file <fn> to the lookup data structure.
// called from za_enum_files in order (0 <= idx < num_entries).
// the first call notifies us of # entries, so we can allocate memory.
//
// note: ent is only valid during the callback! must be copied or saved.
LibError archive_add_file(Archive* a, const ArchiveEntry* ent)
{
a->ents[a->num_files++] = *ent;
return ERR_OK;
}
///////////////////////////////////////////////////////////////////////////////
//
// afile_*: file from Zip archive
// uses lookup to get file information; holds inflate state.
//
///////////////////////////////////////////////////////////////////////////////
// convenience function, allows implementation change in AFile.
// note that size == ucsize isn't foolproof, and adding a flag to
// ofs or size is ugly and error-prone.
// no error checking - always called from functions that check af.
static inline bool is_compressed(AFile* af)
{
return af->method != CM_NONE;
}
// get file status (size, mtime). output param is zeroed on error.
LibError afile_stat(Handle ha, const char* fn, struct stat* s)
{
// zero output param in case we fail below.
memset(s, 0, sizeof(struct stat));
H_DEREF(ha, Archive, a);
ArchiveEntry* ent;
CHECK_ERR(archive_get_file_info(a, fn, 0, ent));
s->st_size = ent->ucsize;
s->st_mtime = ent->mtime;
return ERR_OK;
}
LibError afile_validate(const AFile* af)
{
if(!af)
return ERR_INVALID_PARAM;
// note: don't check af->ha - it may be freed at shutdown before
// its files. TODO: revisit once dependency support is added.
if(!af->fc.size)
return ERR_1;
// note: af->ctx is 0 if file is not compressed.
return ERR_OK;
}
#define CHECK_ZFILE(af) CHECK_ERR(afile_validate(af))
// open file, and fill *af with information about it.
// return < 0 on error (output param zeroed).
LibError afile_open(const Handle ha, const char* fn, uintptr_t memento, int flags, AFile* af)
{
// zero output param in case we fail below.
memset(af, 0, sizeof(*af));
H_DEREF(ha, Archive, a);
// this is needed for AFile below. optimization: archive_get_file_info
// wants the original filename, but by passing the unique copy
// we avoid work there (its file_make_unique_fn_copy returns immediately)
const char* atom_fn = file_make_unique_fn_copy(fn, 0);
ArchiveEntry* ent;
// don't want AFile to contain a ArchiveEntry struct -
// its ucsize member must be 'loose' for compatibility with File.
// => need to copy ArchiveEntry fields into AFile.
RETURN_ERR(archive_get_file_info(a, atom_fn, memento, ent));
if(ent->flags & ZIP_LFH_FIXUP_NEEDED)
{
zip_fixup_lfh(&a->f, ent);
ent->flags &= ~ZIP_LFH_FIXUP_NEEDED;
}
uintptr_t ctx = 0;
// slight optimization: do not allocate context if not compressed
if(ent->method != CM_NONE)
{
ctx = comp_alloc(CT_DECOMPRESSION, ent->method);
if(!ctx)
return ERR_NO_MEM;
}
af->fc.flags = flags;
af->fc.size = ent->ucsize;
af->fc.atom_fn = atom_fn;
af->ofs = ent->ofs;
af->csize = ent->csize;
af->method = ent->method;
af->ha = ha;
af->ctx = ctx;
af->is_mapped = 0;
CHECK_ZFILE(af);
return ERR_OK;
}
// close file.
LibError afile_close(AFile* af)
{
CHECK_ZFILE(af);
// other AFile fields don't need to be freed/cleared
comp_free(af->ctx);
af->ctx = 0;
return ERR_OK;
}
///////////////////////////////////////////////////////////////////////////////
//
// sync and async I/O
// uses file_* and inf_*.
//
///////////////////////////////////////////////////////////////////////////////
static const size_t CHUNK_SIZE = 16*KiB;
// begin transferring <size> bytes, starting at <ofs>. get result
// with afile_io_wait; when no longer needed, free via afile_io_discard.
LibError afile_io_issue(AFile* af, off_t user_ofs, size_t max_output_size, void* user_buf, AFileIo* io)
{
// zero output param in case we fail below.
memset(io, 0, sizeof(AFileIo));
CHECK_ZFILE(af);
H_DEREF(af->ha, Archive, a);
// not compressed; we'll just read directly from the archive file.
// no need to clamp to EOF - that's done already by the VFS.
if(!is_compressed(af))
{
// io->ctx is 0 (due to memset)
const off_t ofs = af->ofs+user_ofs;
return file_io_issue(&a->f, ofs, max_output_size, user_buf, &io->io);
}
io->ctx = af->ctx;
io->max_output_size = max_output_size;
io->user_buf = user_buf;
const off_t cofs = af->ofs + af->last_cofs; // needed to determine csize
// read up to next chunk (so that the next read is aligned -
// less work for aio) or up to EOF.
const ssize_t left_in_chunk = CHUNK_SIZE - (cofs % CHUNK_SIZE);
const ssize_t left_in_file = af->csize - cofs;
const size_t csize = MIN(left_in_chunk, left_in_file);
void* cbuf = mem_alloc(csize, 4*KiB);
if(!cbuf)
return ERR_NO_MEM;
CHECK_ERR(file_io_issue(&a->f, cofs, csize, cbuf, &io->io));
af->last_cofs += (off_t)csize;
return ERR_OK;
}
// indicates if the IO referenced by <io> has completed.
// return value: 0 if pending, 1 if complete, < 0 on error.
int afile_io_has_completed(AFileIo* io)
{
return file_io_has_completed(&io->io);
}
// wait until the transfer <io> completes, and return its buffer.
// output parameters are zeroed on error.
LibError afile_io_wait(AFileIo* io, void*& buf, size_t& size)
{
buf = 0;
size = 0;
void* raw_buf;
size_t raw_size;
CHECK_ERR(file_io_wait(&io->io, raw_buf, raw_size));
// file is compressed and we need to decompress
if(io->ctx)
{
comp_set_output(io->ctx, (void*)io->user_buf, io->max_output_size);
ssize_t ucbytes_output = comp_feed(io->ctx, raw_buf, raw_size);
free(raw_buf);
RETURN_ERR(ucbytes_output);
buf = io->user_buf;
size = ucbytes_output;
}
else
{
buf = raw_buf;
size = raw_size;
}
return ERR_OK;
}
// finished with transfer <io> - free its buffer (returned by afile_io_wait)
LibError afile_io_discard(AFileIo* io)
{
return file_io_discard(&io->io);
}
LibError afile_io_validate(const AFileIo* io)
{
if(debug_is_pointer_bogus(io->user_buf))
return ERR_1;
// <ctx> and <max_output_size> have no invariants we could check.
RETURN_ERR(file_io_validate(&io->io));
return ERR_OK;
}
///////////////////////////////////////////////////////////////////////////////
class Decompressor
{
public:
Decompressor(uintptr_t comp_ctx_, size_t ucsize_max, bool use_temp_buf_, FileIOCB cb, uintptr_t cb_ctx)
{
comp_ctx = comp_ctx_;
csize_total = 0;
ucsize_left = ucsize_max;
use_temp_buf = use_temp_buf_;
user_cb = cb;
user_cb_ctx = cb_ctx;
}
LibError feed(const void* cblock, size_t csize, size_t* bytes_processed)
{
if(use_temp_buf)
RETURN_ERR(comp_alloc_output(comp_ctx, csize));
void* ucblock = comp_get_output(comp_ctx);
const size_t ucsize = comp_feed(comp_ctx, cblock, csize);
*bytes_processed = ucsize;
debug_assert(ucsize <= ucsize_left);
ucsize_left -= ucsize;
LibError ret = INFO_CB_CONTINUE;
if(user_cb)
ret = user_cb(user_cb_ctx, ucblock, ucsize, bytes_processed);
if(ucsize_left == 0)
ret = ERR_OK;
return ret;
}
size_t total_csize_fed() const { return csize_total; }
private:
uintptr_t comp_ctx;
size_t csize_total;
size_t ucsize_left;
bool use_temp_buf;
// allow user-specified callbacks: "chain" them, because file_io's
// callback mechanism is already used to return blocks.
FileIOCB user_cb;
uintptr_t user_cb_ctx;
};
static LibError decompressor_feed_cb(uintptr_t cb_ctx,
const void* cblock, size_t csize, size_t* bytes_processed)
{
Decompressor* d = (Decompressor*)cb_ctx;
return d->feed(cblock, csize, bytes_processed);
}
// read from the (possibly compressed) file <af> as if it were a normal file.
// starting at the beginning of the logical (decompressed) file,
// skip <ofs> bytes of data; read the next <size> bytes into <*pbuf>.
//
// if non-NULL, <cb> is called for each block read, passing <ctx>.
// if it returns a negative error code,
// the read is aborted and that value is returned.
// the callback mechanism is useful for user progress notification or
// processing data while waiting for the next I/O to complete
// (quasi-parallel, without the complexity of threads).
//
// return bytes read, or a negative error code.
ssize_t afile_read(AFile* af, off_t ofs, size_t size, FileIOBuf* pbuf, FileIOCB cb, uintptr_t cb_ctx)
{
CHECK_ZFILE(af);
H_DEREF(af->ha, Archive, a);
if(!is_compressed(af))
// no need to set last_cofs - only checked if compressed.
return file_io(&a->f, af->ofs+ofs, size, pbuf, cb, cb_ctx);
debug_assert(af->ctx != 0);
const bool use_temp_buf = (pbuf == FILE_BUF_TEMP);
if(!use_temp_buf)
comp_set_output(af->ctx, (void*)*pbuf, size);
const off_t cofs = af->ofs+af->last_cofs;
// remaining bytes in file. callback will cause IOs to stop when
// enough ucdata has been produced.
const size_t csize_max = af->csize - af->last_cofs;
Decompressor d(af->ctx, size, use_temp_buf, cb, cb_ctx);
ssize_t uc_transferred = file_io(&a->f, cofs, csize_max, FILE_BUF_TEMP, decompressor_feed_cb, (uintptr_t)&d);
af->last_cofs += (off_t)d.total_csize_fed();
return uc_transferred;
}
///////////////////////////////////////////////////////////////////////////////
//
// file mapping
//
///////////////////////////////////////////////////////////////////////////////
// map the entire file <af> into memory. mapping compressed files
// isn't allowed, since the compression algorithm is unspecified.
// output parameters are zeroed on failure.
//
// the mapping will be removed (if still open) when its file is closed.
// however, map/unmap calls should still be paired so that the mapping
// may be removed when no longer needed.
LibError afile_map(AFile* af, void*& p, size_t& size)
{
p = 0;
size = 0;
CHECK_ZFILE(af);
// mapping compressed files doesn't make sense because the
// compression algorithm is unspecified - disallow it.
if(is_compressed(af))
WARN_RETURN(ERR_IS_COMPRESSED);
// note: we mapped the archive in archive_open, but unmapped it
// in the meantime to save memory in case it wasn't going to be mapped.
// now we do so again; it's unmapped in afile_unmap (refcounted).
H_DEREF(af->ha, Archive, a);
void* archive_p;
size_t archive_size;
CHECK_ERR(file_map(&a->f, archive_p, archive_size));
p = (char*)archive_p + af->ofs;
size = af->fc.size;
af->is_mapped = 1;
return ERR_OK;
}
// remove the mapping of file <af>; fail if not mapped.
//
// the mapping will be removed (if still open) when its archive is closed.
// however, map/unmap calls should be paired so that the archive mapping
// may be removed when no longer needed.
LibError afile_unmap(AFile* af)
{
CHECK_ZFILE(af);
// make sure archive mapping refcount remains balanced:
// don't allow multiple|"false" unmaps.
if(!af->is_mapped)
return ERR_FAIL;
af->is_mapped = 0;
H_DEREF(af->ha, Archive, a);
return file_unmap(&a->f);
}

View File

@ -0,0 +1,220 @@
// Zip archiving on top of ZLib.
//
// Copyright (c) 2003-2005 Jan Wassenberg
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// Contact info:
// Jan.Wassenberg@stud.uni-karlsruhe.de
// http://www.stud.uni-karlsruhe.de/~urkt/
#ifndef ARCHIVE_H__
#define ARCHIVE_H__
#include "../handle.h"
#include "file.h" // FileCB for afile_enum
#include "compression.h" // CompressionMethod
// note: filenames are case-insensitive.
//
// archive
//
// open and return a handle to the archive indicated by <fn>.
// somewhat slow - each file is added to an internal index.
extern Handle archive_open(const char* fn);
// close the archive <ha> and set ha to 0
extern LibError archive_close(Handle& ha);
// successively call <cb> for each valid file in the archive <ha>,
// passing the complete path and <user>.
// if it returns a nonzero value, abort and return that, otherwise 0.
extern LibError archive_enum(const Handle ha, const FileCB cb, const uintptr_t user);
//
// file
//
struct AFile
{
FileCommon fc;
off_t ofs; // in archive
off_t csize;
CompressionMethod method;
off_t last_cofs; // in compressed file
Handle ha;
uintptr_t ctx;
// this AFile has been successfully afile_map-ped, i.e. reference
// count of the archive's mapping has been increased.
// we need to undo that when closing it.
uint is_mapped : 1;
};
// get file status (size, mtime). output param is zeroed on error.
extern LibError afile_stat(Handle ha, const char* fn, struct stat* s);
// open file, and fill *zf with information about it.
// return < 0 on error (output param zeroed).
extern LibError afile_open(Handle ha, const char* fn, uintptr_t memento, int flags, AFile* af);
// close file.
extern LibError afile_close(AFile* af);
extern LibError afile_validate(const AFile* af);
//
// asynchronous read
//
struct AFileIo
{
FileIo io;
uintptr_t ctx;
size_t max_output_size;
void* user_buf;
};
// begin transferring <size> bytes, starting at <ofs>. get result
// with afile_io_wait; when no longer needed, free via afile_io_discard.
extern LibError afile_io_issue(AFile* af, off_t ofs, size_t size, void* buf, AFileIo* io);
// indicates if the IO referenced by <io> has completed.
// return value: 0 if pending, 1 if complete, < 0 on error.
extern int afile_io_has_completed(AFileIo* io);
// wait until the transfer <io> completes, and return its buffer.
// output parameters are zeroed on error.
extern LibError afile_io_wait(AFileIo* io, void*& p, size_t& size);
// finished with transfer <io> - free its buffer (returned by afile_io_wait)
extern LibError afile_io_discard(AFileIo* io);
extern LibError afile_io_validate(const AFileIo* io);
//
// synchronous read
//
// read from the (possibly compressed) file <zf> as if it were a normal file.
// starting at the beginning of the logical (decompressed) file,
// skip <ofs> bytes of data; read the next <size> bytes into <buf>.
//
// if non-NULL, <cb> is called for each block read, passing <ctx>.
// if it returns a negative error code,
// the read is aborted and that value is returned.
// the callback mechanism is useful for user progress notification or
// processing data while waiting for the next I/O to complete
// (quasi-parallel, without the complexity of threads).
//
// return bytes read, or a negative error code.
extern ssize_t afile_read(AFile* af, off_t ofs, size_t size, FileIOBuf* pbuf, FileIOCB cb = 0, uintptr_t ctx = 0);
//
// memory mapping
//
// useful for files that are too large to be loaded into memory,
// or if only (non-sequential) portions of a file are needed at a time.
//
// this is of course only possible for uncompressed files - compressed files
// would have to be inflated sequentially, which defeats the point of mapping.
// map the entire file <zf> into memory. mapping compressed files
// isn't allowed, since the compression algorithm is unspecified.
// output parameters are zeroed on failure.
//
// the mapping will be removed (if still open) when its archive is closed.
// however, map/unmap calls should still be paired so that the archive mapping
// may be removed when no longer needed.
extern LibError afile_map(AFile* af, void*& p, size_t& size);
// remove the mapping of file <zf>; fail if not mapped.
//
// the mapping will be removed (if still open) when its archive is closed.
// however, map/unmap calls should be paired so that the archive mapping
// may be removed when no longer needed.
extern LibError afile_unmap(AFile* af);
enum ArchiveFileFlags
{
ZIP_LFH_FIXUP_NEEDED = 1
};
// convenience container for location / size of file in archive.
// separate from AFile to minimize size of file table.
struct ArchiveEntry
{
// these are returned by afile_stat:
off_t ucsize;
time_t mtime;
// used in IO
off_t ofs; // bit 31 set if fixup needed
off_t csize;
CompressionMethod method;
uint flags;
const char* atom_fn;
// why csize?
// file I/O may be N-buffered, so it's good to know when the raw data
// stops, or else we potentially overshoot by N-1 blocks.
// if we do read too much though, nothing breaks - inflate would just
// ignore it, since Zip files are compressed individually.
//
// we also need a way to check if a file is compressed (e.g. to fail
// mmap requests if the file is compressed). packing a bit in ofs or
// ucsize is error prone and ugly (1 bit less won't hurt though).
// any other way will mess up the nice 2^n byte size anyway, so
// might as well store csize.
};
// successively called for each valid file in the archive,
// passing the complete path and <user>.
// return INFO_CB_CONTINUE to continue calling; anything else will cause
// the caller to abort and immediately return that value.
//
// HACK: call back with negative index the first time; its abs. value is
// the number of entries in the archive. lookup needs to know this so it can
// preallocate memory. having lookup_init call z_get_num_files and then
// za_enum_files would require passing around a ZipInfo struct, or searching
// for the ECDR twice - both ways aren't nice. nor is expanding on demand -
// we try to minimize allocations (faster, less fragmentation).
// fn (filename) is not necessarily 0-terminated!
// loc is only valid during the callback! must be copied or saved.
typedef LibError (*CDFH_CB)(uintptr_t user, i32 i, const ArchiveEntry* loc, size_t fn_len);
struct Archive;
extern LibError archive_allocate_entries(Archive* a, size_t num_entries);
extern LibError archive_add_file(Archive* a, const ArchiveEntry* ent);
#endif // #ifndef ARCHIVE_H__

View File

@ -0,0 +1,463 @@
// Compression/Decompression interface
// Copyright (c) 2005 Jan Wassenberg
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// Contact info:
// Jan.Wassenberg@stud.uni-karlsruhe.de
// http://www.stud.uni-karlsruhe.de/~urkt/
#include "precompiled.h"
#include "lib/res/mem.h"
#include "lib/allocators.h"
#include "lib/timer.h"
#include "compression.h"
// provision for removing all ZLib code (all inflate calls will fail).
// used for checking DLL dependency; might also simulate corrupt Zip files.
//#define NO_ZLIB
#ifndef NO_ZLIB
# define ZLIB_DLL
# include <zlib.h>
# if MSC_VERSION
# ifdef NDEBUG
# pragma comment(lib, "zlib1.lib")
# else
# pragma comment(lib, "zlib1d.lib")
# endif
# endif
#endif
TIMER_ADD_CLIENT(tc_zip_inflate);
TIMER_ADD_CLIENT(tc_zip_memcpy);
/*
what is value added of zlib layer?
it gets the zlib interface cororrect - e.g. checking for return vals.
code can then just use our more simple interfacde
inf_*: in-memory inflate routines (zlib wrapper)
decompresses blocks from file_io callback.
*/
static LibError LibError_from_zlib(int err)
{
switch(err)
{
case Z_OK:
return ERR_OK;
case Z_STREAM_END:
return ERR_EOF;
case Z_MEM_ERROR:
return ERR_NO_MEM;
case Z_DATA_ERROR:
return ERR_CORRUPTED;
case Z_STREAM_ERROR:
return ERR_INVALID_PARAM;
default:
return ERR_FAIL;
}
UNREACHABLE;
}
// must be dynamically allocated - need one for every open AFile,
// and z_stream is large.
class Compressor
{
public:
Compressor(ContextType type_)
{
type = type_;
}
virtual ~Compressor()
{
mem_free(out_mem);
// free all remaining input buffers that we copied (rare)
for(size_t i = 0; i < pending_bufs.size(); i++)
free(pending_bufs[i].mem_to_free);
}
virtual LibError init() = 0;
virtual LibError reset() = 0;
virtual LibError alloc_output(size_t in_size) = 0;
// consume as much of the given input buffer as possible. the data is
// decompressed/compressed into the previously established output buffer.
// reports how many bytes were consumed and produced; either or both
// can be 0 if input size is small or not enough room in output buffer.
// caller is responsible for saving any leftover input data,
// which is why we pass back in_consumed.
virtual LibError consume(const void* in, size_t in_size, size_t& in_consumed, size_t& out_produced) = 0;
virtual LibError finish(void** out, size_t* out_size) = 0;
virtual void release() = 0;
void set_output(void* out, size_t out_size)
{
next_out = out;
avail_out = out_size;
}
void* get_output()
{
return next_out;
}
ssize_t feed(const void* in, size_t in_size)
{
pending_bufs.push_back(Buf(in, in_size, 0));
size_t out_total = 0; // returned unless error occurs
LibError err;
// work off all queued input buffers until output buffer is filled.
do
{
Buf& buf = pending_bufs.front();
size_t in_consumed, out_consumed;
err = consume(buf.cdata, buf.csize, in_consumed, out_consumed);
if(err < 0)
return err;
out_total += out_consumed;
debug_assert(in_consumed <= buf.csize);
// all input consumed - dequeue input buffer
if(in_consumed == buf.csize)
{
free(buf.mem_to_free); // no-op unless we allocated it
pending_bufs.pop_front();
}
// limited by output space - mark input buffer as partially used
else
{
buf.cdata += in_consumed;
buf.csize -= in_consumed;
// buffer was allocated by caller and may be freed behind our
// backs after returning (which we must because output buffer
// is full). allocate a copy of the remaining input data.
if(!buf.mem_to_free)
{
void* cdata_copy = malloc(buf.csize);
if(!cdata_copy)
return ERR_NO_MEM;
memcpy2(cdata_copy, buf.cdata, buf.csize);
buf.cdata = (const u8*)cdata_copy;
}
return (ssize_t)out_total;
}
}
while(!pending_bufs.empty());
return (ssize_t)out_total;
}
protected:
ContextType type;
CompressionMethod method;
void* next_out;
size_t avail_out;
void* out_mem;
size_t out_mem_size;
// may be several IOs in flight => list needed
struct Buf
{
const u8* cdata;
size_t csize;
void* mem_to_free;
Buf(const void* cdata_, size_t csize_, void* mem_to_free_)
{
cdata = (const u8*)cdata_;
csize = csize_;
mem_to_free = mem_to_free_;
}
};
std::deque<Buf> pending_bufs;
LibError alloc_output_impl(size_t required_out_size)
{
size_t alloc_size = required_out_size;
// .. already had a buffer
if(out_mem)
{
// it was big enough - reuse
if(out_mem_size >= required_out_size)
return ERR_OK;
// free previous
// note: mem.cpp doesn't support realloc; don't use Pool etc. because
// maximum file size may be huge (more address space than we can afford)
mem_free(out_mem);
// TODO: make sure difference in required_out_size vs. out_mem_size
// is big enough - i.e. don't only increment in small chunks.
// set alloc_size...
// fall through..
}
// .. need to allocate anew
out_mem = mem_alloc(alloc_size, 32*KiB);
if(!out_mem)
WARN_RETURN(ERR_NO_MEM);
out_mem_size = alloc_size;
next_out = out_mem;
avail_out = out_mem_size;
return ERR_OK;
}
}; // class Compressor
#ifndef NO_ZLIB
class ZLibCompressor : public Compressor
{
z_stream zs;
public:
// default ctor cannot be generated
ZLibCompressor(ContextType type)
: Compressor(type)
{
}
virtual LibError init()
{
memset(&zs, 0, sizeof(zs));
int ret;
if(type == CT_COMPRESSION)
{
const int level = Z_BEST_COMPRESSION;
const int windowBits = -MAX_WBITS; // max window size; omit ZLib header
const int memLevel = 8; // default; total mem ~= 256KiB
const int strategy = Z_DEFAULT_STRATEGY; // normal data - not RLE
ret = deflateInit2(&zs, level, Z_DEFLATED, windowBits, memLevel, strategy);
}
else
{
const int windowBits = -MAX_WBITS; // max window size; omit ZLib header
ret = inflateInit2(&zs, windowBits);
}
CHECK_ERR(LibError_from_zlib(ret));
return ERR_OK;
}
virtual LibError reset()
{
int ret;
if(type == CT_COMPRESSION)
ret = deflateReset(&zs);
else
ret = inflateReset(&zs);
CHECK_ERR(LibError_from_zlib(ret));
return ERR_OK;
}
// out:
// compression ratios can be enormous (1000x), so we require
// callers to allocate the output buffer themselves
// (since they know the actual size).
// allocate buffer
// caller can't do it because they don't know what compression ratio
// will be achieved.
virtual LibError alloc_output(size_t in_size)
{
if(type == CT_COMPRESSION)
{
size_t required_size = (size_t)deflateBound(&zs, (uLong)in_size);
RETURN_ERR(alloc_output_impl(required_size));
return ERR_OK;
}
else
WARN_RETURN(ERR_LOGIC);
}
virtual LibError consume(const void* in, size_t in_size, size_t& in_consumed, size_t& out_consumed)
{
zs.avail_in = (uInt)in_size;
zs.next_in = (Byte*)in;
zs.next_out = (Byte*)next_out;
zs.avail_out = (uInt)avail_out;
const size_t prev_avail_in = zs.avail_in;
const size_t prev_avail_out = zs.avail_out;
int ret;
if(type == CT_COMPRESSION)
ret = deflate(&zs, 0);
else
ret = inflate(&zs, Z_SYNC_FLUSH);
debug_assert(prev_avail_in >= zs.avail_in && prev_avail_out >= avail_out);
in_consumed = prev_avail_in - zs.avail_in;
out_consumed = prev_avail_out- zs.avail_out;
next_out = zs.next_out;
avail_out = zs.avail_out;
// sanity check: if ZLib reports end of stream, all input data
// must have been consumed.
if(ret == Z_STREAM_END)
{
debug_assert(zs.avail_in == 0);
ret = Z_OK;
}
CHECK_ERR(LibError_from_zlib(ret));
return ERR_OK;
}
virtual LibError finish(void** out, size_t* out_size)
{
if(type == CT_COMPRESSION)
{
// notify zlib that no more data is forthcoming and have it flush output.
// our output buffer has enough space due to use of deflateBound;
// therefore, deflate must return Z_STREAM_END.
int ret = deflate(&zs, Z_FINISH);
if(ret != Z_STREAM_END)
debug_warn("deflate: unexpected Z_FINISH behavior");
}
else
{
// nothing to do - decompression always flushes immediately
}
*out = zs.next_out - zs.total_out;
*out_size = zs.total_out;
return ERR_OK;
}
virtual void release()
{
// can have both input or output data remaining
// (if not all data in uncompressed stream was needed)
int ret;
if(type == CT_COMPRESSION)
ret = deflateEnd(&zs);
else
ret = inflateEnd(&zs);
WARN_ERR(LibError_from_zlib(ret));
}
};
#endif // #ifndef NO_ZLIB
//-----------------------------------------------------------------------------
// allocator
static const size_t MAX_COMPRESSOR_SIZE = sizeof(ZLibCompressor);
static SingleAllocator<u8[MAX_COMPRESSOR_SIZE]> compressor_allocator;
uintptr_t comp_alloc(ContextType type, CompressionMethod method)
{
void* c_mem = compressor_allocator.alloc();
if(!c_mem)
return 0;
Compressor* c;
switch(method)
{
#include "nommgr.h"
#ifndef NO_ZLIB
case CM_DEFLATE:
cassert(sizeof(ZLibCompressor) <= MAX_COMPRESSOR_SIZE);
c = new(c_mem) ZLibCompressor(type);
break;
#endif
#include "mmgr.h"
default:
debug_warn("unknown compression type");
compressor_allocator.free(c_mem);
return 0;
}
c->init();
return (uintptr_t)c;
}
LibError comp_reset(uintptr_t c_)
{
Compressor* c = (Compressor*)c_;
return c->reset();
}
// subsequent calls to comp_feed will unzip into <out>.
void comp_set_output(uintptr_t c_, void* out, size_t out_size)
{
Compressor* c = (Compressor*)c_;
c->set_output(out, out_size);
}
LibError comp_alloc_output(uintptr_t c_, size_t in_size)
{
Compressor* c = (Compressor*)c_;
return c->alloc_output(in_size);
}
void* comp_get_output(uintptr_t c_)
{
Compressor* c = (Compressor*)c_;
return c->get_output();
}
// unzip into output buffer. returns bytes written
// (may be 0, if not enough data is passed in), or < 0 on error.
ssize_t comp_feed(uintptr_t c_, const void* in, size_t in_size)
{
Compressor* c = (Compressor*)c_;
return c->feed(in, in_size);
}
LibError comp_finish(uintptr_t c_, void** out, size_t* out_size)
{
Compressor* c = (Compressor*)c_;
return c->finish(out, out_size);
}
void comp_free(uintptr_t c_)
{
// no-op if context is 0 (i.e. was never allocated)
if(!c_)
return;
Compressor* c = (Compressor*)c_;
c->release();
c->~Compressor();
compressor_allocator.free(c);
}

View File

@ -0,0 +1,31 @@
#ifndef COMPRESSION_H__
#define COMPRESSION_H__
enum ContextType
{
CT_COMPRESSION,
CT_DECOMPRESSION
};
enum CompressionMethod
{
CM_NONE,
// zlib "deflate" - see RFC 1750, 1751.
CM_DEFLATE
};
extern uintptr_t comp_alloc(ContextType type, CompressionMethod method);
extern void comp_set_output(uintptr_t ctx, void* out, size_t out_size);
extern LibError comp_alloc_output(uintptr_t c_, size_t in_size);
extern void* comp_get_output(uintptr_t ctx_);
extern ssize_t comp_feed(uintptr_t ctx, const void* in, size_t in_size);
extern LibError comp_finish(uintptr_t ctx, void** out, size_t* out_size);
extern void comp_free(uintptr_t ctx);
#endif // #ifndef COMPRESSION_H__

View File

@ -21,28 +21,23 @@
#include "lib.h"
#include "../res.h"
#include "file.h"
#include "detect.h"
#include "adts.h"
#include "sysdep/sysdep.h"
#include "byte_order.h"
#include "lib/allocators.h"
#include "file.h"
#include "file_internal.h"
#include <vector>
#include <algorithm>
#include <string>
// block := power-of-two sized chunk of a file.
// all transfers are expanded to naturally aligned, whole blocks
// (this makes caching parts of files feasible; it is also much faster
// for some aio implementations, e.g. wposix).
const size_t BLOCK_SIZE_LOG2 = 16; // 2**16 = 64 KiB
const size_t BLOCK_SIZE = 1ul << BLOCK_SIZE_LOG2;
const size_t SECTOR_SIZE = 4096;
// reasonable guess. if too small, aio will do alignment.
// reasonable guess. if too small, aio will do alignment.
const size_t SECTOR_SIZE = 4*KiB;
FileStats stats;
// rationale for aio, instead of only using mmap:
@ -80,7 +75,7 @@ LibError pp_set_dir(PathPackage* pp, const char* dir)
const int len = snprintf(pp->path, ARRAY_SIZE(pp->path), "%s/", dir);
// (need len below and must return an error code, not -1)
if(len < 0)
CHECK_ERR(ERR_PATH_LENGTH);
WARN_RETURN(ERR_PATH_LENGTH);
pp->end = pp->path+len;
pp->chars_left = ARRAY_SIZE(pp->path)-len;
@ -159,7 +154,7 @@ static LibError convert_path(char* dst, const char* src, Conversion conv = TO_NA
{
len++;
if(len >= PATH_MAX)
CHECK_ERR(ERR_PATH_LENGTH);
WARN_RETURN(ERR_PATH_LENGTH);
char c = *s++;
@ -495,13 +490,14 @@ LibError file_enum(const char* P_path, const FileCB cb, const uintptr_t user)
{
struct stat s;
memset(&s, 0, sizeof(s));
const uintptr_t memento = 0; // there is nothing we
for(DirEntCIt it = dirents.begin(); it != dirents.end(); ++it)
{
const DirEnt* ent = *it;
s.st_mode = (ent->size == -1)? S_IFDIR : S_IFREG;
s.st_size = ent->size;
s.st_mtime = ent->mtime;
LibError ret = cb(ent->name, &s, user);
LibError ret = cb(ent->name, &s, memento, user);
if(ret != INFO_CB_CONTINUE)
{
cb_err = ret; // first error (since we now abort)
@ -575,19 +571,79 @@ LibError file_validate(const File* f)
// mapped but refcount is invalid
else if((f->mapping != 0) ^ (f->map_refs != 0))
return ERR_2;
// fn_hash not set
// atom_fn not set
#ifndef NDEBUG
else if(!f->fn_hash)
else if(!f->fc.atom_fn)
return ERR_3;
#endif
return ERR_OK;
}
#define CHECK_FILE(f) CHECK_ERR(file_validate(f))
// rationale: we want a constant-time IsAtomFn(string pointer) lookup:
// this avoids any overhead of calling file_make_unique_fn_copy on
// already-atomized strings. that requires allocating from one contiguous
// arena, which is also more memory-efficient than the heap (no headers).
static Pool atom_pool;
// allocate a copy of P_fn in our string pool. strings are equal iff
// their addresses are equal, thus allowing fast comparison.
const char* file_make_unique_fn_copy(const char* P_fn, size_t fn_len)
{
/*
const char* slash = strrchr(P_fn, '/');
if(slash&&!stricmp(slash+1, "proptest.PMD"))
debug_break();
*/
// early out: if already an atom, return immediately.
if(pool_contains(&atom_pool, (void*)P_fn))
return P_fn;
// allow for Pascal-style strings (e.g. from Zip file header)
if(!fn_len)
fn_len = strlen(P_fn);
const char* unique_fn;
// check if already allocated; return existing copy if so.
//
// rationale: the entire storage could be done via container,
// rather than simply using it as a lookup mapping.
// however, DynHashTbl together with Pool (see above) is more efficient.
typedef DynHashTbl<const char*, const char*> AtomMap;
static AtomMap atom_map;
unique_fn = atom_map.find(P_fn);
if(unique_fn)
{
debug_assert(!strcmp(P_fn, unique_fn));
return unique_fn;
}
unique_fn = (const char*)pool_alloc(&atom_pool, fn_len+1);
if(!unique_fn)
return 0;
memcpy2((void*)unique_fn, P_fn, fn_len);
((char*)unique_fn)[fn_len] = '\0';
atom_map.insert(unique_fn, unique_fn);
FILE_STATS_NOTIFY_UNIQUE_FILE();
return unique_fn;
}
static inline void atom_init()
{
pool_create(&atom_pool, 8*MiB, POOL_VARIABLE_ALLOCS);
}
static inline void atom_shutdown()
{
(void)pool_destroy(&atom_pool);
}
LibError file_open(const char* p_fn, const uint flags, File* f)
LibError file_open(const char* P_fn, const uint flags, File* f)
{
// zero output param in case we fail below.
memset(f, 0, sizeof(*f));
@ -595,8 +651,8 @@ LibError file_open(const char* p_fn, const uint flags, File* f)
if(flags > FILE_FLAG_MAX)
return ERR_INVALID_PARAM;
char n_fn[PATH_MAX];
RETURN_ERR(file_make_full_native_path(p_fn, n_fn));
char N_fn[PATH_MAX];
RETURN_ERR(file_make_full_native_path(P_fn, N_fn));
// don't stat if opening for writing - the file may not exist yet
off_t size = 0;
@ -609,7 +665,7 @@ LibError file_open(const char* p_fn, const uint flags, File* f)
{
// get file size
struct stat s;
if(stat(n_fn, &s) < 0)
if(stat(N_fn, &s) < 0)
return ERR_FILE_NOT_FOUND;
size = s.st_size;
@ -622,7 +678,7 @@ LibError file_open(const char* p_fn, const uint flags, File* f)
//if(size <= 32*KiB)
// flags |= FILE_NO_AIO;
// make sure <n_fn> is a regular file
// make sure <N_fn> is a regular file
if(!S_ISREG(s.st_mode))
return ERR_NOT_FILE;
}
@ -633,23 +689,23 @@ LibError file_open(const char* p_fn, const uint flags, File* f)
else
oflag |= O_BINARY_NP;
// if AIO is disabled (at user's behest or because the file is small),
// so inform wposix.
// if AIO is disabled at user's behest, so inform wposix.
if(flags & FILE_NO_AIO)
oflag |= O_NO_AIO_NP;
#endif
int fd = open(n_fn, oflag, S_IRWXO|S_IRWXU|S_IRWXG);
int fd = open(N_fn, oflag, S_IRWXO|S_IRWXU|S_IRWXG);
if(fd < 0)
return ERR_FILE_ACCESS;
f->flags = flags;
f->size = size;
f->fn_hash = fnv_hash(n_fn); // copy filename instead?
f->fc.flags = flags;
f->fc.size = size;
f->fc.atom_fn = file_make_unique_fn_copy(P_fn, 0);
f->mapping = 0;
f->map_refs = 0;
f->fd = fd;
CHECK_FILE(f);
return ERR_OK;
}
@ -668,7 +724,7 @@ LibError file_close(File* f)
// return final file size (required by VFS after writing files).
// this is much easier than updating when writing, because we'd have
// to add accounting code to both (sync and async) paths.
f->size = lseek(f->fd, 0, SEEK_END);
f->fc.size = lseek(f->fd, 0, SEEK_END);
// (check fd to avoid BoundsChecker warning about invalid close() param)
if(f->fd != -1)
@ -677,601 +733,15 @@ LibError file_close(File* f)
f->fd = -1;
}
return ERR_OK;
}
///////////////////////////////////////////////////////////////////////////////
//
// async I/O
//
///////////////////////////////////////////////////////////////////////////////
// rationale:
// asynchronous IO routines don't cache; they're just a thin AIO wrapper.
// it's taken care of by file_io, which splits transfers into blocks
// and keeps temp buffers in memory (not user-allocated, because they
// might pull the rug out from under us at any time).
//
// doing so here would be more complicated: would have to handle "forwarding",
// i.e. recognizing that the desired block has been issued, but isn't yet
// complete. file_io also knows more about whether a block should be cached.
//
// disadvantages:
// - streamed data will always be read from disk. no problem, because
// such data (e.g. music, long speech) is unlikely to be used again soon.
// - prefetching (issuing the next few blocks from an archive during idle
// time, so that future out-of-order reads don't need to seek) isn't
// possible in the background (unless via thread, but that's discouraged).
// the utility is questionable, though: how to prefetch so as not to delay
// real IOs? can't determine "idle time" without completion notification,
// which is hard.
// we could get the same effect by bridging small gaps in file_io,
// and rearranging files in the archive in order of access.
static Pool aiocb_pool;
static inline void aiocb_pool_init()
{
(void)pool_create(&aiocb_pool, 32*sizeof(aiocb), sizeof(aiocb));
}
static inline void aiocb_pool_shutdown()
{
(void)pool_destroy(&aiocb_pool);
}
static inline aiocb* aiocb_pool_alloc()
{
ONCE(aiocb_pool_init());
return (aiocb*)pool_alloc(&aiocb_pool, 0);
}
static inline void aiocb_pool_free(void* cb)
{
pool_free(&aiocb_pool, cb);
}
// starts transferring to/from the given buffer.
// no attempt is made at aligning or padding the transfer.
LibError file_io_issue(File* f, off_t ofs, size_t size, void* p, FileIo* io)
{
// zero output param in case we fail below.
memset(io, 0, sizeof(FileIo));
debug_printf("FILE| issue ofs=%d size=%d\n", ofs, size);
//
// check params
//
CHECK_FILE(f);
if(!size || !p || !io)
return ERR_INVALID_PARAM;
const bool is_write = (f->flags & FILE_WRITE) != 0;
// cut off at EOF.
if(!is_write)
{
// avoid min() due to type conversion warnings.
const off_t bytes_left = f->size - ofs;
if(bytes_left < 0)
{
debug_warn("EOF");
return ERR_EOF;
}
if((off_t)size > bytes_left)
size = (size_t)bytes_left;
// guaranteed to fit, since size was > bytes_left
}
// (we can't store the whole aiocb directly - glibc's version is
// 144 bytes large)
aiocb* cb = aiocb_pool_alloc();
io->cb = cb;
if(!cb)
return ERR_NO_MEM;
memset(cb, 0, sizeof(aiocb));
// send off async read/write request
cb->aio_lio_opcode = is_write? LIO_WRITE : LIO_READ;
cb->aio_buf = p;
cb->aio_fildes = f->fd;
cb->aio_offset = ofs;
cb->aio_nbytes = size;
debug_printf("FILE| issue2 io=%p nbytes=%d\n", io, cb->aio_nbytes);
int err = lio_listio(LIO_NOWAIT, &cb, 1, (struct sigevent*)0);
if(err < 0)
{
debug_printf("lio_listio: %d, %d[%s]\n", err, errno, strerror(errno));
file_io_discard(io);
return LibError_from_errno();
}
// wipe out any cached blocks. this is necessary to cover the (rare) case
// of file cache contents predating the file write.
if(f->fc.flags & FILE_WRITE)
file_cache_invalidate(f->fc.atom_fn);
return ERR_OK;
}
// indicates if the IO referenced by <io> has completed.
// return value: 0 if pending, 1 if complete, < 0 on error.
int file_io_has_completed(FileIo* io)
{
aiocb* cb = (aiocb*)io->cb;
int ret = aio_error(cb);
if(ret == EINPROGRESS)
return 0;
if(ret == 0)
return 1;
debug_warn("unexpected aio_error return");
return -1;
}
LibError file_io_wait(FileIo* io, void*& p, size_t& size)
{
debug_printf("FILE| wait io=%p\n", io);
// zero output params in case something (e.g. H_DEREF) fails.
p = 0;
size = 0;
aiocb* cb = (aiocb*)io->cb;
// wait for transfer to complete.
const aiocb** cbs = (const aiocb**)&cb; // pass in an "array"
while(aio_error(cb) == EINPROGRESS)
aio_suspend(cbs, 1, (timespec*)0); // wait indefinitely
// query number of bytes transferred (-1 if the transfer failed)
const ssize_t bytes_transferred = aio_return(cb);
debug_printf("FILE| bytes_transferred=%d aio_nbytes=%d\n", bytes_transferred, cb->aio_nbytes);
// (size was clipped to EOF in file_io => this is an actual IO error)
if(bytes_transferred < (ssize_t)cb->aio_nbytes)
return ERR_IO;
p = (void*)cb->aio_buf; // cast from volatile void*
size = bytes_transferred;
return ERR_OK;
}
LibError file_io_discard(FileIo* io)
{
memset(io->cb, 0, sizeof(aiocb));
// discourage further use.
aiocb_pool_free(io->cb);
io->cb = 0;
return ERR_OK;
}
LibError file_io_validate(const FileIo* io)
{
const aiocb* cb = (const aiocb*)io->cb;
// >= 0x100 is not necessarily bogus, but suspicious.
// this also catches negative values.
if((uint)cb->aio_fildes >= 0x100)
return ERR_1;
if(debug_is_pointer_bogus((void*)cb->aio_buf))
return ERR_2;
if(cb->aio_lio_opcode != LIO_WRITE && cb->aio_lio_opcode != LIO_READ && cb->aio_lio_opcode != LIO_NOP)
return ERR_3;
// all other aiocb fields have no invariants we could check.
return ERR_OK;
}
///////////////////////////////////////////////////////////////////////////////
ssize_t lowio(int fd, bool is_write, off_t ofs, size_t size, void* buf)
{
lseek(fd, ofs, SEEK_SET);
if(is_write)
return write(fd, buf, size);
else
return read (fd, buf, size);
}
// L3 cache: intended to cache raw compressed data, since files aren't aligned
// in the archive; alignment code would force a read of the whole block,
// which would be a slowdown unless we keep them in memory.
//
// keep out of async code (although extra work for sync: must not issue/wait
// if was cached) to simplify things. disadvantage: problems if same block
// is issued twice, before the first call completes (via wait_io).
// that won't happen though unless we have threaded file_ios =>
// rare enough not to worry about performance.
//
// since sync code allocates the (temp) buffer, it's guaranteed
// to remain valid.
//
// create an id for use with the Cache that uniquely identifies
// the block from the file <fn_hash> starting at <ofs> (aligned).
static u64 block_make_id(const u32 fn_hash, const off_t ofs)
{
// id format: filename hash | block number
// 63 32 31 0
//
// we assume the hash (currently: FNV) is unique for all filenames.
// chance of a collision is tiny, and a build tool will ensure
// filenames in the VFS archives are safe.
//
// block_num will always fit in 32 bits (assuming maximum file size
// = 2^32 * BLOCK_SIZE = 2^48 -- plenty); we check this, but don't
// include a workaround. we could return 0, and the caller would have
// to allocate their own buffer, but don't bother.
// make sure block_num fits in 32 bits
const size_t block_num = ofs / BLOCK_SIZE;
debug_assert(block_num <= 0xffffffff);
u64 id = fn_hash; // careful, don't shift a u32 32 bits left
id <<= 32;
id |= block_num;
return id;
}
typedef std::pair<u64, void*> BlockCacheEntry;
typedef std::map<u64, void*> BlockCache;
typedef BlockCache::iterator BlockIt;
static BlockCache block_cache;
struct IOSlot
{
FileIo io;
void* temp_buf;
u64 block_id;
// needed so that we can add the block to the cache when
// its IO is complete. if we add it when issuing, we'd no longer be
// thread-safe: someone else might find it in the cache before its
// transfer has completed. don't want to add an "is_complete" flag,
// because that'd be hard to update (on every wait_io).
void* cached_block;
// != 0 <==> data coming from cache and no IO issued.
// given buffer
// given buffer, will copy from cache
// temp buffer allocated here
// temp buffer taken from cache
};
// don't just use operator[], so that block_cache isn't cluttered
// with IDs associated with 0 (blocks that wouldn't be cached anyway).
static void* block_find(u64 block_id)
{
BlockIt it = block_cache.find(block_id);
if(it == block_cache.end())
return 0;
return it->second;
}
static void block_add(u64 block_id, void* block)
{
if(block_find(block_id))
debug_warn("already in cache");
else
block_cache[block_id] = block;
}
static ssize_t block_issue(File* f, IOSlot* slot, const off_t issue_ofs, void* buf)
{
memset(slot, 0, sizeof(IOSlot));
ssize_t issue_size = BLOCK_SIZE;
// check if in cache
slot->block_id = block_make_id(f->fn_hash, issue_ofs);
slot->cached_block = block_find(slot->block_id);
if(slot->cached_block)
goto skip_issue;
//debug_printf("%x miss\n", issue_ofs);
// allocate temp buffer
if(!buf)
buf = slot->temp_buf = mem_alloc(BLOCK_SIZE, BLOCK_SIZE);
// if using buffer, set position in it; otherwise, use temp buffer
CHECK_ERR(file_io_issue(f, issue_ofs, BLOCK_SIZE, buf, &slot->io));
skip_issue:
return issue_size;
}
static void block_shutdown()
{
for(BlockIt it = block_cache.begin(); it != block_cache.end(); ++it)
mem_free(it->second);
}
// remove all blocks loaded from the file <fn>. used when reloading the file.
LibError file_invalidate_cache(const char* fn)
{
// convert to native path to match fn_hash set by file_open
char n_fn[PATH_MAX];
file_make_full_native_path(fn, n_fn);
const u32 fn_hash = fnv_hash(fn);
// notes:
// - don't use remove_if, because std::pair doesn't have operator=.
// - erasing elements during loop is ok because map iterators aren't
// invalidated.
for(BlockIt it = block_cache.begin(); it != block_cache.end(); ++it)
if((it->first >> 32) == fn_hash)
block_cache.erase(it);
return ERR_OK;
}
// the underlying aio implementation likes buffer and offset to be
// sector-aligned; if not, the transfer goes through an align buffer,
// and requires an extra memcpy2.
//
// if the user specifies an unaligned buffer, there's not much we can
// do - we can't assume the buffer contains padding. therefore,
// callers should let us allocate the buffer if possible.
//
// if ofs misalign = buffer, only the first and last blocks will need
// to be copied by aio, since we read up to the next block boundary.
// otherwise, everything will have to be copied; at least we split
// the read into blocks, so aio's buffer won't have to cover the
// whole file.
// transfer <size> bytes, starting at <ofs>, to/from the given file.
// (read or write access was chosen at file-open time).
//
// if non-NULL, <cb> is called for each block transferred, passing <ctx>.
// it returns how much data was actually transferred, or a negative error
// code (in which case we abort the transfer and return that value).
// the callback mechanism is useful for user progress notification or
// processing data while waiting for the next I/O to complete
// (quasi-parallel, without the complexity of threads).
//
// return number of bytes transferred (see above), or a negative error code.
ssize_t file_io(File* f, off_t data_ofs, size_t data_size, void* data_buf,
FileIOCB cb, uintptr_t ctx) // optional
{
debug_printf("FILE| io: fd=%d size=%d ofs=%d\n", f->fd, data_size, data_ofs);
CHECK_FILE(f);
const bool is_write = !!(f->flags & FILE_WRITE);
const bool no_aio = !!(f->flags & FILE_NO_AIO);
// when reading:
if(!is_write)
{
// cut data_size off at EOF
const ssize_t bytes_left = f->size - data_ofs;
if(bytes_left < 0)
return ERR_EOF;
data_size = MIN(data_size, (size_t)bytes_left);
}
bool temp = (data_buf == 0);
// sanity checks:
// .. temp blocks requested AND
// (not reading OR using lowio OR no callback)
if(temp && (is_write || no_aio || !cb))
{
debug_warn("invalid parameter");
return ERR_INVALID_PARAM;
}
// only align if we allocate the buffer and in AIO mode
const bool do_align = temp;
//
// calculate aligned transfer size (no change if !do_align)
//
off_t actual_ofs = data_ofs;
size_t actual_size = data_size;
void* actual_buf = data_buf;
// note: we go to the trouble of aligning the first block (instead of
// just reading up to the next block and letting aio realign it),
// so that it can be taken from the cache.
// this is not possible if !do_align, since we have to allocate
// extra buffer space for the padding.
const size_t ofs_misalign = data_ofs % BLOCK_SIZE;
const size_t lead_padding = do_align? ofs_misalign : 0;
// for convenience; used below.
actual_ofs -= (off_t)lead_padding;
actual_size = round_up(lead_padding + data_size, BLOCK_SIZE);
// skip aio code, use lowio
if(no_aio)
return lowio(f->fd, is_write, data_ofs, data_size, data_buf);
//
// now we read the file in 64 KiB chunks, N-buffered.
// if reading from Zip, inflate while reading the next block.
//
const int MAX_IOS = 4;
IOSlot ios[MAX_IOS] = { {0} };
int head = 0;
int tail = 0;
int pending_ios = 0;
bool all_issued = false;
// (useful, raw data: possibly compressed, but doesn't count padding)
size_t raw_transferred_cnt = 0;
size_t issue_cnt = 0;
// if callback, what it reports; otherwise, = raw_transferred_cnt
// this is what we'll return
size_t actual_transferred_cnt = 0;
ssize_t err = +1; // loop terminates if <= 0
for(;;)
{
// queue not full, data remaining to transfer, and no error:
// start transferring next block.
if(pending_ios < MAX_IOS && !all_issued && err > 0)
{
// get next free IO slot in ring buffer
IOSlot* slot = &ios[head];
memset(slot, 0, sizeof(IOSlot));
head = (head + 1) % MAX_IOS;
pending_ios++;
off_t issue_ofs = (off_t)(actual_ofs + issue_cnt);
void* buf = (temp)? 0 : (char*)actual_buf + issue_cnt;
ssize_t issued = block_issue(f, slot, issue_ofs, buf);
debug_printf("FILE| io2: block_issue: %d\n", issued);
if(issued < 0)
err = issued;
// transfer failed - loop will now terminate after
// waiting for all pending transfers to complete.
issue_cnt += issued;
if(issue_cnt >= actual_size)
all_issued = true;
}
// IO pending: wait for it to complete, and process it.
else if(pending_ios)
{
IOSlot* slot = &ios[tail];
tail = (tail + 1) % MAX_IOS;
pending_ios--;
void* block = slot->cached_block;
size_t size = BLOCK_SIZE;
// wasn't in cache; it was issued, so wait for it
bool from_cache;
if(block)
from_cache = true;
else
{
from_cache = false;
int ret = file_io_wait(&slot->io, block, size);
if(ret < 0)
err = (ssize_t)ret;
}
// first time; skip past padding
void* data = block;
if(raw_transferred_cnt == 0)
{
(char*&)data += lead_padding;
size -= lead_padding;
}
// don't include trailing padding
if(raw_transferred_cnt + size > data_size)
size = data_size - raw_transferred_cnt;
// we have useable data from a previous temp buffer,
// but it needs to be copied into the user's buffer
if(from_cache && !temp)
memcpy2((char*)data_buf+raw_transferred_cnt, data, size);
//// if size comes out short, we must be at EOF
raw_transferred_cnt += size;
if(cb && !(err <= 0))
{
ssize_t ret = cb(ctx, data, size);
// if negative: processing failed.
// loop will now terminate after waiting for all
// pending transfers to complete.
// note: don't abort if = 0: zip callback may not actually
// output anything if passed very little data.
if(ret < 0)
err = ret;
else
actual_transferred_cnt += ret;
}
// no callback to process data: raw = actual
else
actual_transferred_cnt += size;
if(!from_cache)
file_io_discard(&slot->io);
if(temp)
{
// adding is allowed and we didn't take this from the cache already: add
if(!slot->cached_block)
block_add(slot->block_id, slot->temp_buf);
}
}
// (all issued OR error) AND no pending transfers - done.
else
break;
}
debug_printf("FILE| err=%d, actual_transferred_cnt=%d\n", err, actual_transferred_cnt);
// failed (0 means callback reports it's finished)
if(err < 0)
return err;
debug_assert(issue_cnt >= raw_transferred_cnt && raw_transferred_cnt >= data_size);
return (ssize_t)actual_transferred_cnt;
}
///////////////////////////////////////////////////////////////////////////////
//
// memory mapping
@ -1301,14 +771,14 @@ LibError file_map(File* f, void*& p, size_t& size)
CHECK_FILE(f);
const int prot = (f->flags & FILE_WRITE)? PROT_WRITE : PROT_READ;
const int prot = (f->fc.flags & FILE_WRITE)? PROT_WRITE : PROT_READ;
// already mapped - increase refcount and return previous mapping.
if(f->mapping)
{
// prevent overflow; if we have this many refs, should find out why.
if(f->map_refs >= MAX_MAP_REFS)
CHECK_ERR(ERR_LIMIT);
WARN_RETURN(ERR_LIMIT);
f->map_refs++;
goto have_mapping;
}
@ -1317,11 +787,12 @@ LibError file_map(File* f, void*& p, size_t& size)
// and BoundsChecker warns about wposix mmap failing).
// then again, don't complain, because this might happen when mounting
// a dir containing empty files; each is opened as a Zip file.
if(f->size == 0)
if(f->fc.size == 0)
return ERR_FAIL;
errno = 0;
f->mapping = mmap((void*)0, f->size, prot, MAP_PRIVATE, f->fd, (off_t)0);
void* start = 0; // system picks start address
f->mapping = mmap(start, f->fc.size, prot, MAP_PRIVATE, f->fd, (off_t)0);
if(f->mapping == MAP_FAILED)
return LibError_from_errno();
@ -1329,7 +800,7 @@ LibError file_map(File* f, void*& p, size_t& size)
have_mapping:
p = f->mapping;
size = f->size;
size = f->fc.size;
return ERR_OK;
}
@ -1358,16 +829,24 @@ LibError file_unmap(File* f)
// no more references: remove the mapping
void* p = f->mapping;
f->mapping = 0;
// don't clear f->size - the file is still open.
// don't clear f->fc.size - the file is still open.
errno = 0;
return LibError_from_posix(munmap(p, f->size));
return LibError_from_posix(munmap(p, f->fc.size));
}
LibError file_init()
{
atom_init();
file_cache_init();
return ERR_OK;
}
LibError file_shutdown()
{
aiocb_pool_shutdown();
block_shutdown();
FILE_STATS_DUMP();
atom_shutdown();
file_io_shutdown();
return ERR_OK;
}

View File

@ -22,6 +22,8 @@
#include "posix.h" // struct stat
extern LibError file_init();
// convenience "class" that simplifies successively appending a filename to
// its parent directory. this avoids needing to allocate memory and calling
// strlen/strcat. used by wdetect and dir_next_ent.
@ -86,6 +88,13 @@ extern LibError file_make_full_portable_path(const char* n_full_path, char* path
extern LibError file_set_root_dir(const char* argv0, const char* rel_path);
// allocate a copy of P_fn in our string pool. strings are equal iff
// their addresses are equal, thus allowing fast comparison.
// fn_len can be 0 to indicate P_fn is a null-terminated C string
// (normal case) or the string length [characters].
extern const char* file_make_unique_fn_copy(const char* P_fn, size_t fn_len);
//
// dir_next_ent
//
@ -150,7 +159,7 @@ extern LibError dir_close(DirIterator* d);
// name doesn't include path!
// return INFO_CB_CONTINUE to continue calling; anything else will cause
// file_enum to abort and immediately return that value.
typedef LibError (*FileCB)(const char* name, const struct stat* s, const uintptr_t user);
typedef LibError (*FileCB)(const char* name, const struct stat* s, uintptr_t memento, const uintptr_t user);
// call <cb> for each file and subdirectory in <dir> (alphabetical order),
// passing the entry name (not full path!), stat info, and <user>.
@ -162,17 +171,22 @@ extern LibError file_enum(const char* dir, FileCB cb, uintptr_t user);
struct File
struct FileCommon
{
// keep offset of flags and size members in sync with struct ZFile!
// it is accessed by VFS and must be the same for both (union).
// dirty, but necessary because VFile is pushing the HDATA size limit.
// keep offset of flags and size members in sync with struct AFile!
// it is accessed by VFS and must be the same for both (union).
// dirty, but necessary because VFile is pushing the HDATA size limit.
uint flags;
off_t size;
// used together with offset to uniquely identify cached blocks.
u32 fn_hash;
// copy of the filename that was passed to file_open;
// its address uniquely identifies it. used as key for file cache.
const char* atom_fn;
};
struct File
{
FileCommon fc;
int fd;
@ -184,7 +198,10 @@ struct File
enum
{
// write-only access; otherwise, read only
// write-only access; otherwise, read only.
//
// note: only allowing either reads or writes simplifies file cache
// coherency (need only invalidate when closing a FILE_WRITE file).
FILE_WRITE = 0x01,
// translate newlines: convert from/to native representation when
@ -226,28 +243,36 @@ extern LibError file_validate(const File* f);
// remove all blocks loaded from the file <fn>. used when reloading the file.
extern LibError file_invalidate_cache(const char* fn);
extern LibError file_cache_invalidate(const char* fn);
//
// asynchronous IO
//
// this is a thin wrapper on top of the system AIO calls.
// IOs are carried out exactly as requested - there is no caching or
// alignment done here. rationale: see source.
struct FileIo
{
void* cb;
};
// queue the IO; it begins after the previous ones (if any) complete.
//
// rationale: this interface is more convenient than implicitly advancing a
// file pointer because zip.cpp often accesses random offsets.
// file pointer because archive.cpp often accesses random offsets.
extern LibError file_io_issue(File* f, off_t ofs, size_t size, void* buf, FileIo* io);
// indicates if the given IO has completed.
// return value: 0 if pending, 1 if complete, < 0 on error.
extern int file_io_has_completed(FileIo* io);
extern LibError file_io_wait(FileIo* io, void*& p, size_t& size);
// wait for the given IO to complete. passes back its buffer and size.
extern LibError file_io_wait(FileIo* io, const void*& p, size_t& size);
// indicates the IO's buffer is no longer needed and frees that memory.
extern LibError file_io_discard(FileIo* io);
extern LibError file_io_validate(const FileIo* io);
@ -257,14 +282,34 @@ extern LibError file_io_validate(const FileIo* io);
// synchronous IO
//
// user-specified offsets and transfer lengths must be multiples of this!
// (simplifies file_io)
const size_t FILE_BLOCK_SIZE = 64*KiB;
// block := power-of-two sized chunk of a file.
// all transfers are expanded to naturally aligned, whole blocks
// (this makes caching parts of files feasible; it is also much faster
// for some aio implementations, e.g. wposix).
const size_t FILE_BLOCK_SIZE = 16*KiB;
// return value:
// < 0: failed; abort transfer.
// >= 0: bytes output; continue.
typedef ssize_t (*FileIOCB)(uintptr_t ctx, void* p, size_t size);
typedef const u8* FileIOBuf;
FileIOBuf* const FILE_BUF_TEMP = (FileIOBuf*)1;
const FileIOBuf FILE_BUF_ALLOC = (FileIOBuf)2;
extern FileIOBuf file_buf_alloc(size_t size, const char* atom_fn);
extern LibError file_buf_free(FileIOBuf buf);
// called by file_io after a block IO has completed.
// *bytes_processed must be set; file_io will return the sum of these values.
// example: when reading compressed data and decompressing in the callback,
// indicate #bytes decompressed.
// return value: INFO_CB_CONTINUE to continue calling; anything else:
// abort immediately and return that.
// note: in situations where the entire IO is not split into blocks
// (e.g. when reading from cache or not using AIO), this is still called but
// for the entire IO. we do not split into fake blocks because it is
// advantageous (e.g. for decompressors) to have all data at once, if available
// anyway.
typedef LibError (*FileIOCB)(uintptr_t ctx, const void* block, size_t size, size_t* bytes_processed);
// transfer <size> bytes, starting at <ofs>, to/from the given file.
// (read or write access was chosen at file-open time).
@ -277,8 +322,10 @@ typedef ssize_t (*FileIOCB)(uintptr_t ctx, void* p, size_t size);
// (quasi-parallel, without the complexity of threads).
//
// return number of bytes transferred (see above), or a negative error code.
extern ssize_t file_io(File* f, off_t ofs, size_t size, void* buf, FileIOCB cb = 0, uintptr_t ctx = 0);
extern ssize_t file_io(File* f, off_t ofs, size_t size, FileIOBuf* pbuf, FileIOCB cb = 0, uintptr_t ctx = 0);
extern ssize_t file_read_from_cache(const char* atom_fn, off_t ofs, size_t size,
FileIOBuf* pbuf, FileIOCB cb, uintptr_t ctx);
//
// memory mapping

View File

@ -0,0 +1,628 @@
#include "precompiled.h"
#include <map>
#include "lib/allocators.h"
#include "lib/byte_order.h"
#include "lib/res/res.h"
#include "lib/adts.h"
#include "file.h"
#include "file_cache.h"
#include "file_internal.h"
static const size_t AIO_SECTOR_SIZE = 512;
// strategy:
// policy:
// - allocation: use all available mem first, then look at freelist
// - freelist: good fit, address-ordered, always split
// - free: immediately coalesce
// mechanism:
// - coalesce: boundary tags in freed memory
// - freelist: 2**n segregated doubly-linked, address-ordered
class CacheAllocator
{
static const size_t MAX_CACHE_SIZE = 64*MiB;
public:
void init()
{
// note: do not call from ctor; pool_create currently (2006-20-01)
// breaks if called at NLSO init time.
(void)pool_create(&pool, MAX_CACHE_SIZE, 0);
}
void shutdown()
{
(void)pool_destroy(&pool);
}
void* alloc(size_t size)
{
const size_t size_pa = round_up(size, AIO_SECTOR_SIZE);
// use all available space first
void* p = pool_alloc(&pool, size_pa);
if(p)
return p;
// try to reuse a freed entry
const uint size_class = size_class_of(size_pa);
p = alloc_from_class(size_class, size_pa);
if(p)
return p;
p = alloc_from_larger_class(size_class, size_pa);
if(p)
return p;
// failed - can no longer expand and nothing big enough was
// found in freelists.
// file cache will decide which elements are least valuable,
// free() those and call us again.
return 0;
}
#include "nommgr.h"
void free(u8* p, size_t size)
#include "mmgr.h"
{
if(!pool_contains(&pool, p))
{
debug_warn("not in arena");
return;
}
size_t size_pa = round_up(size, AIO_SECTOR_SIZE);
coalesce(p, size_pa);
freelist_add(p, size_pa);
}
private:
Pool pool;
uint size_class_of(size_t size_pa)
{
return log2((uint)size_pa);
}
//-------------------------------------------------------------------------
// boundary tags for coalescing
static const u32 MAGIC1 = FOURCC('C','M','E','M');
static const u32 MAGIC2 = FOURCC('\x00','\xFF','\x55','\xAA');
struct FreePage
{
FreePage* prev;
FreePage* next;
size_t size_pa;
u32 magic1;
u32 magic2;
};
// must be enough room to stash header+footer in the freed page.
cassert(AIO_SECTOR_SIZE >= 2*sizeof(FreePage));
FreePage* freed_page_at(u8* p, size_t ofs)
{
if(!ofs)
p -= sizeof(FreePage);
else
p += ofs;
FreePage* page = (FreePage*)p;
if(page->magic1 != MAGIC1 || page->magic2 != MAGIC2)
return 0;
debug_assert(page->size_pa % AIO_SECTOR_SIZE == 0);
return page;
}
void coalesce(u8*& p, size_t& size_pa)
{
FreePage* prev = freed_page_at(p, 0);
if(prev)
{
freelist_remove(prev);
p -= prev->size_pa;
size_pa += prev->size_pa;
}
FreePage* next = freed_page_at(p, size_pa);
if(next)
{
freelist_remove(next);
size_pa += next->size_pa;
}
}
//-------------------------------------------------------------------------
// freelist
uintptr_t bitmap;
FreePage* freelists[sizeof(uintptr_t)*CHAR_BIT];
void freelist_add(u8* p, size_t size_pa)
{
const uint size_class = size_class_of(size_pa);
// write header and footer into the freed mem
// (its prev and next link fields will be set below)
FreePage* header = (FreePage*)p;
header->prev = header->next = 0;
header->size_pa = size_pa;
header->magic1 = MAGIC1; header->magic2 = MAGIC2;
FreePage* footer = (FreePage*)(p+size_pa-sizeof(FreePage));
*footer = *header;
// insert the header into freelist
// .. list was empty: link to head
if(!freelists[size_class])
{
freelists[size_class] = header;
bitmap |= BIT(size_class);
}
// .. not empty: link to node (address order)
else
{
FreePage* prev = freelists[size_class];
// find node to insert after
while(prev->next && header <= prev->next)
prev = prev->next;
header->next = prev->next;
header->prev = prev;
}
}
void freelist_remove(FreePage* page)
{
const uint size_class = size_class_of(page->size_pa);
// in middle of list: unlink from prev node
if(page->prev)
page->prev->next = page->next;
// was at front of list: unlink from head
else
{
freelists[size_class] = page->next;
// freelist is now empty - update bitmap.
if(!page->next)
bitmap &= ~BIT(size_class);
}
// not at end of list: unlink from next node
if(page->next)
page->next->prev = page->prev;
}
void* alloc_from_class(uint size_class, size_t size_pa)
{
// return first suitable entry in (address-ordered) list
FreePage* cur = freelists[size_class];
while(cur)
{
if(cur->size_pa >= size_pa)
{
u8* p = (u8*)cur;
const size_t remnant_pa = cur->size_pa - size_pa;
freelist_remove(cur);
if(remnant_pa)
freelist_add(p+remnant_pa, remnant_pa);
return p;
}
cur = cur->next;
}
return 0;
}
void* alloc_from_larger_class(uint start_size_class, size_t size_pa)
{
uint classes_left = bitmap;
// .. strip off all smaller classes
classes_left &= (~0 << start_size_class);
while(classes_left)
{
#define LS1(x) (x & -(int)x) // value of LSB 1-bit
uint size_class = LS1(classes_left);
classes_left &= ~BIT(size_class); // remove from classes_left
void* p = alloc_from_class(size_class, size_pa);
if(p)
return p;
}
// apparently all classes above start_size_class are empty,
// or the above would have succeeded.
debug_assert(bitmap < BIT(start_size_class+1));
return 0;
}
}; // CacheAllocator
static CacheAllocator cache_allocator;
//-----------------------------------------------------------------------------
// list of FileIOBufs currently held by the application.
class ExtantBufMgr
{
struct ExtantBuf
{
FileIOBuf buf;
// this would also be available via TFile, but we want users
// to be able to allocate file buffers (and they don't know tf).
// therefore, we store this separately.
size_t size;
// which file was this buffer taken from?
// we search for given atom_fn as part of file_cache_retrieve
// (since we are responsible for already extant bufs).
// also useful for tracking down buf 'leaks' (i.e. someone
// forgetting to call file_buf_free).
const char* atom_fn;
ExtantBuf(FileIOBuf buf_, size_t size_, const char* atom_fn_)
: buf(buf_), size(size_), atom_fn(atom_fn_) {}
};
std::vector<ExtantBuf> extant_bufs;
public:
void add(FileIOBuf buf, size_t size, const char* atom_fn)
{
debug_assert(buf != 0);
// look for holes in array and reuse those
for(size_t i = 0; i < extant_bufs.size(); i++)
{
ExtantBuf& eb = extant_bufs[i];
if(!eb.buf)
{
eb.buf = buf;
eb.size = size;
eb.atom_fn = atom_fn;
return;
}
}
// add another entry
extant_bufs.push_back(ExtantBuf(buf, size, atom_fn));
}
bool includes(FileIOBuf buf)
{
debug_assert(buf != 0);
for(size_t i = 0; i < extant_bufs.size(); i++)
{
ExtantBuf& eb = extant_bufs[i];
if(matches(eb, buf))
return true;
}
return false;
}
void find_and_remove(FileIOBuf buf, size_t* size)
{
debug_assert(buf != 0);
for(size_t i = 0; i < extant_bufs.size(); i++)
{
ExtantBuf& eb = extant_bufs[i];
if(matches(eb, buf))
{
*size = eb.size;
eb.buf = 0;
eb.size = 0;
eb.atom_fn = 0;
return;
}
}
debug_warn("buf is not on extant list! double free?");
}
void display_all_remaining()
{
debug_printf("Leaked FileIOBufs:\n");
for(size_t i = 0; i < extant_bufs.size(); i++)
{
ExtantBuf& eb = extant_bufs[i];
if(eb.buf)
debug_printf(" %p (0x%08x) %s\n", eb.buf, eb.size, eb.atom_fn);
}
debug_printf("--------\n");
}
private:
bool matches(ExtantBuf& eb, FileIOBuf buf)
{
return (eb.buf <= buf && buf < (u8*)eb.buf+eb.size);
}
}; // ExtantBufMgr
static ExtantBufMgr extant_bufs;
//-----------------------------------------------------------------------------
static Cache<const char*, FileIOBuf> file_cache;
FileIOBuf file_buf_alloc(size_t size, const char* atom_fn)
{
FileIOBuf buf;
uint attempts = 0;
for(;;)
{
if(attempts++ > 50)
debug_warn("possible infinite loop: failed to make room in cache");
buf = (FileIOBuf)cache_allocator.alloc(size);
if(buf)
break;
size_t size;
FileIOBuf discarded_buf = file_cache.remove_least_valuable(&size);
if(discarded_buf)
cache_allocator.free((u8*)discarded_buf, size);
}
extant_bufs.add(buf, size, atom_fn);
FILE_STATS_NOTIFY_BUF_ALLOC();
return buf;
}
LibError file_buf_get(FileIOBuf* pbuf, size_t size,
const char* atom_fn, bool is_write, FileIOCB cb)
{
// decode *pbuf - exactly one of these is true
const bool temp = (pbuf == FILE_BUF_TEMP);
const bool alloc = !temp && (*pbuf == FILE_BUF_ALLOC);
const bool user = !temp && !alloc;
// reading into temp buffers - ok.
if(!is_write && temp && cb != 0)
return ERR_OK;
// reading and want buffer allocated.
if(!is_write && alloc)
{
*pbuf = file_buf_alloc(size, atom_fn);
if(!*pbuf)
return ERR_NO_MEM;
return ERR_OK;
}
// writing from given buffer - ok.
if(is_write && user)
return ERR_OK;
return ERR_INVALID_PARAM;
}
LibError file_buf_free(FileIOBuf buf)
{
FILE_STATS_NOTIFY_BUF_ALLOC();
size_t size;
extant_bufs.find_and_remove(buf, &size);
return ERR_OK;
}
LibError file_cache_add(FileIOBuf buf, size_t size, const char* atom_fn)
{
// decide (based on flags) if buf is to be cached; set cost
uint cost = 1;
file_cache.add(atom_fn, buf, size, cost);
return ERR_OK;
}
FileIOBuf file_cache_retrieve(const char* atom_fn, size_t* size)
{
// note: do not query extant_bufs - reusing that doesn't make sense
// (why would someone issue a second IO for the entire file while
// still referencing the previous instance?)
FileIOBuf buf = file_cache.retrieve(atom_fn, size);
if(!buf)
{
FILE_STATS_NOTIFY_CACHE(CR_MISS, 1); // IOTODO: hack - cannot get miss size since not in cache
return 0;
}
FILE_STATS_NOTIFY_CACHE(CR_HIT, *size);
return buf;
}
/*
a) FileIOBuf is opaque type with getter
FileIOBuf buf; <--------------------- how to initialize??
file_io(.., &buf);
data = file_buf_contents(&buf);
file_buf_free(&buf);
would obviate lookup struct but at expense of additional getter and
trouble with init - need to set FileIOBuf to wrap user's buffer, or
only allow us to return buffer address (which is ok)
b) FileIOBuf is pointer to the buf, and secondary map associates that with BufInfo
FileIOBuf buf;
file_io(.., &buf);
file_buf_free(&buf);
secondary map covers all currently open IO buffers. it is accessed upon
file_buf_free and there are only a few active at a time ( < 10)
*/
//-----------------------------------------------------------------------------
// block cache: intended to cache raw compressed data, since files aren't aligned
// in the archive; alignment code would force a read of the whole block,
// which would be a slowdown unless we keep them in memory.
//
// keep out of async code (although extra work for sync: must not issue/wait
// if was cached) to simplify things. disadvantage: problems if same block
// is issued twice, before the first call completes (via wait_io).
// that won't happen though unless we have threaded file_ios =>
// rare enough not to worry about performance.
//
// since sync code allocates the (temp) buffer, it's guaranteed
// to remain valid.
//
class BlockMgr
{
static const size_t MAX_BLOCKS = 32;
enum BlockStatus
{
BS_PENDING,
BS_COMPLETE,
BS_INVALID
};
struct Block
{
BlockId id;
void* mem;
BlockStatus status;
Block() {} // for RingBuf
Block(BlockId id_, void* mem_)
: id(id_), mem(mem_), status(BS_PENDING) {}
};
RingBuf<Block, MAX_BLOCKS> blocks;
typedef RingBuf<Block, MAX_BLOCKS>::iterator BlockIt;
// use Pool to allocate mem for all blocks because it guarantees
// page alignment (required for IO) and obviates manually aligning.
Pool pool;
public:
void init()
{
(void)pool_create(&pool, MAX_BLOCKS*FILE_BLOCK_SIZE, FILE_BLOCK_SIZE);
}
void shutdown()
{
(void)pool_destroy(&pool);
}
void* alloc(BlockId id)
{
if(blocks.size() == MAX_BLOCKS)
{
Block& b = blocks.front();
// if this block is still locked, big trouble..
// (someone forgot to free it and we can't reuse it)
debug_assert(b.status != BS_PENDING);
pool_free(&pool, b.mem);
blocks.pop_front();
}
void* mem = pool_alloc(&pool, FILE_BLOCK_SIZE); // can't fail
blocks.push_back(Block(id, mem));
debug_printf("alloc %p\n", mem);
return mem;
}
void mark_completed(BlockId id)
{
for(BlockIt it = blocks.begin(); it != blocks.end(); ++it)
{
if(it->id == id)
it->status = BS_COMPLETE;
}
}
void* find(BlockId id)
{
// linear search is ok, since we only keep a few blocks.
for(BlockIt it = blocks.begin(); it != blocks.end(); ++it)
{
if(it->status == BS_COMPLETE && it->id == id)
return it->mem;
}
return 0; // not found
}
void invalidate(const char* atom_fn)
{
for(BlockIt it = blocks.begin(); it != blocks.end(); ++it)
if((const char*)(it->id >> 32) == atom_fn)
it->status = BS_INVALID;
}
};
static BlockMgr block_mgr;
// create an id for use with the cache that uniquely identifies
// the block from the file <atom_fn> starting at <ofs> (aligned).
BlockId block_cache_make_id(const char* atom_fn, const off_t ofs)
{
cassert(sizeof(atom_fn) == 4);
// format: filename atom | block number
// 63 32 31 0
//
// <atom_fn> is guaranteed to be unique (see file_make_unique_fn_copy).
//
// block_num should always fit in 32 bits (assuming maximum file size
// = 2^32 * FILE_BLOCK_SIZE ~= 2^48 -- plenty). we don't bother
// checking this.
const size_t block_num = ofs / FILE_BLOCK_SIZE;
return u64_from_u32((u32)(uintptr_t)atom_fn, (u32)block_num);
}
void* block_cache_alloc(BlockId id)
{
return block_mgr.alloc(id);
}
void block_cache_mark_completed(BlockId id)
{
block_mgr.mark_completed(id);
}
void* block_cache_find(BlockId id)
{
return block_mgr.find(id);
}
//-----------------------------------------------------------------------------
// remove all blocks loaded from the file <fn>. used when reloading the file.
LibError file_cache_invalidate(const char* P_fn)
{
const char* atom_fn = file_make_unique_fn_copy(P_fn, 0);
block_mgr.invalidate(atom_fn);
return ERR_OK;
}
void file_cache_init()
{
block_mgr.init();
cache_allocator.init();
}
void file_cache_shutdown()
{
extant_bufs.display_all_remaining();
cache_allocator.shutdown();
block_mgr.shutdown();
}

View File

@ -0,0 +1,19 @@
extern LibError file_buf_get(FileIOBuf* pbuf, size_t size,
const char* atom_fn, bool is_write, FileIOCB cb);
extern FileIOBuf file_cache_retrieve(const char* atom_fn, size_t* size);
extern LibError file_cache_add(FileIOBuf buf, size_t size, const char* atom_fn);
typedef u64 BlockId;
// create an id for use with the cache that uniquely identifies
// the block from the file <atom_fn> starting at <ofs> (aligned).
extern BlockId block_cache_make_id(const char* atom_fn, const off_t ofs);
extern void* block_cache_alloc(BlockId id);
extern void block_cache_mark_completed(BlockId id);
extern void* block_cache_find(BlockId id);

View File

@ -0,0 +1,184 @@
#include "lib/timer.h"
#include "vfs_optimizer.h"
#define CHECK_FILE(f) CHECK_ERR(file_validate(f))
extern void file_io_shutdown();
extern void file_cache_init();
extern void file_cache_shutdown();
//-----------------------------------------------------------------------------
enum FileIOImplentation { FI_LOWIO, FI_AIO, FI_MAX_IDX };
enum FileOp { FO_READ, FO_WRITE };
enum CacheRet { CR_HIT, CR_MISS };
#define FILE_STATS_ENABLED 1
#if FILE_STATS_ENABLED
class FileStats
{
public:
void notify_unique_file()
{
unique_files++;
}
void notify_open(const char* atom_fn, size_t file_size)
{
open_files_cur++;
open_files_max = MAX(open_files_max, open_files_cur);
typedef std::pair<std::set<const char*>::iterator, bool> PairIB;
PairIB ret = accessed_files.insert(atom_fn);
// wasn't inserted yet
if(ret.second)
unique_file_size_total += file_size;
}
void notify_close()
{
debug_assert(open_files_cur > 0);
open_files_cur--;
}
void notify_buf_alloc()
{
extant_bufs_cur++;
extant_bufs_max = MAX(extant_bufs_max, extant_bufs_cur);
extant_bufs_total++;
}
void notify_buf_free()
{
debug_assert(extant_bufs_cur > 0);
extant_bufs_cur--;
}
void notify_io(FileIOImplentation fi, FileOp fo, size_t user_size, size_t actual_size, double start_time)
{
io_count++;
debug_assert(io_actual_size_total >= io_user_size_total);
io_user_size_total += user_size;
io_actual_size_total += actual_size;
const double end_time = get_time();
const double throughput = actual_size / (end_time - start_time);
debug_assert(fi < FI_MAX_IDX);
double& avg = (fo == FO_READ)? read_throughput_avg[fi] : write_throughput_avg[fi];
const float gain = (avg == 0.0)? 1.0f : 0.5f;
avg = gain*throughput + (1.0f-gain)*avg;
}
void notify_cache(CacheRet cr, size_t size)
{
debug_assert(cr == CR_HIT || cr == CR_MISS);
cache_count[cr]++;
cache_size_total[cr] += size;
}
void dump()
{
// note: writes count toward io_actual_size_total but not cache.
debug_assert(io_actual_size_total >= cache_size_total[CR_MISS]);
// not necessarily true, since not all IO clients use the cache.
// debug_assert(io_count >= cache_count[CR_MISS]);
const size_t unique_files_accessed = accessed_files.size();
// guesstimate miss rate due to cache capacity
// (indicates effectiveness of caching algorithm)
const u64 working_set_est = unique_file_size_total * ((double)unique_files_accessed/
unique_files);
const uint cache_capacity_miss_rate_est = (cache_size_total[CR_MISS]-working_set_est)/
(double)(cache_size_total[CR_HIT]+cache_size_total[CR_MISS]);
const double KB = 1000.0; const double MB = 1000000.0;
debug_printf(
"File statistics\n"
"--------------------------------------------------------------------------------\n"
"Total files seen: %u; total files accessed: %u.\n"
" unused files: %d%%.\n"
"Max. open files: %u; leaked files: %u.\n"
"Total buffers (re)used: %u; max. extant buffers: %u; leaked buffers: %u.\n"
"Total # user IOs: %u; cumulative size: %.3g MB; average size: %.2g KB.\n"
" unused data: %d%%.\n"
"IO thoughput [MB/s; 0=never happened]:\n"
" lowio: R=%.3g, W=%.3g\n"
" aio: R=%.3g, W=%.3g\n"
"File cache totals: hits: %.3g MB; misses: %.3g MB.\n"
" ratio: %d%%; capacity miss rate: ~%d%%.\n"
"--------------------------------------------------------------------------------\n"
,
unique_files, unique_files_accessed,
100-(int)(((float)unique_files_accessed)/unique_files),
open_files_max, open_files_cur,
extant_bufs_total, extant_bufs_max, extant_bufs_cur,
io_count, io_user_size_total/MB, ((double)io_user_size_total)/io_count/KB,
100-(int)(((float)io_user_size_total)/unique_file_size_total),
read_throughput_avg[FI_LOWIO]/MB, write_throughput_avg[FI_LOWIO]/MB,
read_throughput_avg[FI_AIO ]/MB, write_throughput_avg[FI_AIO ]/MB,
cache_size_total[CR_HIT]/MB, cache_size_total[CR_MISS]/MB,
(int)(((float)cache_size_total[CR_HIT])/(cache_size_total[CR_HIT]+cache_size_total[CR_MISS])), cache_capacity_miss_rate_est
);
}
FileStats()
: accessed_files(), read_throughput_avg(), write_throughput_avg(),
cache_count(), cache_size_total()
{
unique_files = 0;
unique_file_size_total = 0;
open_files_cur = open_files_max = 0;
extant_bufs_cur = extant_bufs_max = 0;
io_count = 0;
io_user_size_total = io_actual_size_total = 0;
}
private:
uint unique_files;
std::set<const char*> accessed_files;
u64 unique_file_size_total;
uint open_files_cur, open_files_max; // total = accessed_files.size()
uint extant_bufs_cur, extant_bufs_max, extant_bufs_total;
uint io_count;
u64 io_user_size_total;
u64 io_actual_size_total;
double read_throughput_avg[FI_MAX_IDX];
double write_throughput_avg[FI_MAX_IDX];
// file cache only (hit and miss; indexed via CacheRet)
uint cache_count[2];
u64 cache_size_total[2];
};
extern FileStats stats;
#define FILE_STATS_NOTIFY_UNIQUE_FILE() stats.notify_unique_file()
#define FILE_STATS_NOTIFY_OPEN(atom_fn, file_size) stats.notify_open(atom_fn, file_size)
#define FILE_STATS_NOTIFY_CLOSE() stats.notify_close()
#define FILE_STATS_NOTIFY_BUF_ALLOC() stats.notify_buf_alloc()
#define FILE_STATS_NOTIFY_BUF_FREE() stats.notify_buf_free()
#define FILE_STATS_NOTIFY_IO(fi, fo, user_size, actual_size, start_time) stats.notify_io(fi, fo, user_size, actual_size, start_time)
#define FILE_STATS_NOTIFY_CACHE(cr, size) stats.notify_cache(cr, size)
#define FILE_STATS_DUMP() stats.dump()
#else // !FILE_STATS_ENABLED
#define FILE_STATS_NOTIFY_UNIQUE_FILE() 0
#define FILE_STATS_NOTIFY_OPEN(atom_fn, file_size) 0
#define FILE_STATS_NOTIFY_CLOSE() 0
#define FILE_STATS_NOTIFY_BUF_ALLOC() 0
#define FILE_STATS_NOTIFY_BUF_FREE() 0
#define FILE_STATS_NOTIFY_IO(fi, fo, user_size, actual_size, start_time) 0
#define FILE_STATS_NOTIFY_CACHE(cr, size) 0
#define FILE_STATS_DUMP() 0
#endif

View File

@ -0,0 +1,570 @@
#include "precompiled.h"
#include "lib.h"
#include "lib/posix.h"
#include "lib/allocators.h"
#include "lib/adts.h"
#include "lib/res/res.h"
#include "file.h"
#include "file_cache.h"
#include "file_internal.h"
//-----------------------------------------------------------------------------
// async I/O
//-----------------------------------------------------------------------------
// rationale:
// asynchronous IO routines don't cache; they're just a thin AIO wrapper.
// it's taken care of by file_io, which splits transfers into blocks
// and keeps temp buffers in memory (not user-allocated, because they
// might pull the rug out from under us at any time).
//
// caching here would be more complicated: would have to handle "forwarding",
// i.e. recognizing that the desired block has been issued, but isn't yet
// complete. file_io also knows more about whether a block should be cached.
//
// disadvantages:
// - streamed data will always be read from disk. no problem, because
// such data (e.g. music, long speech) is unlikely to be used again soon.
// - prefetching (issuing the next few blocks from an archive during idle
// time, so that future out-of-order reads don't need to seek) isn't
// possible in the background (unless via thread, but that's discouraged).
// the utility is questionable, though: how to prefetch so as not to delay
// real IOs? can't determine "idle time" without completion notification,
// which is hard.
// we could get the same effect by bridging small gaps in file_io,
// and rearranging files in the archive in order of access.
static Pool aiocb_pool;
static inline void aiocb_pool_init()
{
(void)pool_create(&aiocb_pool, 32*sizeof(aiocb), sizeof(aiocb));
}
static inline void aiocb_pool_shutdown()
{
(void)pool_destroy(&aiocb_pool);
}
static inline aiocb* aiocb_pool_alloc()
{
ONCE(aiocb_pool_init());
return (aiocb*)pool_alloc(&aiocb_pool, 0);
}
static inline void aiocb_pool_free(void* cb)
{
pool_free(&aiocb_pool, cb);
}
// starts transferring to/from the given buffer.
// no attempt is made at aligning or padding the transfer.
LibError file_io_issue(File* f, off_t ofs, size_t size, void* p, FileIo* io)
{
// zero output param in case we fail below.
memset(io, 0, sizeof(FileIo));
debug_printf("FILE| issue ofs=%d size=%d\n", ofs, size);
//
// check params
//
CHECK_FILE(f);
if(!size || !p || !io)
WARN_RETURN(ERR_INVALID_PARAM);
const bool is_write = (f->fc.flags & FILE_WRITE) != 0;
// cut off at EOF.
if(!is_write)
{
// avoid min() due to type conversion warnings.
const off_t bytes_left = f->fc.size - ofs;
if(bytes_left < 0)
WARN_RETURN(ERR_EOF);
if((off_t)size > bytes_left)
size = (size_t)bytes_left;
// guaranteed to fit, since size was > bytes_left
}
// (we can't store the whole aiocb directly - glibc's version is
// 144 bytes large)
aiocb* cb = aiocb_pool_alloc();
io->cb = cb;
if(!cb)
return ERR_NO_MEM;
memset(cb, 0, sizeof(*cb));
// send off async read/write request
cb->aio_lio_opcode = is_write? LIO_WRITE : LIO_READ;
cb->aio_buf = (volatile void*)p;
cb->aio_fildes = f->fd;
cb->aio_offset = ofs;
cb->aio_nbytes = size;
debug_printf("FILE| issue2 io=%p nbytes=%u\n", io, cb->aio_nbytes);
int err = lio_listio(LIO_NOWAIT, &cb, 1, (struct sigevent*)0);
if(err < 0)
{
debug_printf("lio_listio: %d, %d[%s]\n", err, errno, strerror(errno));
(void)file_io_discard(io);
WARN_RETURN(LibError_from_errno());
}
return ERR_OK;
}
// indicates if the IO referenced by <io> has completed.
// return value: 0 if pending, 1 if complete, < 0 on error.
int file_io_has_completed(FileIo* io)
{
aiocb* cb = (aiocb*)io->cb;
int ret = aio_error(cb);
if(ret == EINPROGRESS)
return 0;
if(ret == 0)
return 1;
WARN_RETURN(ERR_FAIL);
}
LibError file_io_wait(FileIo* io, const void*& p, size_t& size)
{
debug_printf("FILE| wait io=%p\n", io);
// zero output params in case something (e.g. H_DEREF) fails.
p = 0;
size = 0;
aiocb* cb = (aiocb*)io->cb;
// wait for transfer to complete.
const aiocb** cbs = (const aiocb**)&cb; // pass in an "array"
while(aio_error(cb) == EINPROGRESS)
aio_suspend(cbs, 1, (timespec*)0); // wait indefinitely
// query number of bytes transferred (-1 if the transfer failed)
const ssize_t bytes_transferred = aio_return(cb);
debug_printf("FILE| bytes_transferred=%d aio_nbytes=%u\n", bytes_transferred, cb->aio_nbytes);
// (size was clipped to EOF in file_io => this is an actual IO error)
if(bytes_transferred < (ssize_t)cb->aio_nbytes)
return ERR_IO;
p = (void*)cb->aio_buf; // cast from volatile void*
size = bytes_transferred;
return ERR_OK;
}
LibError file_io_discard(FileIo* io)
{
memset(io->cb, 0, sizeof(aiocb)); // prevent further use.
aiocb_pool_free(io->cb);
io->cb = 0;
return ERR_OK;
}
LibError file_io_validate(const FileIo* io)
{
const aiocb* cb = (const aiocb*)io->cb;
// >= 0x100 is not necessarily bogus, but suspicious.
// this also catches negative values.
if((uint)cb->aio_fildes >= 0x100)
return ERR_1;
if(debug_is_pointer_bogus((void*)cb->aio_buf))
return ERR_2;
if(cb->aio_lio_opcode != LIO_WRITE && cb->aio_lio_opcode != LIO_READ && cb->aio_lio_opcode != LIO_NOP)
return ERR_3;
// all other aiocb fields have no invariants we could check.
return ERR_OK;
}
//-----------------------------------------------------------------------------
// sync I/O
//-----------------------------------------------------------------------------
// the underlying aio implementation likes buffer and offset to be
// sector-aligned; if not, the transfer goes through an align buffer,
// and requires an extra memcpy2.
//
// if the user specifies an unaligned buffer, there's not much we can
// do - we can't assume the buffer contains padding. therefore,
// callers should let us allocate the buffer if possible.
//
// if ofs misalign = buffer, only the first and last blocks will need
// to be copied by aio, since we read up to the next block boundary.
// otherwise, everything will have to be copied; at least we split
// the read into blocks, so aio's buffer won't have to cover the
// whole file.
class IOManager
{
File* f;
bool is_write;
bool no_aio;
FileIOCB cb;
uintptr_t cb_ctx;
off_t start_ofs;
FileIOBuf* pbuf;
size_t user_size;
size_t ofs_misalign;
size_t size;
// (useful, raw data: possibly compressed, but doesn't count padding)
size_t total_issued;
size_t total_transferred;
// if callback, sum of what it reports; otherwise, = total_transferred
// this is what we'll return.
size_t total_processed;
struct IOSlot
{
FileIo io;
const void* cached_block;
u64 block_id;
// needed so that we can add the block to the cache when
// its IO is complete. if we add it when issuing, we'd no longer be
// thread-safe: someone else might find it in the cache before its
// transfer has completed. don't want to add an "is_complete" flag,
// because that'd be hard to update (on every wait_io).
void* temp_buf;
IOSlot()
{
reset();
}
void reset()
{
memset(&io, 0, sizeof(io));
temp_buf = 0;
block_id = 0;
cached_block = 0;
}
};
static const uint MAX_PENDING_IOS = 4;
RingBuf<IOSlot, MAX_PENDING_IOS> queue;
// stop issuing and processing as soon as this changes
LibError err;
// bytes_processed is 0 if return value != { ERR_OK, INFO_CB_CONTINUE }
// note: don't abort if = 0: zip callback may not actually
// output anything if passed very little data.
static LibError call_back(const void* block, size_t size,
FileIOCB cb, uintptr_t ctx, size_t& bytes_processed)
{
if(cb)
{
LibError ret = cb(ctx, block, size, &bytes_processed);
// failed - reset byte count in case callback didn't
if(ret != ERR_OK && ret != INFO_CB_CONTINUE)
bytes_processed = 0;
return ret;
}
// no callback to process data: raw = actual
else
{
bytes_processed = size;
return INFO_CB_CONTINUE;
}
}
ssize_t lowio()
{
const int fd = f->fd;
lseek(fd, start_ofs, SEEK_SET);
// emulate temp buffers - we take care of allocating and freeing.
void* dst;
void* dst_mem = 0;
if(pbuf == FILE_BUF_TEMP)
{
dst_mem = malloc(size);
if(!dst_mem)
return ERR_NO_MEM;
dst = dst_mem;
}
else
dst = (void*)*pbuf;
ssize_t total_transferred;
if(is_write)
total_transferred = write(fd, dst, size);
else
total_transferred = read (fd, dst, size);
if(total_transferred < 0)
{
free(dst_mem);
WARN_RETURN(LibError_from_errno());
}
size_t total_processed;
LibError ret = call_back(dst, total_transferred, cb, cb_ctx, total_processed);
free(dst_mem);
CHECK_ERR(ret);
return (ssize_t)total_processed;
}
// align and pad the IO to FILE_BLOCK_SIZE
// (reduces work for AIO implementation).
LibError prepare()
{
ofs_misalign = 0;
size = user_size;
if(!is_write && !no_aio)
{
// note: we go to the trouble of aligning the first block (instead of
// just reading up to the next block and letting aio realign it),
// so that it can be taken from the cache.
// this is not possible if we don't allocate the buffer because
// extra space must be added for the padding.
ofs_misalign = start_ofs % FILE_BLOCK_SIZE;
start_ofs -= (off_t)ofs_misalign;
size = round_up(ofs_misalign + user_size, FILE_BLOCK_SIZE);
}
RETURN_ERR(file_buf_get(pbuf, size, f->fc.atom_fn, is_write, cb));
return ERR_OK;
}
void issue(IOSlot& slot)
{
const off_t ofs = start_ofs+(off_t)total_issued;
size_t issue_size;
// write: must not issue beyond end of data.
if(is_write)
issue_size = MIN(FILE_BLOCK_SIZE, size - total_issued);
// read: always grab whole blocks so we can put them in the cache.
// any excess data (can only be within first or last block) is
// discarded in wait().
else
issue_size = FILE_BLOCK_SIZE;
// check if in cache
slot.block_id = block_cache_make_id(f->fc.atom_fn, ofs);
slot.cached_block = block_cache_find(slot.block_id);
if(slot.cached_block)
goto skip_issue;
// if using buffer, set position in it; otherwise, use temp buffer
void* buf;
if(pbuf == FILE_BUF_TEMP)
buf = slot.temp_buf = block_cache_alloc(slot.block_id);
else
buf = (char*)*pbuf + total_issued;
LibError ret = file_io_issue(f, ofs, issue_size, buf, &slot.io);
// transfer failed - loop will now terminate after
// waiting for all pending transfers to complete.
if(ret != ERR_OK)
{
err = ret;
mem_free(slot.temp_buf);
}
skip_issue:
total_issued += issue_size;
}
void wait(IOSlot& slot, void*& block, size_t& block_size)
{
if(slot.cached_block)
{
block = (u8*)slot.cached_block;
block_size = FILE_BLOCK_SIZE;
}
// wasn't in cache; it was issued, so wait for it
else
{
LibError ret = file_io_wait(&slot.io, block, block_size);
if(ret < 0)
err = ret;
}
// first time; skip past padding
if(total_transferred == 0)
{
block = (u8*)block + ofs_misalign;
block_size -= ofs_misalign;
}
// last time: don't include trailing padding
if(total_transferred + block_size > user_size)
block_size = user_size - total_transferred;
// we have useable data from a previous temp buffer,
// but it needs to be copied into the user's buffer
if(slot.cached_block && pbuf != FILE_BUF_TEMP)
memcpy2((char*)*pbuf+ofs_misalign+total_transferred, block, block_size);
total_transferred += block_size;
}
void process(IOSlot& slot, void* block, size_t block_size, FileIOCB cb, uintptr_t ctx)
{
if(err == INFO_CB_CONTINUE)
{
size_t bytes_processed;
err = call_back(block, block_size, cb, ctx, bytes_processed);
if(err == INFO_CB_CONTINUE || err == ERR_OK)
total_processed += bytes_processed;
// else: processing failed.
// loop will now terminate after waiting for all
// pending transfers to complete.
}
if(!slot.cached_block)
file_io_discard(&slot.io);
if(!slot.cached_block && pbuf == FILE_BUF_TEMP)
block_cache_mark_completed(slot.block_id);
}
ssize_t aio()
{
again:
{
// data remaining to transfer, and no error:
// start transferring next block.
if(total_issued < size && err == INFO_CB_CONTINUE && queue.size() < MAX_PENDING_IOS)
{
queue.push_back(IOSlot());
IOSlot& slot = queue.back();
issue(slot);
goto again;
}
// IO pending: wait for it to complete, and process it.
if(!queue.empty())
{
IOSlot& slot = queue.front();
void* block; size_t block_size;
wait(slot, block, block_size);
process(slot, block, block_size, cb, cb_ctx);
queue.pop_front();
goto again;
}
}
// (all issued OR error) AND no pending transfers - done.
debug_assert(total_issued >= total_transferred && total_transferred >= user_size);
return (ssize_t)total_processed;
}
public:
IOManager(File* f_, off_t ofs_, size_t size_, FileIOBuf* pbuf_,
FileIOCB cb_, uintptr_t cb_ctx_)
{
f = f_;
is_write = (f->fc.flags & FILE_WRITE ) != 0;
no_aio = (f->fc.flags & FILE_NO_AIO) != 0;
cb = cb_;
cb_ctx = cb_ctx_;
start_ofs = ofs_;
user_size = size_;
pbuf = pbuf_;
total_issued = 0;
total_transferred = 0;
total_processed = 0;
err = INFO_CB_CONTINUE;
}
// now we read the file in 64 KiB chunks, N-buffered.
// if reading from Zip, inflate while reading the next block.
ssize_t run()
{
prepare();
const double start_time = get_time();
FileIOImplentation fi;
ssize_t ret;
if(no_aio)
{
fi = FI_LOWIO;
ret = lowio();
}
else
{
fi = FI_AIO;
ret = aio();
}
FILE_STATS_NOTIFY_IO(fi, is_write? FO_WRITE : FO_READ, user_size, total_issued, start_time);
debug_printf("FILE| err=%d, total_processed=%u\n", err, total_processed);
// we allocated the memory: skip any leading padding
if(pbuf != FILE_BUF_TEMP && !is_write)
*pbuf = (u8*)*pbuf + ofs_misalign;
if(err != INFO_CB_CONTINUE && err != ERR_OK)
return (ssize_t)err;
return ret;
}
}; // IOManager
// transfer <size> bytes, starting at <ofs>, to/from the given file.
// (read or write access was chosen at file-open time).
//
// if non-NULL, <cb> is called for each block transferred, passing <ctx>.
// it returns how much data was actually transferred, or a negative error
// code (in which case we abort the transfer and return that value).
// the callback mechanism is useful for user progress notification or
// processing data while waiting for the next I/O to complete
// (quasi-parallel, without the complexity of threads).
//
// return number of bytes transferred (see above), or a negative error code.
ssize_t file_io(File* f, off_t ofs, size_t size, FileIOBuf* pbuf,
FileIOCB cb, uintptr_t ctx) // optional
{
debug_printf("FILE| io: size=%u ofs=%u fn=%s\n", size, ofs, f->fc.atom_fn);
CHECK_FILE(f);
IOManager mgr(f, ofs, size, pbuf, cb, ctx);
return mgr.run();
}
void file_io_shutdown()
{
aiocb_pool_shutdown();
}

View File

@ -34,14 +34,17 @@
#include "lib.h"
#include "adts.h"
#include "timer.h"
#include "../res.h"
#include "zip.h"
#include "file.h"
#include "file_cache.h"
#include "file_internal.h"
#include "sysdep/dir_watch.h"
#include "vfs_path.h"
#include "vfs_tree.h"
#include "vfs_mount.h"
#include "timer.h"
#include "vfs_optimizer.h"
// not safe to call before main!
@ -250,91 +253,26 @@ LibError vfs_dir_next_ent(const Handle hd, DirEnt* ent, const char* filter)
///////////////////////////////////////////////////////////////////////////////
//
// logging
//
static int file_listing_enabled;
// tristate; -1 = already shut down
static FILE* file_list;
static void file_listing_shutdown()
{
if(file_listing_enabled == 1)
{
fclose(file_list);
file_listing_enabled = -1;
}
}
static void file_listing_add(const char* v_fn)
{
// we've already shut down - complain.
if(file_listing_enabled == -1)
{
debug_warn("called after file_listing_shutdown atexit");
return;
}
// listing disabled.
if(file_listing_enabled == 0)
return;
if(!file_list)
{
char N_path[PATH_MAX];
(void)file_make_full_native_path("../logs/filelist.txt", N_path);
file_list = fopen(N_path, "w");
if(!file_list)
return;
}
fputs(v_fn, file_list);
fputc('\n', file_list);
}
void vfs_enable_file_listing(bool want_enabled)
{
// already shut down - don't allow enabling
if(file_listing_enabled == -1 && want_enabled)
{
debug_warn("enabling after shutdown");
return;
}
file_listing_enabled = (int)want_enabled;
}
///////////////////////////////////////////////////////////////////////////////
// return actual path to the specified file:
// "<real_directory>/fn" or "<archive_name>/fn".
LibError vfs_realpath(const char* v_path, char* realpath)
{
TFile* tf;
char V_exact_path[VFS_MAX_PATH];
CHECK_ERR(tree_lookup(v_path, &tf, 0, V_exact_path));
CHECK_ERR(tree_lookup(v_path, &tf));
const Mount* m = tree_get_mount(tf);
return x_realpath(m, V_exact_path, realpath);
const Mount* m = tfile_get_mount(tf);
const char* V_path = tfile_get_atom_fn(tf);
return x_realpath(m, V_path, realpath);
}
// does the specified file exist? return false on error.
// useful because a "file not found" warning is not raised, unlike vfs_stat.
bool vfs_exists(const char* v_fn)
bool vfs_exists(const char* V_fn)
{
TFile* tf;
return (tree_lookup(v_fn, &tf) == 0);
return (tree_lookup(V_fn, &tf) == 0);
}
@ -356,19 +294,16 @@ LibError vfs_stat(const char* v_path, struct stat* s)
struct VFile
{
// cached contents of file from vfs_load
// (can't just use pointer - may be freed behind our back)
Handle hm;
XFile xf;
// current file pointer. this is necessary because file.cpp's interface
// requires passing an offset for every VIo; see file_io_issue.
off_t ofs;
XFile xf;
uint is_valid : 1;
// be aware when adding fields that this struct is quite large,
// and may require increasing the control block size limit.
// (especially in CONFIG_PARANOIA builds, which add a member!)
};
H_TYPE_DEFINE(VFile);
@ -385,7 +320,8 @@ static void VFile_dtor(VFile* vf)
// x_close and mem_free_h safely handle 0-initialized data.
WARN_ERR(x_close(&vf->xf));
(void)mem_free_h(vf->hm);
if(vf->is_valid)
FILE_STATS_NOTIFY_CLOSE();
}
static LibError VFile_reload(VFile* vf, const char* V_path, Handle)
@ -397,12 +333,11 @@ static LibError VFile_reload(VFile* vf, const char* V_path, Handle)
if(x_is_open(&vf->xf))
return ERR_OK;
file_listing_add(V_path);
trace_add(V_path);
TFile* tf;
char V_exact_path[VFS_MAX_PATH];
uint lf = (flags & FILE_WRITE)? LF_CREATE_MISSING : 0;
LibError err = tree_lookup(V_path, &tf, lf, V_exact_path);
LibError err = tree_lookup(V_path, &tf, lf);
if(err < 0)
{
// don't CHECK_ERR - this happens often and the dialog is annoying
@ -410,8 +345,14 @@ static LibError VFile_reload(VFile* vf, const char* V_path, Handle)
return err;
}
const Mount* m = tree_get_mount(tf);
return x_open(m, V_exact_path, flags, tf, &vf->xf);
const Mount* m = tfile_get_mount(tf);
RETURN_ERR(x_open(m, V_path, flags, tf, &vf->xf));
FileCommon& fc = vf->xf.u.fc;
FILE_STATS_NOTIFY_OPEN(fc.atom_fn, fc.size);
vf->is_valid = 1;
return ERR_OK;
}
static LibError VFile_validate(const VFile* vf)
@ -441,7 +382,7 @@ ssize_t vfs_size(Handle hf)
// file_flags: default 0
//
// on failure, a debug_warn is generated and a negative error code returned.
Handle vfs_open(const char* v_fn, uint file_flags)
Handle vfs_open(const char* V_fn, uint file_flags)
{
// keeping files open doesn't make sense in most cases (because the
// file is used to load resources, which are cached at a higher level).
@ -451,7 +392,7 @@ Handle vfs_open(const char* v_fn, uint file_flags)
// res_flags is for h_alloc and file_flags goes to VFile_init.
// h_alloc already complains on error.
return h_alloc(H_VFile, v_fn, res_flags, file_flags);
return h_alloc(H_VFile, V_fn, res_flags, file_flags);
}
@ -483,143 +424,73 @@ LibError vfs_close(Handle& hf)
// this is the preferred read method.
//
// return number of bytes transferred (see above), or a negative error code.
ssize_t vfs_io(const Handle hf, const size_t size, void** p, FileIOCB cb, uintptr_t ctx)
ssize_t vfs_io(const Handle hf, const size_t size, FileIOBuf* pbuf,
FileIOCB cb, uintptr_t cb_ctx)
{
debug_printf("VFS| io: size=%d\n", size);
H_DEREF(hf, VFile, vf);
XFile& xf = vf->xf;
off_t ofs = vf->ofs;
vf->ofs += (off_t)size;
void* buf = 0; // assume temp buffer (p == 0)
if(p)
{
// user-specified buffer
if(*p)
buf = *p;
// we allocate
else
{
buf = mem_alloc(round_up(size, 4096), FILE_BLOCK_SIZE);
if(!buf)
return ERR_NO_MEM;
*p = buf;
}
}
const bool is_write = (xf.u.fc.flags & FILE_WRITE) != 0;
RETURN_ERR(file_buf_get(pbuf, size, xf.u.fc.atom_fn, is_write, cb));
return x_io(&vf->xf, ofs, size, buf, cb, ctx);
}
#include "timer.h"
static double dt;
static double totaldata;
void dump()
{
debug_printf("TOTAL TIME IN VFS_IO: %f\nthroughput: %f MiB/s\n\n", dt, totaldata/dt/1e6);
}
static ssize_t vfs_timed_io(const Handle hf, const size_t size, void** p, FileIOCB cb = 0, uintptr_t ctx = 0)
{
ONCE(atexit(dump));
double t1=get_time();
totaldata += size;
ssize_t nread = vfs_io(hf, size, p, cb, ctx);
double t2=get_time();
if(t2-t1 < 1.0)
dt += t2-t1;
return nread;
return x_io(&vf->xf, ofs, size, pbuf, cb, cb_ctx);
}
// load the entire file <fn> into memory.
// returns a memory handle to the file's contents or a negative error code.
// p and size are filled with address/size of buffer (0 on failure).
// buf and size are filled with address/size of buffer (0 on failure).
// flags influences IO mode and is typically 0.
// in addition to the regular file cache, the entire buffer is
// kept in memory if flags & FILE_CACHE.
// when the file contents are no longer needed, you can mem_free_h the
// Handle, or mem_free(p).
//
// rationale: we need the Handle return value for Tex.hm - the data pointer
// must be protected against being accidentally free-d in that case.
Handle vfs_load(const char* v_fn, void*& p, size_t& size, uint flags /* default 0 */)
Handle vfs_load(const char* V_fn, FileIOBuf& buf, size_t& size, uint flags /* default 0 */)
{
debug_printf("VFS| load: v_fn=%s\n", v_fn);
debug_printf("VFS| load: V_fn=%s\n", V_fn);
p = 0; size = 0; // zeroed in case vfs_open or H_DEREF fails
const char* atom_fn = file_make_unique_fn_copy(V_fn, 0);
buf = file_cache_retrieve(atom_fn, &size);
if(buf)
return ERR_OK;
Handle hf = vfs_open(v_fn, flags);
buf = 0; size = 0; // initialize in case something below fails
Handle hf = vfs_open(atom_fn, flags);
RETURN_ERR(hf);
// necessary because if we skip this and have H_DEREF report the
// error, we get "invalid handle" instead of vfs_open's error code.
// don't CHECK_ERR because vfs_open already did.
H_DEREF(hf, VFile, vf);
Handle hm = 0; // return value - handle to memory or error code
size = x_size(&vf->xf);
// already read into mem - return existing mem handle
// TODO: what if mapped?
if(vf->hm > 0)
buf = FILE_BUF_ALLOC;
ssize_t nread = vfs_io(hf, size, &buf);
// IO failed
if(nread < 0)
hm = nread; // error code
else
{
p = mem_get_ptr(vf->hm, &size); // xxx remove the entire vf->hm - unused
if(p)
{
debug_assert(x_size(&vf->xf) == (off_t)size && "vfs_load: mismatch between File and Mem size");
hm = vf->hm;
goto ret;
}
else
debug_warn("invalid MEM attached to vfile (0 pointer)");
// happens if someone frees the pointer. not an error!
}
/*
// allocate memory. does expose implementation details of File
// (padding), but it greatly simplifies returning the Handle
// (if we allow File to alloc, have to make sure the Handle references
// the actual data address, not that of the padding).
{
const size_t BLOCK_SIZE = 64*KiB;
p = mem_alloc(size, BLOCK_SIZE, 0, &hm);
if(!p)
{
hm = ERR_NO_MEM;
goto ret;
}
}
*/
{
ssize_t nread = vfs_timed_io(hf, size, &p);
// failed
if(nread < 0)
{
mem_free(p);
hm = nread; // error code
}
else
{
hm = mem_wrap(p, size, 0, 0, 0, 0, 0, (void*)vfs_load);
if(flags & FILE_CACHE)
vf->hm = hm;
}
debug_assert(nread == (ssize_t)size);
(void)file_cache_add(buf, size, atom_fn);
hm = mem_wrap((void*)buf, size, 0, 0, 0, 0, 0, (void*)vfs_load);
}
ret:
WARN_ERR(vfs_close(hf));
// if FILE_CACHE, it's kept open
(void)vfs_close(hf);
// if we fail, make sure these are set to 0
// (they may have been assigned values above)
// IO or handle alloc failed
if(hm <= 0)
p = 0, size = 0;
{
file_buf_free(buf);
buf = 0, size = 0; // make sure they are zeroed
}
CHECK_ERR(hm);
return hm;
@ -629,15 +500,16 @@ ret:
// caveat: pads file to next max(4kb, sector_size) boundary
// (due to limitation of Win32 FILE_FLAG_NO_BUFFERING I/O).
// if that's a problem, specify FILE_NO_AIO when opening.
ssize_t vfs_store(const char* v_fn, void* p, const size_t size, uint flags /* default 0 */)
ssize_t vfs_store(const char* V_fn, const void* p, const size_t size, uint flags /* default 0 */)
{
Handle hf = vfs_open(v_fn, flags|FILE_WRITE);
Handle hf = vfs_open(V_fn, flags|FILE_WRITE);
RETURN_ERR(hf);
// necessary because if we skip this and have H_DEREF report the
// error, we get "invalid handle" instead of vfs_open's error code.
// don't CHECK_ERR because vfs_open already did.
H_DEREF(hf, VFile, vf);
const ssize_t ret = vfs_io(hf, size, &p);
FileIOBuf buf = (FileIOBuf)p;
const ssize_t ret = vfs_io(hf, size, &buf);
WARN_ERR(vfs_close(hf));
return ret;
}
@ -794,7 +666,7 @@ static LibError reload_without_rebuild(const char* fn)
{
// invalidate this file's cached blocks to make sure its contents are
// loaded anew.
RETURN_ERR(file_invalidate_cache(fn));
RETURN_ERR(file_cache_invalidate(fn));
RETURN_ERR(h_reload(fn));
@ -909,6 +781,7 @@ inline void vfs_display()
// splitting this into a separate function.
static void vfs_init_once(void)
{
tree_init();
mount_init();
}
@ -927,7 +800,7 @@ void vfs_init()
void vfs_shutdown()
{
file_listing_shutdown();
trace_shutdown();
mount_shutdown();
tree_shutdown();
}

View File

@ -197,6 +197,15 @@ Decompression is free because it is done in parallel with IOs.
#include "lib/posix.h" // struct stat
#include "file.h" // file open flags
// upper bound on number of files; used as size of TNode pool and
// enables an optimization in the cache if it fits in 16 bits
// (each block stores a 16-bit ID instead of pointer to TNode).
// -1 allows for an "invalid/free" value.
//
// must be #define instead of const because we check whether it
// fits in 16-bits via #if.
#define VFS_MAX_FILES ((1u << 16) - 1)
// make the VFS tree ready for use. must be called before all other
// functions below, barring explicit mentions to the contrary.
extern void vfs_init();
@ -378,7 +387,7 @@ extern LibError vfs_io_discard(Handle& hio);
// this is the preferred read method.
//
// return number of bytes transferred (see above), or a negative error code.
extern ssize_t vfs_io(Handle hf, size_t size, void** p, FileIOCB cb = 0, uintptr_t ctx = 0);
extern ssize_t vfs_io(Handle hf, size_t size, FileIOBuf* p, FileIOCB cb = 0, uintptr_t ctx = 0);
// convenience functions that replace vfs_open / vfs_io / vfs_close:
@ -391,9 +400,9 @@ extern ssize_t vfs_io(Handle hf, size_t size, void** p, FileIOCB cb = 0, uintptr
// kept in memory if flags & FILE_CACHE.
// when the file contents are no longer needed, you can mem_free_h the
// Handle, or mem_free(p).
extern Handle vfs_load(const char* fn, void*& p, size_t& size, uint flags = 0);
extern Handle vfs_load(const char* fn, FileIOBuf& p, size_t& size, uint flags = 0);
extern ssize_t vfs_store(const char* fn, void* p, size_t size, uint flags = 0);
extern ssize_t vfs_store(const char* fn, const void* p, size_t size, uint flags = 0);
//

View File

@ -75,7 +75,7 @@ struct Mount
~Mount()
{
if(archive > 0) // avoid h_mgr warning
zip_archive_close(archive);
archive_close(archive);
}
Mount& operator=(const Mount& rhs)
@ -153,7 +153,7 @@ bool mount_should_replace(const Mount* m_old, const Mount* m_new, bool files_are
static const Mount& add_mount(const char* V_mount_point, const char* P_real_path, Handle archive,
uint flags, uint pri);
// passed through dirent_cb's zip_enum to zip_cb
// passed through dirent_cb's afile_enum to afile_cb
struct ZipCBParams
{
// tree directory into which we are adding the archive's files
@ -163,7 +163,7 @@ struct ZipCBParams
const Mount* const m;
// storage for directory lookup optimization (see below).
// held across one zip_enum's zip_cb calls.
// held across one afile_enum's afile_cb calls.
char last_path[VFS_MAX_PATH];
size_t last_path_len;
TDir* last_td;
@ -180,33 +180,27 @@ private:
ZipCBParams& operator=(const ZipCBParams&);
};
// called by add_ent's zip_enum for each file in the archive.
// called by add_ent's afile_enum for each file in the archive.
// we get the full path, since that's what is stored in Zip archives.
//
// [total time 21ms, with ~2000 file's (includes add_file cost)]
static LibError zip_cb(const char* path, const struct stat* s, uintptr_t user)
static LibError afile_cb(const char* path, const struct stat* s, uintptr_t memento, uintptr_t user)
{
CHECK_PATH(path);
ZipCBParams* params = (ZipCBParams*)user;
TDir* td = params->td;
TDir* td = params->td;
const Mount* m = params->m;
char* last_path = params->last_path;
size_t& last_path_len = params->last_path_len;
TDir*& last_td = params->last_td;
// extract file name (needed for add_file)
const char* fn = path;
const char* slash = strrchr(path, '/');
if(slash)
fn = slash+1;
// else: there is no path - it's in the archive's root td.
TDir*& last_td = params->last_td;
// into which directory should the file be inserted?
// naive approach: tree_lookup_dir the path (slow!)
// optimization: store the last file's path; if it's the same,
// use the directory we looked up last time (much faster!)
const size_t path_len = fn-path;
const char* slash = strrchr(path, '/');
const size_t path_len = slash? (slash-path+1) : 0;
// .. same as last time
if(last_td && path_len == last_path_len &&
strnicmp(path, last_path, path_len) == 0)
@ -228,7 +222,7 @@ static LibError zip_cb(const char* path, const struct stat* s, uintptr_t user)
last_td = td;
}
WARN_ERR(tree_add_file(td, fn, m, s->st_size, s->st_mtime));
WARN_ERR(tree_add_file(td, path, m, s->st_size, s->st_mtime, memento));
return INFO_CB_CONTINUE;
}
@ -247,15 +241,13 @@ typedef Archives::const_iterator ArchiveCIt;
// was successfully added to the list. see comments below.
static LibError enqueue_archive(const char* name, const char* P_archive_dir, Archives* archives)
{
archives=0;// HACK HACK HACK: disables zip files (WIP)
// caller doesn't want us to check if this is a Zip file. this is the
// case in all subdirectories of the mount point, since checking for all
// mounted files would be slow. see mount_dir_tree.
if(!archives)
return INFO_SKIPPED;
// get complete path for zip_archive_open.
// get complete path for afile_archive_open.
// this doesn't (need to) work for subdirectories of the mounted td!
// we can't use mount_get_path because we don't have the VFS path.
char P_path[PATH_MAX];
@ -264,7 +256,7 @@ archives=0;// HACK HACK HACK: disables zip files (WIP)
// just open the Zip file and see if it's valid. we don't bother
// checking the extension because archives won't necessarily be
// called .zip (e.g. Quake III .pk3).
Handle archive = zip_archive_open(P_path);
Handle archive = archive_open(P_path);
RETURN_ERR(archive);
archives->push_back(archive);
@ -276,7 +268,7 @@ archives=0;// HACK HACK HACK: disables zip files (WIP)
static LibError mount_archive(TDir* td, const Mount& m)
{
ZipCBParams params(td, &m);
zip_enum(m.archive, zip_cb, (uintptr_t)&params);
archive_enum(m.archive, afile_cb, (uintptr_t)&params);
return ERR_OK;
}
@ -345,9 +337,9 @@ static LibError enqueue_dir(TDir* parent_td, const char* name,
// create subdirectory..
TDir* td;
CHECK_ERR(tree_add_dir(parent_td, name, &td));
CHECK_ERR(tree_add_dir(parent_td, P_path, &td));
// .. and add it to the list of directories to visit.
dir_queue->push_back(TDirAndPath(td, const_cast<const char*>(P_path)));
dir_queue->push_back(TDirAndPath(td, P_path));
return ERR_OK;
}
@ -385,8 +377,12 @@ static LibError add_ent(TDir* td, DirEnt* ent, const char* P_parent_path, const
if(enqueue_archive(name, m->P_name.c_str(), archives) == ERR_OK)
return ERR_OK;
// prepend parent path to get complete pathname.
char P_path[PATH_MAX];
CHECK_ERR(vfs_path_append(P_path, P_parent_path, name));
// it's a regular data file; add it to the directory.
return tree_add_file(td, name, m, ent->size, ent->mtime);
return tree_add_file(td, P_path, m, ent->size, ent->mtime, 0);
}
@ -591,14 +587,14 @@ LibError vfs_mount(const char* V_mount_point, const char* P_real_path, int flags
for(MountIt it = mounts.begin(); it != mounts.end(); ++it)
{
if(file_is_subpath(P_real_path, it->P_name.c_str()))
CHECK_ERR(ERR_ALREADY_MOUNTED);
WARN_RETURN(ERR_ALREADY_MOUNTED);
}
// disallow "." because "./" isn't supported on Windows.
// it would also create a loophole for the parent td check above.
// "./" and "/." are caught by CHECK_PATH.
if(!strcmp(P_real_path, "."))
CHECK_ERR(ERR_PATH_INVALID);
WARN_RETURN(ERR_PATH_INVALID);
const Mount& m = add_mount(V_mount_point, P_real_path, 0, flags, pri);
return remount(m);
@ -732,7 +728,7 @@ LibError mount_populate(TDir* td, RealDir* rd)
// rationale for not using virtual functions for file_open vs zip_open:
// rationale for not using virtual functions for file_open vs afile_open:
// it would spread out the implementation of each function and makes
// keeping them in sync harder. we will very rarely add new sources and
// all these functions are in one spot anyway.
@ -740,7 +736,7 @@ LibError mount_populate(TDir* td, RealDir* rd)
// given a Mount, return the actual location (portable path) of
// <V_path>. used by vfs_realpath and VFile_reopen.
LibError x_realpath(const Mount* m, const char* V_exact_path, char* P_real_path)
LibError x_realpath(const Mount* m, const char* V_path, char* P_real_path)
{
const char* P_parent_path = 0;
@ -758,15 +754,16 @@ LibError x_realpath(const Mount* m, const char* V_exact_path, char* P_real_path)
const char* remove = m->V_mount_point.c_str();
const char* replace = P_parent_path;
return path_replace(P_real_path, V_exact_path, remove, replace);
return path_replace(P_real_path, V_path, remove, replace);
}
LibError x_open(const Mount* m, const char* V_exact_path, int flags, TFile* tf, XFile* xf)
LibError x_open(const Mount* m, const char* V_path, int flags, TFile* tf, XFile* xf)
{
// declare variables used in the switch below to avoid needing {}.
char P_path[PATH_MAX];
uintptr_t memento = 0;
switch(m->type)
{
@ -776,10 +773,11 @@ LibError x_open(const Mount* m, const char* V_exact_path, int flags, TFile* tf,
debug_warn("requesting write access to file in archive");
return ERR_NOT_IMPLEMENTED;
}
RETURN_ERR(zip_open(m->archive, V_exact_path, flags, &xf->u.zf));
memento = tfile_get_memento(tf);
RETURN_ERR(afile_open(m->archive, V_path, memento, flags, &xf->u.zf));
break;
case MT_FILE:
CHECK_ERR(x_realpath(m, V_exact_path, P_path));
CHECK_ERR(x_realpath(m, V_path, P_path));
RETURN_ERR(file_open(P_path, flags, &xf->u.f));
break;
default:
@ -804,7 +802,7 @@ LibError x_close(XFile* xf)
return ERR_OK;
case MT_ARCHIVE:
(void)zip_close(&xf->u.zf);
(void)afile_close(&xf->u.zf);
break;
case MT_FILE:
(void)file_close(&xf->u.f);
@ -815,8 +813,8 @@ LibError x_close(XFile* xf)
// update file state in VFS tree
// (must be done after close, since that calculates the size)
if(xf->u.f.flags & FILE_WRITE) // xxx what about other types?
tree_update_file(xf->tf, xf->u.f.size, time(0)); // can't fail
if(xf->u.fc.flags & FILE_WRITE)
tree_update_file(xf->tf, xf->u.fc.size, time(0)); // can't fail
xf->type = MT_NONE;
return ERR_OK;
@ -843,7 +841,7 @@ LibError x_validate(const XFile* xf)
// cannot be modified), but it's not ATM.
if(xf->tf == 0)
return ERR_13;
return zip_validate(&xf->u.zf);
return afile_validate(&xf->u.zf);
default:
return ERR_INVALID_MOUNT_TYPE;
@ -858,76 +856,24 @@ bool x_is_open(const XFile* xf)
}
cassert(offsetof(struct File, size ) == offsetof(struct ZFile, ucsize));
cassert(offsetof(struct File, flags) == offsetof(struct ZFile, flags));
// VFile was exceeding HDATA_USER_SIZE. flags and size (required
// in File as well as VFile) are now moved into the union.
// use the functions below to insulate against change a bit.
off_t x_size(const XFile* xf)
{
return xf->u.f.size;
return xf->u.fc.size;
}
void x_set_flags(XFile* xf, uint flags)
{
xf->u.f.flags = flags;
xf->u.fc.flags = flags;
}
uint x_flags(const XFile* xf)
{
return xf->u.f.flags;
}
ssize_t x_io(XFile* xf, off_t ofs, size_t size, void* buf, FileIOCB cb, uintptr_t ctx)
{
switch(xf->type)
{
case MT_ARCHIVE:
// (vfs_open makes sure it's not opened for writing if zip)
return zip_read(&xf->u.zf, ofs, size, buf, cb, ctx);
case MT_FILE:
// normal file:
// let file_io alloc the buffer if the caller didn't (i.e. p = 0),
// because it knows about alignment / padding requirements
return file_io(&xf->u.f, ofs, size, buf, cb, ctx);
default:
WARN_RETURN(ERR_INVALID_MOUNT_TYPE);
}
}
LibError x_map(XFile* xf, void*& p, size_t& size)
{
switch(xf->type)
{
case MT_ARCHIVE:
return zip_map(&xf->u.zf, p, size);
case MT_FILE:
return file_map(&xf->u.f, p, size);
default:
WARN_RETURN(ERR_INVALID_MOUNT_TYPE);
}
}
LibError x_unmap(XFile* xf)
{
switch(xf->type)
{
case MT_ARCHIVE:
return zip_unmap(&xf->u.zf);
case MT_FILE:
return file_unmap(&xf->u.f);
default:
WARN_RETURN(ERR_INVALID_MOUNT_TYPE);
}
return xf->u.fc.flags;
}
@ -937,7 +883,7 @@ LibError x_io_issue(XFile* xf, off_t ofs, size_t size, void* buf, XIo* xio)
switch(xio->type)
{
case MT_ARCHIVE:
return zip_io_issue(&xf->u.zf, ofs, size, buf, &xio->u.zio);
return afile_io_issue(&xf->u.zf, ofs, size, buf, &xio->u.zio);
case MT_FILE:
return file_io_issue(&xf->u.f, ofs, size, buf, &xio->u.fio);
default:
@ -951,7 +897,7 @@ int x_io_has_completed(XIo* xio)
switch(xio->type)
{
case MT_ARCHIVE:
return zip_io_has_completed(&xio->u.zio);
return afile_io_has_completed(&xio->u.zio);
case MT_FILE:
return file_io_has_completed(&xio->u.fio);
default:
@ -965,7 +911,7 @@ LibError x_io_wait(XIo* xio, void*& p, size_t& size)
switch(xio->type)
{
case MT_ARCHIVE:
return zip_io_wait(&xio->u.zio, p, size);
return afile_io_wait(&xio->u.zio, p, size);
case MT_FILE:
return file_io_wait(&xio->u.fio, p, size);
default:
@ -979,7 +925,7 @@ LibError x_io_discard(XIo* xio)
switch(xio->type)
{
case MT_ARCHIVE:
return zip_io_discard(&xio->u.zio);
return afile_io_discard(&xio->u.zio);
case MT_FILE:
return file_io_discard(&xio->u.fio);
default:
@ -993,11 +939,59 @@ LibError x_io_validate(const XIo* xio)
switch(xio->type)
{
case MT_ARCHIVE:
return zip_io_validate(&xio->u.zio);
return afile_io_validate(&xio->u.zio);
case MT_FILE:
return file_io_validate(&xio->u.fio);
default:
return ERR_INVALID_MOUNT_TYPE;
}
UNREACHABLE;
}
}
ssize_t x_io(XFile* xf, off_t ofs, size_t size, FileIOBuf* pbuf, FileIOCB cb, uintptr_t ctx)
{
switch(xf->type)
{
case MT_ARCHIVE:
// (vfs_open makes sure it's not opened for writing if zip)
return afile_read(&xf->u.zf, ofs, size, pbuf, cb, ctx);
case MT_FILE:
// normal file:
// let file_io alloc the buffer if the caller didn't (i.e. p = 0),
// because it knows about alignment / padding requirements
return file_io(&xf->u.f, ofs, size, pbuf, cb, ctx);
default:
WARN_RETURN(ERR_INVALID_MOUNT_TYPE);
}
}
LibError x_map(XFile* xf, void*& p, size_t& size)
{
switch(xf->type)
{
case MT_ARCHIVE:
return afile_map(&xf->u.zf, p, size);
case MT_FILE:
return file_map(&xf->u.f, p, size);
default:
WARN_RETURN(ERR_INVALID_MOUNT_TYPE);
}
}
LibError x_unmap(XFile* xf)
{
switch(xf->type)
{
case MT_ARCHIVE:
return afile_unmap(&xf->u.zf);
case MT_FILE:
return file_unmap(&xf->u.f);
default:
WARN_RETURN(ERR_INVALID_MOUNT_TYPE);
}
}

View File

@ -1,14 +1,15 @@
#ifndef VFS_MOUNT_H__
#define VFS_MOUNT_H__
struct Mount; // must come before vfs_tree.h
#include "file.h"
#include "zip.h"
#include "vfs_tree.h"
extern void mount_init();
extern void mount_shutdown();
struct Mount;
// If it was possible to forward-declare enums in gcc, this one wouldn't be in
// the header. Don't use.
@ -30,14 +31,12 @@ struct XIo
union XIoUnion
{
FileIo fio;
ZipIo zio;
AFileIo zio;
}
u;
};
struct TFile;
struct XFile
{
enum MountType type; // internal use only
@ -48,8 +47,9 @@ struct XFile
union XFileUnion
{
FileCommon fc;
File f;
ZFile zf;
AFile zf;
}
u;
};
@ -57,9 +57,9 @@ struct XFile
// given a Mount, return the actual location (portable path) of
// <V_path>. used by vfs_realpath and VFile_reopen.
extern LibError x_realpath(const Mount* m, const char* V_exact_path, char* P_real_path);
extern LibError x_realpath(const Mount* m, const char* V_path, char* P_real_path);
extern LibError x_open(const Mount* m, const char* V_exact_path, int flags, TFile* tf, XFile* xf);
extern LibError x_open(const Mount* m, const char* V_path, int flags, TFile* tf, XFile* xf);
extern LibError x_close(XFile* xf);
extern LibError x_validate(const XFile* xf);
@ -69,17 +69,18 @@ extern off_t x_size(const XFile* xf);
extern uint x_flags(const XFile* xf);
extern void x_set_flags(XFile* xf, uint flags);
extern ssize_t x_io(XFile* xf, off_t ofs, size_t size, void* buf, FileIOCB cb, uintptr_t ctx);;
extern LibError x_map(XFile* xf, void*& p, size_t& size);
extern LibError x_unmap(XFile* xf);
extern LibError x_io_issue(XFile* xf, off_t ofs, size_t size, void* buf, XIo* xio);
extern int x_io_has_completed(XIo* xio);
extern LibError x_io_wait(XIo* xio, void*& p, size_t& size);
extern LibError x_io_discard(XIo* xio);
extern LibError x_io_validate(const XIo* xio);
extern ssize_t x_io(XFile* xf, off_t ofs, size_t size, FileIOBuf* pbuf, FileIOCB cb, uintptr_t ctx);
extern LibError x_map(XFile* xf, void*& p, size_t& size);
extern LibError x_unmap(XFile* xf);
@ -122,7 +123,6 @@ struct RealDir
extern LibError mount_attach_real_dir(RealDir* rd, const char* P_path, const Mount* m, int flags);
extern void mount_detach_real_dir(RealDir* rd);
struct TDir;
extern LibError mount_populate(TDir* td, RealDir* rd);

View File

@ -0,0 +1,245 @@
#include "precompiled.h"
#include "lib/res/res.h"
#include "lib/res/file/compression.h"
#include "lib/allocators.h"
#include "lib/timer.h"
#include "vfs_optimizer.h"
enum TraceState
{
TS_UNINITIALIZED,
TS_DISABLED,
TS_ENABLED,
TS_ERROR,
TS_SHUTDOWN
};
static uintptr_t trace_state = TS_UNINITIALIZED; // values from TraceState; type for use with CAS
static Pool trace_pool;
void trace_shutdown()
{
if(trace_state == TS_DISABLED || trace_state == TS_ENABLED)
{
(void)pool_destroy(&trace_pool);
trace_state = TS_SHUTDOWN;
}
}
void trace_enable(bool want_enabled)
{
if(trace_state == TS_SHUTDOWN || trace_state == TS_ERROR)
WARN_ERR_RETURN(ERR_LOGIC);
if(CAS(&trace_state, TS_UNINITIALIZED, TS_ERROR))
{
if(pool_create(&trace_pool, 4*MiB, sizeof(TraceEntry)) < 0)
return; // leave trace_state set to TS_ERROR
}
trace_state = want_enabled? TS_ENABLED : TS_DISABLED;
}
void trace_add(const char* P_fn)
{
if(trace_state == TS_DISABLED || trace_state == TS_UNINITIALIZED)
return;
if(trace_state != TS_ENABLED)
WARN_ERR_RETURN(ERR_LOGIC);
TraceEntry* t = (TraceEntry*)pool_alloc(&trace_pool, 0);
if(!t)
return;
t->timestamp = get_time();
t->atom_fn = file_make_unique_fn_copy(P_fn, 0);
}
LibError trace_write_to_file(const char* trace_filename)
{
if(trace_state == TS_UNINITIALIZED)
return ERR_OK;
if(trace_state != TS_ENABLED && trace_state != TS_DISABLED)
WARN_RETURN(ERR_LOGIC);
char N_fn[PATH_MAX];
RETURN_ERR(file_make_full_native_path(trace_filename, N_fn));
FILE* f = fopen(N_fn, "wt");
if(!f)
return ERR_FILE_ACCESS;
Trace t;
trace_get(&t);
for(size_t i = 0; i < t.num_ents; i++)
fprintf(f, "%#010f: %s\n", t.ents[i].timestamp, t.ents[i].atom_fn);
(void)fclose(f);
return ERR_OK;
}
LibError trace_load_from_file(const char* trace_filename)
{
char N_fn[PATH_MAX];
RETURN_ERR(file_make_full_native_path(trace_filename, N_fn));
FILE* f = fopen(N_fn, "rt");
if(!f)
return ERR_FILE_NOT_FOUND;
// parse lines and stuff them in trace_pool
// (as if they had been trace_add-ed; replaces any existing data)
pool_free_all(&trace_pool);
char fmt[20];
snprintf(fmt, ARRAY_SIZE(fmt), "%%f: %%%ds\n", PATH_MAX);
for(;;)
{
double timestamp; char P_path[PATH_MAX];
int ret = fscanf(f, fmt, &timestamp, P_path);
if(ret == EOF)
break;
if(ret != 2)
debug_warn("invalid line in trace file");
TraceEntry* ent = (TraceEntry*)pool_alloc(&trace_pool, 0);
debug_assert(ent != 0); // was written to file from same pool => must fit
ent->timestamp = timestamp;
ent->atom_fn = file_make_unique_fn_copy(P_path, 0);
}
fclose(f);
return ERR_OK;
}
void trace_get(Trace* t)
{
t->ents = (const TraceEntry*)trace_pool.da.base;
t->num_ents = (uint)(trace_pool.da.pos / sizeof(TraceEntry));
}
///////////////////////////////////////////////////////////////////////////////
#if 0
struct FileList
{
const char* atom_fns;
size_t num_files;
};
static LibError filelist_build(Trace* t, FileList* fl)
{
}
static LibError filelist_get(FileList* fl, uint i, const char* path)
{
return ERR_DIR_END;
}
static LibError compress_cb(uintptr_t cb_ctx, const void* block, size_t size, size_t* bytes_processed)
{
uintptr_t ctx = cb_ctx;
*bytes_processed = comp_feed(ctx, block, size);
return INFO_CB_CONTINUE;
}
static LibError read_and_compress_file(uintptr_t ctx, ZipEntry* ze)
{
const char* fn = ze->path;
struct stat s;
RETURN_ERR(file_stat(fn, &s));
const size_t ucsize = s.st_size;
RETURN_ERR(comp_reset(ctx));
RETURN_ERR(comp_alloc_output(ctx, ucsize));
File f;
RETURN_ERR(file_open(fn, 0, &f));
FileIOBuf buf = FILE_BUF_ALLOC;
uintptr_t cb_ctx = ctx;
ssize_t cbytes_output = file_io(&f, 0, ucsize, &buf, compress_cb, cb_ctx);
(void)file_close(&f);
void* cdata; size_t csize;
RETURN_ERR(comp_finish(ctx, &cdata, &csize));
debug_assert(cbytes_output <= csize);
RETURN_ERR(cbytes_output);
// decide if it was better compressed or not
ze->ucsize = ucsize;
ze->mtime = s.st_mtime;
ze->method = CM_DEFLATE;
ze->csize = csize;
ze->cdata = cdata;
zip_archive_add(&za, &ze);
return ERR_OK;
}
static void build_optimized_archive(const char* trace_file, const char* zip_filename)
{
FileList fl;
{
Trace t;
RETURN_ERR(trace_load_from_file(trace_filename, &t));
filelist_build(&t, &fl);
}
ZipArchive za;
zip_archive_create(zip_filename, &za);
uintptr_t ctx = comp_alloc();
uint trace_i = 0;
uint queued_files = 0, committed_files = 0;
for(;;)
{
/*
document: zlib layer is ok to allocate. caller shouldnt do so from a pool:
when the next file is going to be loaded and decompressed but our pool is full,
we need to wait for the archive write to finish and mark pool as reclaimed.
this is better done with heap; also, memory isn't bottleneck for readqueue size
*/
ZipEntry ze; // TODO: QUEUE
const int max_readqueue_depth = 1;
for(uint i = 0; i < max_readqueue_depth; i++)
{
LibError ret = trace_get_next_file(trace, trace_i, ze.path);
if(ret == ERR_DIR_END)
break;
WARN_ERR(read_and_compress_file(ctx, &ze));
queued_files++;
}
if(committed_files == queued_files)
break;
zip_archive_add(&za, &ze);
committed_files++;
}
comp_free(ctx);
zip_archive_finish(&za);
}
#endif

View File

@ -0,0 +1,25 @@
#ifndef VFS_OPTIMIZER_H__
#define VFS_OPTIMIZER_H__
extern void trace_enable(bool want_enabled);
extern void trace_add(const char* P_fn);
extern LibError trace_write_to_file(const char* trace_filename);
extern LibError trace_read_from_file(const char* trace_filename);
struct TraceEntry
{
double timestamp;
const char* atom_fn;
};
struct Trace
{
const TraceEntry* ents;
uint num_ents;
};
extern void trace_get(Trace* t);
extern void trace_shutdown();
#endif // #ifndef VFS_OPTIMIZER_H__

View File

@ -78,7 +78,7 @@ LibError path_validate(const uint line, const char* path)
}
fail:
debug_printf("%s called from line %d failed: %s\n", __func__, line, msg);
debug_printf("%s called from line %u failed: %s\n", __func__, line, msg);
debug_warn("failed");
return ERR_FAIL;

View File

@ -7,9 +7,12 @@
#include <vector>
#include <algorithm>
#include "lib/allocators.h"
#include "lib/adts.h"
#include "../res.h"
#include "vfs_path.h"
#include "vfs_tree.h"
#include "file_cache.h"
#include "lib/allocators.h"
@ -31,6 +34,7 @@
// TDir = container holding TFile-s representing a dir. in the tree.
static void* node_alloc();
//-----------------------------------------------------------------------------
// locking
@ -50,386 +54,285 @@ void tree_unlock()
}
// CONTAINER RATIONALE (see philip discussion)
//-----------------------------------------------------------------------------
struct Mount;
// these must be defined before TNode because it puts them in a union.
// some TDir member functions access TNode members, so we have to define
// those later.
struct TFile
enum TNodeType
{
NT_DIR,
NT_FILE
};
class TNode
{
public:
TNodeType type;
//OLD DOC: (for exact_name): used by callers needing the exact case,
// e.g. for case-sensitive syscalls; also key for lookup
// set by TChildren
const char* atom_fn;
// name component only (points within atom_fn).
// alternative is strrchr(atom_fn, '/') on every access - slow.
const char* name;
TNode(TNodeType type_, const char* atom_fn_, const char* name_)
{
type = type_;
atom_fn = atom_fn_;
name = name_;
}
};
class TFile : public TNode
{
public:
// required:
const Mount* m;
// allocated and owned by caller (mount code)
time_t mtime;
off_t size;
time_t mtime;
// note: this is basically the constructor (C++ can't call it directly
// since this object is stored in a union)
void init()
uintptr_t memento;
TFile(const char* atom_fn, const char* name, const Mount* m_)
: TNode(NT_FILE, atom_fn, name)
{
m = 0;
mtime = 0;
m = m_;
size = 0;
mtime = 0;
memento = 0;
}
};
struct TNode;
enum TNodeType
template<> class DHT_Traits<const char*, TNode*>
{
N_NONE,
N_DIR,
N_FILE
};
static Bucket node_buckets;
//////////////////////////////////////////////////////////////////////////////
//
//
//
//////////////////////////////////////////////////////////////////////////////
typedef TNode* T;
typedef const char* Key;
static const size_t n = 16;
static inline Key GetKey(const T t);
static inline bool Eq(const Key k1, const Key k2);
static inline u32 Hash(const Key key);
class DynHashTbl
{
T* tbl;
short num_entries;
short max_entries; // when initialized, = 2**n for faster modulo
bool expand_tbl()
{
// alloc a new table (but don't assign it to <tbl> unless successful)
T* old_tbl = tbl;
tbl = (T*)calloc(max_entries*2, sizeof(T));
if(!tbl)
{
tbl = old_tbl;
return false;
}
max_entries += max_entries;
// must be set before get_slot
// newly initialized, nothing to copy - done
if(!old_tbl)
return true;
// re-hash from old table into the new one
for(int i = 0; i < max_entries/2; i++)
{
T const t = old_tbl[i];
if(t)
*get_slot(GetKey(t)) = t;
}
free(old_tbl);
return true;
}
public:
void init()
static const size_t initial_entries = 16;
size_t hash(const char* key) const
{
tbl = 0;
num_entries = 0;
max_entries = n/2; // will be doubled in expand_tbl
expand_tbl();
return (size_t)fnv_lc_hash(key);
}
void clear()
bool equal(const char* k1, const char* k2) const
{
free(tbl);
tbl = 0;
num_entries = max_entries = 0;
// exact match
if(!strcmp(k1, k2))
return true;
#ifndef NDEBUG
// matched except for case: this can have 2 causes:
// - intentional. that would be legitimate but doesn't make much
// sense and isn't expected.
// - bug, e.g. discarding filename case in a filelist.
// this risks not being able to find the file (since VFS and
// possibly OS are case-sensitive) and wastes memory here.
// what we'll do is warn and treat as separate filename
// (least surprise).
if(!stricmp(k1, k2))
debug_warn("filenames differ only in case: bug?");
#endif
return false;
}
// note: add is only called once per file, so we can do the hash
// here without duplication
T* get_slot(Key key)
const char* get_key(TNode* t) const
{
u32 hash = Hash(key);
debug_assert(max_entries != 0); // otherwise, mask will be incorrect
const uint mask = max_entries-1;
T* p;
for(;;)
{
p = &tbl[hash & mask];
hash++;
const T t = *p;
if(!t)
break;
if(Eq(key, GetKey(t)))
break;
}
return p;
}
bool add(const Key key, const T t)
{
// expand before determining slot; this will invalidate previous pnodes.
if(num_entries*4 >= max_entries*3)
{
if(!expand_tbl())
return false;
}
// commit
*get_slot(key) = t;
num_entries++;
return true;
}
T find(Key key)
{
return *get_slot(key);
}
size_t size()
{
return num_entries;
}
class iterator
{
public:
typedef std::forward_iterator_tag iterator_category;
typedef ::T T;
typedef T value_type;
typedef ptrdiff_t difference_type;
typedef const T* pointer;
typedef const T& reference;
iterator()
{
}
iterator(T* pos_, T* end_) : pos(pos_), end(end_)
{
}
T& operator*() const
{
return *pos;
}
iterator& operator++() // pre
{
do
pos++;
while(pos != end && *pos == 0);
return (*this);
}
bool operator==(const iterator& rhs) const
{
return pos == rhs.pos;
}
bool operator<(const iterator& rhs) const
{
return (pos < rhs.pos);
}
// derived
const T* operator->() const
{
return &**this;
}
bool operator!=(const iterator& rhs) const
{
return !(*this == rhs);
}
iterator operator++(int) // post
{
iterator tmp = *this; ++*this; return tmp;
}
protected:
T* pos;
T* end;
// only used when incrementing (avoid going beyond end of table)
};
iterator begin() const
{
T* pos = tbl;
while(pos != tbl+max_entries && *pos == 0)
pos++;
return iterator(pos, tbl+max_entries);
}
iterator end() const
{
return iterator(tbl+max_entries, 0);
return t->name;
}
};
typedef DynHashTbl::iterator TChildIt;
typedef DynHashTbl<const char*, TNode*, DHT_Traits<const char*, TNode*> > TChildren;
typedef TChildren::iterator TChildrenIt;
enum TDirFlags
{
TD_POPULATED = 1
};
// must be declared before TNode
struct TDir
class TDir : public TNode
{
int flags; // enum TDirFlags
RealDir rd;
DynHashTbl children;
TChildren children;
void init();
TNode* find(const char* name, TNodeType desired_type);
LibError add(const char* name, TNodeType new_type, TNode** pnode);
LibError attach_real_dir(const char* path, int flags, const Mount* new_m);
LibError lookup(const char* path, uint flags, TNode** pnode, char* exact_path);
void clearR();
void displayR(int indent_level);
};
// can't inherit, since exact_name must come at end of record
struct TNode
{
// must be at start of TNode to permit casting back and forth!
// (see TDir::lookup)
union TNodeUnion
public:
TDir(const char* atom_fn, const char* name)
: TNode(NT_DIR, atom_fn, name), children()
{
TDir dir;
TFile file;
} u;
flags = 0;
rd.m = 0;
rd.watch = 0;
}
TNodeType type;
TNode* find(const char* name) const { return children.find(name); }
TChildrenIt begin() const { return children.begin(); }
TChildrenIt end() const { return children.end(); }
//used by callers needing the exact case,
// e.g. for case-sensitive syscalls; also key for lookup
// set by DynHashTbl
char exact_name[1];
// non-const - caller may change e.g. rd.watch
RealDir& get_rd() { return rd; }
void populate()
{
// the caller may potentially access this directory.
// make sure it has been populated with loose files/directories.
if(!(flags & TD_POPULATED))
{
WARN_ERR(mount_populate(this, &rd));
flags |= TD_POPULATED;
}
}
LibError add(const char* P_path, TNodeType type, TNode** pnode)
{
const char* atom_fn = file_make_unique_fn_copy(P_path, 0);
const char* slash = strrchr(atom_fn, '/');
const char* name = slash? slash+1 : atom_fn;
if(!path_component_valid(name))
return ERR_PATH_INVALID;
TNode* node = children.find(name);
if(node)
{
if(node->type != type)
return (type == NT_FILE)? ERR_NOT_FILE : ERR_NOT_DIR;
*pnode = node;
return INFO_ALREADY_PRESENT;
}
// note: if anything below fails, this mem remains allocated in the
// pool, but that "can't happen" and is OK because pool is big enough.
void* mem = node_alloc();
if(!mem)
return ERR_NO_MEM;
#include "nommgr.h"
if(type == NT_FILE)
node = new(mem) TFile(atom_fn, name, rd.m);
else
node = new(mem) TDir(atom_fn, name);
#include "mmgr.h"
children.insert(name, node);
*pnode = node;
return ERR_OK;
}
// empty this directory and all subdirectories; used when rebuilding VFS.
void clearR()
{
// recurse for all subdirs
// (preorder traversal - need to do this before clearing the list)
for(TChildrenIt it = children.begin(); it != children.end(); ++it)
{
TNode* node = *it;
if(node->type == NT_DIR)
((TDir*)node)->clearR();
}
// wipe out this directory
children.clear();
// the watch is restored when this directory is repopulated; we must
// remove it in case the real directory backing this one was deleted.
mount_detach_real_dir(&rd);
}
};
static inline bool Eq(const Key k1, const Key k2)
static Pool node_pool;
static inline void node_init()
{
return strcmp(k1, k2) == 0;
const size_t el_size = MAX(sizeof(TDir), sizeof(TFile));
(void)pool_create(&node_pool, VFS_MAX_FILES*el_size, el_size);
}
static u32 Hash(const Key key)
static inline void node_shutdown()
{
return fnv_lc_hash(key);
(void)pool_destroy(&node_pool);
}
static inline Key GetKey(const T t)
static void* node_alloc()
{
return t->exact_name;
return pool_alloc(&node_pool, 0);
}
//////////////////////////////////////////////////////////////////////////////
//
//
//
//////////////////////////////////////////////////////////////////////////////
void TDir::init()
static void displayR(TDir* td, int indent_level)
{
flags = 0;
rd.m = 0;
rd.watch = 0;
children.init();
}
const char indent[] = " ";
TNode* TDir::find(const char* name, TNodeType desired_type)
{
TNode* node = children.find(name);
if(node && node->type != desired_type)
return 0;
return node;
}
LibError TDir::add(const char* name, TNodeType new_type, TNode** pnode)
{
if(!path_component_valid(name))
return ERR_PATH_INVALID;
// this is legit - when looking up a directory, LF_CREATE_IF_MISSING
// calls this *instead of* find (as opposed to only if not found)
TNode* node = children.find(name);
if(node)
goto done;
TChildrenIt it;
// list all files in this dir
for(it = td->begin(); it != td->end(); ++it)
{
const size_t size = sizeof(TNode)+strnlen(name, VFS_MAX_PATH)+1;
node = (TNode*)bucket_alloc(&node_buckets, size);
if(!node)
return ERR_OK;
strcpy(node->exact_name, name); // safe
node->type = new_type;
TNode* node = (*it);
if(node->type != NT_FILE)
continue;
const char* name = node->name;
if(!children.add(name, node))
{
debug_warn("failed to expand table");
// node will be freed by node_free_all
return ERR_OK;
TFile& file = *((TFile*)node);
char file_location = mount_get_type(file.m);
char* timestamp = ctime(&file.mtime);
timestamp[24] = '\0'; // remove '\n'
const off_t size = file.size;
// build format string: tell it how long the filename may be,
// so that it takes up all space before file info column.
char fmt[25];
int chars = 80 - indent_level*(sizeof(indent)-1);
sprintf(fmt, "%%-%d.%ds (%%c; %%6d; %%s)\n", chars, chars);
for(int i = 0; i < indent_level; i++)
printf(indent);
printf(fmt, name, file_location, size, timestamp);
}
// note: this is called from lookup, which needs to create nodes.
// therefore, we need to initialize here.
if(new_type == N_FILE)
node->u.file.init();
else
node->u.dir.init();
}
// recurse over all subdirs
for(it = td->begin(); it != td->end(); ++it)
{
TNode* node = (*it);
if(node->type != NT_DIR)
continue;
const char* subdir_name = node->name;
done:
*pnode = node;
return ERR_OK;
// write subdir's name
// note: do it now, instead of in recursive call so that:
// - we don't have to pass dir_name parameter;
// - the VFS root node isn't displayed.
for(int i = 0; i < indent_level; i++)
printf(indent);
printf("[%s/]\n", subdir_name);
TDir* subdir = ((TDir*)node);
displayR(subdir, indent_level+1);
}
}
LibError TDir::lookup(const char* path, uint flags, TNode** pnode, char* exact_path)
{
// cleared on failure / if returning root dir node (= "")
if(exact_path)
exact_path[0] = '\0';
static LibError lookup(TDir* td, const char* path, uint flags, TNode** pnode)
{
// early out: "" => return this directory (usually VFS root)
if(path[0] == '\0')
{
*pnode = (TNode*)this; // HACK: TDir is at start of TNode
*pnode = (TNode*)td; // HACK: TDir is at start of TNode
return ERR_OK;
}
@ -441,12 +344,11 @@ LibError TDir::lookup(const char* path, uint flags, TNode** pnode, char* exact_p
// copy into (writeable) buffer so we can 'tokenize' path components
// by replacing '/' with '\0'.
char v_path[VFS_MAX_PATH];
strcpy_s(v_path, sizeof(v_path), path);
char* cur_component = v_path;
char V_path[VFS_MAX_PATH];
strcpy_s(V_path, sizeof(V_path), path);
char* cur_component = V_path;
TDir* td = this;
TNodeType type = N_DIR;
TNodeType type = NT_DIR;
// successively navigate to the next component in <path>.
TNode* node = 0;
@ -467,57 +369,38 @@ LibError TDir::lookup(const char* path, uint flags, TNode** pnode, char* exact_p
break;
// it's a filename
type = N_FILE;
type = NT_FILE;
}
// normal operation (cur_component is a directory)
else
{
// the caller may potentially access this directory.
// make sure it has been populated with loose files/directories.
if(!(td->flags & TD_POPULATED))
{
WARN_ERR(mount_populate(td, &td->rd));
td->flags |= TD_POPULATED;
}
td->populate();
*slash = '\0';
}
// create <cur_component> (no-op if it already exists)
if(create_missing)
{
RETURN_ERR(td->add(cur_component, type, &node));
// this is a hack, but I don't see a better way.
// tree_add_file does special "should override" checks and
// we are creating a TNode (not TFile or TDir) here,
// so we special-case its init.
if(type == N_FILE)
{
node->u.file.m = td->rd.m;
}
}
RETURN_ERR(td->add(V_path, type, &node));
else
{
node = td->find(cur_component, type);
node = td->find(cur_component);
if(!node)
return slash? ERR_PATH_NOT_FOUND : ERR_FILE_NOT_FOUND;
if(node->type != type)
return slash? ERR_NOT_DIR : ERR_NOT_FILE;
}
td = &node->u.dir;
if(exact_path)
exact_path += sprintf(exact_path, "%s/", node->exact_name);
// no length check needed: length is the same as path
// cur_component was a filename => we're done
if(!slash)
{
// strip trailing '/' that was added above
if(exact_path)
exact_path[-1] = '\0';
break;
}
// else: it was a directory; advance
// .. undo having replaced '/' with '\0' - this means V_path will
// store the complete path up to and including cur_component.
if(cur_component != V_path)
cur_component[-1] = '/';
cur_component = slash+1;
td = (TDir*)node;
}
// success.
@ -525,78 +408,6 @@ LibError TDir::lookup(const char* path, uint flags, TNode** pnode, char* exact_p
return ERR_OK;
}
// empty this directory and all subdirectories; used when rebuilding VFS.
void TDir::clearR()
{
// recurse for all subdirs
// (preorder traversal - need to do this before clearing the list)
for(TChildIt it = children.begin(); it != children.end(); ++it)
{
TNode* node = *it;
if(node->type == N_DIR)
node->u.dir.clearR();
}
// wipe out this directory
children.clear();
// the watch is restored when this directory is repopulated; we must
// remove it in case the real directory backing this one was deleted.
mount_detach_real_dir(&rd);
}
void TDir::displayR(int indent_level)
{
const char indent[] = " ";
TChildIt it;
// list all files in this dir
for(it = children.begin(); it != children.end(); ++it)
{
TNode* node = (*it);
if(node->type != N_FILE)
continue;
TFile& file = node->u.file;
const char* name = node->exact_name;
char type = mount_get_type(file.m);
char* timestamp = ctime(&file.mtime);
timestamp[24] = '\0'; // remove '\n'
const off_t size = file.size;
for(int i = 0; i < indent_level; i++)
printf(indent);
char fmt[25];
int chars = 80 - indent_level*(sizeof(indent)-1);
sprintf(fmt, "%%-%d.%ds (%%c; %%6d; %%s)\n", chars, chars);
// build format string: tell it how long the filename may be,
// so that it takes up all space before file info column.
printf(fmt, name, type, size, timestamp);
}
// recurse over all subdirs
for(it = children.begin(); it != children.end(); ++it)
{
TNode* node = (*it);
if(node->type != N_DIR)
continue;
TDir& subdir = node->u.dir;
const char* subdir_name = node->exact_name;
// write subdir's name
// note: do it now, instead of in recursive call so that:
// - we don't have to pass dir_name parameter;
// - the VFS root node isn't displayed.
for(int i = 0; i < indent_level; i++)
printf(indent);
printf("[%s/]\n", subdir_name);
subdir.displayR(indent_level+1);
}
}
//////////////////////////////////////////////////////////////////////////////
//
@ -604,98 +415,96 @@ void TDir::displayR(int indent_level)
//
//////////////////////////////////////////////////////////////////////////////
static TNode tree_root;
// => exact_name = ""
static TDir* tree_root_dir = &tree_root.u.dir;
void tree_clear()
{
tree_root_dir->clearR();
}
static TDir tree_root(0, 0);
// rationale: can't do this in tree_shutdown - we'd leak at exit.
// calling from tree_add* is ugly as well, so require manual init.
void tree_init()
{
tree_root_dir->init();
node_init();
}
void tree_shutdown()
{
bucket_free_all(&node_buckets);
node_shutdown();
}
void tree_clear()
{
tree_root.clearR();
}
// write a representation of the VFS tree to stdout.
void tree_display()
{
tree_root_dir->displayR(0);
displayR(&tree_root, 0);
}
LibError tree_add_file(TDir* td, const char* name, const Mount* m,
off_t size, time_t mtime)
LibError tree_add_file(TDir* td, const char* P_path,
const Mount* m, off_t size, time_t mtime, uintptr_t memento)
{
TNode* node;
RETURN_ERR(td->add(name, N_FILE, &node));
TFile* tf = &node->u.file;
LibError ret = td->add(P_path, NT_FILE, &node);
RETURN_ERR(ret);
if(ret == INFO_NO_REPLACE)
{
// assume they're the same if size and last-modified time match.
// note: FAT timestamp only has 2 second resolution
TFile* tf = (TFile*)node;
const bool is_same = (tf->size == size) &&
fabs(difftime(tf->mtime, mtime)) <= 2.0;
if(!mount_should_replace(tf->m, m, is_same))
return INFO_NO_REPLACE;
}
// assume they're the same if size and last-modified time match.
// note: FAT timestamp only has 2 second resolution
const bool is_same = (tf->size == size) &&
fabs(difftime(tf->mtime, mtime)) <= 2.0;
if(!mount_should_replace(tf->m, m, is_same))
return INFO_NO_REPLACE;
tf->m = m;
tf->mtime = mtime;
tf->size = size;
TFile* tf = (TFile*)node;
tf->m = m;
tf->mtime = mtime;
tf->size = size;
tf->memento = memento;
return ERR_OK;
}
LibError tree_add_dir(TDir* td, const char* name, TDir** ptd)
LibError tree_add_dir(TDir* td, const char* P_path, TDir** ptd)
{
TNode* node;
RETURN_ERR(td->add(name, N_DIR, &node));
*ptd = &node->u.dir;
RETURN_ERR(td->add(P_path, NT_DIR, &node));
*ptd = (TDir*)node;
return ERR_OK;
}
LibError tree_lookup_dir(const char* path, TDir** ptd, uint flags, char* exact_path)
LibError tree_lookup_dir(const char* path, TDir** ptd, uint flags)
{
// path is not a directory; TDir::lookup might return a file node
if(path[0] != '\0' && path[strlen(path)-1] != '/')
return ERR_NOT_DIR;
TDir* td = (flags & LF_START_DIR)? *ptd : tree_root_dir;
TDir* td = (flags & LF_START_DIR)? *ptd : &tree_root;
TNode* node;
CHECK_ERR(td->lookup(path, flags, &node, exact_path));
CHECK_ERR(lookup(td, path, flags, &node));
// directories should exist, so warn if this fails
*ptd = &node->u.dir;
*ptd = (TDir*)node;
return ERR_OK;
}
LibError tree_lookup(const char* path, TFile** pfile, uint flags, char* exact_path)
LibError tree_lookup(const char* path, TFile** pfile, uint flags)
{
// path is not a file; TDir::lookup might return a directory node
if(path[0] == '\0' || path[strlen(path)-1] == '/')
return ERR_NOT_FILE;
TNode* node;
LibError ret = tree_root_dir->lookup(path, flags, &node, exact_path);
LibError ret = lookup(&tree_root, path, flags, &node);
RETURN_ERR(ret);
*pfile = &node->u.file;
*pfile = (TFile*)node;
return ERR_OK;
}
@ -708,10 +517,10 @@ LibError tree_lookup(const char* path, TFile** pfile, uint flags, char* exact_pa
// rationale: see DirIterator definition in file.h.
struct TreeDirIterator_
{
DynHashTbl::iterator it;
TChildren::iterator it;
// cache end() to avoid needless copies
DynHashTbl::iterator end;
TChildren::iterator end;
// the directory we're iterating over; this is used to lock/unlock it,
// i.e. prevent modifications that would invalidate the iterator.
@ -737,8 +546,8 @@ LibError tree_dir_open(const char* path_slash, TreeDirIterator* d_)
// more overhead (we have hundreds of directories) and is unnecessary.
tree_lock();
d->it = td->children.begin();
d->end = td->children.end();
d->it = td->begin();
d->end = td->end();
d->td = td;
return ERR_OK;
}
@ -752,19 +561,22 @@ LibError tree_dir_next_ent(TreeDirIterator* d_, DirEnt* ent)
return ERR_DIR_END;
const TNode* node = *(d->it++);
ent->name = node->exact_name;
ent->name = node->name;
// set size and mtime fields depending on node type:
switch(node->type)
{
case N_DIR:
case NT_DIR:
ent->size = -1;
ent->mtime = 0; // not currently supported for dirs
break;
case N_FILE:
ent->size = node->u.file.size;
ent->mtime = node->u.file.mtime;
case NT_FILE:
{
TFile* tf = (TFile*)node;
ent->size = tf->size;
ent->mtime = tf->mtime;
break;
}
default:
debug_warn("invalid TNode type");
}
@ -786,11 +598,22 @@ LibError tree_dir_close(TreeDirIterator* UNUSED(d))
//-----------------------------------------------------------------------------
// get/set
const Mount* tree_get_mount(const TFile* tf)
const Mount* tfile_get_mount(const TFile* tf)
{
return tf->m;
}
uintptr_t tfile_get_memento(const TFile* tf)
{
return tf->memento;
}
const char* tfile_get_atom_fn(const TFile* tf)
{
return ((TNode*)tf)->atom_fn;
}
void tree_update_file(TFile* tf, off_t size, time_t mtime)
{
@ -814,5 +637,5 @@ LibError tree_stat(const TFile* tf, struct stat* s)
RealDir* tree_get_real_dir(TDir* td)
{
return &td->rd;
return &td->get_rd();
}

View File

@ -19,11 +19,13 @@
#ifndef VFS_TREE_H__
#define VFS_TREE_H__
class TFile; // must come before vfs_mount.h
class TDir;
#include "file.h" // DirEnt
#include "vfs_mount.h" // Mount
struct TFile;
struct TDir;
extern void tree_init();
extern void tree_shutdown();
@ -42,9 +44,11 @@ extern void tree_clear();
// note: if "priority" is the same, replace!
// this makes sure mods/patches etc. actually replace files.
extern LibError tree_add_file(TDir* td, const char* fn, const Mount* m,
off_t size, time_t mtime);
off_t size, time_t mtime, uintptr_t memento);
extern LibError tree_add_dir(TDir* dir, const char* P_path, TDir** ptd);
extern LibError tree_add_dir(TDir* dir, const char* name, TDir** ptd);
enum TreeLookupFlags
{
@ -57,13 +61,10 @@ enum TreeLookupFlags
// if <flags> & LF_CREATE_MISSING, the file is added to VFS unless
// a higher-priority file of the same name already exists
// (used by VFile_reload when opening for writing).
// if <exact_path> != 0, it receives a copy of <path> with the exact
// case of each component as returned by the OS (useful for calling
// external case-sensitive code). must hold at least VFS_MAX_PATH chars.
//
// return 0 on success, or a negative error code
// (in which case output params are undefined).
extern LibError tree_lookup(const char* path, TFile** ptf, uint flags = 0, char* exact_path = 0);
extern LibError tree_lookup(const char* path, TFile** ptf, uint flags = 0);
// starting at VFS root, traverse <path> and pass back information
// for its last directory component.
@ -72,16 +73,13 @@ extern LibError tree_lookup(const char* path, TFile** ptf, uint flags = 0, char*
// added to the VFS.
// if <flags> & LF_START_DIR, traversal starts at *pdir
// (used when looking up paths relative to a mount point).
// if <exact_path> != 0, it receives a copy of <path> with the exact
// case of each component as returned by the OS (useful for calling
// external case-sensitive code). must hold at least VFS_MAX_PATH chars.
//
// <path> can be to a file or dir (in which case it must end in '/',
// to make sure the last component is treated as a directory).
//
// return 0 on success, or a negative error code
// (in which case output params are undefined).
extern LibError tree_lookup_dir(const char* path, TDir** ptd, uint flags = 0, char* exact_path = 0);
extern LibError tree_lookup_dir(const char* path, TDir** ptd, uint flags = 0);
// documentation and rationale: see file.h's dir_next_ent interface
@ -102,7 +100,9 @@ extern LibError tree_realpath(TFile* tf, const char* V_path, char* P_real_path);
extern LibError tree_stat(const TFile* tf, struct stat* s);
extern const Mount* tree_get_mount(const TFile* tf);
extern const Mount* tfile_get_mount(const TFile* tf);
extern uintptr_t tfile_get_memento(const TFile* tf);
extern const char* tfile_get_atom_fn(const TFile* tf);
extern void tree_update_file(TFile* tf, off_t size, time_t mtime);

File diff suppressed because it is too large Load Diff

View File

@ -1,165 +1,11 @@
// Zip archiving on top of ZLib.
//
// Copyright (c) 2003 Jan Wassenberg
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// Contact info:
// Jan.Wassenberg@stud.uni-karlsruhe.de
// http://www.stud.uni-karlsruhe.de/~urkt/
#ifndef ZIP_H__
#define ZIP_H__
#include "../handle.h"
#include "file.h" // FileCB for zip_enum
#include "archive.h"
#include "file.h"
extern LibError zip_populate_archive(Archive* a, File* f);
// note: filenames are case-insensitive.
//
// archive
//
// open and return a handle to the zip archive indicated by <fn>.
// somewhat slow - each file is added to an internal index.
extern Handle zip_archive_open(const char* fn);
// close the archive <ha> and set ha to 0
extern LibError zip_archive_close(Handle& ha);
// successively call <cb> for each valid file in the archive <ha>,
// passing the complete path and <user>.
// if it returns a nonzero value, abort and return that, otherwise 0.
extern LibError zip_enum(const Handle ha, const FileCB cb, const uintptr_t user);
//
// file
//
struct ZFile
{
// keep offset of flags and size members in sync with struct File!
// it is accessed by VFS and must be the same for both (union).
// dirty, but necessary because VFile is pushing the HDATA size limit.
uint flags;
size_t ucsize; // uncompressed size
off_t ofs; // in archive
off_t csize;
off_t last_read_ofs; // in compressed file
Handle ha;
uintptr_t inf_ctx;
// this ZFile has been successfully zip_map-ped, i.e. reference
// count of the archive's mapping has been increased.
// we need to undo that when closing it.
uint is_mapped : 1;
};
// get file status (size, mtime). output param is zeroed on error.
extern LibError zip_stat(Handle ha, const char* fn, struct stat* s);
// open file, and fill *zf with information about it.
// return < 0 on error (output param zeroed).
extern LibError zip_open(Handle ha, const char* fn, int flags, ZFile* zf);
// close file.
extern LibError zip_close(ZFile* zf);
extern LibError zip_validate(const ZFile* zf);
//
// asynchronous read
//
struct ZipIo
{
FileIo io;
uintptr_t inf_ctx;
size_t max_output_size;
void* user_buf;
bool already_inflated;
};
// begin transferring <size> bytes, starting at <ofs>. get result
// with zip_io_wait; when no longer needed, free via zip_io_discard.
extern LibError zip_io_issue(ZFile* zf, off_t ofs, size_t size, void* buf, ZipIo* io);
// indicates if the IO referenced by <io> has completed.
// return value: 0 if pending, 1 if complete, < 0 on error.
extern int zip_io_has_completed(ZipIo* io);
// wait until the transfer <io> completes, and return its buffer.
// output parameters are zeroed on error.
extern LibError zip_io_wait(ZipIo* io, void*& p, size_t& size);
// finished with transfer <io> - free its buffer (returned by zip_io_wait)
extern LibError zip_io_discard(ZipIo* io);
extern LibError zip_io_validate(const ZipIo* io);
//
// synchronous read
//
// read from the (possibly compressed) file <zf> as if it were a normal file.
// starting at the beginning of the logical (decompressed) file,
// skip <ofs> bytes of data; read the next <size> bytes into <buf>.
//
// if non-NULL, <cb> is called for each block read, passing <ctx>.
// if it returns a negative error code,
// the read is aborted and that value is returned.
// the callback mechanism is useful for user progress notification or
// processing data while waiting for the next I/O to complete
// (quasi-parallel, without the complexity of threads).
//
// return bytes read, or a negative error code.
extern ssize_t zip_read(ZFile* zf, off_t ofs, size_t size, void* buf, FileIOCB cb = 0, uintptr_t ctx = 0);
//
// memory mapping
//
// useful for files that are too large to be loaded into memory,
// or if only (non-sequential) portions of a file are needed at a time.
//
// this is of course only possible for uncompressed files - compressed files
// would have to be inflated sequentially, which defeats the point of mapping.
// map the entire file <zf> into memory. mapping compressed files
// isn't allowed, since the compression algorithm is unspecified.
// output parameters are zeroed on failure.
//
// the mapping will be removed (if still open) when its archive is closed.
// however, map/unmap calls should still be paired so that the archive mapping
// may be removed when no longer needed.
extern LibError zip_map(ZFile* zf, void*& p, size_t& size);
// remove the mapping of file <zf>; fail if not mapped.
//
// the mapping will be removed (if still open) when its archive is closed.
// however, map/unmap calls should be paired so that the archive mapping
// may be removed when no longer needed.
extern LibError zip_unmap(ZFile* zf);
extern void zip_fixup_lfh(File* f, ArchiveEntry* ent);
#endif // #ifndef ZIP_H__