1
0
forked from 0ad/0ad

these files were mistakenly copied from lib/res/file to lib/file; that revision can't be reverted, so delete.

This was SVN commit r5440.
This commit is contained in:
janwas 2007-11-10 13:29:54 +00:00
parent 267eac33b5
commit e6dd9f1f1b
37 changed files with 0 additions and 12234 deletions

View File

@ -1,717 +0,0 @@
/**
* =========================================================================
* File : archive.cpp
* Project : 0 A.D.
* Description : provide access to archive "resources". allows
* : opening, reading from, and creating them.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "archive.h"
#include "lib/timer.h"
#include "lib/allocators.h"
#include "lib/res/res.h"
#include "../file_internal.h"
#include <boost/shared_ptr.hpp>
// components:
// - za_*: Zip archive handling
// passes the list of files in an archive to lookup.
// - lookup_*: file lookup
// per archive: return file info (e.g. offset, size), given filename.
// - Archive_*: Handle-based container for archive info
// owns archive file and its lookup mechanism.
// - inf_*: in-memory inflate routines (zlib wrapper)
// decompresses blocks from file_io callback.
// - afile_*: file from Zip archive
// uses lookup to get file information; holds inflate state.
// - sync and async I/O
// uses file_* and inf_*.
// - file mapping
ERROR_ASSOCIATE(ERR::IS_COMPRESSED, "Invalid operation for a compressed file", -1);
///////////////////////////////////////////////////////////////////////////////
//
// lookup_*: file lookup
// per archive: return file info (e.g. offset, size), given filename.
//
///////////////////////////////////////////////////////////////////////////////
// rationale:
// - we don't export a "key" (currently array index) that would allow faster
// file lookup. this would only be useful if higher-level code were to
// store the key and use it more than once. also, lookup is currently fast
// enough. finally, this would also make our file enumerate callback
// incompatible with the others (due to the extra key param).
//
// - we don't bother with a directory tree to speed up lookup. the above
// is fast enough: O(1) if accessed sequentially, otherwise O(log(files)).
///////////////////////////////////////////////////////////////////////////////
//
// Archive_*: Handle-based container for archive info
// owns archive file and its lookup mechanism.
//
///////////////////////////////////////////////////////////////////////////////
struct Archive
{
File f;
ArchiveEntry* ents;
// number of valid entries in above array (see lookup_add_file_cb)
uint num_files;
// note: we need to keep track of what resources reload() allocated,
// so the dtor can free everything correctly.
uint is_open : 1;
uint is_loaded : 1;
};
H_TYPE_DEFINE(Archive);
static void Archive_init(Archive*, va_list)
{
}
static void Archive_dtor(Archive* a)
{
if(a->is_loaded)
{
(void)mem_free(a->ents);
a->is_loaded = 0;
}
if(a->is_open)
{
(void)file_close(&a->f);
a->is_open = 0;
}
}
static LibError Archive_reload(Archive* a, const char* fn, Handle)
{
// must be enabled in archive files for efficiency (see decl).
// note that afile_read overrides archive file flags for
// uncompressed IOs, but this flag is re-added there.
const uint flags = FILE_CACHE_BLOCK;
// (note: don't warn on failure - this happens when
// vfs_mount blindly archive_open-s a dir)
RETURN_ERR(file_open(fn, flags, &a->f));
a->is_open = 1;
RETURN_ERR(zip_populate_archive(&a->f, a));
a->is_loaded = 1;
return INFO::OK;
}
static LibError Archive_validate(const Archive* a)
{
RETURN_ERR(file_validate(&a->f));
if(debug_is_pointer_bogus(a->ents))
WARN_RETURN(ERR::_1);
return INFO::OK;
}
static LibError Archive_to_string(const Archive* a, char* buf)
{
snprintf(buf, H_STRING_LEN, "(%u files)", a->num_files);
return INFO::OK;
}
// open and return a handle to the archive indicated by <fn>.
// somewhat slow - each file is added to an internal index.
Handle archive_open(const char* fn)
{
TIMER("archive_open");
// note: must not keep the archive open. the archive builder asks
// vfs_mount to back away from all archives and close them,
// which must happen immediately or else deleting archives will fail.
return h_alloc(H_Archive, fn, RES_NO_CACHE);
}
// close the archive <ha> and set ha to 0
LibError archive_close(Handle& ha)
{
return h_free(ha, H_Archive);
}
// look up ArchiveEntry, given filename (untrusted!).
static LibError archive_get_file_info(Archive* a, const char* fn, uintptr_t memento, ArchiveEntry*& ent)
{
if(memento)
{
ent = (ArchiveEntry*)memento;
return INFO::OK;
}
else
{
const char* atom_fn = file_make_unique_fn_copy(fn);
for(uint i = 0; i < a->num_files; i++)
if(a->ents[i].atom_fn == atom_fn)
{
ent = &a->ents[i];
return INFO::OK;
}
}
WARN_RETURN(ERR::TNODE_NOT_FOUND);
}
// successively call <cb> for each valid file in the archive <ha>,
// passing the complete path and <user>.
// if it returns a nonzero value, abort and return that, otherwise 0.
//
// FileCB's name parameter will be the full path and unique
// (i.e. returned by file_make_unique_fn_copy).
LibError archive_enum(const Handle ha, const FileCB cb, const uintptr_t user)
{
H_DEREF(ha, Archive, a);
struct stat s;
memset(&s, 0, sizeof(s));
for(uint i = 0; i < a->num_files; i++)
{
const ArchiveEntry* ent = &a->ents[i];
s.st_mode = S_IFREG;
s.st_size = (off_t)ent->usize;
s.st_mtime = ent->mtime;
const uintptr_t memento = (uintptr_t)ent;
LibError ret = cb(ent->atom_fn, &s, memento, user);
if(ret != INFO::CB_CONTINUE)
return ret;
}
return INFO::OK;
}
LibError archive_allocate_entries(Archive* a, size_t num_entries)
{
debug_assert(num_entries != 0); // =0 makes no sense but wouldn't be fatal
debug_assert(a->ents == 0); // must not have been allocated yet
a->ents = (ArchiveEntry*)mem_alloc(num_entries * sizeof(ArchiveEntry), 32);
if(!a->ents)
WARN_RETURN(ERR::NO_MEM);
return INFO::OK;
}
// add file <fn> to the lookup data structure.
// called from za_enum_files in order (0 <= idx < num_entries).
// the first call notifies us of # entries, so we can allocate memory.
//
// note: ent is only valid during the callback! must be copied or saved.
LibError archive_add_file(Archive* a, const ArchiveEntry* ent)
{
a->ents[a->num_files++] = *ent;
return INFO::OK;
}
///////////////////////////////////////////////////////////////////////////////
//
// afile_*: file from Zip archive
// uses lookup to get file information; holds inflate state.
//
///////////////////////////////////////////////////////////////////////////////
struct ArchiveFile
{
off_t ofs; // in archive
off_t csize;
CompressionMethod method;
u32 checksum;
off_t last_cofs; // in compressed file
Handle ha;
uintptr_t ctx;
// this File has been successfully afile_map-ped, i.e. reference
// count of the archive's mapping has been increased.
// we need to undo that when closing it.
uint is_mapped : 1;
};
cassert(sizeof(ArchiveFile) <= FILE_OPAQUE_SIZE);
// convenience function, allows implementation change in File.
// note that size == usize isn't foolproof, and adding a flag to
// ofs or size is ugly and error-prone.
// no error checking - always called from functions that check af.
static inline bool is_compressed(ArchiveFile* af)
{
return af->method != CM_NONE;
}
// get file status (size, mtime). output param is zeroed on error.
LibError afile_stat(Handle ha, const char* fn, struct stat* s)
{
// zero output param in case we fail below.
memset(s, 0, sizeof(struct stat));
H_DEREF(ha, Archive, a);
ArchiveEntry* ent;
RETURN_ERR(archive_get_file_info(a, fn, 0, ent));
s->st_size = ent->usize;
s->st_mtime = ent->mtime;
return INFO::OK;
}
LibError afile_validate(const File* f)
{
if(!f)
WARN_RETURN(ERR::INVALID_PARAM);
const ArchiveFile* af = (const ArchiveFile*)f->opaque;
UNUSED2(af);
// note: don't check af->ha - it may be freed at shutdown before
// its files. TODO: revisit once dependency support is added.
if(!f->size)
WARN_RETURN(ERR::_1);
// note: af->ctx is 0 if file is not compressed.
return INFO::OK;
}
#define CHECK_AFILE(f) RETURN_ERR(afile_validate(f))
// open file, and fill *af with information about it.
// return < 0 on error (output param zeroed).
LibError afile_open(const Handle ha, const char* fn, uintptr_t memento, uint flags, File* f)
{
// zero output param in case we fail below.
memset(f, 0, sizeof(*f));
if(flags & FILE_WRITE)
WARN_RETURN(ERR::IS_COMPRESSED);
H_DEREF(ha, Archive, a);
// this is needed for File below. optimization: archive_get_file_info
// wants the original filename, but by passing the unique copy
// we avoid work there (its file_make_unique_fn_copy returns immediately)
const char* atom_fn = file_make_unique_fn_copy(fn);
ArchiveEntry* ent;
// don't want File to contain a ArchiveEntry struct -
// its usize member must be 'loose' for compatibility with File.
// => need to copy ArchiveEntry fields into File.
RETURN_ERR(archive_get_file_info(a, atom_fn, memento, ent));
zip_fixup_lfh(&a->f, ent);
uintptr_t ctx = 0;
// slight optimization: do not allocate context if not compressed
if(ent->method != CM_NONE)
{
ctx = comp_alloc(CT_DECOMPRESSION, ent->method);
if(!ctx)
WARN_RETURN(ERR::NO_MEM);
}
f->flags = flags;
f->size = ent->usize;
f->atom_fn = atom_fn;
ArchiveFile* af = (ArchiveFile*)f->opaque;
af->ofs = ent->ofs;
af->csize = ent->csize;
af->method = ent->method;
af->checksum = ent->checksum;
af->ha = ha;
af->ctx = ctx;
af->is_mapped = 0;
CHECK_AFILE(f);
return INFO::OK;
}
// close file.
LibError afile_close(File* f)
{
CHECK_AFILE(f);
ArchiveFile* af = (ArchiveFile*)f->opaque;
// other File fields don't need to be freed/cleared
comp_free(af->ctx);
af->ctx = 0;
return INFO::OK;
}
///////////////////////////////////////////////////////////////////////////////
//
// sync and async I/O
// uses file_* and inf_*.
//
///////////////////////////////////////////////////////////////////////////////
struct ArchiveFileIo
{
// note: this cannot be embedded into the struct due to the FileIo
// interface (fixed size limit and type field).
// it is passed by afile_read to file_io, so we'll have to allocate
// and point to it.
FileIo* io;
uintptr_t ctx;
size_t max_output_size;
u8* user_buf;
};
cassert(sizeof(ArchiveFileIo) <= FILE_IO_OPAQUE_SIZE);
static const size_t CHUNK_SIZE = 16*KiB;
static SingleAllocator<FileIo> io_allocator;
// begin transferring <size> bytes, starting at <ofs>. get result
// with afile_io_wait; when no longer needed, free via afile_io_discard.
LibError afile_io_issue(File* f, off_t user_ofs, size_t max_output_size, u8* user_buf, FileIo* io)
{
// zero output param in case we fail below.
memset(io, 0, sizeof(FileIo));
CHECK_AFILE(f);
ArchiveFile* af = (ArchiveFile*)f->opaque;
H_DEREF(af->ha, Archive, a);
ArchiveFileIo* aio = (ArchiveFileIo*)io->opaque;
aio->io = io_allocator.alloc();
if(!aio->io)
WARN_RETURN(ERR::NO_MEM);
// not compressed; we'll just read directly from the archive file.
// no need to clamp to EOF - that's done already by the VFS.
if(!is_compressed(af))
{
// aio->ctx is 0 (due to memset)
const off_t ofs = af->ofs+user_ofs;
return file_io_issue(&a->f, ofs, max_output_size, user_buf, aio->io);
}
aio->ctx = af->ctx;
aio->max_output_size = max_output_size;
aio->user_buf = user_buf;
const off_t cofs = af->ofs + af->last_cofs; // needed to determine csize
// read up to next chunk (so that the next read is aligned -
// less work for aio) or up to EOF.
const ssize_t left_in_chunk = CHUNK_SIZE - (cofs % CHUNK_SIZE);
const ssize_t left_in_file = af->csize - cofs;
const size_t csize = std::min(left_in_chunk, left_in_file);
u8* cbuf = (u8*)mem_alloc(csize, 4*KiB);
if(!cbuf)
WARN_RETURN(ERR::NO_MEM);
RETURN_ERR(file_io_issue(&a->f, cofs, csize, cbuf, aio->io));
af->last_cofs += (off_t)csize;
return INFO::OK;
}
// indicates if the IO referenced by <io> has completed.
// return value: 0 if pending, 1 if complete, < 0 on error.
int afile_io_has_completed(FileIo* io)
{
ArchiveFileIo* aio = (ArchiveFileIo*)io->opaque;
return file_io_has_completed(aio->io);
}
// wait until the transfer <io> completes, and return its buffer.
// output parameters are zeroed on error.
LibError afile_io_wait(FileIo* io, u8*& buf, size_t& size)
{
buf = 0;
size = 0;
ArchiveFileIo* aio = (ArchiveFileIo*)io->opaque;
u8* raw_buf;
size_t raw_size;
RETURN_ERR(file_io_wait(aio->io, raw_buf, raw_size));
// file is compressed and we need to decompress
if(aio->ctx)
{
comp_set_output(aio->ctx, aio->user_buf, aio->max_output_size);
const ssize_t ubytes_output = comp_feed(aio->ctx, raw_buf, raw_size);
free(raw_buf);
RETURN_ERR(ubytes_output);
buf = aio->user_buf;
size = ubytes_output;
}
else
{
buf = raw_buf;
size = raw_size;
}
return INFO::OK;
}
// finished with transfer <io> - free its buffer (returned by afile_io_wait)
LibError afile_io_discard(FileIo* io)
{
ArchiveFileIo* aio = (ArchiveFileIo*)io->opaque;
LibError ret = file_io_discard(aio->io);
io_allocator.release(aio->io);
return ret;
}
LibError afile_io_validate(const FileIo* io)
{
ArchiveFileIo* aio = (ArchiveFileIo*)io->opaque;
if(debug_is_pointer_bogus(aio->user_buf))
WARN_RETURN(ERR::_1);
// <ctx> and <max_output_size> have no invariants we could check.
RETURN_ERR(file_io_validate(aio->io));
return INFO::OK;
}
//-----------------------------------------------------------------------------
class Decompressor
{
public:
Decompressor(uintptr_t ctx, FileIOBuf* pbuf, size_t usizeMax, FileIOCB cb, uintptr_t cbData)
: m_ctx(ctx)
, m_udataSize(usizeMax), m_csizeTotal(0), m_usizeTotal(0)
, m_cb(cb), m_cbData(cbData)
{
debug_assert(m_ctx != 0);
if(pbuf == FILE_BUF_TEMP)
{
m_tmpBuf.reset((u8*)page_aligned_alloc(m_udataSize), PageAlignedDeleter(m_udataSize));
m_udata = m_tmpBuf.get();
}
else
m_udata = (u8*)*pbuf; // WARNING: FileIOBuf is nominally const; if that's ever enforced, this may need to change.
}
LibError Feed(const u8* cblock, size_t cblockSize, size_t* bytes_processed)
{
// when decompressing into the temp buffer, always start at ofs=0.
const size_t ofs = m_tmpBuf.get()? 0 : m_usizeTotal;
u8* const ublock = m_udata + ofs;
comp_set_output(m_ctx, ublock, m_udataSize-ofs);
const size_t ublockSize = comp_feed(m_ctx, cblock, cblockSize);
m_csizeTotal += cblockSize;
m_usizeTotal += ublockSize;
debug_assert(m_usizeTotal <= m_udataSize);
*bytes_processed = ublockSize;
LibError ret = INFO::CB_CONTINUE;
if(m_cb)
ret = m_cb(m_cbData, ublock, ublockSize, bytes_processed);
if(m_usizeTotal == m_udataSize)
ret = INFO::OK;
return ret;
}
LibError Finish(u32& checksum)
{
u8* out; size_t outSize; // unused
return comp_finish(m_ctx, &out, &outSize, &checksum);
}
size_t NumCompressedBytesProcessed() const
{
return m_csizeTotal;
}
private:
uintptr_t m_ctx;
size_t m_csizeTotal;
size_t m_usizeTotal;
u8* m_udata;
size_t m_udataSize;
boost::shared_ptr<u8> m_tmpBuf;
// allow user-specified callbacks: "chain" them, because file_io's
// callback mechanism is already used to return blocks.
FileIOCB m_cb;
uintptr_t m_cbData;
};
static LibError decompressor_feed_cb(uintptr_t cbData,
const u8* cblock, size_t cblockSize, size_t* bytes_processed)
{
Decompressor& decompressor = *(Decompressor*)cbData;
return decompressor.Feed(cblock, cblockSize, bytes_processed);
}
// read from the (possibly compressed) file <af> as if it were a normal file.
// starting at the beginning of the logical (decompressed) file,
// skip <ofs> bytes of data; read the next <size> bytes into <*pbuf>.
//
// if non-NULL, <cb> is called for each block read, passing <ctx>.
// if it returns a negative error code,
// the read is aborted and that value is returned.
// the callback mechanism is useful for user progress notification or
// processing data while waiting for the next I/O to complete
// (quasi-parallel, without the complexity of threads).
//
// return bytes read, or a negative error code.
ssize_t afile_read(File* f, off_t ofs, size_t size, FileIOBuf* pbuf, FileIOCB cb, uintptr_t cbData)
{
CHECK_AFILE(f);
ArchiveFile* af = (ArchiveFile*)f->opaque;
H_DEREF(af->ha, Archive, a);
if(!is_compressed(af))
{
// HACK
// background: file_io will operate according to the
// *archive* file's flags, but the File may contain some overrides
// set via vfs_open. one example is FILE_LONG_LIVED -
// that must be copied over (temporarily) into a->f flags.
//
// we currently copy all flags - this may mean that setting
// global policy flags for all archive files is difficult,
// but that can be worked around by setting them in afile_open.
// this is better than the alternative of copying individual
// flags because it'd need to be updated as new flags are added.
a->f.flags = f->flags;
// this was set in Archive_reload and must be re-enabled for efficiency.
a->f.flags |= FILE_CACHE_BLOCK;
bool we_allocated = (pbuf != FILE_BUF_TEMP) && (*pbuf == FILE_BUF_ALLOC);
// no need to set last_cofs - only checked if compressed.
ssize_t bytes_read = file_io(&a->f, af->ofs+ofs, size, pbuf, cb, cbData);
RETURN_ERR(bytes_read);
if(we_allocated)
(void)file_buf_set_real_fn(*pbuf, f->atom_fn);
return bytes_read;
}
RETURN_ERR(file_io_get_buf(pbuf, size, f->atom_fn, f->flags, cb));
const off_t cofs = af->ofs+af->last_cofs;
// remaining bytes in file. callback will cause IOs to stop when
// enough udata has been produced.
const size_t csize_max = af->csize - af->last_cofs;
Decompressor decompressor(af->ctx, pbuf, size, cb, cbData);
const ssize_t usize_read = file_io(&a->f, cofs, csize_max, FILE_BUF_TEMP, decompressor_feed_cb, (uintptr_t)&decompressor);
u32 checksum;
RETURN_ERR(decompressor.Finish(checksum));
//debug_assert(checksum == af->checksum);
af->last_cofs += (off_t)decompressor.NumCompressedBytesProcessed();
return usize_read;
}
///////////////////////////////////////////////////////////////////////////////
//
// file mapping
//
///////////////////////////////////////////////////////////////////////////////
// map the entire file <af> into memory. mapping compressed files
// isn't allowed, since the compression algorithm is unspecified.
// output parameters are zeroed on failure.
//
// the mapping will be removed (if still open) when its file is closed.
// however, map/unmap calls should still be paired so that the mapping
// may be removed when no longer needed.
LibError afile_map(File* f, u8*& p, size_t& size)
{
p = 0;
size = 0;
CHECK_AFILE(f);
ArchiveFile* af = (ArchiveFile*)f->opaque;
// mapping compressed files doesn't make sense because the
// compression algorithm is unspecified - disallow it.
if(is_compressed(af))
WARN_RETURN(ERR::IS_COMPRESSED);
// note: we mapped the archive in archive_open, but unmapped it
// in the meantime to save memory in case it wasn't going to be mapped.
// now we do so again; it's unmapped in afile_unmap (refcounted).
H_DEREF(af->ha, Archive, a);
u8* archive_p; size_t archive_size;
RETURN_ERR(file_map(&a->f, archive_p, archive_size));
p = archive_p + af->ofs;
size = f->size;
af->is_mapped = 1;
return INFO::OK;
}
// remove the mapping of file <af>; fail if not mapped.
//
// the mapping will be removed (if still open) when its archive is closed.
// however, map/unmap calls should be paired so that the archive mapping
// may be removed when no longer needed.
LibError afile_unmap(File* f)
{
CHECK_AFILE(f);
ArchiveFile* af = (ArchiveFile*)f->opaque;
// make sure archive mapping refcount remains balanced:
// don't allow multiple|"false" unmaps.
if(!af->is_mapped)
WARN_RETURN(ERR::FILE_NOT_MAPPED);
af->is_mapped = 0;
H_DEREF(af->ha, Archive, a);
return file_unmap(&a->f);
}

View File

@ -1,214 +0,0 @@
/**
* =========================================================================
* File : archive.h
* Project : 0 A.D.
* Description : provide access to archive "resources". allows
* : opening, reading from, and creating them.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_ARCHIVE
#define INCLUDED_ARCHIVE
#include "lib/res/handle.h"
#include "../file.h" // FileCB for afile_enum
#include "compression.h" // CompressionMethod
namespace ERR
{
const LibError IS_COMPRESSED = -110400;
}
// note: filenames are case-insensitive.
//
// archive
//
// open and return a handle to the archive indicated by <fn>.
// somewhat slow - each file is added to an internal index.
extern Handle archive_open(const char* fn);
// close the archive <ha> and set ha to 0
extern LibError archive_close(Handle& ha);
// successively call <cb> for each valid file in the archive <ha>,
// passing the complete path and <user>.
// if it returns a nonzero value, abort and return that, otherwise 0.
//
// FileCB's name parameter will be the full path and unique
// (i.e. returned by file_make_unique_fn_copy).
extern LibError archive_enum(const Handle ha, const FileCB cb, const uintptr_t user);
//
// file
//
// get file status (size, mtime). output param is zeroed on error.
extern LibError afile_stat(Handle ha, const char* fn, struct stat* s);
// open file, and fill *f with information about it.
// return < 0 on error (output param zeroed).
extern LibError afile_open(Handle ha, const char* fn, uintptr_t memento, uint flags, File* f);
// close file.
extern LibError afile_close(File* f);
extern LibError afile_validate(const File* f);
extern LibError afile_open_vfs(const char* fn, uint flags, File* f, TFile* tf);
//
// asynchronous read
//
// begin transferring <size> bytes, starting at <ofs>. get result
// with afile_io_wait; when no longer needed, free via afile_io_discard.
extern LibError afile_io_issue(File* f, off_t ofs, size_t size, u8* buf, FileIo* io);
// indicates if the IO referenced by <io> has completed.
// return value: 0 if pending, 1 if complete, < 0 on error.
extern int afile_io_has_completed(FileIo* io);
// wait until the transfer <io> completes, and return its buffer.
// output parameters are zeroed on error.
extern LibError afile_io_wait(FileIo* io, u8*& p, size_t& size);
// finished with transfer <io> - free its buffer (returned by afile_io_wait)
extern LibError afile_io_discard(FileIo* io);
extern LibError afile_io_validate(const FileIo* io);
//
// synchronous read
//
// read from the (possibly compressed) file <zf> as if it were a normal file.
// starting at the beginning of the logical (decompressed) file,
// skip <ofs> bytes of data; read the next <size> bytes into <buf>.
//
// if non-NULL, <cb> is called for each block read, passing <ctx>.
// if it returns a negative error code,
// the read is aborted and that value is returned.
// the callback mechanism is useful for user progress notification or
// processing data while waiting for the next I/O to complete
// (quasi-parallel, without the complexity of threads).
//
// return bytes read, or a negative error code.
extern ssize_t afile_read(File* f, off_t ofs, size_t size, FileIOBuf* pbuf, FileIOCB cb = 0, uintptr_t ctx = 0);
//
// memory mapping
//
// useful for files that are too large to be loaded into memory,
// or if only (non-sequential) portions of a file are needed at a time.
//
// this is of course only possible for uncompressed files - compressed files
// would have to be inflated sequentially, which defeats the point of mapping.
// map the entire file <f> into memory. mapping compressed files
// isn't allowed, since the compression algorithm is unspecified.
// output parameters are zeroed on failure.
//
// the mapping will be removed (if still open) when its archive is closed.
// however, map/unmap calls should still be paired so that the archive mapping
// may be removed when no longer needed.
extern LibError afile_map(File* f, u8*& p, size_t& size);
// remove the mapping of file <f>; fail if not mapped.
//
// the mapping will be removed (if still open) when its archive is closed.
// however, map/unmap calls should be paired so that the archive mapping
// may be removed when no longer needed.
extern LibError afile_unmap(File* f);
//
// interface for backends
//
// the archive-specific backends call back here for each file;
// this module provides storage for the file table.
enum ArchiveFileFlags
{
// indicates ArchiveEntry.ofs points to a "local file header"
// instead of the file data. a fixup routine is called upon
// file open; it skips past LFH and clears this flag.
// this is somewhat of a hack, but vital to archive open
// performance. without it, we'd have to scan through the
// entire Zip file, which can take *seconds*.
// (we cannot use the information in CDFH, because its 'extra' field
// has been observed to differ from that of the LFH)
// by reading LFH when a file in archive is opened, the block cache
// absorbs the IO cost because the file will likely be read anyway.
ZIP_LFH_FIXUP_NEEDED = 1
};
// holds all per-file information extracted from the header.
// this is intended to work for all archive types.
//
// note: File* (state of a currently open file) is separate because
// some of its fields need not be stored here; we'd like to minimize
// size of the file table.
struct ArchiveEntry
{
// these are returned by afile_stat:
off_t usize;
time_t mtime;
// used in IO
off_t ofs;
off_t csize;
CompressionMethod method;
u32 checksum;
uint flags; // ArchiveFileFlags
const char* atom_fn;
// why csize?
// file I/O may be N-buffered, so it's good to know when the raw data
// stops, or else we potentially overshoot by N-1 blocks.
// if we do read too much though, nothing breaks - inflate would just
// ignore it, since Zip files are compressed individually.
//
// we also need a way to check if a file is compressed (e.g. to fail
// mmap requests if the file is compressed). packing a bit in ofs or
// usize is error prone and ugly (1 bit less won't hurt though).
// any other way will mess up the nice 2^n byte size anyway, so
// might as well store csize.
};
// successively called for each valid file in the archive,
// passing the complete path and <user>.
// return INFO::CB_CONTINUE to continue calling; anything else will cause
// the caller to abort and immediately return that value.
//
// HACK: call back with negative index the first time; its abs. value is
// the number of entries in the archive. lookup needs to know this so it can
// preallocate memory. having lookup_init call z_get_num_files and then
// za_enum_files would require passing around a ZipInfo struct, or searching
// for the ECDR twice - both ways aren't nice. nor is expanding on demand -
// we try to minimize allocations (faster, less fragmentation).
// fn (filename) is not necessarily 0-terminated!
// loc is only valid during the callback! must be copied or saved.
typedef LibError (*CDFH_CB)(uintptr_t user, i32 i, const ArchiveEntry* loc, size_t fn_len);
struct Archive;
extern LibError archive_allocate_entries(Archive* a, size_t num_entries);
extern LibError archive_add_file(Archive* a, const ArchiveEntry* ent);
#endif // #ifndef INCLUDED_ARCHIVE

View File

@ -1,289 +0,0 @@
/**
* =========================================================================
* File : archive_builder.cpp
* Project : 0 A.D.
* Description :
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "archive_builder.h"
#include "lib/timer.h"
#include "../file_internal.h"
// un-nice dependencies:
#include "ps/Loader.h"
// vfs_load callback that compresses the data in parallel with IO
// (for incompressible files, we just calculate the checksum)
class Compressor
{
public:
Compressor(uintptr_t ctx, const char* atom_fn, size_t usize)
: m_ctx(ctx)
, m_usize(usize)
, m_skipCompression(IsFileTypeIncompressible(atom_fn))
, m_cdata(0), m_csize(0), m_checksum(0)
{
comp_reset(m_ctx);
m_csizeBound = comp_max_output_size(m_ctx, usize);
THROW_ERR(comp_alloc_output(m_ctx, m_csizeBound));
}
LibError Feed(const u8* ublock, size_t ublockSize, size_t* bytes_processed)
{
// comp_feed already makes note of total #bytes fed, and we need
// vfs_io to return the usize (to check if all data was read).
*bytes_processed = ublockSize;
if(m_skipCompression)
{
// (since comp_finish returns the checksum, we only need to update this
// when not compressing.)
m_checksum = comp_update_checksum(m_ctx, m_checksum, ublock, ublockSize);
}
else
{
// note: we don't need the return value because comp_finish
// will tell us the total csize.
(void)comp_feed(m_ctx, ublock, ublockSize);
}
return INFO::CB_CONTINUE;
}
LibError Finish()
{
if(m_skipCompression)
return INFO::OK;
RETURN_ERR(comp_finish(m_ctx, &m_cdata, &m_csize, &m_checksum));
debug_assert(m_csize <= m_csizeBound);
return INFO::OK;
}
u32 Checksum() const
{
return m_checksum;
}
// final decision on whether to store the file as compressed,
// given the observed compressed/uncompressed sizes.
bool IsCompressionProfitable() const
{
// file is definitely incompressible.
if(m_skipCompression)
return false;
const float ratio = (float)m_usize / m_csize;
const ssize_t bytes_saved = (ssize_t)m_usize - (ssize_t)m_csize;
UNUSED2(bytes_saved);
// tiny - store compressed regardless of savings.
// rationale:
// - CPU cost is negligible and overlapped with IO anyway;
// - reading from compressed files uses less memory because we
// don't need to allocate space for padding in the final buffer.
if(m_usize < 512)
return true;
// large high-entropy file - store uncompressed.
// rationale:
// - any bigger than this and CPU time becomes a problem: it isn't
// necessarily hidden by IO time anymore.
if(m_usize >= 32*KiB && ratio < 1.02f)
return false;
// we currently store everything else compressed.
return true;
}
void GetOutput(const u8*& cdata, size_t& csize) const
{
debug_assert(!m_skipCompression);
debug_assert(m_cdata && m_csize);
cdata = m_cdata;
csize = m_csize;
// note: no need to free cdata - it is owned by the
// compression context and can be reused.
}
private:
static bool IsFileTypeIncompressible(const char* fn)
{
const char* ext = path_extension(fn);
// this is a selection of file types that are certainly not
// further compressible. we need not include every type under the sun -
// this is only a slight optimization that avoids wasting time
// compressing files. the real decision as to cmethod is made based
// on attained compression ratio.
static const char* incompressible_exts[] =
{
"zip", "rar",
"jpg", "jpeg", "png",
"ogg", "mp3"
};
for(uint i = 0; i < ARRAY_SIZE(incompressible_exts); i++)
{
if(!strcasecmp(ext+1, incompressible_exts[i]))
return true;
}
return false;
}
uintptr_t m_ctx;
size_t m_usize;
size_t m_csizeBound;
bool m_skipCompression;
u8* m_cdata;
size_t m_csize;
u32 m_checksum;
};
static LibError compressor_feed_cb(uintptr_t cbData,
const u8* ublock, size_t ublockSize, size_t* bytes_processed)
{
Compressor& compressor = *(Compressor*)cbData;
return compressor.Feed(ublock, ublockSize, bytes_processed);
}
static LibError read_and_compress_file(const char* atom_fn, uintptr_t ctx,
ArchiveEntry& ent, const u8*& file_contents, FileIOBuf& buf) // out
{
struct stat s;
RETURN_ERR(vfs_stat(atom_fn, &s));
const size_t usize = s.st_size;
// skip 0-length files.
// rationale: zip.cpp needs to determine whether a CDFH entry is
// a file or directory (the latter are written by some programs but
// not needed - they'd only pollute the file table).
// it looks like checking for usize=csize=0 is the safest way -
// relying on file attributes (which are system-dependent!) is
// even less safe.
// we thus skip 0-length files to avoid confusing them with directories.
if(!usize)
return INFO::SKIPPED;
Compressor compressor(ctx, atom_fn, usize);
// read file into newly allocated buffer and run compressor.
size_t usize_read;
const uint flags = 0;
RETURN_ERR(vfs_load(atom_fn, buf, usize_read, flags, compressor_feed_cb, (uintptr_t)&compressor));
debug_assert(usize_read == usize);
LibError ret = compressor.Finish();
if(ret < 0)
{
file_buf_free(buf);
return ret;
}
// store file info
ent.usize = (off_t)usize;
ent.mtime = s.st_mtime;
// .. ent.ofs is set by zip_archive_add_file
ent.flags = 0;
ent.atom_fn = atom_fn;
ent.checksum = compressor.Checksum();
if(compressor.IsCompressionProfitable())
{
ent.method = CM_DEFLATE;
size_t csize;
compressor.GetOutput(file_contents, csize);
ent.csize = (off_t)csize;
}
else
{
ent.method = CM_NONE;
ent.csize = (off_t)usize;
file_contents = buf;
}
return INFO::OK;
}
//-----------------------------------------------------------------------------
LibError archive_build_init(const char* P_archive_filename, Filenames V_fns, ArchiveBuildState* ab)
{
RETURN_ERR(zip_archive_create(P_archive_filename, &ab->za));
ab->ctx = comp_alloc(CT_COMPRESSION, CM_DEFLATE);
ab->V_fns = V_fns;
// count number of files (needed to estimate progress)
for(ab->num_files = 0; ab->V_fns[ab->num_files]; ab->num_files++) {}
ab->i = 0;
return INFO::OK;
}
int archive_build_continue(ArchiveBuildState* ab)
{
const double end_time = get_time() + 200e-3;
for(;;)
{
const char* V_fn = ab->V_fns[ab->i];
if(!V_fn)
break;
ArchiveEntry ent; const u8* file_contents; FileIOBuf buf;
if(read_and_compress_file(V_fn, ab->ctx, ent, file_contents, buf) == INFO::OK)
{
(void)zip_archive_add_file(ab->za, &ent, file_contents);
(void)file_buf_free(buf);
}
ab->i++;
LDR_CHECK_TIMEOUT((int)ab->i, (int)ab->num_files);
}
// note: this is currently known to fail if there are no files in the list
// - zlib.h says: Z_DATA_ERROR is returned if freed prematurely.
// safe to ignore.
comp_free(ab->ctx); ab->ctx = 0;
(void)zip_archive_finish(ab->za);
return INFO::OK;
}
void archive_build_cancel(ArchiveBuildState* ab)
{
// note: the GUI may call us even though no build was ever in progress.
// be sure to make all steps no-op if <ab> is zeroed (initial state) or
// no build is in progress.
comp_free(ab->ctx); ab->ctx = 0;
if(ab->za)
(void)zip_archive_finish(ab->za);
memset(ab, 0, sizeof(*ab));
}
LibError archive_build(const char* P_archive_filename, Filenames V_fns)
{
ArchiveBuildState ab;
RETURN_ERR(archive_build_init(P_archive_filename, V_fns, &ab));
for(;;)
{
int ret = archive_build_continue(&ab);
RETURN_ERR(ret);
if(ret == INFO::OK)
return INFO::OK;
}
}

View File

@ -1,43 +0,0 @@
/**
* =========================================================================
* File : archive_builder.h
* Project : 0 A.D.
* Description :
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_ARCHIVE_BUILDER
#define INCLUDED_ARCHIVE_BUILDER
// array of pointers to VFS filenames (including path), terminated by a
// NULL entry.
typedef const char** Filenames;
struct ZipArchive;
// rationale: this is fairly lightweight and simple, so we don't bother
// making it opaque.
struct ArchiveBuildState
{
ZipArchive* za;
uintptr_t ctx;
Filenames V_fns;
size_t num_files; // number of filenames in V_fns (excluding final 0)
size_t i;
};
extern LibError archive_build_init(const char* P_archive_filename, Filenames V_fns,
ArchiveBuildState* ab);
// create an archive (overwriting previous file) and fill it with the given
// files. compression method is chosen intelligently based on extension and
// file entropy / achieved compression ratio.
extern int archive_build_continue(ArchiveBuildState* ab);
extern void archive_build_cancel(ArchiveBuildState* ab);
extern LibError archive_build(const char* P_archive_filename, Filenames V_fns);
#endif // #ifndef INCLUDED_ARCHIVE_BUILDER

View File

@ -1,693 +0,0 @@
/**
* =========================================================================
* File : compression.cpp
* Project : 0 A.D.
* Description : interface for compressing/decompressing data streams.
* : currently implements "deflate" (RFC1951).
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "compression.h"
#include <deque>
#include "lib/res/mem.h"
#include "lib/allocators.h"
#include "lib/timer.h"
#include "../file_io.h" // IO_EOF
#include <boost/shared_ptr.hpp>
// rationale: this layer allows for other compression methods/libraries
// besides ZLib. it also simplifies the interface for user code and
// does error checking, etc.
ERROR_ASSOCIATE(ERR::COMPRESSION_UNKNOWN_METHOD, "Unknown/unsupported compression method", -1);
// provision for removing all ZLib code (all inflate calls will fail).
// used for checking DLL dependency; might also simulate corrupt Zip files.
//#define NO_ZLIB
#ifndef NO_ZLIB
# include "lib/external_libraries/zlib.h"
#else
// several switch statements are going to have all cases removed.
// squelch the corresponding warning.
# pragma warning(disable: 4065)
#endif
TIMER_ADD_CLIENT(tc_zip_inflate);
TIMER_ADD_CLIENT(tc_zip_memcpy);
//-----------------------------------------------------------------------------
class ICodec
{
public:
/**
* note: the implementation should not check whether any data remains -
* codecs are sometimes destroyed without completing a transfer.
**/
virtual ~ICodec()
{
}
/**
* @return an upper bound on the output size for the given amount of input.
* this is used when allocating a single buffer for the whole operation.
**/
virtual size_t MaxOutputSize(size_t inSize) const = 0;
/**
* clear all previous state and prepare for reuse.
*
* this is as if the object were destroyed and re-created, but more
* efficient since memory buffers can be kept, etc.
**/
virtual LibError Reset() = 0;
/**
* process (i.e. compress or decompress) data.
*
* @param outSize bytes remaining in the output buffer; shall not be zero.
* @param inConsumed, outProduced how many bytes in the input and
* output buffers were used. either or both of these can be zero if
* the input size is small or there's not enough output space.
**/
virtual LibError Process(const u8* in, size_t inSize, u8* out, size_t outSize, size_t& inConsumed, size_t& outProduced) = 0;
/**
* flush buffers and make sure all output has been produced.
*
* @param out, outSize - the entire output buffer. this assumes the
* output pointers passed to Process were contiguous; if not, these
* values will not be meaningful.
* @param checksum over all input data.
* @return error status for the entire operation.
**/
virtual LibError Finish(u8*& out, size_t& outSize, u32& checksum) = 0;
/**
* update a checksum to reflect the contents of a buffer.
*
* @param checksum the initial value (must be 0 on first call)
* @return the new checksum.
**/
virtual u32 UpdateChecksum(u32 checksum, const u8* in, size_t inSize) const = 0;
};
//-----------------------------------------------------------------------------
#ifndef NO_ZLIB
class ZLibCodec : public ICodec
{
protected:
ZLibCodec()
{
memset(&m_zs, 0, sizeof(m_zs));
InitializeChecksum();
}
void InitializeChecksum()
{
m_checksum = crc32(0, 0, 0);
}
typedef int ZEXPORT (*ZLibFunc)(z_streamp strm, int flush);
static LibError LibError_from_zlib(int zlib_err, bool warn_if_failed = true)
{
LibError err = ERR::FAIL;
switch(zlib_err)
{
case Z_OK:
return INFO::OK;
case Z_STREAM_END:
err = ERR::IO_EOF; break;
case Z_MEM_ERROR:
err = ERR::NO_MEM; break;
case Z_DATA_ERROR:
err = ERR::CORRUPTED; break;
case Z_STREAM_ERROR:
err = ERR::INVALID_PARAM; break;
default:
err = ERR::FAIL; break;
}
if(warn_if_failed)
DEBUG_WARN_ERR(err);
return err;
}
static void WarnIfZLibError(int zlib_ret)
{
(void)LibError_from_zlib(zlib_ret, true);
}
LibError Process(ZLibFunc func, int flush, const u8* in, const size_t inSize, u8* out, const size_t outSize, size_t& inConsumed, size_t& outConsumed)
{
m_zs.next_in = (Byte*)in;
m_zs.avail_in = (uInt)inSize;
m_zs.next_out = (Byte*)out;
m_zs.avail_out = (uInt)outSize;
int ret = func(&m_zs, flush);
// sanity check: if ZLib reports end of stream, all input data
// must have been consumed.
if(ret == Z_STREAM_END)
{
debug_assert(m_zs.avail_in == 0);
ret = Z_OK;
}
debug_assert(inSize >= m_zs.avail_in && outSize >= m_zs.avail_out);
inConsumed = inSize - m_zs.avail_in;
outConsumed = outSize - m_zs.avail_out;
return LibError_from_zlib(ret);
}
virtual u32 UpdateChecksum(u32 checksum, const u8* in, size_t inSize) const
{
return (u32)crc32(checksum, in, (uInt)inSize);
}
mutable z_stream m_zs;
// note: z_stream does contain an 'adler' checksum field, but that's
// not updated in streams lacking a gzip header, so we'll have to
// calculate a checksum ourselves.
// adler32 is somewhat weaker than CRC32, but a more important argument
// is that we should use the latter for compatibility with Zip archives.
mutable u32 m_checksum;
};
class ZLibCompressor : public ZLibCodec
{
public:
ZLibCompressor()
{
// note: with Z_BEST_COMPRESSION, 78% percent of
// archive builder CPU time is spent in ZLib, even though
// that is interleaved with IO; everything else is negligible.
// we therefore enable this only in final builds; during
// development, 1.5% bigger archives are definitely worth much
// faster build time.
#if CONFIG_FINAL
const int level = Z_BEST_COMPRESSION;
#else
const int level = Z_BEST_SPEED;
#endif
const int windowBits = -MAX_WBITS; // max window size; omit ZLib header
const int memLevel = 9; // max speed; total mem ~= 384KiB
const int strategy = Z_DEFAULT_STRATEGY; // normal data - not RLE
const int ret = deflateInit2(&m_zs, level, Z_DEFLATED, windowBits, memLevel, strategy);
debug_assert(ret == Z_OK);
}
virtual ~ZLibCompressor()
{
const int ret = deflateEnd(&m_zs);
WarnIfZLibError(ret);
}
virtual size_t MaxOutputSize(size_t inSize) const
{
return (size_t)deflateBound(&m_zs, (uLong)inSize);
}
virtual LibError Reset()
{
ZLibCodec::InitializeChecksum();
const int ret = deflateReset(&m_zs);
return LibError_from_zlib(ret);
}
virtual LibError Process(const u8* in, size_t inSize, u8* out, size_t outSize, size_t& inConsumed, size_t& outConsumed)
{
m_checksum = UpdateChecksum(m_checksum, in, inSize);
return ZLibCodec::Process(deflate, 0, in, inSize, out, outSize, inConsumed, outConsumed);
}
virtual LibError Finish(u8*& out, size_t& outSize, u32& checksum)
{
// notify zlib that no more data is forthcoming and have it flush output.
// our output buffer has enough space due to use of deflateBound;
// therefore, deflate must return Z_STREAM_END.
const int ret = deflate(&m_zs, Z_FINISH);
if(ret != Z_STREAM_END)
debug_warn("deflate: unexpected Z_FINISH behavior");
out = m_zs.next_out - m_zs.total_out;
outSize = m_zs.total_out;
checksum = m_checksum;
return INFO::OK;
}
};
class ZLibDecompressor : public ZLibCodec
{
public:
ZLibDecompressor()
{
const int windowBits = -MAX_WBITS; // max window size; omit ZLib header
const int ret = inflateInit2(&m_zs, windowBits);
debug_assert(ret == Z_OK);
}
virtual ~ZLibDecompressor()
{
const int ret = inflateEnd(&m_zs);
WarnIfZLibError(ret);
}
virtual size_t MaxOutputSize(size_t inSize) const
{
// relying on an upper bound for the output is a really bad idea for
// large files. archive formats store the uncompressed file sizes,
// so callers should use that when allocating the output buffer.
debug_assert(inSize < 1*MiB);
// http://www.zlib.org/zlib_tech.html
return inSize*1032;
}
virtual LibError Reset()
{
ZLibCodec::InitializeChecksum();
const int ret = inflateReset(&m_zs);
return LibError_from_zlib(ret);
}
virtual LibError Process(const u8* in, size_t inSize, u8* out, size_t outSize, size_t& inConsumed, size_t& outConsumed)
{
const LibError ret = ZLibCodec::Process(inflate, Z_SYNC_FLUSH, in, inSize, out, outSize, inConsumed, outConsumed);
m_checksum = UpdateChecksum(m_checksum, in, inSize);
return ret;
}
virtual LibError Finish(u8*& out, size_t& outSize, u32& checksum)
{
// no action needed - decompression always flushes immediately.
out = m_zs.next_out - m_zs.total_out;
outSize = m_zs.total_out;
checksum = m_checksum;
return INFO::OK;
}
};
#endif // #ifndef NO_ZLIB
//-----------------------------------------------------------------------------
#include "lib/nommgr.h" // protect placement new
class CodecFactory
{
public:
ICodec* Create(ContextType type, CompressionMethod method)
{
debug_assert(type == CT_COMPRESSION || type == CT_DECOMPRESSION);
switch(method)
{
#ifndef NO_ZLIB
case CM_DEFLATE:
if(type == CT_COMPRESSION)
{
cassert(sizeof(ZLibCompressor) <= MAX_CODEC_SIZE);
return new(AllocateMemory()) ZLibCompressor;
}
else
{
cassert(sizeof(ZLibDecompressor) <= MAX_CODEC_SIZE);
return new(AllocateMemory()) ZLibDecompressor;
}
break;
#endif
default:
WARN_ERR(ERR::COMPRESSION_UNKNOWN_METHOD);
return 0;
}
}
void Destroy(ICodec* codec)
{
codec->~ICodec();
m_allocator.release((Allocator::value_type*)codec);
}
private:
void* AllocateMemory()
{
void* mem = m_allocator.alloc();
if(!mem)
throw std::bad_alloc();
return mem;
}
// double: see explanation in SingleAllocator
static const size_t MAX_CODEC_SIZE = 100;
typedef SingleAllocator<double[(MAX_CODEC_SIZE+sizeof(double)-1)/sizeof(double)]> Allocator;
Allocator m_allocator;
};
#include "lib/mmgr.h"
//-----------------------------------------------------------------------------
// BufferManager
class BufferManager
{
public:
void Enqueue(const u8* data, size_t size)
{
// note: calling with inSize = 0 is allowed and just means
// we don't enqueue a new buffer. it happens when compressing
// newly decompressed data if nothing was output (due to a
// small compressed input buffer).
if(size != 0)
m_pendingBuffers.push_back(Buffer(data, size));
}
bool GetNext(const u8*& data, size_t& size) const
{
if(m_pendingBuffers.empty())
return false;
const Buffer& buffer = m_pendingBuffers.front();
data = buffer.RemainingData();
size = buffer.RemainingSize();
return true;
}
void MarkAsProcessed(size_t numBytes)
{
Buffer& buffer = m_pendingBuffers.front();
buffer.MarkAsProcessed(numBytes);
if(buffer.RemainingSize() == 0)
m_pendingBuffers.pop_front();
}
void Reset()
{
m_pendingBuffers.clear();
}
private:
class Buffer
{
public:
Buffer(const u8* data, size_t size)
: m_data(data), m_size(size), m_pos(0)
{
}
const u8* RemainingData() const
{
return m_data + m_pos;
}
size_t RemainingSize() const
{
return m_size - m_pos;
}
void MarkAsProcessed(size_t numBytes)
{
m_pos += numBytes;
debug_assert(m_pos <= m_size);
// everything has been consumed. (this buffer will now be
// destroyed by removing it from the deque)
if(m_pos == m_size)
return;
// if there is any data left, the caller must have "choked"
// (i.e. filled their output buffer).
// this buffer currently references data allocated by the caller.
if(!m_copy.get())
{
// since we have to return and they could free it behind our
// back, we'll need to allocate a copy of the remaining data.
m_size = RemainingSize();
m_copy.reset(new u8[m_size]);
cpu_memcpy(m_copy.get(), RemainingData(), m_size);
m_data = m_copy.get(); // must happen after cpu_memcpy
m_pos = 0;
}
}
private:
const u8* m_data;
size_t m_size;
size_t m_pos;
boost::shared_ptr<u8> m_copy;
};
// note: a 'list' (deque is more efficient) is necessary.
// lack of output space can result in leftover input data;
// since we do not want Feed() to always have to check for and
// use up any previous remnants, we allow queuing them.
std::deque<Buffer> m_pendingBuffers;
};
//-----------------------------------------------------------------------------
class Stream
{
public:
Stream(ContextType type, CompressionMethod method)
: m_out(0), m_outSize(0), m_outPos(0)
, m_codec(m_codecFactory.Create(type, method))
{
}
~Stream()
{
m_codecFactory.Destroy(m_codec);
}
size_t MaxOutputSize(size_t inSize) const
{
return m_codec->MaxOutputSize(inSize);
}
void Reset()
{
m_bufferManager.Reset();
m_out = 0;
m_outSize = 0;
m_outPos = 0;
m_codec->Reset();
}
void SetOutput(u8* out, size_t outSize)
{
debug_assert(IsAllowableOutputBuffer(out, outSize));
m_out = out;
m_outSize = outSize;
m_outPos = 0;
}
LibError AllocOutput(size_t size)
{
// notes:
// - this implementation allows reusing previous buffers if they
// are big enough, which reduces the number of allocations.
// - no further attempts to reduce allocations (e.g. by doubling
// the current size) are made; this strategy is enough.
// - Pool etc. cannot be used because files may be huge (larger
// than the address space of 32-bit systems).
// no buffer or the previous one wasn't big enough: reallocate
if(!m_outMem.get() || m_outMemSize < size)
{
m_outMem.reset((u8*)page_aligned_alloc(size), PageAlignedDeleter(size));
m_outMemSize = size;
}
SetOutput(m_outMem.get(), size);
return INFO::OK;
}
ssize_t Feed(const u8* in, size_t inSize)
{
size_t outTotal = 0; // returned unless error occurs
m_bufferManager.Enqueue(in, inSize);
// work off any pending buffers and the new one
const u8* cdata; size_t csize;
while(m_bufferManager.GetNext(cdata, csize))
{
if(m_outSize == m_outPos) // output buffer full; must not call Process
break;
size_t inConsumed, outProduced;
LibError err = m_codec->Process(cdata, csize, m_out+m_outPos, m_outSize-m_outPos, inConsumed, outProduced);
if(err < 0)
return err;
m_bufferManager.MarkAsProcessed(inConsumed);
outTotal += outProduced;
m_outPos += outProduced;
}
return (ssize_t)outTotal;
}
LibError Finish(u8*& out, size_t& outSize, u32& checksum)
{
return m_codec->Finish(out, outSize, checksum);
}
u32 UpdateChecksum(u32 checksum, const u8* in, size_t inSize) const
{
return m_codec->UpdateChecksum(checksum, in, inSize);
}
private:
// ICodec::Finish is allowed to assume that output buffers were identical
// or contiguous; we verify this here.
bool IsAllowableOutputBuffer(u8* out, size_t outSize)
{
// none yet established
if(m_out == 0 && m_outSize == 0 && m_outPos == 0)
return true;
// same as last time (happens with temp buffers)
if(m_out == out && m_outSize == outSize)
return true;
// located after the last buffer (note: not necessarily after
// the entire buffer; a lack of input can cause the output buffer
// to only partially be used before the next call.)
if((unsigned)(out - m_out) <= m_outSize)
return true;
return false;
}
BufferManager m_bufferManager;
u8* m_out;
size_t m_outSize;
size_t m_outPos;
boost::shared_ptr<u8> m_outMem;
size_t m_outMemSize;
static CodecFactory m_codecFactory;
ICodec* m_codec;
};
/*static*/ CodecFactory Stream::m_codecFactory;
//-----------------------------------------------------------------------------
#include "lib/nommgr.h" // protect placement new
class StreamFactory
{
public:
Stream* Create(ContextType type, CompressionMethod method)
{
void* mem = m_allocator.alloc();
if(!mem)
throw std::bad_alloc();
return new(mem) Stream(type, method);
}
void Destroy(Stream* stream)
{
stream->~Stream();
m_allocator.release(stream);
}
private:
SingleAllocator<Stream> m_allocator;
};
#include "lib/mmgr.h"
//-----------------------------------------------------------------------------
static StreamFactory streamFactory;
uintptr_t comp_alloc(ContextType type, CompressionMethod method)
{
Stream* stream = streamFactory.Create(type, method);
return (uintptr_t)stream;
}
void comp_free(uintptr_t ctx)
{
// no-op if context is 0 (i.e. was never allocated)
if(!ctx)
return;
Stream* stream = (Stream*)ctx;
streamFactory.Destroy(stream);
}
void comp_reset(uintptr_t ctx)
{
Stream* stream = (Stream*)ctx;
stream->Reset();
}
size_t comp_max_output_size(uintptr_t ctx, size_t inSize)
{
Stream* stream = (Stream*)ctx;
return stream->MaxOutputSize(inSize);
}
void comp_set_output(uintptr_t ctx, u8* out, size_t outSize)
{
Stream* stream = (Stream*)ctx;
stream->SetOutput(out, outSize);
}
LibError comp_alloc_output(uintptr_t ctx, size_t inSize)
{
Stream* stream = (Stream*)ctx;
return stream->AllocOutput(inSize);
}
ssize_t comp_feed(uintptr_t ctx, const u8* in, size_t inSize)
{
Stream* stream = (Stream*)ctx;
return stream->Feed(in, inSize);
}
LibError comp_finish(uintptr_t ctx, u8** out, size_t* outSize, u32* checksum)
{
Stream* stream = (Stream*)ctx;
return stream->Finish(*out, *outSize, *checksum);
}
u32 comp_update_checksum(uintptr_t ctx, u32 checksum, const u8* in, size_t inSize)
{
Stream* stream = (Stream*)ctx;
return stream->UpdateChecksum(checksum, in, inSize);
}

View File

@ -1,118 +0,0 @@
/**
* =========================================================================
* File : compression.h
* Project : 0 A.D.
* Description : interface for compressing/decompressing data streams.
* : currently implements "deflate" (RFC1951).
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_COMPRESSION
#define INCLUDED_COMPRESSION
namespace ERR
{
const LibError COMPRESSION_UNKNOWN_METHOD = -110300;
}
enum ContextType
{
CT_COMPRESSION,
CT_DECOMPRESSION
};
enum CompressionMethod
{
CM_NONE,
// zlib "deflate" (RFC 1750, 1751) and CRC32
CM_DEFLATE,
CM_UNSUPPORTED
};
/**
* allocate a new compression/decompression context.
**/
extern uintptr_t comp_alloc(ContextType type, CompressionMethod method);
/**
* free this context and all associated memory.
**/
extern void comp_free(uintptr_t ctx);
/**
* clear all previous state and prepare for reuse.
*
* this is as if the object were destroyed and re-created, but more
* efficient since it avoids reallocating a considerable amount of memory
* (about 200KB for LZ).
**/
extern void comp_reset(uintptr_t ctx);
/**
* @return an upper bound on the output size for the given amount of input.
* this is used when allocating a single buffer for the whole operation.
**/
extern size_t comp_max_output_size(uintptr_t ctx, size_t inSize);
/**
* set output buffer for subsequent comp_feed() calls.
*
* due to the comp_finish interface, output buffers must be contiguous or
* identical (otherwise IsAllowableOutputBuffer will complain).
**/
extern void comp_set_output(uintptr_t ctx, u8* out, size_t outSize);
/**
* allocate a new output buffer.
*
* @param size [bytes] to allocate.
*
* if a buffer had previously been allocated and is large enough, it is
* reused (this reduces the number of allocations). the buffer is
* automatically freed by comp_free.
**/
extern LibError comp_alloc_output(uintptr_t ctx, size_t inSize);
/**
* 'feed' the given buffer to the compressor/decompressor.
*
* @return number of output bytes produced or a negative LibError.
* note that 0 is a legitimate return value - this happens if the input
* buffer is small and the codec hasn't produced any output.
*
* note: after this call returns, the buffer may be overwritten or freed;
* we take care of copying and queuing any data that remains (e.g. due to
* lack of output buffer space).
**/
extern ssize_t comp_feed(uintptr_t ctx, const u8* in, size_t inSize);
/**
* conclude the compression/decompression operation.
*
* @param out, outSize receive the output buffer. this assumes identical or
* contiguous addresses were passed, which comp_set_output ensures.
* @param checksum
*
* note: this must always be called (even if the output buffer is already
* known) because it feeds any remaining queued input buffers.
**/
extern LibError comp_finish(uintptr_t ctx, u8** out, size_t* out_size, u32* checksum);
/**
* update a checksum to reflect the contents of a buffer.
*
* @param checksum the initial value (must be 0 on first call)
* @return the new checksum.
*
* note: this routine is stateless but still requires a context to establish
* the type of checksum to calculate. the results are the same as yielded by
* comp_finish after comp_feed-ing all input buffers.
**/
extern u32 comp_update_checksum(uintptr_t ctx, u32 checksum, const u8* in, size_t inSize);
#endif // #ifndef INCLUDED_COMPRESSION

View File

@ -1,146 +0,0 @@
#include "lib/self_test.h"
#include "lib/base32.h"
#include "lib/res/file/path.h"
#include "lib/res/file/file.h"
#include "lib/res/file/file_cache.h"
#include "lib/res/file/vfs.h"
#include "lib/res/file/archive/archive.h"
#include "lib/res/file/archive/archive_builder.h"
#include "lib/res/h_mgr.h"
#include "lib/res/mem.h"
#include "lib/rand.h"
class TestArchiveBuilder : public CxxTest::TestSuite
{
const char* const archive_fn;
static const size_t NUM_FILES = 30;
static const size_t MAX_FILE_SIZE = 20000;
std::set<const char*> existing_names;
const char* gen_random_name()
{
// 10 chars is enough for (10-1)*5 bits = 45 bits > u32
char name_tmp[10];
for(;;)
{
u32 rand_num = rand(0, 100000);
base32(4, (const u8*)&rand_num, (u8*)name_tmp);
// store filename in atom pool
const char* atom_fn = file_make_unique_fn_copy(name_tmp);
// done if the filename is unique (not been generated yet)
if(existing_names.find(atom_fn) == existing_names.end())
{
existing_names.insert(atom_fn);
return atom_fn;
}
}
}
struct TestFile
{
off_t size;
u8* data; // must be delete[]-ed after comparing
};
// (must be separate array and end with NULL entry (see Filenames))
const char* filenames[NUM_FILES+1];
TestFile files[NUM_FILES];
void generate_random_files()
{
for(size_t i = 0; i < NUM_FILES; i++)
{
const off_t size = rand(0, MAX_FILE_SIZE);
u8* data = new u8[size];
// random data won't compress at all, and we want to exercise
// the uncompressed codepath as well => make some of the files
// easily compressible (much less values).
const bool make_easily_compressible = (rand(0, 100) > 50);
if(make_easily_compressible)
{
for(off_t i = 0; i < size; i++)
data[i] = rand() & 0x0F;
}
else
{
for(off_t i = 0; i < size; i++)
data[i] = rand() & 0xFF;
}
filenames[i] = gen_random_name();
files[i].size = size;
files[i].data = data;
ssize_t bytes_written = vfs_store(filenames[i], data, size, FILE_NO_AIO);
TS_ASSERT_EQUALS(bytes_written, size);
}
// 0-terminate the list - see Filenames decl.
filenames[NUM_FILES] = NULL;
}
public:
TestArchiveBuilder()
: archive_fn("test_archive_random_data.zip") {}
void setUp()
{
(void)file_init();
(void)file_set_root_dir(0, ".");
vfs_init();
}
void tearDown()
{
vfs_shutdown();
file_shutdown();
path_reset_root_dir();
}
void test_create_archive_with_random_files()
{
if(!file_exists("archivetest")) // don't get stuck if this test fails and never deletes the directory it created
TS_ASSERT_OK(dir_create("archivetest"));
TS_ASSERT_OK(vfs_mount("", "archivetest"));
generate_random_files();
TS_ASSERT_OK(archive_build(archive_fn, filenames));
// wipe out file cache, otherwise we're just going to get back
// the file contents read during archive_build .
file_cache_reset();
// read in each file and compare file contents
Handle ha = archive_open(archive_fn);
TS_ASSERT(ha > 0);
for(size_t i = 0; i < NUM_FILES; i++)
{
File f;
TS_ASSERT_OK(afile_open(ha, filenames[i], 0, 0, &f));
FileIOBuf buf = FILE_BUF_ALLOC;
ssize_t bytes_read = afile_read(&f, 0, files[i].size, &buf);
TS_ASSERT_EQUALS(bytes_read, files[i].size);
TS_ASSERT_SAME_DATA(buf, files[i].data, files[i].size);
TS_ASSERT_OK(file_buf_free(buf));
TS_ASSERT_OK(afile_close(&f));
SAFE_ARRAY_DELETE(files[i].data);
}
TS_ASSERT_OK(archive_close(ha));
dir_delete("archivetest");
file_delete(archive_fn);
}
void test_multiple_init_shutdown()
{
// setUp has already vfs_init-ed it and tearDown will vfs_shutdown.
vfs_shutdown();
vfs_init();
}
};

View File

@ -1,54 +0,0 @@
#include "lib/self_test.h"
#include "lib/self_test.h"
#include "lib/res/file/archive/compression.h"
class TestCompression : public CxxTest::TestSuite
{
public:
void test_compress_decompress_compare()
{
// generate random input data
// (limit values to 0..7 so that the data will actually be compressible)
const size_t data_size = 10000;
u8 data[data_size];
for(size_t i = 0; i < data_size; i++)
data[i] = rand() & 0x07;
u8* cdata; size_t csize;
u8 udata[data_size];
// compress
uintptr_t c = comp_alloc(CT_COMPRESSION, CM_DEFLATE);
{
TS_ASSERT(c != 0);
const size_t csizeBound = comp_max_output_size(c, data_size);
TS_ASSERT_OK(comp_alloc_output(c, csizeBound));
const ssize_t cdata_produced = comp_feed(c, data, data_size);
TS_ASSERT(cdata_produced >= 0);
u32 checksum;
TS_ASSERT_OK(comp_finish(c, &cdata, &csize, &checksum));
TS_ASSERT(cdata_produced <= (ssize_t)csize); // can't have produced more than total
}
// decompress
uintptr_t d = comp_alloc(CT_DECOMPRESSION, CM_DEFLATE);
{
TS_ASSERT(d != 0);
comp_set_output(d, udata, data_size);
const ssize_t udata_produced = comp_feed(d, cdata, csize);
TS_ASSERT(udata_produced >= 0);
u8* udata_final; size_t usize_final; u32 checksum;
TS_ASSERT_OK(comp_finish(d, &udata_final, &usize_final, &checksum));
TS_ASSERT(udata_produced <= (ssize_t)usize_final); // can't have produced more than total
TS_ASSERT_EQUALS(udata_final, udata); // output buffer address is same
TS_ASSERT_EQUALS(usize_final, data_size); // correct amount of output
}
comp_free(c);
comp_free(d);
// verify data survived intact
TS_ASSERT_SAME_DATA(data, udata, data_size);
}
};

View File

@ -1,25 +0,0 @@
#include "lib/self_test.h"
#include <time.h>
#include "lib/res/file/archive/zip.h"
class TestZip : public CxxTest::TestSuite
{
public:
void test_fat_timedate_conversion()
{
// note: FAT time stores second/2, which means converting may
// end up off by 1 second.
time_t t, converted_t;
t = time(0);
converted_t = time_t_from_FAT(FAT_from_time_t(t));
TS_ASSERT_DELTA(t, converted_t, 2);
t++;
converted_t = time_t_from_FAT(FAT_from_time_t(t));
TS_ASSERT_DELTA(t, converted_t, 2);
}
};

View File

@ -1,473 +0,0 @@
/**
* =========================================================================
* File : trace.cpp
* Project : 0 A.D.
* Description : allows recording and 'playing back' a sequence of
* : I/Os - useful for benchmarking and archive builder.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "trace.h"
#include "lib/allocators.h"
#include "lib/timer.h"
#include "lib/sysdep/cpu.h"
#include "../file_internal.h"
ERROR_ASSOCIATE(ERR::TRACE_EMPTY, "No valid entries in trace", -1);
static uintptr_t trace_initialized; // set via CAS
static Pool trace_pool;
// call at before using trace_pool. no-op if called more than once.
static inline void trace_init()
{
if(cpu_CAS(&trace_initialized, 0, 1))
(void)pool_create(&trace_pool, 4*MiB, sizeof(TraceEntry));
}
void trace_shutdown()
{
if(cpu_CAS(&trace_initialized, 1, 0))
(void)pool_destroy(&trace_pool);
}
// enabled by default. by the time we can decide whether a trace needs to
// be generated (see should_rebuild_main_archive), file accesses will
// already have occurred; hence default enabled and disable if not needed.
static bool trace_enabled = true;
static bool trace_force_enabled = false; // see below
// note: explicitly enabling trace means the user wants one to be
// generated even if an up-to-date version exists.
// (mechanism: ignore any attempts to disable)
void trace_enable(bool want_enabled)
{
trace_enabled = want_enabled;
if(want_enabled)
trace_force_enabled = true;
if(trace_force_enabled)
trace_enabled = true;
}
static LibError trace_add(TraceOp op, const char* P_fn, size_t size,
uint flags = 0, double timestamp = 0.0)
{
trace_init();
if(!trace_enabled)
return INFO::OK;
if(timestamp == 0.0)
timestamp = get_time();
TraceEntry* t = (TraceEntry*)pool_alloc(&trace_pool, 0);
if(!t)
return ERR::LIMIT; // NOWARN
t->timestamp = timestamp;
t->atom_fn = file_make_unique_fn_copy(P_fn);
t->size = size;
t->op = op;
t->flags = flags;
return INFO::OK;
}
static void trace_get_raw_ents(const TraceEntry*& ents, size_t& num_ents)
{
ents = (const TraceEntry*)trace_pool.da.base;
num_ents = (uint)(trace_pool.da.pos / sizeof(TraceEntry));
}
void trace_notify_io(const char* P_fn, size_t size, uint flags)
{
trace_add(TO_IO, P_fn, size, flags);
}
void trace_notify_free(const char* P_fn, size_t size)
{
trace_add(TO_FREE, P_fn, size);
}
//-----------------------------------------------------------------------------
// put all entries in one trace file: easier to handle; obviates FS enum code
// rationale: don't go through trace in order; instead, process most recent
// run first, to give more weight to it (TSP code should go with first entry
// when #occurrences are equal)
static const TraceEntry delimiter_entry =
{
0.0f, // timestamp
"------------------------------------------------------------",
0, // size
TO_IO, // TraceOp (never seen by user; value doesn't matter)
0 // flags
};
// storage for Trace.runs.
static const uint MAX_RUNS = 100;
static TraceRun runs[MAX_RUNS];
// note: the last entry may be one past number of actual entries.
// WARNING: due to misfeature in DelimiterAdder, indices are added twice.
// this is fixed in trace_get; just don't rely on run_start_indices.size()!
static std::vector<size_t> run_start_indices;
class DelimiterAdder
{
public:
enum Consequence
{
SKIP_ADD,
CONTINUE
};
Consequence operator()(size_t i, double timestamp, const char* P_path)
{
// this entry is a delimiter
if(!strcmp(P_path, delimiter_entry.atom_fn))
{
run_start_indices.push_back(i+1); // skip this entry
// note: its timestamp is invalid, so don't set cur_timestamp!
return SKIP_ADD;
}
const double last_timestamp = cur_timestamp;
cur_timestamp = timestamp;
// first item is always start of a run
if((i == 0) ||
// timestamp started over from 0 (e.g. 29, 30, 1) -> start of new run.
(timestamp < last_timestamp))
run_start_indices.push_back(i);
return CONTINUE;
}
private:
double cur_timestamp;
};
//-----------------------------------------------------------------------------
void trace_get(Trace* t)
{
const TraceEntry* ents; size_t num_ents;
trace_get_raw_ents(ents, num_ents);
// nobody had split ents up into runs; just create one big 'run'.
if(run_start_indices.empty())
run_start_indices.push_back(0);
t->runs = runs;
t->num_runs = 0; // counted up
t->total_ents = num_ents;
size_t last_start_idx = num_ents;
std::vector<size_t>::reverse_iterator it;
for(it = run_start_indices.rbegin(); it != run_start_indices.rend(); ++it)
{
const size_t start_idx = *it;
// run_start_indices.back() may be = num_ents (could happen if
// a zero-length run gets written out); skip that to avoid
// zero-length run here.
// also fixes DelimiterAdder misbehavior of adding 2 indices per run.
if(last_start_idx == start_idx)
continue;
debug_assert(start_idx < t->total_ents);
TraceRun& run = runs[t->num_runs++];
run.num_ents = last_start_idx - start_idx;
run.ents = &ents[start_idx];
last_start_idx = start_idx;
if(t->num_runs == MAX_RUNS)
break;
}
debug_assert(t->num_runs != 0);
}
void trace_clear()
{
pool_free_all(&trace_pool);
run_start_indices.clear();
memset(runs, 0, sizeof(runs)); // for safety
}
//-----------------------------------------------------------------------------
static void write_entry(FILE* f, const TraceEntry* ent)
{
char opcode = '?';
switch(ent->op)
{
case TO_IO: opcode = 'L'; break;
case TO_FREE: opcode = 'F'; break;
default: debug_warn("invalid TraceOp");
}
debug_assert(ent->op == TO_IO || ent->op == TO_FREE);
fprintf(f, "%#010f: %c \"%s\" %d %04x\n", ent->timestamp, opcode,
ent->atom_fn, ent->size, ent->flags);
}
// *appends* entire current trace contents to file (with delimiter first)
LibError trace_write_to_file(const char* trace_filename)
{
if(!trace_enabled)
return INFO::SKIPPED;
char N_fn[PATH_MAX];
RETURN_ERR(file_make_full_native_path(trace_filename, N_fn));
// append at end of file, otherwise we'd only have the most
// recently stored trace. vfs_optimizer correctly deals with
// several trace runs per file.
FILE* f = fopen(N_fn, "at");
if(!f)
WARN_RETURN(ERR::FILE_ACCESS);
write_entry(f, &delimiter_entry);
// somewhat of a hack: write all entries in original order, not the
// reverse order returned by trace_get.
const TraceEntry* ent; size_t num_ents;
trace_get_raw_ents(ent, num_ents);
for(size_t i = 0; i < num_ents; i++, ent++)
write_entry(f, ent);
(void)fclose(f);
return INFO::OK;
}
LibError trace_read_from_file(const char* trace_filename, Trace* t)
{
trace_clear();
char N_fn[PATH_MAX];
RETURN_ERR(file_make_full_native_path(trace_filename, N_fn));
FILE* f = fopen(N_fn, "rt");
if(!f)
WARN_RETURN(ERR::TNODE_NOT_FOUND);
// we use trace_add, which is the same mechanism called by trace_notify*;
// therefore, tracing needs to be enabled.
trace_enabled = true;
DelimiterAdder delim_adder;
// parse lines and stuff them in trace_pool
// (as if they had been trace_add-ed; replaces any existing data)
// .. bake PATH_MAX limit into string.
char fmt[30];
snprintf(fmt, ARRAY_SIZE(fmt), "%%lf: %%c \"%%%d[^\"]\" %%d %%04x\n", PATH_MAX);
for(size_t i = 0; ; i++)
{
double timestamp; char opcode; char P_path[PATH_MAX]; size_t size; uint flags;
int chars_read = fscanf(f, fmt, &timestamp, &opcode, P_path, &size, &flags);
if(chars_read == EOF)
break;
debug_assert(chars_read == 5);
TraceOp op = TO_IO; // default in case file is garbled
switch(opcode)
{
case 'L': op = TO_IO; break;
case 'F': op = TO_FREE; break;
default: debug_warn("invalid TraceOp");
}
if(delim_adder(i, timestamp, P_path) != DelimiterAdder::SKIP_ADD)
{
LibError ret = trace_add(op, P_path, size, flags, timestamp);
// storage in trace pool exhausted. must abort to avoid later
// adding delimiters for items that weren't actually stored
// into the pool.
if(ret == ERR::LIMIT)
break;
}
}
fclose(f);
trace_get(t);
// all previous trace entries were hereby lost (overwritten),
// so there's no sense in continuing.
trace_enabled = false;
if(t->total_ents == 0)
WARN_RETURN(ERR::TRACE_EMPTY);
return INFO::OK;
}
void trace_gen_random(size_t num_entries)
{
trace_clear();
for(size_t i = 0; i < num_entries; i++)
{
// generate random names until we get a valid file;
// remember its name and size.
const char* atom_fn;
off_t size;
for(;;)
{
atom_fn = file_get_random_name();
// use instead of vfs_stat to avoid warnings, since some of
// atom_fn will actually be directory names.
if(vfs_exists(atom_fn))
{
struct stat s;
LibError ret = vfs_stat(atom_fn, &s);
// ought to apply due to vfs_exists above.
debug_assert(ret == INFO::OK && S_ISREG(s.st_mode));
size = s.st_size;
break;
}
}
trace_add(TO_IO, atom_fn, size);
trace_add(TO_FREE, atom_fn, size);
}
}
//-----------------------------------------------------------------------------
// simulate carrying out the entry's TraceOp to determine
// whether this IO would be satisfied by the file_buf cache.
//
// note: TO_IO's handling of uncached buffers means the simulated and
// real cache contents will diverge if the real caller doesn't free their
// buffer immediately.
// this is a bit of a bother, but only slightly influences results
// because it works by affecting the cache allocator's eviction pattern.
// alternatives:
// - only allocate if file_cache_would_add. this would actually
// cause divergence whenever skipping any allocation, which is worse.
// - maintain a list of "buffers we allocated" and use that instead of
// file_cache_retrieve in TO_FREE. this would keep both caches in sync but
// add considerable complexity (function would no longer be "stateless").
bool trace_entry_causes_io(const TraceEntry* ent)
{
uint fb_flags = FB_NO_STATS;
if(ent->flags & FILE_LONG_LIVED)
fb_flags |= FB_LONG_LIVED;
FileIOBuf buf;
size_t size = ent->size;
const char* atom_fn = ent->atom_fn;
uint file_flags = ent->flags;
switch(ent->op)
{
case TO_IO:
{
// we're not interested in writes
if(file_flags & FILE_WRITE)
return false;
buf = file_cache_retrieve(atom_fn, &size, fb_flags);
// would not be in cache
if(!buf)
{
buf = file_buf_alloc(size, atom_fn, fb_flags);
LibError ret = file_cache_add(buf, size, atom_fn, file_flags);
// the cache decided not to add buf (see file_cache_would_add).
// since TO_FREE below uses the cache to find out which
// buffer was allocated for atom_fn, we have to free it manually.
// see note above.
if(ret == INFO::SKIPPED)
(void)file_buf_free(buf, fb_flags);
return true;
}
break;
}
case TO_FREE:
buf = file_cache_retrieve(atom_fn, &size, fb_flags|FB_NO_ACCOUNTING);
// note: if buf == 0, file_buf_free is a no-op. this happens in the
// abovementioned cached-at-higher-level case.
(void)file_buf_free(buf, fb_flags);
break;
default:
debug_warn("unknown TraceOp");
}
return false;
}
// carry out all operations specified in the trace.
// if flags&TRF_SYNC_TO_TIMESTAMP, waits until timestamp for each event is
// reached; otherwise, they are run as fast as possible.
LibError trace_run(const char* trace_filename, uint flags)
{
Trace t;
RETURN_ERR(trace_read_from_file(trace_filename, &t));
// prevent the actions we carry out below from generating
// trace_add-s.
trace_enabled = false;
const double start_time = get_time();
const double first_timestamp = t.runs[t.num_runs-1].ents[0].timestamp;
for(uint r = 0; r < t.num_runs; r++)
{
const TraceRun& run = t.runs[r];
const TraceEntry* ent = run.ents;
for(uint i = 0; i < run.num_ents; i++, ent++)
{
// wait until time for next entry if caller requested this
if(flags & TRF_SYNC_TO_TIMESTAMP)
{
while(get_time()-start_time < ent->timestamp-first_timestamp)
{
// busy-wait (don't sleep - can skew results)
}
}
// carry out this entry's operation
FileIOBuf buf; size_t size;
switch(ent->op)
{
case TO_IO:
// do not 'run' writes - we'd destroy the existing data.
if(ent->flags & FILE_WRITE)
continue;
(void)vfs_load(ent->atom_fn, buf, size, ent->flags);
break;
case TO_FREE:
buf = file_cache_retrieve(ent->atom_fn, &size, FB_NO_STATS|FB_NO_ACCOUNTING);
(void)file_buf_free(buf);
break;
default:
debug_warn("unknown TraceOp");
}
}
}
trace_clear();
return INFO::OK;
}

View File

@ -1,91 +0,0 @@
/**
* =========================================================================
* File : trace.h
* Project : 0 A.D.
* Description : allows recording and 'playing back' a sequence of
* : I/Os - useful for benchmarking and archive builder.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_TRACE
#define INCLUDED_TRACE
namespace ERR
{
const LibError TRACE_EMPTY = -110500;
}
extern void trace_enable(bool want_enabled);
extern void trace_shutdown();
extern void trace_notify_io(const char* P_fn, size_t size, uint flags);
extern void trace_notify_free(const char* P_fn, size_t size);
// TraceEntry operation type.
// note: rather than only a list of accessed files, we also need to
// know the application's behavior WRT caching (e.g. when it releases
// cached buffers). this is necessary so that our simulation can
// yield the same behavior.
enum TraceOp
{
TO_IO,
TO_FREE,
};
// stores one event that is relevant for file IO / caching.
//
// size-optimized a bit since these are all kept in memory
// (to prevent trace file writes from affecting other IOs)
struct TraceEntry
{
// note: float instead of double for nice 16 byte struct size
float timestamp; // returned by get_time before operation starts
const char* atom_fn; // path+name of affected file
// rationale: store size in the trace because other applications
// that use this trace format but not our IO code wouldn't know
// size (since they cannot retrieve the file info given atom_fn).
size_t size; // of IO (usually the entire file)
uint op : 8; // operation - see TraceOp
uint flags : 24; // misc, e.g. file_io flags.
};
struct TraceRun
{
const TraceEntry* ents;
size_t num_ents;
};
struct Trace
{
// most recent first! (see rationale in source)
const TraceRun* runs;
size_t num_runs;
size_t total_ents;
};
extern void trace_get(Trace* t);
extern LibError trace_write_to_file(const char* trace_filename);
extern LibError trace_read_from_file(const char* trace_filename, Trace* t);
extern void trace_gen_random(size_t num_entries);
// simulate carrying out the entry's TraceOp to determine
// whether this IO would be satisfied by the file_buf cache.
extern bool trace_entry_causes_io(const TraceEntry* ent);
enum TraceRunFlags
{
TRF_SYNC_TO_TIMESTAMP = 1
};
// carry out all operations specified in the trace.
// if flags&TRF_SYNC_TO_TIMESTAMP, waits until timestamp for each event is
// reached; otherwise, they are run as fast as possible.
extern LibError trace_run(const char* trace_filename, uint flags = 0);
#endif // #ifndef INCLUDED_TRACE

View File

@ -1,731 +0,0 @@
/**
* =========================================================================
* File : vfs_optimizer.cpp
* Project : 0 A.D.
* Description : automatically bundles files into archives in order of
* : access to optimize I/O.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "vfs_optimizer.h"
#include <set>
#include <map>
#include <algorithm>
#include <ctime>
#include "../file_internal.h"
// enough for 64K unique files - ought to suffice.
typedef u16 FileId;
static const FileId NULL_ID = 0;
static const size_t MAX_IDS = 0x10000 -1; // -1 due to NULL_ID
struct FileNode
{
const char* atom_fn;
FileId prev_id;
FileId next_id;
u32 visited : 1;
u32 output : 1;
FileNode(const char* atom_fn_)
{
atom_fn = atom_fn_;
prev_id = next_id = NULL_ID;
visited = output = 0;
}
};
typedef std::vector<FileNode> FileNodes;
//-----------------------------------------------------------------------------
// check if the file is supposed to be added to archive.
// this avoids adding e.g. screenshots (wasteful because they're never used)
// or config (bad because they are written to and that's not supported for
// archived files).
static bool is_archivable(const TFile* tf)
{
const Mount* m = tfile_get_mount(tf);
return mount_is_archivable(m);
}
class IdMgr
{
FileId cur;
typedef std::map<const char*, FileId> Map;
Map map;
FileNodes* nodes;
// dummy return value so this can be called via for_each/mem_fun_ref
void associate_node_with_fn(const FileNode& node)
{
FileId id = id_from_node(&node);
const Map::value_type item = std::make_pair(node.atom_fn, id);
std::pair<Map::iterator, bool> ret = map.insert(item);
if(!ret.second)
debug_warn("atom_fn already associated with node");
}
public:
FileId id_from_node(const FileNode* node) const
{
// +1 to skip NULL_ID value
FileId id = node - &((*nodes)[0]) +1;
debug_assert(id <= nodes->size());
return id;
}
FileNode* node_from_id(FileId id) const
{
debug_assert(id != NULL_ID);
return &(*nodes)[id-1];
}
FileId id_from_fn(const char* atom_fn) const
{
Map::const_iterator cit = map.find(atom_fn);
if(cit == map.end())
{
debug_warn("id_from_fn: not found");
return NULL_ID;
}
return cit->second;
}
void init(FileNodes* nodes_)
{
cur = NULL_ID+1;
map.clear();
nodes = nodes_;
// can't use for_each (mem_fun requires const function and
// non-reference-type argument)
for(FileNodes::const_iterator cit = nodes->begin(); cit != nodes->end(); ++cit)
{
const FileNode& node = *cit;
associate_node_with_fn(node);
}
}
};
static IdMgr id_mgr;
//-----------------------------------------------------------------------------
// build list of FileNode - exactly one per file in VFS.
//
// time cost: 13ms for 5500 files; we therefore do not bother with
// optimizations like reading from vfs_tree container directly.
class FileGatherer
{
static void EntCb(const char* path, const DirEnt* ent, uintptr_t cbData)
{
FileNodes* file_nodes = (FileNodes*)cbData;
// we only want files
if(DIRENT_IS_DIR(ent))
return;
if(is_archivable(ent->tf))
{
const char* atom_fn = file_make_unique_fn_copy(path);
file_nodes->push_back(FileNode(atom_fn));
}
}
public:
FileGatherer(FileNodes& file_nodes)
{
// jump-start allocation (avoids frequent initial reallocs)
file_nodes.reserve(500);
// TODO: only add entries from mount points that have
// VFS_MOUNT_ARCHIVE flag set (avoids adding screenshots etc.)
vfs_dir_enum("", VFS_DIR_RECURSIVE, 0, EntCb, (uintptr_t)&file_nodes);
// MAX_IDS is a rather large limit on number of files, but must not
// be exceeded (otherwise FileId overflows).
// check for this here and not in EntCb because it's not
// expected to happen.
if(file_nodes.size() > MAX_IDS)
{
// note: use this instead of resize because FileNode doesn't have
// a default ctor. NB: this is how resize is implemented anyway.
file_nodes.erase(file_nodes.begin() + MAX_IDS, file_nodes.end());
WARN_ERR(ERR::LIMIT);
}
}
};
//-----------------------------------------------------------------------------
typedef u32 ConnectionId;
cassert(sizeof(FileId)*2 <= sizeof(ConnectionId));
static ConnectionId cid_make(FileId first, FileId second)
{
return u32_from_u16(first, second);
}
static FileId cid_first(ConnectionId id)
{
return u32_hi(id);
}
static FileId cid_second(ConnectionId id)
{
return u32_lo(id);
}
struct Connection
{
ConnectionId id;
// repeated edges ("connections") are reflected in
// the 'occurrences' count; we optimize the ordering so that
// files with frequent connections are nearby.
uint occurrences;
Connection(ConnectionId id_)
: id(id_), occurrences(1) {}
};
typedef std::vector<Connection> Connections;
// builds a list of Connection-s (basically edges in the FileNode graph)
// defined by the trace.
//
// time cost: 70ms for 1000 trace entries. this is rather heavy;
// the main culprit is simulating file_cache to see if an IO would result.
class ConnectionBuilder
{
// functor: on every call except the first, adds a connection between
// the previous file (remembered here) and the current file.
// if the connection already exists, its occurrence count is incremented.
class ConnectionAdder
{
// speeds up "already exists" overhead from n*n to n*log(n).
typedef std::map<ConnectionId, Connection*> Map;
typedef std::pair<ConnectionId, Connection*> MapItem;
typedef Map::const_iterator MapCIt;
Map map;
FileId prev_id;
public:
ConnectionAdder() : prev_id(NULL_ID) {}
void operator()(Connections& connections, const char* new_fn)
{
const bool was_first_call = (prev_id == NULL_ID);
FileId id = id_mgr.id_from_fn(new_fn);
const ConnectionId c_id = cid_make(prev_id, id);
prev_id = id;
if(was_first_call)
return; // bail after setting prev_id
// note: always insert-ing and checking return value would be
// more efficient (saves 1 iteration over map), but would not
// be safe: VC8's STL disallows &vector[0] if empty
// (even though memory has been reserved).
// it doesn't matter much anyway (decently fast and offline task).
MapCIt it = map.find(c_id);
const bool already_exists = (it != map.end());
if(already_exists)
{
Connection* c = it->second; // Map "payload"
c->occurrences++;
}
// seen this connection for the first time: add to map and list.
else
{
connections.push_back(Connection(c_id));
const MapItem item = std::make_pair(c_id, &connections.back());
map.insert(item);
}
stats_ab_connection(already_exists);
}
};
void add_connections_from_runs(const Trace& t, Connections& connections)
{
file_cache_reset();
// (note: lifetime = entire connection build process; if re-created
// in between, entries in Connections will no longer be unique,
// which may break TourBuilder)
ConnectionAdder add_connection;
// extract accesses from each run (starting with most recent
// first. this isn't critical, but may help a bit since
// files that are equally strongly 'connected' are ordered
// according to position in file_nodes. that means files from
// more recent traces tend to go first, which is good.)
for(size_t r = 0; r < t.num_runs; r++)
{
const TraceRun& run = t.runs[r];
for(uint i = 0; i < run.num_ents; i++)
{
const TraceEntry* te = &run.ents[i];
// improvement: postprocess the trace and remove all IOs that would be
// satisfied by our cache. often repeated IOs would otherwise potentially
// be arranged badly.
if(trace_entry_causes_io(te))
{
// only add connection if this file exists and is in
// file_nodes list. otherwise, ConnectionAdder's
// id_from_fn call will fail.
// note: this happens when trace contains by now
// deleted or unarchivable files.
TFile* tf;
if(tree_lookup(te->atom_fn, &tf) == INFO::OK)
if(is_archivable(tf))
add_connection(connections, te->atom_fn);
}
}
file_cache_reset();
}
}
public:
LibError run(const char* trace_filename, Connections& connections)
{
Trace t;
RETURN_ERR(trace_read_from_file(trace_filename, &t));
// reserve memory for worst-case amount of connections (happens if
// all accesses are unique). this is necessary because we store
// pointers to Connection in the map, which would be invalidated if
// connections[] ever expands.
// may waste up to ~3x the memory (about 1mb) for a short time,
// which is ok.
connections.reserve(t.total_ents-1);
add_connections_from_runs(t, connections);
return INFO::OK;
}
};
//-----------------------------------------------------------------------------
// given graph and known edges, stitch together FileNodes so that
// Hamilton tour (TSP solution) length of the graph is minimized.
// heuristic is greedy adding edges sorted by decreasing 'occurrences'.
//
// time cost: 7ms for 1000 connections; quite fast despite DFS.
//
// could be improved (if there are lots of files) by storing in each node
// a pointer to end of list; if adding a new edge, check if end.endoflist
// is the start of edge.
class TourBuilder
{
// sort by decreasing occurrence
struct Occurrence_greater: public std::binary_function<const Connection&, const Connection&, bool>
{
bool operator()(const Connection& c1, const Connection& c2) const
{
return (c1.occurrences > c2.occurrences);
}
};
bool has_cycle;
void detect_cycleR(FileId id)
{
FileNode* pnode = id_mgr.node_from_id(id);
pnode->visited = 1;
FileId next_id = pnode->next_id;
if(next_id != NULL_ID)
{
FileNode* pnext = id_mgr.node_from_id(next_id);
if(pnext->visited)
has_cycle = true;
else
detect_cycleR(next_id);
}
}
bool is_cycle_at(FileNodes& file_nodes, FileId node)
{
has_cycle = false;
for(FileNodes::iterator it = file_nodes.begin(); it != file_nodes.end(); ++it)
it->visited = 0;
detect_cycleR(node);
return has_cycle;
}
void try_add_edge(FileNodes& file_nodes, const Connection& c)
{
FileId first_id = cid_first(c.id);
FileId second_id = cid_second(c.id);
FileNode* first = id_mgr.node_from_id(first_id);
FileNode* second = id_mgr.node_from_id(second_id);
// one of them has already been hooked up - bail
if(first->next_id != NULL_ID || second->prev_id != NULL_ID)
return;
first->next_id = second_id;
second->prev_id = first_id;
const bool introduced_cycle = is_cycle_at(file_nodes, second_id);
#ifndef NDEBUG
debug_assert(introduced_cycle == is_cycle_at(file_nodes, first_id));
#endif
if(introduced_cycle)
{
// undo
first->next_id = second->prev_id = NULL_ID;
return;
}
}
void output_chain(FileNode& node, std::vector<const char*>& fn_vector)
{
// early out: if this access was already visited, so must the entire
// chain of which it is a part. bail to save lots of time.
if(node.output)
return;
// follow prev links starting with c until no more are left;
// start ends up the beginning of the chain including <c>.
FileNode* start = &node;
while(start->prev_id != NULL_ID)
start = id_mgr.node_from_id(start->prev_id);
// iterate over the chain - add to Filenames list and mark as visited
FileNode* cur = start;
for(;;)
{
if(!cur->output)
{
fn_vector.push_back(cur->atom_fn);
cur->output = 1;
}
if(cur->next_id == NULL_ID)
break;
cur = id_mgr.node_from_id(cur->next_id);
}
}
public:
TourBuilder(FileNodes& file_nodes, Connections& connections, std::vector<const char*>& fn_vector)
{
std::stable_sort(connections.begin(), connections.end(), Occurrence_greater());
for(Connections::iterator it = connections.begin(); it != connections.end(); ++it)
try_add_edge(file_nodes, *it);
for(FileNodes::iterator it = file_nodes.begin(); it != file_nodes.end(); ++it)
output_chain(*it, fn_vector);
}
};
//-----------------------------------------------------------------------------
// autobuild logic: decides when to (re)build an archive.
//-----------------------------------------------------------------------------
// for each loose or archived file encountered during mounting: add to a
// std::set; if there are more than *_THRESHOLD non-archived files, rebuild.
// this ends up costing 50ms for 5000 files, so disable it in final release.
#if CONFIG_FINAL
# define AB_COUNT_LOOSE_FILES 0
#else
# define AB_COUNT_LOOSE_FILES 1
#endif
// rebuild if the archive is much older than most recent VFS timestamp.
// this makes sense during development: the archive will periodically be
// rebuilt with the newest trace. however, it would be annoying in the
// final release, where users will frequently mod things, which should not
// end up rebuilding the main archive.
#if CONFIG_FINAL
# define AB_COMPARE_MTIME 0
#else
# define AB_COMPARE_MTIME 1
#endif
#if AB_COUNT_LOOSE_FILES
static const ssize_t REBUILD_MAIN_ARCHIVE_THRESHOLD = 50;
static const ssize_t BUILD_MINI_ARCHIVE_THRESHOLD = 20;
typedef std::set<const char*> FnSet;
static FnSet loose_files;
static FnSet archived_files;
#endif
void vfs_opt_notify_loose_file(const char* atom_fn)
{
#if AB_COUNT_LOOSE_FILES
// note: files are added before archives, so we can't stop adding to
// set after one of the above thresholds are reached.
loose_files.insert(atom_fn);
#endif
}
void vfs_opt_notify_non_loose_file(const char* atom_fn)
{
#if AB_COUNT_LOOSE_FILES
archived_files.insert(atom_fn);
#endif
}
static bool should_rebuild_main_archive(const char* trace_filename,
DirEnts& existing_archives)
{
// if there's no trace file, no point in building a main archive.
// (we wouldn't know how to order the files)
if(!file_exists(trace_filename))
return false;
#if AB_COUNT_LOOSE_FILES
// too many (eligible for archiving!) loose files not in archive: rebuild.
const ssize_t loose_files_only = (ssize_t)loose_files.size() - (ssize_t)archived_files.size();
if(loose_files_only >= REBUILD_MAIN_ARCHIVE_THRESHOLD)
return true;
#endif
// scan dir and see what archives are already present..
{
time_t most_recent_archive_mtime = 0;
// note: a loop is more convenient than std::for_each, which would
// require referencing the returned functor (since param is a copy).
for(DirEnts::const_iterator it = existing_archives.begin(); it != existing_archives.end(); ++it)
most_recent_archive_mtime = std::max(it->mtime, most_recent_archive_mtime);
// .. no archive yet OR 'lots' of them: rebuild so that they'll be
// merged into one archive and the rest deleted.
if(existing_archives.empty() || existing_archives.size() >= 4)
return true;
#if AB_COMPARE_MTIME
// .. archive is much older than most recent data: rebuild.
const double max_diff = 14*86400; // 14 days
if(difftime(tree_most_recent_mtime(), most_recent_archive_mtime) > max_diff)
return true;
#endif
}
return false;
}
//-----------------------------------------------------------------------------
static char archive_fn[PATH_MAX];
static ArchiveBuildState ab;
static std::vector<const char*> fn_vector;
static DirEnts existing_archives; // and possibly other entries
class IsArchive
{
const char* archive_ext;
public:
IsArchive(const char* archive_fn)
{
archive_ext = path_extension(archive_fn);
}
bool operator()(DirEnt& ent) const
{
// remove if not file
if(DIRENT_IS_DIR(&ent))
return true;
// remove if not same extension
const char* ext = path_extension(ent.name);
if(strcasecmp(archive_ext, ext) != 0)
return true;
// keep
return false;
}
};
static LibError vfs_opt_init(const char* trace_filename, const char* archive_fn_fmt, bool force_build)
{
// get next not-yet-existing archive filename.
static NextNumberedFilenameInfo archive_nfi;
bool use_vfs = false; // can't use VFS for archive files
next_numbered_filename(archive_fn_fmt, &archive_nfi, archive_fn, use_vfs);
// get list of existing archives in root dir.
// note: this is needed by should_rebuild_main_archive and later in
// vfs_opt_continue; must be done here instead of inside the former
// because that is not called when force_build == true.
{
char dir[PATH_MAX];
path_dir_only(archive_fn_fmt, dir);
RETURN_ERR(file_get_sorted_dirents(dir, existing_archives));
DirEntIt new_end = std::remove_if(existing_archives.begin(), existing_archives.end(), IsArchive(archive_fn));
existing_archives.erase(new_end, existing_archives.end());
}
// bail if we shouldn't rebuild the archive.
if(!force_build && !should_rebuild_main_archive(trace_filename, existing_archives))
return INFO::SKIPPED;
// build 'graph' (nodes only) of all files that must be added.
FileNodes file_nodes;
FileGatherer gatherer(file_nodes);
if(file_nodes.empty())
WARN_RETURN(ERR::DIR_END);
// scan nodes and add them to filename->FileId mapping.
id_mgr.init(&file_nodes);
// build list of edges between FileNodes (referenced via FileId) that
// are defined by trace entries.
Connections connections;
ConnectionBuilder cbuilder;
RETURN_ERR(cbuilder.run(trace_filename, connections));
// create output filename list by first adding the above edges (most
// frequent first) and then adding the rest sequentially.
TourBuilder builder(file_nodes, connections, fn_vector);
fn_vector.push_back(0); // 0-terminate for use as Filenames
Filenames V_fns = &fn_vector[0];
RETURN_ERR(archive_build_init(archive_fn, V_fns, &ab));
return INFO::OK;
}
static int vfs_opt_continue()
{
int ret = archive_build_continue(&ab);
if(ret == INFO::OK)
{
// do NOT delete source files! some apps might want to
// keep them (e.g. for source control), or name them differently.
mount_release_all_archives();
// delete old archives
PathPackage pp; // need path to each existing_archive, not only name
{
char archive_dir[PATH_MAX];
path_dir_only(archive_fn, archive_dir);
(void)path_package_set_dir(&pp, archive_dir);
}
for(DirEntCIt it = existing_archives.begin(); it != existing_archives.end(); ++it)
{
(void)path_package_append_file(&pp, it->name);
(void)file_delete(pp.path);
}
// rebuild is required due to mount_release_all_archives.
// the dir watcher may already have rebuilt the VFS once,
// which is a waste of time here.
(void)mount_rebuild();
// it is believed that wiping out the file cache is not necessary.
// building archive doesn't change the game data files, and any
// cached contents of the previous archives are irrelevant.
}
return ret;
}
static bool should_build_mini_archive(const char* UNUSED(mini_archive_fn_fmt))
{
#if AB_COUNT_LOOSE_FILES
// too many (eligible for archiving!) loose files not in archive
const ssize_t loose_files_only = (ssize_t)loose_files.size() - (ssize_t)archived_files.size();
if(loose_files_only >= BUILD_MINI_ARCHIVE_THRESHOLD)
return true;
#endif
return false;
}
static LibError build_mini_archive(const char* mini_archive_fn_fmt)
{
if(!should_build_mini_archive(mini_archive_fn_fmt))
return INFO::SKIPPED;
#if AB_COUNT_LOOSE_FILES
Filenames V_fns = new const char*[loose_files.size()+1];
std::copy(loose_files.begin(), loose_files.end(), &V_fns[0]);
V_fns[loose_files.size()] = 0; // terminator
// get new unused mini archive name at P_dst_path
char mini_archive_fn[PATH_MAX];
static NextNumberedFilenameInfo nfi;
bool use_vfs = false; // can't use VFS for archive files
next_numbered_filename(mini_archive_fn_fmt, &nfi, mini_archive_fn, use_vfs);
RETURN_ERR(archive_build(mini_archive_fn, V_fns));
delete[] V_fns;
return INFO::OK;
#else
return ERR::NOT_IMPLEMENTED;
#endif
}
static enum
{
DECIDE_IF_BUILD,
IN_PROGRESS,
NOP
}
state = DECIDE_IF_BUILD;
void vfs_opt_auto_build_cancel()
{
archive_build_cancel(&ab);
state = NOP;
}
int vfs_opt_auto_build(const char* trace_filename,
const char* archive_fn_fmt, const char* mini_archive_fn_fmt, bool force_build)
{
if(state == NOP)
return INFO::ALL_COMPLETE;
if(state == DECIDE_IF_BUILD)
{
if(vfs_opt_init(trace_filename, archive_fn_fmt, force_build) != INFO::SKIPPED)
state = IN_PROGRESS;
else
{
// create mini-archive (if needed)
RETURN_ERR(build_mini_archive(mini_archive_fn_fmt));
state = NOP;
return INFO::OK; // "finished"
}
}
if(state == IN_PROGRESS)
{
int ret = vfs_opt_continue();
// just finished
if(ret == INFO::OK)
state = NOP;
return ret;
}
UNREACHABLE;
}
LibError vfs_opt_rebuild_main_archive(const char* trace_filename, const char* archive_fn_fmt)
{
for(;;)
{
int ret = vfs_opt_auto_build(trace_filename, archive_fn_fmt, 0, true);
RETURN_ERR(ret);
if(ret == INFO::OK)
return INFO::OK;
}
}

View File

@ -1,26 +0,0 @@
/**
* =========================================================================
* File : vfs_optimizer.h
* Project : 0 A.D.
* Description : automatically bundles files into archives in order of
* : access to optimize I/O.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_VFS_OPTIMIZER
#define INCLUDED_VFS_OPTIMIZER
extern LibError vfs_opt_rebuild_main_archive(const char* trace_filename, const char* archive_fn_fmt);
extern void vfs_opt_auto_build_cancel();
extern int vfs_opt_auto_build(const char* trace_filename,
const char* archive_fn_fmt, const char* mini_archive_fn_fmt, bool force_build = false);
extern void vfs_opt_notify_loose_file(const char* atom_fn);
extern void vfs_opt_notify_non_loose_file(const char* atom_fn);
#endif // #ifndef INCLUDED_VFS_OPTIMIZER

View File

@ -1,640 +0,0 @@
/**
* =========================================================================
* File : zip.cpp
* Project : 0 A.D.
* Description : archive backend for Zip files.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "zip.h"
#include <time.h>
#include <limits>
#include "lib/bits.h"
#include "lib/byte_order.h"
#include "lib/allocators.h"
#include "lib/timer.h"
#include "lib/res/res.h"
#include "../file_internal.h"
//-----------------------------------------------------------------------------
// timestamp conversion: DOS FAT <-> Unix time_t
//-----------------------------------------------------------------------------
// must not be static because these are tested by unit test
time_t time_t_from_FAT(u32 fat_timedate)
{
const uint fat_time = bits(fat_timedate, 0, 15);
const uint fat_date = bits(fat_timedate, 16, 31);
struct tm t; // struct tm format:
t.tm_sec = bits(fat_time, 0,4) * 2; // [0,59]
t.tm_min = bits(fat_time, 5,10); // [0,59]
t.tm_hour = bits(fat_time, 11,15); // [0,23]
t.tm_mday = bits(fat_date, 0,4); // [1,31]
t.tm_mon = bits(fat_date, 5,8) - 1; // [0,11]
t.tm_year = bits(fat_date, 9,15) + 80; // since 1900
t.tm_isdst = -1; // unknown - let libc determine
// otherwise: totally bogus, and at the limit of 32-bit time_t
debug_assert(t.tm_year < 138);
time_t ret = mktime(&t);
if(ret == (time_t)-1)
debug_warn("mktime failed");
return ret;
}
u32 FAT_from_time_t(time_t time)
{
// (values are adjusted for DST)
struct tm* t = localtime(&time);
u16 fat_time = 0;
fat_time |= (t->tm_sec/2); // 5
fat_time |= (t->tm_min) << 5; // 6
fat_time |= (t->tm_hour) << 11; // 5
u16 fat_date = 0;
fat_date |= (t->tm_mday); // 5
fat_date |= (t->tm_mon+1) << 5; // 4
fat_date |= (t->tm_year-80) << 9; // 7
u32 fat_timedate = u32_from_u16(fat_date, fat_time);
return fat_timedate;
}
//-----------------------------------------------------------------------------
// Zip file data structures and signatures
//-----------------------------------------------------------------------------
enum ZipCompressionMethod
{
ZIP_CM_NONE = 0,
ZIP_CM_DEFLATE = 8
};
// translate ArchiveEntry.method to zip_method.
static ZipCompressionMethod zip_method_for(CompressionMethod method)
{
switch(method)
{
case CM_NONE:
return ZIP_CM_NONE;
case CM_DEFLATE:
return ZIP_CM_DEFLATE;
default:
WARN_ERR(ERR::COMPRESSION_UNKNOWN_METHOD);
return ZIP_CM_NONE;
}
}
// translate to (not Zip-specific) CompressionMethod for use in ArchiveEntry.
static CompressionMethod method_for_zip_method(ZipCompressionMethod zip_method)
{
switch(zip_method)
{
case ZIP_CM_NONE:
return CM_NONE;
case ZIP_CM_DEFLATE:
return CM_DEFLATE;
default:
WARN_ERR(ERR::COMPRESSION_UNKNOWN_METHOD);
return CM_UNSUPPORTED;
}
}
static const u32 cdfh_magic = FOURCC_LE('P','K','\1','\2');
static const u32 lfh_magic = FOURCC_LE('P','K','\3','\4');
static const u32 ecdr_magic = FOURCC_LE('P','K','\5','\6');
#pragma pack(push, 1)
struct LFH
{
u32 magic;
u16 x1; // version needed
u16 flags;
u16 method;
u32 fat_mtime; // last modified time (DOS FAT format)
u32 crc;
u32 csize;
u32 usize;
u16 fn_len;
u16 e_len;
};
const size_t LFH_SIZE = sizeof(LFH);
cassert(LFH_SIZE == 30);
// convenience (allows writing out LFH and fn in 1 IO).
// must be declared here to avoid any struct padding.
struct LFH_Package
{
LFH lfh;
char fn[PATH_MAX];
};
struct CDFH
{
u32 magic;
u32 x1; // versions
u16 flags;
u16 method;
u32 fat_mtime; // last modified time (DOS FAT format)
u32 crc;
u32 csize;
u32 usize;
u16 fn_len;
u16 e_len;
u16 c_len;
u32 x2; // spanning
u32 x3; // attributes
u32 lfh_ofs;
};
const size_t CDFH_SIZE = sizeof(CDFH);
cassert(CDFH_SIZE == 46);
// convenience (avoids need for pointer arithmetic)
// must be declared here to avoid any struct padding.
struct CDFH_Package
{
CDFH cdfh;
char fn[PATH_MAX];
};
struct ECDR
{
u32 magic;
u8 x1[6]; // multiple-disk support
u16 cd_entries;
u32 cd_size;
u32 cd_ofs;
u16 comment_len;
};
const size_t ECDR_SIZE = sizeof(ECDR);
cassert(ECDR_SIZE == 22);
#pragma pack(pop)
static off_t lfh_total_size(const LFH* lfh_le)
{
debug_assert(lfh_le->magic == lfh_magic);
const size_t fn_len = read_le16(&lfh_le->fn_len);
const size_t e_len = read_le16(&lfh_le->e_len);
// note: LFH doesn't have a comment field!
return (off_t)(LFH_SIZE + fn_len + e_len);
}
static void lfh_assemble(LFH* lfh_le,
CompressionMethod method, time_t mtime, u32 crc,
off_t csize, off_t usize, size_t fn_len)
{
const ZipCompressionMethod zip_method = zip_method_for(method);
const u32 fat_mtime = FAT_from_time_t(mtime);
lfh_le->magic = lfh_magic;
lfh_le->x1 = to_le16(0);
lfh_le->flags = to_le16(0);
lfh_le->method = to_le16(zip_method);
lfh_le->fat_mtime = to_le32(fat_mtime);
lfh_le->crc = to_le32(crc);
lfh_le->csize = to_le32(u32_from_larger(csize));
lfh_le->usize = to_le32(u32_from_larger(usize));
lfh_le->fn_len = to_le16(u16_from_larger(fn_len));
lfh_le->e_len = to_le16(0);
}
static void cdfh_decompose(const CDFH* cdfh_le,
CompressionMethod& method, time_t& mtime, u32& crc, off_t& csize, off_t& usize,
const char*& fn, off_t& lfh_ofs, size_t& total_size)
{
const u16 zip_method = read_le16(&cdfh_le->method);
const u32 fat_mtime = read_le32(&cdfh_le->fat_mtime);
crc = read_le32(&cdfh_le->crc);
csize = (off_t)read_le32(&cdfh_le->csize);
usize = (off_t)read_le32(&cdfh_le->usize);
const u16 fn_len = read_le16(&cdfh_le->fn_len);
const u16 e_len = read_le16(&cdfh_le->e_len);
const u16 c_len = read_le16(&cdfh_le->c_len);
lfh_ofs = (off_t)read_le32(&cdfh_le->lfh_ofs);
method = method_for_zip_method((ZipCompressionMethod)zip_method);
mtime = time_t_from_FAT(fat_mtime);
// return 0-terminated copy of filename
const char* fn_src = (const char*)cdfh_le+CDFH_SIZE; // not 0-terminated!
char fn_buf[PATH_MAX];
cpu_memcpy(fn_buf, fn_src, fn_len*sizeof(char));
fn_buf[fn_len] = '\0';
fn = file_make_unique_fn_copy(fn_buf);
total_size = CDFH_SIZE + fn_len + e_len + c_len;
}
static void cdfh_assemble(CDFH* dst_cdfh_le,
CompressionMethod method, time_t mtime, u32 crc,
size_t csize, size_t usize, size_t fn_len, size_t slack, u32 lfh_ofs)
{
const ZipCompressionMethod zip_method = zip_method_for(method);
const u32 fat_mtime = FAT_from_time_t(mtime);
dst_cdfh_le->magic = cdfh_magic;
dst_cdfh_le->x1 = to_le32(0);
dst_cdfh_le->flags = to_le16(0);
dst_cdfh_le->method = to_le16(zip_method);
dst_cdfh_le->fat_mtime = to_le32(fat_mtime);
dst_cdfh_le->crc = to_le32(crc);
dst_cdfh_le->csize = to_le32(u32_from_larger(csize));
dst_cdfh_le->usize = to_le32(u32_from_larger(usize));
dst_cdfh_le->fn_len = to_le16(u16_from_larger(fn_len));
dst_cdfh_le->e_len = to_le16(0);
dst_cdfh_le->c_len = to_le16(u16_from_larger(slack));
dst_cdfh_le->x2 = to_le32(0);
dst_cdfh_le->x3 = to_le32(0);
dst_cdfh_le->lfh_ofs = to_le32(lfh_ofs);
}
static void ecdr_decompose(ECDR* ecdr_le,
uint& cd_entries, off_t& cd_ofs, size_t& cd_size)
{
cd_entries = (uint)read_le16(&ecdr_le->cd_entries);
cd_ofs = (off_t)read_le32(&ecdr_le->cd_ofs);
cd_size = (size_t)read_le32(&ecdr_le->cd_size);
}
static void ecdr_assemble(ECDR* dst_ecdr_le, uint cd_entries, off_t cd_ofs, size_t cd_size)
{
dst_ecdr_le->magic = ecdr_magic;
memset(dst_ecdr_le->x1, 0, sizeof(dst_ecdr_le->x1));
dst_ecdr_le->cd_entries = to_le16(u16_from_larger(cd_entries));
dst_ecdr_le->cd_size = to_le32(u32_from_larger(cd_size));
dst_ecdr_le->cd_ofs = to_le32(u32_from_larger(cd_ofs));
dst_ecdr_le->comment_len = to_le16(0);
}
//-----------------------------------------------------------------------------
// scan for and return a pointer to a Zip record, or 0 if not found.
// <start> is the expected position; we scan from there until EOF for
// the given ID (fourcc). <record_size> includes ID field) bytes must
// remain before EOF - this makes sure the record is completely in the file.
// used by z_find_ecdr and z_extract_cdfh.
static const u8* za_find_id(const u8* buf, size_t size, const u8* start, u32 magic, size_t record_size)
{
ssize_t bytes_left = (ssize_t)((buf+size) - (u8*)start - record_size);
const u8* p = (const u8*)start;
// don't increment function argument directly,
// so we can warn the user if we had to scan.
while(bytes_left-- >= 0)
{
// found it
if(*(u32*)p == magic)
{
#ifndef NDEBUG
if(p != start)
debug_warn("archive damaged, but still found next record.");
#endif
return p;
}
p++;
// be careful not to increment before comparison;
// magic may already be found at <start>.
}
// passed EOF, didn't find it.
// note: do not warn - this happens in the initial ECDR search at
// EOF if the archive contains a comment field.
return 0;
}
// search for ECDR in the last <max_scan_amount> bytes of the file.
// if found, fill <dst_ecdr> with a copy of the (little-endian) ECDR and
// return INFO::OK, otherwise IO error or ERR::CORRUPTED.
static LibError za_find_ecdr(File* f, size_t max_scan_amount, ECDR* dst_ecdr_le)
{
// don't scan more than the entire file
const size_t file_size = f->size;
const size_t scan_amount = std::min(max_scan_amount, file_size);
// read desired chunk of file into memory
const off_t ofs = (off_t)(file_size - scan_amount);
FileIOBuf buf = FILE_BUF_ALLOC;
ssize_t bytes_read = file_io(f, ofs, scan_amount, &buf);
RETURN_ERR(bytes_read);
debug_assert(bytes_read == (ssize_t)scan_amount);
// look for ECDR in buffer
LibError ret = ERR::CORRUPTED;
const u8* start = (const u8*)buf;
const ECDR* ecdr_le = (const ECDR*)za_find_id(start, bytes_read, start, ecdr_magic, ECDR_SIZE);
if(ecdr_le)
{
*dst_ecdr_le = *ecdr_le;
ret = INFO::OK;
}
file_buf_free(buf);
return ret;
}
static LibError za_find_cd(File* f, uint& cd_entries, off_t& cd_ofs, size_t& cd_size)
{
// sanity check: file size must be > header size.
// (this speeds up determining if the file is a Zip file at all)
const size_t file_size = f->size;
if(file_size < LFH_SIZE+CDFH_SIZE+ECDR_SIZE)
{
completely_bogus:
// this file is definitely not a valid Zip file.
// note: the VFS blindly opens files when mounting; it needs to open
// all archives, but doesn't know their extension (e.g. ".pk3").
// therefore, do not warn user.
return ERR::RES_UNKNOWN_FORMAT; // NOWARN
}
ECDR ecdr_le;
// expected case: ECDR at EOF; no file comment (=> we only need to
// read 512 bytes)
LibError ret = za_find_ecdr(f, ECDR_SIZE, &ecdr_le);
if(ret == INFO::OK)
{
have_ecdr:
ecdr_decompose(&ecdr_le, cd_entries, cd_ofs, cd_size);
return INFO::OK;
}
// last resort: scan last 66000 bytes of file
// (the Zip archive comment field - up to 64k - may follow ECDR).
// if the zip file is < 66000 bytes, scan the whole file.
ret = za_find_ecdr(f, 66000u, &ecdr_le);
if(ret == INFO::OK)
goto have_ecdr;
// both ECDR scans failed - this is not a valid Zip file.
// now see if the beginning of the file holds a valid LFH:
const off_t ofs = 0; const size_t scan_amount = LFH_SIZE;
FileIOBuf buf = FILE_BUF_ALLOC;
ssize_t bytes_read = file_io(f, ofs, scan_amount, &buf);
RETURN_ERR(bytes_read);
debug_assert(bytes_read == (ssize_t)scan_amount);
const bool has_LFH = (za_find_id(buf, scan_amount, buf, lfh_magic, LFH_SIZE) != 0);
file_buf_free(buf);
if(!has_LFH)
goto completely_bogus;
// the Zip file is mostly valid but lacking an ECDR. (can happen if
// user hard-exits while building an archive)
// notes:
// - return ERR::CORRUPTED so VFS will not include this file.
// - we could work around this by scanning all LFHs, but won't bother
// because it'd be slow.
// - do not warn - the corrupt archive will be deleted on next
// successful archive builder run anyway.
return ERR::CORRUPTED; // NOWARN
}
// analyse an opened Zip file; call back into archive.cpp to
// populate the Archive object with a list of the files it contains.
// returns INFO::OK on success, ERR::CORRUPTED if file is recognizable as
// a Zip file but invalid, otherwise ERR::RES_UNKNOWN_FORMAT or IO error.
//
// fairly slow - must read Central Directory from disk
// (size ~= 60 bytes*num_files); observed time ~= 80ms.
LibError zip_populate_archive(File* f, Archive* a)
{
uint cd_entries; off_t cd_ofs; size_t cd_size;
RETURN_ERR(za_find_cd(f, cd_entries, cd_ofs, cd_size));
// call back with number of entries in archives (an upper bound
// for valid files; we're not interested in the directory entries).
// we'd have to scan through the central dir to count them out; we'll
// just skip them and waste a bit of preallocated memory.
RETURN_ERR(archive_allocate_entries(a, cd_entries));
FileIOBuf buf = FILE_BUF_ALLOC;
RETURN_ERR(file_io(f, cd_ofs, cd_size, &buf));
// iterate through Central Directory
LibError ret = INFO::OK;
const CDFH* cdfh = (const CDFH*)buf;
size_t ofs_to_next_cdfh = 0;
for(uint i = 0; i < cd_entries; i++)
{
// scan for next CDFH (at or beyond current cdfh position)
cdfh = (const CDFH*)((u8*)cdfh + ofs_to_next_cdfh);
cdfh = (CDFH*)za_find_id((const u8*)buf, cd_size, (const u8*)cdfh, cdfh_magic, CDFH_SIZE);
if(!cdfh) // no (further) CDFH found:
{
ret = ERR::CORRUPTED;
break;
}
// copy translated fields from CDFH into ArchiveEntry.
ArchiveEntry ae;
cdfh_decompose(cdfh, ae.method, ae.mtime, ae.checksum, ae.csize, ae.usize, ae.atom_fn, ae.ofs, ofs_to_next_cdfh);
ae.flags = ZIP_LFH_FIXUP_NEEDED;
// if file (we don't care about directories):
if(ae.csize && ae.usize)
{
ret = archive_add_file(a, &ae);
if(ret != INFO::OK)
break;
}
}
file_buf_free(buf);
return ret;
}
//-----------------------------------------------------------------------------
// this code grabs an LFH struct from file block(s) that are
// passed to the callback. usually, one call copies the whole thing,
// but the LFH may straddle a block boundary.
//
// rationale: this allows using temp buffers for zip_fixup_lfh,
// which avoids involving the file buffer manager and thus
// unclutters the trace and cache contents.
struct LFH_Copier
{
u8* lfh_dst;
size_t lfh_bytes_remaining;
};
static LibError lfh_copier_cb(uintptr_t cbData, const u8* block, size_t size, size_t* bytes_processed)
{
LFH_Copier* p = (LFH_Copier*)cbData;
debug_assert(size <= p->lfh_bytes_remaining);
cpu_memcpy(p->lfh_dst, block, size);
p->lfh_dst += size;
p->lfh_bytes_remaining -= size;
*bytes_processed = size;
return INFO::CB_CONTINUE;
}
// ensures <ent.ofs> points to the actual file contents; it is initially
// the offset of the LFH. we cannot use CDFH filename and extra field
// lengths to skip past LFH since that may not mirror CDFH (has happened).
//
// this is called at file-open time instead of while mounting to
// reduce seeks: since reading the file will typically follow, the
// block cache entirely absorbs the IO cost.
void zip_fixup_lfh(File* f, ArchiveEntry* ent)
{
// already fixed up - done.
if(!(ent->flags & ZIP_LFH_FIXUP_NEEDED))
return;
// performance note: this ends up reading one file block, which is
// only in the block cache if the file starts in the same block as a
// previously read file (i.e. both are small).
LFH lfh;
LFH_Copier params = { (u8*)&lfh, sizeof(LFH) };
ssize_t ret = file_io(f, ent->ofs, LFH_SIZE, FILE_BUF_TEMP, lfh_copier_cb, (uintptr_t)&params);
debug_assert(ret == sizeof(LFH));
ent->ofs += lfh_total_size(&lfh);
ent->flags &= ~ZIP_LFH_FIXUP_NEEDED;
}
//-----------------------------------------------------------------------------
// archive builder backend
//-----------------------------------------------------------------------------
// rationale: don't support partial adding, i.e. updating archive with
// only one file. this would require overwriting parts of the Zip archive,
// which is annoying and slow. also, archives are usually built in
// seek-optimal order, which would break if we start inserting files.
// while testing, loose files can be used, so there's no loss.
// we don't want to expose ZipArchive to callers,
// (would require defining File, Pool and CDFH)
// so allocate the storage here and return opaque pointer.
struct ZipArchive
{
File f;
off_t cur_file_size;
Pool cdfhs;
uint cd_entries;
CDFH* prev_cdfh;
};
static SingleAllocator<ZipArchive> za_mgr;
// create a new Zip archive and return a pointer for use in subsequent
// zip_archive_add_file calls. previous archive file is overwritten.
LibError zip_archive_create(const char* zip_filename, ZipArchive** pza)
{
// local za_copy simplifies things - if something fails, no cleanup is
// needed. upon success, we copy into the newly allocated real za.
ZipArchive za_copy;
za_copy.cur_file_size = 0;
za_copy.cd_entries = 0;
za_copy.prev_cdfh = 0;
RETURN_ERR(file_open(zip_filename, FILE_WRITE|FILE_NO_AIO, &za_copy.f));
RETURN_ERR(pool_create(&za_copy.cdfhs, 10*MiB, 0));
ZipArchive* za = za_mgr.alloc();
if(!za)
WARN_RETURN(ERR::NO_MEM);
*za = za_copy;
*pza = za;
return INFO::OK;
}
// add a file (described by ArchiveEntry) to the archive. file_contents
// is the actual file data; its compression method is given in ae->method and
// can be CM_NONE.
// IO cost: writes out <file_contents> to disk (we don't currently attempt
// any sort of write-buffering).
LibError zip_archive_add_file(ZipArchive* za, const ArchiveEntry* ae, const u8* file_contents)
{
const size_t fn_len = strlen(ae->atom_fn);
// write (LFH, filename, file contents) to archive
// .. put LFH and filename into one 'package'
LFH_Package header;
lfh_assemble(&header.lfh, ae->method, ae->mtime, ae->checksum, ae->csize, ae->usize, fn_len);
strcpy_s(header.fn, ARRAY_SIZE(header.fn), ae->atom_fn);
// .. write that out in 1 IO
const off_t lfh_ofs = za->cur_file_size;
FileIOBuf buf;
buf = (FileIOBuf)&header;
file_io(&za->f, lfh_ofs, LFH_SIZE+fn_len, &buf);
// .. write out file contents
buf = (FileIOBuf)file_contents;
file_io(&za->f, lfh_ofs+(off_t)(LFH_SIZE+fn_len), ae->csize, &buf);
za->cur_file_size += (off_t)(LFH_SIZE+fn_len+ae->csize);
// append a CDFH to the central dir (in memory)
// .. note: pool_alloc may round size up for padding purposes.
const size_t prev_pos = za->cdfhs.da.pos;
CDFH_Package* p = (CDFH_Package*)pool_alloc(&za->cdfhs, CDFH_SIZE+fn_len);
if(!p)
WARN_RETURN(ERR::NO_MEM);
const size_t slack = za->cdfhs.da.pos-prev_pos - (CDFH_SIZE+fn_len);
cdfh_assemble(&p->cdfh, ae->method, ae->mtime, ae->checksum, ae->csize, ae->usize, fn_len, slack, lfh_ofs);
cpu_memcpy(p->fn, ae->atom_fn, fn_len);
za->cd_entries++;
return INFO::OK;
}
// write out the archive to disk; only hereafter is it valid.
// frees the ZipArchive instance.
// IO cost: writes out Central Directory to disk (about 70 bytes per file).
LibError zip_archive_finish(ZipArchive* za)
{
const size_t cd_size = za->cdfhs.da.pos;
// append an ECDR to the CDFH list (this allows us to
// write out both to the archive file in one burst)
ECDR* ecdr = (ECDR*)pool_alloc(&za->cdfhs, ECDR_SIZE);
if(!ecdr)
WARN_RETURN(ERR::NO_MEM);
ecdr_assemble(ecdr, za->cd_entries, za->cur_file_size, cd_size);
FileIOBuf buf = za->cdfhs.da.base;
file_io(&za->f, za->cur_file_size, cd_size+ECDR_SIZE, &buf);
(void)file_close(&za->f);
(void)pool_destroy(&za->cdfhs);
za_mgr.release(za);
return INFO::OK;
}

View File

@ -1,67 +0,0 @@
/**
* =========================================================================
* File : zip.h
* Project : 0 A.D.
* Description : archive backend for Zip files.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_ZIP
#define INCLUDED_ZIP
struct File;
struct Archive;
struct ArchiveEntry;
// analyse an opened Zip file; call back into archive.cpp to
// populate the Archive object with a list of the files it contains.
// returns INFO::OK on success, ERR::CORRUPTED if file is recognizable as
// a Zip file but invalid, otherwise ERR::RES_UNKNOWN_FORMAT or IO error.
//
// fairly slow - must read Central Directory from disk
// (size ~= 60 bytes*num_files); observed time ~= 80ms.
extern LibError zip_populate_archive(File* f, Archive* a);
// ensures <ent.ofs> points to the actual file contents; it is initially
// the offset of the LFH. we cannot use CDFH filename and extra field
// lengths to skip past LFH since that may not mirror CDFH (has happened).
//
// this is called at file-open time instead of while mounting to
// reduce seeks: since reading the file will typically follow, the
// block cache entirely absorbs the IO cost.
extern void zip_fixup_lfh(File* f, ArchiveEntry* ent);
//
// archive builder backend
//
struct ZipArchive; // opaque
// create a new Zip archive and return a pointer for use in subsequent
// zip_archive_add_file calls. previous archive file is overwritten.
extern LibError zip_archive_create(const char* zip_filename, ZipArchive** pza);
// add a file (described by ArchiveEntry) to the archive. file_contents
// is the actual file data; its compression method is given in ae->method and
// can be CM_NONE.
// IO cost: writes out <file_contents> to disk (we don't currently attempt
// any sort of write-buffering).
extern LibError zip_archive_add_file(ZipArchive* za, const ArchiveEntry* ae, const u8* file_contents);
// write out the archive to disk; only hereafter is it valid.
// frees the ZipArchive instance.
// IO cost: writes out Central Directory to disk (about 70 bytes per file).
extern LibError zip_archive_finish(ZipArchive* za);
// for self-test
extern time_t time_t_from_FAT(u32 fat_timedate);
extern u32 FAT_from_time_t(time_t time);
#endif // #ifndef INCLUDED_ZIP

View File

@ -1,581 +0,0 @@
/**
* =========================================================================
* File : file.cpp
* Project : 0 A.D.
* Description : file layer on top of POSIX. avoids the need for
* : absolute paths.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "file.h"
#include <vector>
#include <algorithm>
#include <string>
#include "lib/posix/posix_filesystem.h"
#include "lib/posix/posix_aio.h"
#include "lib/posix/posix_mman.h"
#include "lib/adts.h"
#include "lib/sysdep/sysdep.h"
#include "lib/byte_order.h"
#include "lib/allocators.h"
#include "file_internal.h"
ERROR_ASSOCIATE(ERR::FILE_ACCESS, "Insufficient access rights to open file", EACCES);
ERROR_ASSOCIATE(ERR::DIR_END, "End of directory reached (no more files)", -1);
ERROR_ASSOCIATE(ERR::FILE_NOT_MAPPED, "File was not mapped", -1);
// rationale for aio, instead of only using mmap:
// - parallelism: instead of just waiting for the transfer to complete,
// other work can be done in the meantime.
// example: decompressing from a Zip archive is practically free,
// because we inflate one block while reading the next.
// - throughput: with aio, the drive always has something to do, as opposed
// to read requests triggered by the OS for mapped files, which come
// in smaller chunks. this leads to much higher transfer rates.
// - memory: when used with VFS, aio makes better use of a file cache.
// data is generally compressed in an archive. a cache should store the
// decompressed and decoded (e.g. TGA colour swapping) data; mmap would
// keep the original, compressed data in memory, which doesn't help.
// we bypass the OS file cache via aio, and store partial blocks here (*);
// higher level routines will cache the actual useful data.
// * requests for part of a block are usually followed by another.
// layer on top of POSIX opendir/readdir/closedir that handles
// portable -> native path conversion, ignores non-file/directory entries,
// and additionally returns the file status (size and mtime).
// rationale: see DirIterator definition in header.
struct PosixDirIterator
{
DIR* os_dir;
// to support stat(), we need to either chdir or store the complete path.
// the former is unacceptable because it isn't thread-safe. therefore,
// we latch dir_open's path and append entry name every dir_next_ent call.
// this is also the storage to which DirEnt.name points!
// PathPackage avoids repeated memory allocs and strlen() overhead.
//
// it can't be stored here directly because then the struct would
// no longer fit in HDATA; we'll allocate it separately.
PathPackage* pp;
};
cassert(sizeof(PosixDirIterator) <= DIR_ITERATOR_OPAQUE_SIZE);
static SingleAllocator<PathPackage> pp_allocator;
// prepare to iterate (once) over entries in the given directory.
// if INFO::OK is returned, <d> is ready for subsequent dir_next_ent calls and
// must be freed via dir_close.
LibError dir_open(const char* P_path, DirIterator* di)
{
PosixDirIterator* pdi = (PosixDirIterator*)di->opaque;
// note: copying to n_path and then pp.path is inefficient but
// more clear/robust. this is only called a few hundred times anyway.
char n_path[PATH_MAX];
RETURN_ERR(file_make_full_native_path(P_path, n_path));
pdi->pp = pp_allocator.alloc();
if(!pdi->pp)
WARN_RETURN(ERR::NO_MEM);
errno = 0;
pdi->os_dir = opendir(n_path);
if(!pdi->os_dir)
return LibError_from_errno();
(void)path_package_set_dir(pdi->pp, n_path);
return INFO::OK;
}
// return ERR::DIR_END if all entries have already been returned once,
// another negative error code, or INFO::OK on success, in which case <ent>
// describes the next (order is unspecified) directory entry.
LibError dir_next_ent(DirIterator* di, DirEnt* ent)
{
PosixDirIterator* pdi = (PosixDirIterator*)di->opaque;
get_another_entry:
errno = 0;
struct dirent* os_ent = readdir(pdi->os_dir);
if(!os_ent)
{
// no error, just no more entries to return
if(!errno)
return ERR::DIR_END; // NOWARN
return LibError_from_errno();
}
// copy os_ent.name[]; we need it for stat() #if !OS_WIN and
// return it as ent.name (since os_ent.name[] is volatile).
path_package_append_file(pdi->pp, os_ent->d_name);
const char* name = pdi->pp->end;
// get file information (mode, size, mtime)
struct stat s;
#if OS_WIN
// .. wposix readdir has enough information to return dirent
// status directly (much faster than calling stat).
CHECK_ERR(readdir_stat_np(pdi->os_dir, &s));
#else
// .. call regular stat().
// we need the full pathname for this. don't use path_append because
// it would unnecessarily call strlen.
CHECK_ERR(stat(pdi->pp->path, &s));
#endif
// skip "undesirable" entries that POSIX readdir returns:
if(S_ISDIR(s.st_mode))
{
// .. dummy directory entries ("." and "..")
if(name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')))
goto get_another_entry;
s.st_size = -1; // our way of indicating it's a directory
}
// .. neither dir nor file
else if(!S_ISREG(s.st_mode))
goto get_another_entry;
ent->size = s.st_size;
ent->mtime = s.st_mtime;
ent->name = name;
return INFO::OK;
}
// indicate the directory iterator is no longer needed; all resources it
// held are freed.
LibError dir_close(DirIterator* di)
{
PosixDirIterator* pdi = (PosixDirIterator*)di->opaque;
pp_allocator.release(pdi->pp);
errno = 0;
if(closedir(pdi->os_dir) < 0)
return LibError_from_errno();
return INFO::OK;
}
bool dir_exists(const char* P_path)
{
// modified from file_stat_impl - we don't want errors to be raised here.
char N_path[PATH_MAX];
THROW_ERR(file_make_full_native_path(P_path, N_path));
// if path ends in slash, remove it (required by stat)
char* last_char = N_path+strlen(N_path)-1;
if(path_is_dir_sep(*last_char))
*last_char = '\0';
struct stat s;
if(stat(N_path, &s) != 0)
return false;
debug_assert(S_ISDIR(s.st_mode));
return true;
}
LibError dir_create(const char* P_path)
{
char N_path[PATH_MAX];
RETURN_ERR(file_make_full_native_path(P_path, N_path));
struct stat s;
int ret = stat(N_path, &s);
if(ret == 0)
return INFO::ALREADY_EXISTS;
errno = 0;
ret = mkdir(N_path, S_IRWXO|S_IRWXU|S_IRWXG);
return LibError_from_posix(ret);
}
// note: we have to recursively empty the directory before it can
// be deleted (required by Windows and POSIX rmdir()).
LibError dir_delete(const char* P_path)
{
char N_path[PATH_MAX];
RETURN_ERR(file_make_full_native_path(P_path, N_path));
PathPackage N_pp;
RETURN_ERR(path_package_set_dir(&N_pp, N_path));
DirIterator di;
RETURN_ERR(dir_open(P_path, &di));
LibError ret;
for(;;)
{
DirEnt ent;
ret = dir_next_ent(&di, &ent);
if(ret == ERR::DIR_END)
break;
if(ret != INFO::OK) goto fail;
if(DIRENT_IS_DIR(&ent))
{
char P_subdir[PATH_MAX];
ret = path_append(P_subdir, P_path, ent.name);
if(ret != INFO::OK) goto fail;
ret = dir_delete(P_subdir);
if(ret != INFO::OK) goto fail;
}
else
{
ret = path_package_append_file(&N_pp, ent.name);
if(ret != INFO::OK) goto fail;
errno = 0;
int posix_ret = unlink(N_pp.path);
ret = LibError_from_posix(posix_ret);
if(ret != INFO::OK) goto fail;
}
}
// must happen before rmdir
RETURN_ERR(dir_close(&di));
{
errno = 0;
int posix_ret = rmdir(N_path);
return LibError_from_posix(posix_ret);
}
fail:
RETURN_ERR(dir_close(&di));
return ret;
}
// get file information. output param is zeroed on error.
static LibError file_stat_impl(const char* fn, struct stat* s, bool warn_if_failed = true)
{
memset(s, 0, sizeof(struct stat));
char N_fn[PATH_MAX];
RETURN_ERR(file_make_full_native_path(fn, N_fn));
errno = 0;
int ret = stat(N_fn, s);
return LibError_from_posix(ret, warn_if_failed);
}
LibError file_stat(const char* fn, struct stat* s)
{
return file_stat_impl(fn, s);
}
// does the given file exist? (implemented via file_stat)
bool file_exists(const char* fn)
{
struct stat s;
const bool warn_if_failed = false;
return file_stat_impl(fn, &s, warn_if_failed) == INFO::OK;
}
// permanently delete the file. be very careful with this!
LibError file_delete(const char* fn)
{
char N_fn[PATH_MAX+1];
RETURN_ERR(file_make_full_native_path(fn, N_fn));
errno = 0;
int ret = unlink(N_fn);
return LibError_from_posix(ret);
}
///////////////////////////////////////////////////////////////////////////////
//
// file open/close
// stores information about file (e.g. size) in File struct
//
///////////////////////////////////////////////////////////////////////////////
// interface rationale:
// - this module depends on the handle manager for IO management,
// but should be useable without the VFS (even if they are designed
// to work together).
// - allocating a Handle for the file info would solve several problems
// (see below), but we don't want to allocate 2..3 (VFS, file, Zip file)
// for every file opened - that'd add up quickly.
// the Files are always freed at exit though, since they're part of
// VFile handles in the VFS.
// - we want the VFS open logic to be triggered on file invalidate
// (if the dev. file is deleted, we should use what's in the archives).
// we don't want to make this module depend on VFS, so we don't
// have access to the file location DB; VFS needs to allocate the handle.
// - no problem exposing our internals via File struct -
// we're only used by the VFS and Zip modules. don't bother making
// an opaque struct - that'd have to be kept in sync with the real thing.
// - when Zip opens its archives via file_open, a handle isn't needed -
// the Zip module hides its File struct (required to close the file),
// and the Handle approach doesn't guard against some idiot calling
// close(our_fd_value) directly, either.
struct PosixFile
{
int fd;
// for reference counted memory-mapping
u8* mapping;
uint map_refs;
};
cassert(sizeof(PosixFile) < FILE_OPAQUE_SIZE);
int file_fd_from_PosixFile(File* f)
{
const PosixFile* pf = (const PosixFile*)f->opaque;
return pf->fd;
}
LibError file_validate(const File* f)
{
if(!f)
WARN_RETURN(ERR::INVALID_PARAM);
const PosixFile* pf = (PosixFile*)f->opaque;
if(pf->fd < 0)
WARN_RETURN(ERR::_1);
// mapped but refcount is invalid
else if((pf->mapping != 0) ^ (pf->map_refs != 0))
WARN_RETURN(ERR::_2);
// note: don't check atom_fn - that complains at the end of
// file_open if flags & FILE_DONT_SET_FN and has no benefit, really.
return INFO::OK;
}
LibError file_open(const char* P_fn, uint flags, File* f)
{
// zero output param in case we fail below.
memset(f, 0, sizeof(*f));
if(flags > FILE_FLAG_ALL)
WARN_RETURN(ERR::INVALID_PARAM);
char N_fn[PATH_MAX];
RETURN_ERR(file_make_full_native_path(P_fn, N_fn));
// don't stat if opening for writing - the file may not exist yet
off_t size = 0;
int oflag = O_RDONLY;
if(flags & FILE_WRITE)
oflag = O_WRONLY|O_CREAT|O_TRUNC;
// read access requested
else
{
// get file size
struct stat s;
if(stat(N_fn, &s) < 0)
WARN_RETURN(ERR::TNODE_NOT_FOUND);
size = s.st_size;
// note: despite increased overhead, the AIO read method is still
// significantly faster, even with small files.
// we therefore don't automatically disable AIO.
// notes:
// - up to 32KB can be read by one SCSI request.
// - flags are stored below and will influence file_io.
//if(size <= 32*KiB)
// flags |= FILE_NO_AIO;
// make sure <N_fn> is a regular file
if(!S_ISREG(s.st_mode))
WARN_RETURN(ERR::TNODE_WRONG_TYPE);
}
#if OS_WIN
if(flags & FILE_TEXT)
oflag |= O_TEXT_NP;
else
oflag |= O_BINARY_NP;
// if AIO is disabled at user's behest, so inform wposix.
if(flags & FILE_NO_AIO)
oflag |= O_NO_AIO_NP;
#endif
int fd = open(N_fn, oflag, S_IRWXO|S_IRWXU|S_IRWXG);
if(fd < 0)
WARN_RETURN(ERR::FILE_ACCESS);
f->flags = flags;
f->size = size;
// see FILE_DONT_SET_FN decl.
if(!(flags & FILE_DONT_SET_FN))
f->atom_fn = file_make_unique_fn_copy(P_fn);
PosixFile* pf = (PosixFile*)f->opaque;
pf->mapping = 0;
pf->map_refs = 0;
pf->fd = fd;
CHECK_FILE(f);
return INFO::OK;
}
LibError file_close(File* f)
{
CHECK_FILE(f);
PosixFile* pf = (PosixFile*)f->opaque;
// make sure the mapping is actually freed,
// regardless of how many references remain.
if(pf->map_refs > 1)
pf->map_refs = 1;
if(pf->mapping) // only free if necessary (unmap complains if not mapped)
file_unmap(f);
// return final file size (required by VFS after writing files).
// this is much easier than updating when writing, because we'd have
// to add accounting code to both (sync and async) paths.
f->size = lseek(pf->fd, 0, SEEK_END);
// (check fd to avoid BoundsChecker warning about invalid close() param)
if(pf->fd != -1)
{
close(pf->fd);
pf->fd = -1;
}
// wipe out any cached blocks. this is necessary to cover the (rare) case
// of file cache contents predating the file write.
if(f->flags & FILE_WRITE)
file_cache_invalidate(f->atom_fn);
return INFO::OK;
}
///////////////////////////////////////////////////////////////////////////////
//
// memory mapping
//
///////////////////////////////////////////////////////////////////////////////
// no significance aside from preventing uint overflow.
static const uint MAX_MAP_REFS = 255;
// map the entire file <f> into memory. if already currently mapped,
// return the previous mapping (reference-counted).
// output parameters are zeroed on failure.
//
// the mapping will be removed (if still open) when its file is closed.
// however, map/unmap calls should still be paired so that the mapping
// may be removed when no longer needed.
//
// rationale: reference counting is required for zip_map: several
// Zip "mappings" each reference one ZArchive's actual file mapping.
// implement it here so that we also get refcounting for normal files.
LibError file_map(File* f, u8*& p, size_t& size)
{
p = 0;
size = 0;
CHECK_FILE(f);
PosixFile* pf = (PosixFile*)f->opaque;
const int prot = (f->flags & FILE_WRITE)? PROT_WRITE : PROT_READ;
// already mapped - increase refcount and return previous mapping.
if(pf->mapping)
{
// prevent overflow; if we have this many refs, should find out why.
if(pf->map_refs >= MAX_MAP_REFS)
WARN_RETURN(ERR::LIMIT);
pf->map_refs++;
goto have_mapping;
}
// don't allow mapping zero-length files (doesn't make sense,
// and BoundsChecker warns about wposix mmap failing).
// then again, don't complain, because this might happen when mounting
// a dir containing empty files; each is opened as a Zip file.
if(f->size == 0)
return ERR::FAIL; // NOWARN
errno = 0;
pf->mapping = (u8*)mmap(0, f->size, prot, MAP_PRIVATE, pf->fd, (off_t)0);
if(pf->mapping == MAP_FAILED)
return LibError_from_errno();
pf->map_refs = 1;
have_mapping:
p = pf->mapping;
size = f->size;
return INFO::OK;
}
// decrement the reference count for the mapping belonging to file <f>.
// fail if there are no references; remove the mapping if the count reaches 0.
//
// the mapping will be removed (if still open) when its file is closed.
// however, map/unmap calls should still be paired so that the mapping
// may be removed when no longer needed.
LibError file_unmap(File* f)
{
CHECK_FILE(f);
PosixFile* pf = (PosixFile*)f->opaque;
// file is not currently mapped
if(pf->map_refs == 0)
WARN_RETURN(ERR::FILE_NOT_MAPPED);
// still more than one reference remaining - done.
if(--pf->map_refs > 0)
return INFO::OK;
// no more references: remove the mapping
u8* p = pf->mapping;
pf->mapping = 0;
// don't clear f->size - the file is still open.
errno = 0;
int ret = munmap(p, f->size);
return LibError_from_posix(ret);
}
LibError file_init()
{
path_init();
file_cache_init();
file_io_init();
// convenience
file_sector_size = sys_max_sector_size();
return INFO::OK;
}
LibError file_shutdown()
{
file_stats_dump();
path_shutdown();
file_io_shutdown();
return INFO::OK;
}

View File

@ -1,357 +0,0 @@
/**
* =========================================================================
* File : file.h
* Project : 0 A.D.
* Description : file layer on top of POSIX. avoids the need for
* : absolute paths and provides fast I/O.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_FILE
#define INCLUDED_FILE
#include "lib/posix/posix_filesystem.h" // struct stat
namespace ERR
{
const LibError FILE_ACCESS = -110000;
const LibError FILE_NOT_MAPPED = -110001;
const LibError DIR_END = -110002;
}
extern LibError file_init();
// used by vfs_redirector to call various file objects' methods.
struct FileProvider_VTbl;
//
// path conversion functions (native <--> portable),
// for external libraries that require the real filename.
//
// replaces '/' with platform's directory separator and vice versa.
// verifies path length < PATH_MAX (otherwise return ERR::PATH_LENGTH).
//
// relative paths (relative to root dir)
extern LibError file_make_native_path(const char* path, char* n_path);
extern LibError file_make_portable_path(const char* n_path, char* path);
// as above, but with full native paths (portable paths are always relative).
// prepends current directory, resp. makes sure it matches the given path.
extern LibError file_make_full_native_path(const char* path, char* n_full_path);
extern LibError file_make_full_portable_path(const char* n_full_path, char* path);
// establish the root directory from <rel_path>, which is treated as
// relative to the executable's directory (determined via argv[0]).
// all relative file paths passed to this module will be based from
// this root dir.
//
// example: executable in "$install_dir/system"; desired root dir is
// "$install_dir/data" => rel_path = "../data".
//
// argv[0] is necessary because the current directory is unknown at startup
// (e.g. it isn't set when invoked via batch file), and this is the
// easiest portable way to find our install directory.
//
// can only be called once, by design (see below). rel_path is trusted.
extern LibError file_set_root_dir(const char* argv0, const char* rel_path);
// allocate a copy of P_fn in our string pool. strings are equal iff
// their addresses are equal, thus allowing fast comparison.
//
// if the (generous) filename storage is full, 0 is returned.
// this is not ever expected to happen; callers need not check the
// return value because a warning is raised anyway.
extern const char* file_make_unique_fn_copy(const char* P_fn);
extern const char* file_get_random_name();
//
// directory
//
const size_t DIR_ITERATOR_OPAQUE_SIZE = 40;
// layer on top of POSIX opendir/readdir/closedir that handles
// portable -> native path conversion, ignores non-file/directory entries,
// and additionally returns the file status (size and mtime).
// directory state initialized by dir_open.
// rationale: some private storage apart from opendir's DIR* is required
// to support stat(). we prefer having the caller reserve room (on the stack)
// rather than allocating dynamically (less efficient or more complicated).
//
// this is an opaque struct to avoid exposing our internals and insulate
// user code against changes; we verify at compile-time that the
// public/private definitions match.
// note: cannot just typedef to DirIterator_ because other modules
// instantiate this.
struct DirIterator
{
// safety check - used to verify correct calling of dir_filtered_next_ent
const char* filter;
// .. has filter been assigned? this flag is necessary because
// there are no "invalid" filter values we can use.
uint filter_latched : 1;
const FileProvider_VTbl* type;
char opaque[DIR_ITERATOR_OPAQUE_SIZE];
};
class TFile;
// information about a directory entry filled by dir_next_ent.
struct DirEnt
{
// we want to keep this as small as possible because
// file_enum allocates one copy for each file in the directory.
// store only required stat fields (in VC's order of decl)
off_t size;
time_t mtime;
// name (not including path!) of this entry.
// valid until a subsequent dir_next_ent or dir_close call for the
// current dir state.
// rationale: we don't want to return a pointer to a copy because
// users would have to free it (won't happen).
const char* name;
const TFile* tf;
};
// return [bool] indicating whether the given DirEnt* (filled by
// dir_next_ent) represents a directory.
#define DIRENT_IS_DIR(p_ent) ((p_ent)->size == -1)
// prepare to iterate (once) over entries in the given directory.
// if INFO::OK is returned, <d> is ready for subsequent dir_next_ent calls and
// must be freed via dir_close.
extern LibError dir_open(const char* P_path, DirIterator* d);
// return ERR::DIR_END if all entries have already been returned once,
// another negative error code, or INFO::OK on success, in which case <ent>
// describes the next (order is unspecified) directory entry.
extern LibError dir_next_ent(DirIterator* d, DirEnt* ent);
// indicate the directory iterator is no longer needed; all resources it
// held are freed.
extern LibError dir_close(DirIterator* d);
extern bool dir_exists(const char* P_path);
extern LibError dir_create(const char* P_path);
extern LibError dir_delete(const char* P_path);
#ifdef __cplusplus
typedef std::vector<DirEnt> DirEnts;
typedef DirEnts::iterator DirEntIt;
typedef DirEnts::const_iterator DirEntCIt;
// enumerate all directory entries in <P_path>; add to container and
// then sort it by filename.
extern LibError file_get_sorted_dirents(const char* P_path, DirEnts& dirents);
#endif // #ifdef __cplusplus
// called by file_enum for each entry in the directory.
// name doesn't include path!
// return INFO::CB_CONTINUE to continue calling; anything else will cause
// file_enum to abort and immediately return that value.
typedef LibError (*FileCB)(const char* name, const struct stat* s, uintptr_t memento, const uintptr_t user);
// call <cb> for each file and subdirectory in <dir> (alphabetical order),
// passing the entry name (not full path!), stat info, and <user>.
//
// first builds a list of entries (sorted) and remembers if an error occurred.
// if <cb> returns non-zero, abort immediately and return that; otherwise,
// return first error encountered while listing files, or 0 on success.
extern LibError file_enum(const char* dir, FileCB cb, uintptr_t user);
// chosen for semi-nice 48 byte total struct File size.
// each implementation checks if this is enough.
const size_t FILE_OPAQUE_SIZE = 52;
// represents an open file of any type (OS, archive, VFS).
// contains common fields and opaque storage for type-specific fields.
//
// this cannot merely be added in a separate VFS layer: it would want to
// share some common fields, which either requires this approach
// (one publically visible struct with space for private storage), or
// a common struct layout / embedding a FileCommon struct at
// the beginning. the latter is a bit messy since fields must be accessed
// as e.g. af->fc.flags. one shared struct also makes for a common
// interface.
struct File
{
uint flags;
off_t size;
// copy of the filename that is uniquely identified by its address.
// used as key for file cache.
// NOTE: not set by file_open! (because the path passed there is
// a native path; it has no use within VFS and would only
// unnecessarily clutter the filename storage)
const char* atom_fn;
// can be 0 if not currently in use; otherwise, points to
// the file provider's vtbl.
const FileProvider_VTbl* type;
// storage for the provider-specific fields.
// the implementations cast this to their e.g. PosixFile struct.
//
// note: when doing so, there's no need to verify type - if
// vfs_io dispatches to afile_read, then the File.type must obviously
// have been "archive".
// if users call the e.g. archive.h methods directly, we assume they
// know what they're doing and don't check that.
u8 opaque[FILE_OPAQUE_SIZE];
};
// note: these are all set during file_open and cannot be changed thereafter.
enum FileFlags
{
// IO:
// ------------------------------------------------------------------------
// write-only access; otherwise, read only.
//
// unless FILE_NO_AIO is set, data that is to be written must be
// aligned and padded to a multiple of file_sector_size bytes;
// this requirement avoids the need for align buffers.
//
// note: only allowing either reads or writes simplifies file cache
// coherency (need only invalidate when closing a FILE_WRITE file).
FILE_WRITE = 0x01,
// translate newlines: convert from/to native representation when
// reading/writing. this is useful if files we create need to be
// edited externally - e.g. Notepad requires \r\n.
// caveats:
// - FILE_NO_AIO must be set; translation is done by OS read()/write().
// - not supported by POSIX, so this currently only has meaning on Win32.
FILE_TEXT = 0x02,
// skip the aio path and use the OS-provided synchronous blocking
// read()/write() calls. this avoids the need for buffer alignment
// set out below, so it's useful for writing small text files.
FILE_NO_AIO = 0x04,
// caching:
// ------------------------------------------------------------------------
// do not add the (entire) contents of this file to the cache.
// this flag should be specified when the data is cached at a higher
// level (e.g. OpenGL textures) to avoid wasting previous cache space.
FILE_CACHED_AT_HIGHER_LEVEL = 0x10,
// enable caching individual blocks read from a file. the block cache
// is small, organized as LRU and incurs some copying overhead, so it
// should only be enabled when needed. this is the case for archives,
// where the cache absorbs overhead of block-aligning all IOs.
FILE_CACHE_BLOCK = 0x20,
// notify us that the file buffer returned by file_io will not be
// freed immediately (i.e. before the next allocation).
// allocation policy may differ and a warning is suppressed.
FILE_LONG_LIVED = 0x40,
// misc:
// ------------------------------------------------------------------------
// instruct file_open not to set FileCommon.atom_fn.
// this is a slight optimization used by VFS code: file_open
// would store the portable name, which is only used when calling
// the OS's open(); this would unnecessarily waste atom_fn memory.
//
// note: other file.cpp functions require atom_fn to be set,
// so this behavior is only triggered via flag (caller is
// promising they will set atom_fn).
FILE_DONT_SET_FN = 0x80,
// (only relevant for VFS) file will be written into the
// appropriate subdirectory of the mount point established by
// vfs_set_write_target. see documentation there.
FILE_WRITE_TO_TARGET = FILE_WRITE|0x100,
// sum of all flags above. used when validating flag parameters.
FILE_FLAG_ALL = 0x1FF
};
// get file information. output param is zeroed on error.
extern LibError file_stat(const char* path, struct stat*);
// does the given file exist? (implemented via file_stat)
extern bool file_exists(const char* fn);
// permanently delete the file. be very careful with this!
extern LibError file_delete(const char* fn);
// <tf> is ignored here.
// rationale: all file providers' open() routines should ideally take the
// same parameters. since afile_open requires archive Handle and
// memento, we need some way of passing them; TFile is sufficient
// (via vfs_tree accessor methods).
extern LibError file_open(const char* fn, uint flags, File* f);
// note: final file size is calculated and returned in f->size.
// see implementation for rationale.
extern LibError file_close(File* f);
extern LibError file_validate(const File* f);
#define CHECK_FILE(f) RETURN_ERR(file_validate(f))
// remove all blocks loaded from the file <fn>. used when reloading the file.
extern LibError file_cache_invalidate(const char* fn);
#include "file_io.h"
//
// memory mapping
//
// useful for files that are too large to be loaded into memory,
// or if only (non-sequential) portions of a file are needed at a time.
// map the entire file <f> into memory. if already currently mapped,
// return the previous mapping (reference-counted).
// output parameters are zeroed on failure.
//
// the mapping will be removed (if still open) when its file is closed.
// however, map/unmap calls should still be paired so that the mapping
// may be removed when no longer needed.
//
// rationale: reference counting is required for zip_map: several
// Zip "mappings" each reference one ZArchive's actual file mapping.
// implement it here so that we also get refcounting for normal files.
extern LibError file_map(File* f, u8*& p, size_t& size);
// decrement the reference count for the mapping belonging to file <f>.
// fail if there are no references; remove the mapping if the count reaches 0.
//
// the mapping will be removed (if still open) when its file is closed.
// however, map/unmap calls should still be paired so that the mapping
// may be removed when no longer needed.
extern LibError file_unmap(File* f);
extern LibError file_shutdown();
#endif // #ifndef INCLUDED_FILE

File diff suppressed because it is too large Load Diff

View File

@ -1,107 +0,0 @@
/**
* =========================================================================
* File : file_cache.h
* Project : 0 A.D.
* Description : cache for entire files and I/O blocks. also allocates
* : file buffers, allowing zero-copy I/O.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_FILE_CACHE
#define INCLUDED_FILE_CACHE
#include "file.h" // FileIOBuf
struct BlockId
{
const char* atom_fn;
u32 block_num;
};
extern bool block_eq(BlockId b1, BlockId b2);
// create an id for use with the cache that uniquely identifies
// the block from the file <atom_fn> starting at <ofs>.
extern BlockId block_cache_make_id(const char* atom_fn, const off_t ofs);
extern void* block_cache_alloc(BlockId id);
extern void block_cache_mark_completed(BlockId id);
extern void* block_cache_find(BlockId id);
extern void block_cache_release(BlockId id);
// interpret file_io parameters (pbuf, size, flags, cb) and allocate a
// file buffer if necessary.
// called by file_io and afile_read.
extern LibError file_io_get_buf(FileIOBuf* pbuf, size_t size,
const char* atom_fn, uint flags, FileIOCB cb);
// inform us that the buffer address will be increased by <padding>-bytes.
// this happens when reading uncompressed files from archive: they
// start at unaligned offsets and file_io rounds offset down to
// next block boundary. the buffer therefore starts with padding, which
// is skipped so the user only sees their data.
// we make note of the new buffer address so that it can be freed correctly
// by passing the new padded buffer.
extern void file_buf_add_padding(FileIOBuf exact_buf, size_t exact_size, size_t padding);
// if buf is not in extant list, complain; otherwise, mark it as
// coming from the file <atom_fn>.
// this is needed in the following case: uncompressed reads from archive
// boil down to a file_io of the archive file. the buffer is therefore
// tagged with the archive filename instead of the desired filename.
// afile_read sets things right by calling this.
extern LibError file_buf_set_real_fn(FileIOBuf buf, const char* atom_fn);
// if file_cache_add-ing the given buffer, would it be added?
// this is referenced by trace_entry_causes_io; see explanation there.
extern bool file_cache_would_add(size_t size, const char* atom_fn,
uint file_flags);
// "give" <buf> to the cache, specifying its size and owner filename.
// since this data may be shared among users of the cache, it is made
// read-only (via MMU) to make sure no one can corrupt/change it.
//
// note: the reference added by file_buf_alloc still exists! it must
// still be file_buf_free-d after calling this.
extern LibError file_cache_add(FileIOBuf buf, size_t size,
const char* atom_fn, uint file_flags);
// check if the contents of the file <atom_fn> are in file cache.
// if not, return 0; otherwise, return buffer address and optionally
// pass back its size.
//
// note: does not call stats_cache because it does not know the file size
// in case of cache miss! doing so is left to the caller.
extern FileIOBuf file_cache_retrieve(const char* atom_fn, size_t* psize, uint fb_flags = 0);
// invalidate all data loaded from the file <fn>. this ensures the next
// load of this file gets the (presumably new) contents of the file,
// not previous stale cache contents.
// call after hotloading code detects file has been changed.
extern LibError file_cache_invalidate(const char* P_fn);
// reset entire state of the file cache to what it was after initialization.
// that means completely emptying the extant list and cache.
// used after simulating cache operation, which fills the cache with
// invalid data.
extern void file_cache_reset();
extern void file_cache_init();
extern void file_cache_shutdown();
// test access mechanism
extern void* file_cache_allocator_alloc(size_t size);
extern void file_cache_allocator_free(void* p, size_t size);
extern void file_cache_allocator_reset();
#endif // #ifndef INCLUDED_FILE_CACHE

View File

@ -1,75 +0,0 @@
/**
* =========================================================================
* File : file_internal.h
* Project : 0 A.D.
* Description : master (private) header for all file code.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "lib/path_util.h"
#include "path.h"
#include "file.h"
#include "file_cache.h"
#include "file_io.h"
#include "file_stats.h" // must come after file and file_cache
#include "archive/compression.h"
#include "archive/zip.h"
#include "archive/archive.h"
#include "archive/archive_builder.h"
#include "archive/trace.h"
#include "archive/vfs_optimizer.h"
#include "vfs.h"
#include "vfs_mount.h"
#include "vfs_tree.h"
#include "vfs_redirector.h"
const size_t AIO_SECTOR_SIZE = 512;
// block := power-of-two sized chunk of a file.
// all transfers are expanded to naturally aligned, whole blocks
// (this makes caching parts of files feasible; it is also much faster
// for some aio implementations, e.g. wposix).
//
// this is not exposed to users because it's an implementation detail and
// they shouldn't care.
//
// measurements show this value to yield best read throughput.
const size_t FILE_BLOCK_SIZE = 32*KiB;
// helper routine used by functions that call back to a FileIOCB.
//
// bytes_processed is 0 if return value != { INFO::OK, INFO::CB_CONTINUE }
// note: don't abort if = 0: zip callback may not actually
// output anything if passed very little data.
extern LibError file_io_call_back(const u8* block, size_t size,
FileIOCB cb, uintptr_t cbData, size_t& bytes_processed);
// retrieve the next (order is unspecified) dir entry matching <filter>.
// return 0 on success, ERR::DIR_END if no matching entry was found,
// or a negative error code on failure.
// filter values:
// - 0: anything;
// - "/": any subdirectory;
// - "/|<pattern>": any subdirectory, or as below with <pattern>;
// - <pattern>: any file whose name matches; ? and * wildcards are allowed.
//
// note that the directory entries are only scanned once; after the
// end is reached (-> ERR::DIR_END returned), no further entries can
// be retrieved, even if filter changes (which shouldn't happen - see impl).
//
// rationale: we do not sort directory entries alphabetically here.
// most callers don't need it and the overhead is considerable
// (we'd have to store all entries in a vector). it is left up to
// higher-level code such as VfsUtil.
extern LibError dir_filtered_next_ent(DirIterator* di, DirEnt* ent, const char* filter);
// returns file descriptor (int) given File (assumed to represent PosixFile).
// this avoids the need for declaring PosixFile here for file_io's use.
extern int file_fd_from_PosixFile(File* f);

View File

@ -1,656 +0,0 @@
/**
* =========================================================================
* File : file_io.cpp
* Project : 0 A.D.
* Description : provide fast I/O via POSIX aio and splitting into blocks.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "file_io.h"
#include <deque>
#include <boost/shared_ptr.hpp>
#include "lib/posix/posix_aio.h"
#include "lib/bits.h"
#include "lib/allocators.h"
#include "lib/adts.h"
#include "file_internal.h"
ERROR_ASSOCIATE(ERR::IO, "Error during IO", EIO);
ERROR_ASSOCIATE(ERR::IO_EOF, "Reading beyond end of file", -1);
//-----------------------------------------------------------------------------
// async I/O
//-----------------------------------------------------------------------------
struct PosixFileIo
{
void* cb; // aiocb
};
cassert(sizeof(PosixFileIo) <= FILE_IO_OPAQUE_SIZE);
// we don't do any caching or alignment here - this is just a thin AIO wrapper.
// rationale:
// - aligning the transfer isn't possible here since we have no control
// over the buffer, i.e. we cannot read more data than requested.
// instead, this is done in file_io.
// - transfer sizes here are arbitrary (viz. not block-aligned);
// that means the cache would have to handle this or also split them up
// into blocks, which is redundant (already done by file_io).
// - if caching here, we'd also have to handle "forwarding" (i.e.
// desired block has been issued but isn't yet complete). again, it
// is easier to let the synchronous file_io manager handle this.
// - finally, file_io knows more about whether the block should be cached
// (e.g. whether another block request will follow), but we don't
// currently make use of this.
//
// disadvantages:
// - streamed data will always be read from disk. no problem, because
// such data (e.g. music, long speech) is unlikely to be used again soon.
// - prefetching (issuing the next few blocks from archive/file during
// idle time to satisfy potential future IOs) requires extra buffers;
// this is a bit more complicated than just using the cache as storage.
// FileIO must reference an aiocb, which is used to pass IO params to the OS.
// unfortunately it is 144 bytes on Linux - too much to put in FileIO,
// since that is stored in a 'resource control block' (see h_mgr.h).
// we therefore allocate dynamically, but via suballocator to avoid
// hitting the heap on every IO.
class AiocbAllocator
{
Pool pool;
public:
void init()
{
(void)pool_create(&pool, 32*sizeof(aiocb), sizeof(aiocb));
}
void shutdown()
{
(void)pool_destroy(&pool);
}
aiocb* alloc()
{
return (aiocb*)pool_alloc(&pool, 0);
}
// weird name to avoid trouble with mem tracker macros
// (renaming is less annoying than #include "lib/nommgr.h")
void free_(void* cb)
{
pool_free(&pool, cb);
}
};
static AiocbAllocator aiocb_allocator;
// starts transferring to/from the given buffer.
// no attempt is made at aligning or padding the transfer.
LibError file_io_issue(File* f, off_t ofs, size_t size, u8* p, FileIo* io)
{
debug_printf("FILE| issue ofs=0x%X size=0x%X\n", ofs, size);
// zero output param in case we fail below.
memset(io, 0, sizeof(FileIo));
// check params
CHECK_FILE(f);
if(!size || !p || !io)
WARN_RETURN(ERR::INVALID_PARAM);
const bool is_write = (f->flags & FILE_WRITE) != 0;
PosixFileIo* pio = (PosixFileIo*)io;
// note: cutting off at EOF is necessary to avoid transfer errors,
// but makes size no longer sector-aligned, which would force
// waio to realign (slow). we want to pad back to sector boundaries
// afterwards (to avoid realignment), but that is not possible here
// since we have no control over the buffer (there might not be
// enough room in it). hence, do cut-off in IOManager.
//
// example: 200-byte file. IOManager issues (large) blocks;
// that ends up way beyond EOF, so ReadFile fails.
// limiting size to 200 bytes works, but causes waio to pad the
// transfer and use align buffer (slow).
// rounding up to 512 bytes avoids realignment and does not fail
// (apparently since NTFS files are sector-padded anyway?)
// (we can't store the whole aiocb directly - glibc's version is
// 144 bytes large)
aiocb* cb = aiocb_allocator.alloc();
pio->cb = cb;
if(!cb)
WARN_RETURN(ERR::NO_MEM);
memset(cb, 0, sizeof(*cb));
// send off async read/write request
cb->aio_lio_opcode = is_write? LIO_WRITE : LIO_READ;
cb->aio_buf = (volatile void*)p;
cb->aio_fildes = file_fd_from_PosixFile(f);
cb->aio_offset = ofs;
cb->aio_nbytes = size;
int err = lio_listio(LIO_NOWAIT, &cb, 1, (struct sigevent*)0);
if(err < 0)
{
debug_printf("lio_listio: %d, %d[%s]\n", err, errno, strerror(errno));
(void)file_io_discard(io);
return LibError_from_errno();
}
const BlockId disk_pos = block_cache_make_id(f->atom_fn, ofs);
stats_io_check_seek(disk_pos.atom_fn, disk_pos.block_num);
return INFO::OK;
}
// indicates if the IO referenced by <io> has completed.
// return value: 0 if pending, 1 if complete, < 0 on error.
int file_io_has_completed(FileIo* io)
{
PosixFileIo* pio = (PosixFileIo*)io;
aiocb* cb = (aiocb*)pio->cb;
int ret = aio_error(cb);
if(ret == EINPROGRESS)
return 0;
if(ret == 0)
return 1;
WARN_RETURN(ERR::FAIL);
}
LibError file_io_wait(FileIo* io, u8*& p, size_t& size)
{
PosixFileIo* pio = (PosixFileIo*)io;
// debug_printf("FILE| wait io=%p\n", io);
// zero output params in case something (e.g. H_DEREF) fails.
p = 0;
size = 0;
aiocb* cb = (aiocb*)pio->cb;
// wait for transfer to complete.
const aiocb** cbs = (const aiocb**)&cb; // pass in an "array"
while(aio_error(cb) == EINPROGRESS)
aio_suspend(cbs, 1, (timespec*)0); // wait indefinitely
// query number of bytes transferred (-1 if the transfer failed)
const ssize_t bytes_transferred = aio_return(cb);
// debug_printf("FILE| bytes_transferred=%d aio_nbytes=%u\n", bytes_transferred, cb->aio_nbytes);
// see if actual transfer count matches requested size.
// note: most callers clamp to EOF but round back up to sector size
// (see explanation in file_io_issue).
debug_assert(bytes_transferred >= (ssize_t)(cb->aio_nbytes-AIO_SECTOR_SIZE));
p = (u8*)cb->aio_buf; // cast from volatile void*
size = bytes_transferred;
return INFO::OK;
}
LibError file_io_discard(FileIo* io)
{
PosixFileIo* pio = (PosixFileIo*)io;
memset(pio->cb, 0, sizeof(aiocb)); // prevent further use.
aiocb_allocator.free_(pio->cb);
pio->cb = 0;
return INFO::OK;
}
LibError file_io_validate(const FileIo* io)
{
PosixFileIo* pio = (PosixFileIo*)io;
const aiocb* cb = (const aiocb*)pio->cb;
// >= 0x100 is not necessarily bogus, but suspicious.
// this also catches negative values.
if((uint)cb->aio_fildes >= 0x100)
WARN_RETURN(ERR::_1);
if(debug_is_pointer_bogus((void*)cb->aio_buf))
WARN_RETURN(ERR::_2);
if(cb->aio_lio_opcode != LIO_WRITE && cb->aio_lio_opcode != LIO_READ && cb->aio_lio_opcode != LIO_NOP)
WARN_RETURN(ERR::_3);
// all other aiocb fields have no invariants we could check.
return INFO::OK;
}
//-----------------------------------------------------------------------------
// sync I/O
//-----------------------------------------------------------------------------
// set from sys_max_sector_size(); see documentation there.
size_t file_sector_size;
// the underlying aio implementation likes buffer and offset to be
// sector-aligned; if not, the transfer goes through an align buffer,
// and requires an extra cpu_memcpy.
//
// if the user specifies an unaligned buffer, there's not much we can
// do - we can't assume the buffer contains padding. therefore,
// callers should let us allocate the buffer if possible.
//
// if ofs misalign = buffer, only the first and last blocks will need
// to be copied by aio, since we read up to the next block boundary.
// otherwise, everything will have to be copied; at least we split
// the read into blocks, so aio's buffer won't have to cover the
// whole file.
// helper routine used by functions that call back to a FileIOCB.
//
// bytes_processed is 0 if return value != { INFO::OK, INFO::CB_CONTINUE }
// note: don't abort if = 0: zip callback may not actually
// output anything if passed very little data.
LibError file_io_call_back(const u8* block, size_t size,
FileIOCB cb, uintptr_t cbData, size_t& bytes_processed)
{
if(cb)
{
stats_cb_start();
LibError ret = cb(cbData, block, size, &bytes_processed);
stats_cb_finish();
// failed - reset byte count in case callback didn't
if(ret != INFO::OK && ret != INFO::CB_CONTINUE)
bytes_processed = 0;
CHECK_ERR(ret); // user might not have raised a warning; make sure
return ret;
}
// no callback to process data: raw = actual
else
{
bytes_processed = size;
return INFO::CB_CONTINUE;
}
}
// interpret file_io parameters (pbuf, size, flags, cb) and allocate a
// file buffer if necessary.
// called by file_io and afile_read.
LibError file_io_get_buf(FileIOBuf* pbuf, size_t size,
const char* atom_fn, uint file_flags, FileIOCB cb)
{
// decode *pbuf - exactly one of these is true
const bool temp = (pbuf == FILE_BUF_TEMP);
const bool alloc = !temp && (*pbuf == FILE_BUF_ALLOC);
const bool user = !temp && !alloc;
const bool is_write = (file_flags & FILE_WRITE) != 0;
const uint fb_flags = (file_flags & FILE_LONG_LIVED)? FB_LONG_LIVED : 0;
// reading into temp buffers - ok.
if(!is_write && temp && cb != 0)
return INFO::OK;
// reading and want buffer allocated.
if(!is_write && alloc)
{
*pbuf = file_buf_alloc(size, atom_fn, fb_flags);
if(!*pbuf) // very unlikely (size totally bogus or cache hosed)
WARN_RETURN(ERR::NO_MEM);
return INFO::OK;
}
// writing from user-specified buffer - ok
if(is_write && user)
return INFO::OK;
WARN_RETURN(ERR::INVALID_PARAM);
}
class IOManager
{
File* f;
bool is_write;
bool no_aio;
FileIOCB cb;
uintptr_t cbData;
off_t start_ofs;
FileIOBuf* pbuf;
size_t user_size;
size_t ofs_misalign;
size_t size;
// (useful, raw data: possibly compressed, but doesn't count padding)
size_t total_issued;
size_t total_transferred;
// if callback, sum of what it reports; otherwise, = total_transferred
// this is what we'll return.
size_t total_processed;
struct IOSlot
{
FileIo io;
const void* cached_block;
BlockId block_id;
// needed so that we can add the block to the cache when
// its IO is complete. if we add it when issuing, we'd no longer be
// thread-safe: someone else might find it in the cache before its
// transfer has completed. don't want to add an "is_complete" flag,
// because that'd be hard to update (on every wait_io).
void* temp_buf;
IOSlot()
{
reset();
}
void reset()
{
memset(&io, 0, sizeof(io));
cached_block = 0;
memset(&block_id, 0, sizeof(block_id));
temp_buf = 0;
}
};
static const uint MAX_PENDING_IOS = 4;
//RingBuf<IOSlot, MAX_PENDING_IOS> queue;
std::deque<IOSlot> queue;
// stop issuing and processing as soon as this changes
LibError err;
ssize_t lowio()
{
const int fd = file_fd_from_PosixFile(f);
lseek(fd, start_ofs, SEEK_SET);
// emulate temp buffers - we take care of allocating and freeing.
u8* dst;
boost::shared_ptr<u8> dstMem;
if(pbuf == FILE_BUF_TEMP)
{
dstMem.reset((u8*)page_aligned_alloc(size), PageAlignedDeleter(size));
dst = dstMem.get();
}
else
dst = (u8*)*pbuf; // WARNING: FileIOBuf is nominally const; if that's ever enforced, this may need to change.
const ssize_t total_transferred = is_write? write(fd, dst, size) : read(fd, dst, size);
if(total_transferred < 0)
return LibError_from_errno();
size_t total_processed;
LibError ret = file_io_call_back(dst, total_transferred, cb, cbData, total_processed);
RETURN_ERR(ret);
return (ssize_t)total_processed;
}
// align and pad the IO to FILE_BLOCK_SIZE
// (reduces work for AIO implementation).
LibError prepare()
{
ofs_misalign = 0;
size = user_size;
if(!is_write && !no_aio)
{
// note: we go to the trouble of aligning the first block (instead of
// just reading up to the next block and letting aio realign it),
// so that it can be taken from the cache.
// this is not possible if we don't allocate the buffer because
// extra space must be added for the padding.
ofs_misalign = start_ofs % FILE_BLOCK_SIZE;
start_ofs -= (off_t)ofs_misalign;
size = round_up(ofs_misalign + user_size, FILE_BLOCK_SIZE);
// but cut off at EOF (necessary to prevent IO error).
const off_t bytes_left = f->size - start_ofs;
if(bytes_left < 0)
WARN_RETURN(ERR::IO_EOF);
size = std::min(size, (size_t)bytes_left);
// and round back up to sector size.
// see rationale in file_io_issue.
size = round_up(size, AIO_SECTOR_SIZE);
}
RETURN_ERR(file_io_get_buf(pbuf, size, f->atom_fn, f->flags, cb));
return INFO::OK;
}
void issue(IOSlot& slot)
{
const off_t ofs = start_ofs+(off_t)total_issued;
// for both reads and writes, do not issue beyond end of file/data
const size_t issue_size = std::min(FILE_BLOCK_SIZE, size - total_issued);
// try to grab whole blocks (so we can put them in the cache).
// any excess data (can only be within first or last) is
// discarded in wait().
// check if in cache
slot.block_id = block_cache_make_id(f->atom_fn, ofs);
slot.cached_block = block_cache_find(slot.block_id);
if(!slot.cached_block)
{
void* buf;
// if using buffer, set position in it; otherwise, use temp buffer
if(pbuf == FILE_BUF_TEMP)
buf = slot.temp_buf = block_cache_alloc(slot.block_id);
else
buf = (char*)*pbuf + total_issued;
LibError ret = file_io_issue(f, ofs, issue_size, (u8*)buf, &slot.io);
// transfer failed - loop will now terminate after
// waiting for all pending transfers to complete.
if(ret != INFO::OK)
err = ret;
}
total_issued += issue_size;
}
void wait(IOSlot& slot, u8*& block, size_t& block_size)
{
// get completed block address/size
if(slot.cached_block)
{
block = (u8*)slot.cached_block;
block_size = FILE_BLOCK_SIZE;
}
// .. wasn't in cache; it was issued, so wait for it
else
{
LibError ret = file_io_wait(&slot.io, block, block_size);
if(ret < 0)
err = ret;
}
// special forwarding path: copy into block cache from
// user's buffer. this necessary to efficiently support direct
// IO of uncompressed files in archives.
// note: must occur before skipping padding below.
if(!slot.cached_block && pbuf != FILE_BUF_TEMP && f->flags & FILE_CACHE_BLOCK)
{
slot.temp_buf = block_cache_alloc(slot.block_id);
cpu_memcpy(slot.temp_buf, block, block_size);
// block_cache_mark_completed will be called in process()
}
// first time; skip past padding
if(total_transferred == 0)
{
block = (u8*)block + ofs_misalign;
block_size -= ofs_misalign;
}
// last time: don't include trailing padding
if(total_transferred + block_size > user_size)
block_size = user_size - total_transferred;
// we have useable data from a previous temp buffer,
// but it needs to be copied into the user's buffer
if(slot.cached_block && pbuf != FILE_BUF_TEMP)
cpu_memcpy((char*)*pbuf+ofs_misalign+total_transferred, block, block_size);
total_transferred += block_size;
}
void process(IOSlot& slot, u8* block, size_t block_size, FileIOCB cb, uintptr_t cbData)
{
if(err == INFO::CB_CONTINUE)
{
size_t bytes_processed;
err = file_io_call_back(block, block_size, cb, cbData, bytes_processed);
if(err == INFO::CB_CONTINUE || err == INFO::OK)
total_processed += bytes_processed;
// else: processing failed.
// loop will now terminate after waiting for all
// pending transfers to complete.
}
if(slot.cached_block)
block_cache_release(slot.block_id);
else
{
file_io_discard(&slot.io);
if(slot.temp_buf)
block_cache_mark_completed(slot.block_id);
}
}
ssize_t aio()
{
again:
{
// data remaining to transfer, and no error:
// start transferring next block.
if(total_issued < size && err == INFO::CB_CONTINUE && queue.size() < MAX_PENDING_IOS)
{
queue.push_back(IOSlot());
IOSlot& slot = queue.back();
issue(slot);
goto again;
}
// IO pending: wait for it to complete, and process it.
if(!queue.empty())
{
IOSlot& slot = queue.front();
u8* block; size_t block_size;
wait(slot, block, block_size);
process(slot, block, block_size, cb, cbData);
queue.pop_front();
goto again;
}
}
// (all issued OR error) AND no pending transfers - done.
debug_assert(total_issued >= total_transferred && total_transferred >= user_size);
return (ssize_t)total_processed;
}
public:
IOManager(File* f_, off_t ofs_, size_t size_, FileIOBuf* pbuf_,
FileIOCB cb_, uintptr_t cbData_)
{
f = f_;
is_write = (f->flags & FILE_WRITE ) != 0;
no_aio = (f->flags & FILE_NO_AIO) != 0;
cb = cb_;
cbData = cbData_;
start_ofs = ofs_;
user_size = size_;
pbuf = pbuf_;
total_issued = 0;
total_transferred = 0;
total_processed = 0;
err = INFO::CB_CONTINUE;
}
// now we read the file in 64 KiB chunks, N-buffered.
// if reading from Zip, inflate while reading the next block.
ssize_t run()
{
RETURN_ERR(prepare());
const FileIOImplentation fi = no_aio? FI_LOWIO : FI_AIO;
const FileOp fo = is_write? FO_WRITE : FO_READ;
double start_time = 0.0;
stats_io_sync_start(&start_time);
ssize_t bytes_transferred = no_aio? lowio() : aio();
stats_io_sync_finish(fi, fo, bytes_transferred, &start_time);
// we allocated the memory: skip any leading padding
if(pbuf != FILE_BUF_TEMP && !is_write)
{
FileIOBuf org_buf = *pbuf;
*pbuf = (u8*)org_buf + ofs_misalign;
if(ofs_misalign || size != user_size)
file_buf_add_padding(org_buf, size, ofs_misalign);
}
if(err != INFO::CB_CONTINUE && err != INFO::OK)
return (ssize_t)err;
return bytes_transferred;
}
}; // IOManager
// transfer <size> bytes, starting at <ofs>, to/from the given file.
// (read or write access was chosen at file-open time).
//
// if non-NULL, <cb> is called for each block transferred, passing <cbData>.
// it returns how much data was actually transferred, or a negative error
// code (in which case we abort the transfer and return that value).
// the callback mechanism is useful for user progress notification or
// processing data while waiting for the next I/O to complete
// (quasi-parallel, without the complexity of threads).
//
// return number of bytes transferred (see above), or a negative error code.
ssize_t file_io(File* f, off_t ofs, size_t size, FileIOBuf* pbuf,
FileIOCB cb, uintptr_t cbData) // optional
{
CHECK_FILE(f);
// note: do not update stats/trace here: this includes Zip IOs,
// which shouldn't be reported.
IOManager mgr(f, ofs, size, pbuf, cb, cbData);
return mgr.run();
}
void file_io_init()
{
aiocb_allocator.init();
}
void file_io_shutdown()
{
aiocb_allocator.shutdown();
}

View File

@ -1,149 +0,0 @@
/**
* =========================================================================
* File : file_io.h
* Project : 0 A.D.
* Description : provide fast I/O via POSIX aio and splitting into blocks.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_FILE_IO
#define INCLUDED_FILE_IO
struct FileProvider_VTbl;
struct File;
namespace ERR
{
const LibError IO = -110100;
const LibError IO_EOF = -110101;
}
extern void file_io_init();
extern void file_io_shutdown();
//
// asynchronous IO
//
// this is a thin wrapper on top of the system AIO calls.
// IOs are carried out exactly as requested - there is no caching or
// alignment done here. rationale: see source.
// again chosen for nice alignment; each user checks if big enough.
const size_t FILE_IO_OPAQUE_SIZE = 28;
struct FileIo
{
const FileProvider_VTbl* type;
u8 opaque[FILE_IO_OPAQUE_SIZE];
};
// queue the IO; it begins after the previous ones (if any) complete.
//
// rationale: this interface is more convenient than implicitly advancing a
// file pointer because archive.cpp often accesses random offsets.
extern LibError file_io_issue(File* f, off_t ofs, size_t size, u8* buf, FileIo* io);
// indicates if the given IO has completed.
// return value: 0 if pending, 1 if complete, < 0 on error.
extern int file_io_has_completed(FileIo* io);
// wait for the given IO to complete. passes back its buffer and size.
extern LibError file_io_wait(FileIo* io, u8*& p, size_t& size);
// indicates the IO's buffer is no longer needed and frees that memory.
extern LibError file_io_discard(FileIo* io);
extern LibError file_io_validate(const FileIo* io);
//
// synchronous IO
//
extern size_t file_sector_size;
// called by file_io after a block IO has completed.
// *bytes_processed must be set; file_io will return the sum of these values.
// example: when reading compressed data and decompressing in the callback,
// indicate #bytes decompressed.
// return value: INFO::CB_CONTINUE to continue calling; anything else:
// abort immediately and return that.
// note: in situations where the entire IO is not split into blocks
// (e.g. when reading from cache or not using AIO), this is still called but
// for the entire IO. we do not split into fake blocks because it is
// advantageous (e.g. for decompressors) to have all data at once, if available
// anyway.
typedef LibError (*FileIOCB)(uintptr_t cbData, const u8* block, size_t size, size_t* bytes_processed);
typedef const u8* FileIOBuf;
FileIOBuf* const FILE_BUF_TEMP = (FileIOBuf*)1;
const FileIOBuf FILE_BUF_ALLOC = (FileIOBuf)2;
enum FileBufFlags
{
// indicates the buffer will not be freed immediately
// (i.e. before the next buffer alloc) as it normally should.
// this flag serves to suppress a warning and better avoid fragmentation.
// caller sets this when FILE_LONG_LIVED is specified.
//
// also used by file_cache_retrieve because it may have to
// 'reactivate' the buffer (transfer from cache to extant list),
// which requires knowing whether the buffer is long-lived or not.
FB_LONG_LIVED = 1,
// statistics (e.g. # buffer allocs) should not be updated.
// (useful for simulation, e.g. trace_entry_causes_io)
FB_NO_STATS = 2,
// file_cache_retrieve should not update item credit.
// (useful when just looking up buffer given atom_fn)
FB_NO_ACCOUNTING = 4,
// memory will be allocated from the heap, not the (limited) file cache.
// this makes sense for write buffers that are never used again,
// because we avoid having to displace some other cached items.
FB_FROM_HEAP = 8
};
// allocate a new buffer of <size> bytes (possibly more due to internal
// fragmentation). never returns 0.
// <atom_fn>: owner filename (buffer is intended to be used for data from
// this file).
extern FileIOBuf file_buf_alloc(size_t size, const char* atom_fn, uint fb_flags = 0);
// mark <buf> as no longer needed. if its reference count drops to 0,
// it will be removed from the extant list. if it had been added to the
// cache, it remains there until evicted in favor of another buffer.
extern LibError file_buf_free(FileIOBuf buf, uint fb_flags = 0);
// transfer <size> bytes, starting at <ofs>, to/from the given file.
// (read or write access was chosen at file-open time).
//
// if non-NULL, <cb> is called for each block transferred, passing <cbData>.
// it returns how much data was actually transferred, or a negative error
// code (in which case we abort the transfer and return that value).
// the callback mechanism is useful for user progress notification or
// processing data while waiting for the next I/O to complete
// (quasi-parallel, without the complexity of threads).
//
// return number of bytes transferred (see above), or a negative error code.
extern ssize_t file_io(File* f, off_t ofs, size_t size, FileIOBuf* pbuf, FileIOCB cb = 0, uintptr_t cbData = 0);
extern ssize_t file_read_from_cache(const char* atom_fn, off_t ofs, size_t size,
FileIOBuf* pbuf, FileIOCB cb, uintptr_t cbData);
extern LibError file_io_get_buf(FileIOBuf* pbuf, size_t size,
const char* atom_fn, uint file_flags, FileIOCB cb);
#endif // #ifndef INCLUDED_FILE_IO

View File

@ -1,349 +0,0 @@
/**
* =========================================================================
* File : file_stats.cpp
* Project : 0 A.D.
* Description : gathers statistics from all file modules.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "file_stats.h"
#include <set>
#include "lib/timer.h"
#include "file_internal.h"
typedef std::set<const char*> AtomFnSet;
typedef std::pair<AtomFnSet::iterator, bool> PairIB;
// vfs
static uint vfs_files;
static size_t vfs_size_total;
static double vfs_init_elapsed_time;
// file
static uint unique_names;
static size_t unique_name_len_total;
static uint open_files_cur, open_files_max; // total = opened_files.size()
static double opened_file_size_total;
static AtomFnSet opened_files;
// file_buf
static uint extant_bufs_cur, extant_bufs_max, extant_bufs_total;
static double buf_user_size_total, buf_padded_size_total;
// file_io
static uint user_ios;
static double user_io_size_total;
static double io_actual_size_total[FI_MAX_IDX][2];
static double io_elapsed_time[FI_MAX_IDX][2];
static double io_process_time_total;
static uint io_seeks;
// file_cache
static uint cache_count[2];
static double cache_size_total[2];
static AtomFnSet ever_cached_files;
static uint conflict_misses;
static double conflict_miss_size_total;
static uint block_cache_count[2];
// archive builder
static uint ab_connection_attempts; // total number of trace entries
static uint ab_repeated_connections; // how many of these were not unique
// convenience functions for measuring elapsed time in an interval.
// by exposing start/finish calls, we avoid callers from querying
// timestamps when stats are disabled.
static double start_time;
static void timer_start(double* start_time_storage = &start_time)
{
// make sure no measurement is currently active
// (since start_time is shared static storage)
debug_assert(*start_time_storage == 0.0);
*start_time_storage = get_time();
}
static double timer_reset(double* start_time_storage = &start_time)
{
double elapsed = get_time() - *start_time_storage;
*start_time_storage = 0.0;
return elapsed;
}
//-----------------------------------------------------------------------------
//
// vfs
//
void stats_vfs_file_add(size_t file_size)
{
vfs_files++;
vfs_size_total += file_size;
}
void stats_vfs_file_remove(size_t file_size)
{
vfs_files--;
vfs_size_total -= file_size;
}
void stats_vfs_init_start()
{
timer_start();
}
void stats_vfs_init_finish()
{
vfs_init_elapsed_time += timer_reset();
}
//
// file
//
void stats_unique_name(size_t name_len)
{
unique_names++;
unique_name_len_total += name_len;
}
void stats_open(const char* atom_fn, size_t file_size)
{
open_files_cur++;
open_files_max = std::max(open_files_max, open_files_cur);
PairIB ret = opened_files.insert(atom_fn);
// hadn't been opened yet
if(ret.second)
opened_file_size_total += file_size;
}
void stats_close()
{
debug_assert(open_files_cur > 0);
open_files_cur--;
}
//
// file_buf
//
void stats_buf_alloc(size_t user_size, size_t padded_size)
{
extant_bufs_cur++;
extant_bufs_max = std::max(extant_bufs_max, extant_bufs_cur);
extant_bufs_total++;
buf_user_size_total += user_size;
buf_padded_size_total += padded_size;
}
void stats_buf_free()
{
debug_assert(extant_bufs_cur > 0);
extant_bufs_cur--;
}
void stats_buf_ref()
{
extant_bufs_cur++;
}
//
// file_io
//
void stats_io_user_request(size_t user_size)
{
user_ios++;
user_io_size_total += user_size;
}
// these bracket file_io's IOManager::run and measure effective throughput.
// note: cannot be called from aio issue/finish because IOManager's
// decompression may cause us to miss the exact end of IO, thus throwing off
// throughput measurements.
void stats_io_sync_start(double* start_time_storage)
{
timer_start(start_time_storage);
}
void stats_io_sync_finish(FileIOImplentation fi, FileOp fo, ssize_t user_size, double* start_time_storage)
{
debug_assert(fi < FI_MAX_IDX);
debug_assert(fo == FO_READ || FO_WRITE);
// ignore IOs that failed (nothing we can do)
if(user_size > 0)
{
io_actual_size_total[fi][fo] += user_size;
io_elapsed_time[fi][fo] += timer_reset(start_time_storage);
}
}
void stats_io_check_seek(const char* atom_fn, u32 block_num)
{
static const char* last_atom_fn;
static u32 last_block_num;
// makes debugging ("why are there seeks") a bit nicer by suppressing
// the first (bogus) seek.
if(!last_atom_fn)
goto dont_count_first_seek;
if(atom_fn != last_atom_fn || // different file OR
block_num != last_block_num+1) // nonsequential
io_seeks++;
dont_count_first_seek:
last_atom_fn = atom_fn;
last_block_num = block_num;
}
void stats_cb_start()
{
timer_start();
}
void stats_cb_finish()
{
io_process_time_total += timer_reset();
}
//
// file_cache
//
void stats_cache(CacheRet cr, size_t size, const char* atom_fn)
{
debug_assert(cr == CR_HIT || cr == CR_MISS);
if(cr == CR_MISS)
{
PairIB ret = ever_cached_files.insert(atom_fn);
if(!ret.second) // was already cached once
{
conflict_miss_size_total += size;
conflict_misses++;
}
}
cache_count[cr]++;
cache_size_total[cr] += size;
}
void stats_block_cache(CacheRet cr)
{
debug_assert(cr == CR_HIT || cr == CR_MISS);
block_cache_count[cr]++;
}
//
// archive builder
//
void stats_ab_connection(bool already_exists)
{
ab_connection_attempts++;
if(already_exists)
ab_repeated_connections++;
}
//-----------------------------------------------------------------------------
template<typename T> int percent(T num, T divisor)
{
if(!divisor)
return 0;
return (int)(100*num / divisor);
}
void file_stats_dump()
{
if(!debug_filter_allows("FILE_STATS|"))
return;
const double KB = 1e3; const double MB = 1e6; const double ms = 1e-3;
debug_printf("--------------------------------------------------------------------------------\n");
debug_printf("File statistics:\n");
// note: we split the reports into several debug_printfs for clarity;
// this is necessary anyway due to fixed-size buffer.
debug_printf(
"\nvfs:\n"
"Total files: %u (%g MB)\n"
"Init/mount time: %g ms\n",
vfs_files, vfs_size_total/MB,
vfs_init_elapsed_time/ms
);
debug_printf(
"\nfile:\n"
"Total names: %u (%u KB)\n"
"Accessed files: %u (%g MB) -- %u%% of data set\n"
"Max. concurrent: %u; leaked: %u.\n",
unique_names, unique_name_len_total/1000,
opened_files.size(), opened_file_size_total/MB, percent(opened_files.size(), (size_t)vfs_files),
open_files_max, open_files_cur
);
debug_printf(
"\nfile_buf:\n"
"Total buffers used: %u (%g MB)\n"
"Max concurrent: %u; leaked: %u\n"
"Internal fragmentation: %d%%\n",
extant_bufs_total, buf_user_size_total/MB,
extant_bufs_max, extant_bufs_cur,
percent(buf_padded_size_total-buf_user_size_total, buf_user_size_total)
);
debug_printf(
"\nfile_io:\n"
"Total user load requests: %u (%g MB)\n"
"IO thoughput [MB/s; 0=never happened]:\n"
" lowio: R=%.3g, W=%.3g\n"
" aio: R=%.3g, W=%.3g\n"
"Average size = %g KB; seeks: %u; total callback time: %g ms\n"
"Total data actually read from disk = %g MB\n",
user_ios, user_io_size_total/MB,
#define THROUGHPUT(impl, op) (io_elapsed_time[impl][op] == 0.0)? 0.0 : (io_actual_size_total[impl][op] / io_elapsed_time[impl][op] / MB)
THROUGHPUT(FI_LOWIO, FO_READ), THROUGHPUT(FI_LOWIO, FO_WRITE),
THROUGHPUT(FI_AIO , FO_READ), THROUGHPUT(FI_AIO , FO_WRITE),
user_io_size_total/user_ios/KB, io_seeks, io_process_time_total/ms,
(io_actual_size_total[FI_LOWIO][FO_READ]+io_actual_size_total[FI_AIO][FO_READ])/MB
);
debug_printf(
"\nfile_cache:\n"
"Hits: %u (%g MB); misses %u (%g MB); ratio: %u%%\n"
"Percent of requested bytes satisfied by cache: %u%%; non-compulsory misses: %u (%u%% of misses)\n"
"Block hits: %u; misses: %u; ratio: %u%%\n",
cache_count[CR_HIT], cache_size_total[CR_HIT]/MB, cache_count[CR_MISS], cache_size_total[CR_MISS]/MB, percent(cache_count[CR_HIT], cache_count[CR_HIT]+cache_count[CR_MISS]),
percent(cache_size_total[CR_HIT], cache_size_total[CR_HIT]+cache_size_total[CR_MISS]), conflict_misses, percent(conflict_misses, cache_count[CR_MISS]),
block_cache_count[CR_HIT], block_cache_count[CR_MISS], percent(block_cache_count[CR_HIT], block_cache_count[CR_HIT]+block_cache_count[CR_MISS])
);
debug_printf(
"\nvfs_optimizer:\n"
"Total trace entries: %u; repeated connections: %u; unique files: %u\n",
ab_connection_attempts, ab_repeated_connections, ab_connection_attempts-ab_repeated_connections
);
}

View File

@ -1,82 +0,0 @@
/**
* =========================================================================
* File : file_stats.h
* Project : 0 A.D.
* Description : gathers statistics from all file modules.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_FILE_STATS
#define INCLUDED_FILE_STATS
#define FILE_STATS_ENABLED 1
enum FileIOImplentation { FI_LOWIO, FI_AIO, FI_BCACHE, FI_MAX_IDX };
enum FileOp { FO_READ, FO_WRITE };
enum CacheRet { CR_HIT, CR_MISS };
#if FILE_STATS_ENABLED
// vfs
extern void stats_vfs_file_add(size_t file_size);
extern void stats_vfs_file_remove(size_t file_size);
extern void stats_vfs_init_start();
extern void stats_vfs_init_finish();
// file
extern void stats_unique_name(size_t name_len);
extern void stats_open(const char* atom_fn, size_t file_size);
extern void stats_close();
// file_buf
extern void stats_buf_alloc(size_t user_size, size_t padded_size);
extern void stats_buf_free();
extern void stats_buf_ref();
// file_io
extern void stats_io_user_request(size_t user_size);
extern void stats_io_sync_start(double* start_time_storage);
extern void stats_io_sync_finish(FileIOImplentation fi, FileOp fo, ssize_t user_size, double* start_time_storage);
extern void stats_io_check_seek(const char* atom_fn, u32 block_num);
extern void stats_cb_start();
extern void stats_cb_finish();
// file_cache
extern void stats_cache(CacheRet cr, size_t size, const char* atom_fn);
extern void stats_block_cache(CacheRet cr);
// archive builder
extern void stats_ab_connection(bool already_exists);
extern void file_stats_dump();
#else
#define stats_vfs_file_add(file_size)
#define stats_vfs_file_remove(file_size)
#define stats_vfs_init_start()
#define stats_vfs_init_finish()
#define stats_unique_name(name_len)
#define stats_open(atom_fn, file_size)
#define stats_close()
#define stats_buf_alloc(user_size, padded_size)
#define stats_buf_free()
#define stats_buf_ref()
#define stats_io_user_request(user_size)
#define stats_io_sync_start(disk_pos, start_time_storage)
#define stats_io_sync_finish(fi, fo, user_size, start_time_storage)
#define stats_io_check_seek(atom_fn, block_num)
#define stats_cb_start()
#define stats_cb_finish()
#define stats_cache(cr, size, atom_fn)
#define stats_block_cache(cr)
#define stats_ab_connection(already_exists)
#define file_stats_dump()
#endif
#endif // #ifndef INCLUDED_FILE_STATS

View File

@ -1,319 +0,0 @@
/**
* =========================================================================
* File : file_util.cpp
* Project : 0 A.D.
* Description : utility functions for file and path handling modules
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include <queue>
#include "lib/regex.h"
#include "file_internal.h"
static bool dirent_less(const DirEnt& d1, const DirEnt& d2)
{
return strcmp(d1.name, d2.name) < 0;
}
// enumerate all directory entries in <P_path>; add to container and
// then sort it by filename.
LibError file_get_sorted_dirents(const char* P_path, DirEnts& dirents)
{
DirIterator d;
RETURN_ERR(dir_open(P_path, &d));
dirents.reserve(50); // preallocate for efficiency
DirEnt ent;
for(;;)
{
LibError ret = dir_next_ent(&d, &ent);
if(ret == ERR::DIR_END)
break;
RETURN_ERR(ret);
ent.name = file_make_unique_fn_copy(ent.name);
dirents.push_back(ent);
}
std::sort(dirents.begin(), dirents.end(), dirent_less);
(void)dir_close(&d);
return INFO::OK;
}
// call <cb> for each file and subdirectory in <dir> (alphabetical order),
// passing the entry name (not full path!), stat info, and <user>.
//
// first builds a list of entries (sorted) and remembers if an error occurred.
// if <cb> returns non-zero, abort immediately and return that; otherwise,
// return first error encountered while listing files, or 0 on success.
//
// rationale:
// this makes file_enum and zip_enum slightly incompatible, since zip_enum
// returns the full path. that's necessary because VFS zip_cb
// has no other way of determining what VFS dir a Zip file is in,
// since zip_enum enumerates all files in the archive (not only those
// in a given dir). no big deal though, since add_ent has to
// special-case Zip files anyway.
// the advantage here is simplicity, and sparing callbacks the trouble
// of converting from/to native path (we just give 'em the dirent name).
LibError file_enum(const char* P_path, const FileCB cb, const uintptr_t user)
{
LibError stat_err = INFO::OK; // first error encountered by stat()
LibError cb_err = INFO::OK; // first error returned by cb
DirEnts dirents;
RETURN_ERR(file_get_sorted_dirents(P_path, dirents));
// call back for each entry (now sorted);
// first, expand each DirEnt to full struct stat (we store as such to
// reduce memory use and therefore speed up sorting)
struct stat s;
memset(&s, 0, sizeof(s));
// .. not needed for plain files (OS opens them; memento doesn't help)
const uintptr_t memento = 0;
for(DirEntCIt it = dirents.begin(); it != dirents.end(); ++it)
{
const DirEnt& dirent = *it;
s.st_mode = (dirent.size == -1)? S_IFDIR : S_IFREG;
s.st_size = dirent.size;
s.st_mtime = dirent.mtime;
LibError ret = cb(dirent.name, &s, memento, user);
if(ret != INFO::CB_CONTINUE)
{
cb_err = ret; // first error (since we now abort)
break;
}
}
if(cb_err != INFO::OK)
return cb_err;
return stat_err;
}
// retrieve the next (order is unspecified) dir entry matching <filter>.
// return 0 on success, ERR::DIR_END if no matching entry was found,
// or a negative error code on failure.
// filter values:
// - 0: anything;
// - "/": any subdirectory;
// - "/|<pattern>": any subdirectory, or as below with <pattern>;
// - <pattern>: any file whose name matches; ? and * wildcards are allowed.
//
// note that the directory entries are only scanned once; after the
// end is reached (-> ERR::DIR_END returned), no further entries can
// be retrieved, even if filter changes (which shouldn't happen - see impl).
//
// rationale: we do not sort directory entries alphabetically here.
// most callers don't need it and the overhead is considerable
// (we'd have to store all entries in a vector). it is left up to
// higher-level code such as VfsUtil.
LibError dir_filtered_next_ent(DirIterator* di, DirEnt* ent, const char* filter)
{
// warn if scanning the directory twice with different filters
// (this used to work with dir/file because they were stored separately).
// it is imaginable that someone will want to change it, but until
// there's a good reason, leave this check in. note: only comparing
// pointers isn't 100% certain, but it's safe enough and easy.
if(!di->filter_latched)
{
di->filter = filter;
di->filter_latched = 1;
}
if(di->filter != filter)
debug_warn("filter has changed for this directory. are you scanning it twice?");
bool want_dir = true;
if(filter)
{
// directory
if(filter[0] == '/')
{
// .. and also files
if(filter[1] == '|')
filter += 2;
}
// file only
else
want_dir = false;
}
// loop until ent matches what is requested, or end of directory.
for(;;)
{
RETURN_ERR(xdir_next_ent(di, ent));
if(DIRENT_IS_DIR(ent))
{
if(want_dir)
break;
}
else
{
// (note: filter = 0 matches anything)
if(match_wildcard(ent->name, filter))
break;
}
}
return INFO::OK;
}
// call <cb> for each entry matching <user_filter> (see vfs_next_dirent) in
// directory <path>; if flags & VFS_DIR_RECURSIVE, entries in
// subdirectories are also returned.
//
// note: EnumDirEntsCB path and ent are only valid during the callback.
LibError vfs_dir_enum(const char* start_path, uint flags, const char* user_filter,
DirEnumCB cb, uintptr_t cbData)
{
debug_assert((flags & ~(VFS_DIR_RECURSIVE)) == 0);
const bool recursive = (flags & VFS_DIR_RECURSIVE) != 0;
char filter_buf[PATH_MAX];
const char* filter = user_filter;
bool user_filter_wants_dirs = true;
if(user_filter)
{
if(user_filter[0] != '/')
user_filter_wants_dirs = false;
// we need subdirectories and the caller hasn't already requested them
if(recursive && !user_filter_wants_dirs)
{
snprintf(filter_buf, sizeof(filter_buf), "/|%s", user_filter);
filter = filter_buf;
}
}
// note: FIFO queue instead of recursion is much more efficient
// (less stack usage; avoids seeks by reading all entries in a
// directory consecutively)
std::queue<const char*> dir_queue;
dir_queue.push(file_make_unique_fn_copy(start_path));
// for each directory:
do
{
// get current directory path from queue
// note: can't refer to the queue contents - those are invalidated
// as soon as a directory is pushed onto it.
PathPackage pp;
(void)path_package_set_dir(&pp, dir_queue.front());
dir_queue.pop();
Handle hdir = vfs_dir_open(pp.path);
if(hdir <= 0)
{
debug_warn("vfs_open_dir failed");
continue;
}
// for each entry (file, subdir) in directory:
DirEnt ent;
while(vfs_dir_next_ent(hdir, &ent, filter) == 0)
{
// build complete path (DirEnt only stores entry name)
(void)path_package_append_file(&pp, ent.name);
const char* atom_path = file_make_unique_fn_copy(pp.path);
if(DIRENT_IS_DIR(&ent))
{
if(recursive)
dir_queue.push(atom_path);
if(user_filter_wants_dirs)
cb(atom_path, &ent, cbData);
}
else
cb(atom_path, &ent, cbData);
}
vfs_dir_close(hdir);
}
while(!dir_queue.empty());
return INFO::OK;
}
// fill V_next_fn (which must be big enough for PATH_MAX chars) with
// the next numbered filename according to the pattern defined by V_fn_fmt.
// <nfi> must be initially zeroed (e.g. by defining as static) and passed
// each time.
// if <use_vfs> (default), the paths are treated as VFS paths; otherwise,
// file.cpp's functions are used. this is necessary because one of
// our callers needs a filename for VFS archive files.
//
// this function is useful when creating new files which are not to
// overwrite the previous ones, e.g. screenshots.
// example for V_fn_fmt: "screenshots/screenshot%04d.png".
void next_numbered_filename(const char* fn_fmt,
NextNumberedFilenameInfo* nfi, char* next_fn, bool use_vfs)
{
// (first call only:) scan directory and set next_num according to
// highest matching filename found. this avoids filling "holes" in
// the number series due to deleted files, which could be confusing.
// example: add 1st and 2nd; [exit] delete 1st; [restart]
// add 3rd -> without this measure it would get number 1, not 3.
if(nfi->next_num == 0)
{
char dir[PATH_MAX];
path_dir_only(fn_fmt, dir);
const char* name_fmt = path_name_only(fn_fmt);
int max_num = -1; int num;
DirEnt ent;
if(use_vfs)
{
Handle hd = vfs_dir_open(dir);
if(hd > 0)
{
while(vfs_dir_next_ent(hd, &ent, 0) == INFO::OK)
{
if(!DIRENT_IS_DIR(&ent) && sscanf(ent.name, name_fmt, &num) == 1)
max_num = std::max(num, max_num);
}
(void)vfs_dir_close(hd);
}
}
else
{
DirIterator it;
if(dir_open(dir, &it) == INFO::OK)
{
while(dir_next_ent(&it, &ent) == INFO::OK)
if(!DIRENT_IS_DIR(&ent) && sscanf(ent.name, name_fmt, &num) == 1)
max_num = std::max(num, max_num);
(void)dir_close(&it);
}
}
nfi->next_num = max_num+1;
}
bool (*exists)(const char* fn) = use_vfs? vfs_exists : file_exists;
// now increment number until that file doesn't yet exist.
// this is fairly slow, but typically only happens once due
// to scan loop above. (we still need to provide for looping since
// someone may have added files in the meantime)
// binary search isn't expected to improve things.
do
snprintf(next_fn, PATH_MAX, fn_fmt, nfi->next_num++);
while(exists(next_fn));
}

View File

@ -1,313 +0,0 @@
/**
* =========================================================================
* File : path.cpp
* Project : 0 A.D.
* Description : helper functions for VFS paths.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "path.h"
#include <string.h>
#include "lib/posix/posix_filesystem.h"
#include "lib/adts.h"
#include "lib/rand.h"
#include "lib/allocators.h"
#include "lib/sysdep/sysdep.h"
#include "lib/module_init.h"
#include "file_internal.h"
ERROR_ASSOCIATE(ERR::ROOT_DIR_ALREADY_SET, "Attempting to set FS root dir more than once", -1);
// path types:
// p_*: posix (e.g. mount object name or for open())
// v_*: vfs (e.g. mount point)
// fn : filename only (e.g. from readdir)
// dir_name: directory only, no path (e.g. subdir name)
//
// all paths must be relative (no leading '/'); components are separated
// by '/'; no ':', '\\', "." or ".." allowed; root dir is "".
//
// grammar:
// path ::= dir*file?
// dir ::= name/
// file ::= name
// name ::= [^/]
enum Conversion
{
TO_NATIVE,
TO_PORTABLE
};
static LibError convert_path(char* dst, const char* src, Conversion conv = TO_NATIVE)
{
// SYS_DIR_SEP is assumed to be a single character!
const char* s = src;
char* d = dst;
char from = SYS_DIR_SEP, to = '/';
if(conv == TO_NATIVE)
from = '/', to = SYS_DIR_SEP;
size_t len = 0;
for(;;)
{
len++;
if(len >= PATH_MAX)
WARN_RETURN(ERR::PATH_LENGTH);
char c = *s++;
if(c == from)
c = to;
*d++ = c;
// end of string - done
if(c == '\0')
return INFO::OK;
}
}
// set by file_set_root_dir
static char n_root_dir[PATH_MAX];
static size_t n_root_dir_len;
// return the native equivalent of the given relative portable path
// (i.e. convert all '/' to the platform's directory separator)
// makes sure length < PATH_MAX.
LibError file_make_native_path(const char* path, char* n_path)
{
return convert_path(n_path, path, TO_NATIVE);
}
// return the portable equivalent of the given relative native path
// (i.e. convert the platform's directory separators to '/')
// makes sure length < PATH_MAX.
LibError file_make_portable_path(const char* n_path, char* path)
{
return convert_path(path, n_path, TO_PORTABLE);
}
// return the native equivalent of the given portable path
// (i.e. convert all '/' to the platform's directory separator).
// also prepends current directory => n_full_path is absolute.
// makes sure length < PATH_MAX.
LibError file_make_full_native_path(const char* path, char* n_full_path)
{
debug_assert(path != n_full_path); // doesn't work in-place
strcpy_s(n_full_path, PATH_MAX, n_root_dir);
return convert_path(n_full_path+n_root_dir_len, path, TO_NATIVE);
}
// return the portable equivalent of the given relative native path
// (i.e. convert the platform's directory separators to '/')
// n_full_path is absolute; if it doesn't match the current dir, fail.
// (note: portable paths are always relative to the file root dir).
// makes sure length < PATH_MAX.
LibError file_make_full_portable_path(const char* n_full_path, char* path)
{
debug_assert(path != n_full_path); // doesn't work in-place
if(strncmp(n_full_path, n_root_dir, n_root_dir_len) != 0)
WARN_RETURN(ERR::TNODE_NOT_FOUND);
return convert_path(path, n_full_path+n_root_dir_len, TO_PORTABLE);
}
// security check: only allow attempting to chdir once, so that malicious
// code cannot circumvent the VFS checks that disallow access to anything
// above the current directory (set here).
// this routine is called early at startup, so any subsequent attempts
// are likely bogus.
// we provide for resetting this from the self-test to allow clean
// re-init of the individual tests.
static bool root_dir_established;
// establish the root directory from <rel_path>, which is treated as
// relative to the executable's directory (determined via argv[0]).
// all relative file paths passed to this module will be based from
// this root dir.
//
// example: executable in "$install_dir/system"; desired root dir is
// "$install_dir/data" => rel_path = "../data".
//
// argv[0] is necessary because the current directory is unknown at startup
// (e.g. it isn't set when invoked via batch file), and this is the
// easiest portable way to find our install directory.
//
// can only be called once, by design (see below). rel_path is trusted.
LibError file_set_root_dir(const char* argv0, const char* rel_path)
{
if(root_dir_established)
WARN_RETURN(ERR::ROOT_DIR_ALREADY_SET);
root_dir_established = true;
// get full path to executable
char n_path[PATH_MAX];
// .. first try safe, but system-dependent version
if(sys_get_executable_name(n_path, PATH_MAX) < 0)
{
// .. failed; use argv[0]
if(!realpath(argv0, n_path))
return LibError_from_errno();
}
// make sure it's valid
if(access(n_path, X_OK) < 0)
return LibError_from_errno();
// strip executable name, append rel_path, convert to native
char* start_of_fn = (char*)path_name_only(n_path);
RETURN_ERR(file_make_native_path(rel_path, start_of_fn));
// get actual root dir - previous n_path may include ".."
// (slight optimization, speeds up path lookup)
if(!realpath(n_path, n_root_dir))
return LibError_from_errno();
// .. append SYS_DIR_SEP to simplify code that uses n_root_dir
n_root_dir_len = strlen(n_root_dir)+1; // +1 for trailing SYS_DIR_SEP
debug_assert((n_root_dir_len+1) < sizeof(n_root_dir)); // Just checking
n_root_dir[n_root_dir_len-1] = SYS_DIR_SEP;
// You might think that n_root_dir is already 0-terminated, since it's
// static - but that might not be true after calling file_reset_root_dir!
n_root_dir[n_root_dir_len] = 0;
return INFO::OK;
}
void path_reset_root_dir()
{
// see comment at root_dir_established.
debug_assert(root_dir_established);
n_root_dir[0] = '\0';
n_root_dir_len = 0;
root_dir_established = false;
}
//-----------------------------------------------------------------------------
// storage for path strings
//-----------------------------------------------------------------------------
// rationale: we want a constant-time IsAtomFn(string pointer) lookup:
// this avoids any overhead of calling file_make_unique_fn_copy on
// already-atomized strings. that requires allocating from one contiguous
// arena, which is also more memory-efficient than the heap (no headers).
static Pool atom_pool;
typedef DynHashTbl<const char*, const char*> AtomMap;
static AtomMap atom_map;
bool path_is_atom_fn(const char* fn)
{
return pool_contains(&atom_pool, (void*)fn);
}
// allocate a copy of P_fn in our string pool. strings are equal iff
// their addresses are equal, thus allowing fast comparison.
//
// if the (generous) filename storage is full, 0 is returned.
// this is not ever expected to happen; callers need not check the
// return value because a warning is raised anyway.
const char* file_make_unique_fn_copy(const char* P_fn)
{
// early out: if already an atom, return immediately.
if(path_is_atom_fn(P_fn))
return P_fn;
const size_t fn_len = strlen(P_fn);
const char* unique_fn;
// check if already allocated; return existing copy if so.
//
// rationale: the entire storage could be done via container,
// rather than simply using it as a lookup mapping.
// however, DynHashTbl together with Pool (see above) is more efficient.
unique_fn = atom_map.find(P_fn);
if(unique_fn)
return unique_fn;
unique_fn = (const char*)pool_alloc(&atom_pool, fn_len+1);
if(!unique_fn)
{
DEBUG_WARN_ERR(ERR::NO_MEM);
return 0;
}
cpu_memcpy((void*)unique_fn, P_fn, fn_len);
((char*)unique_fn)[fn_len] = '\0';
atom_map.insert(unique_fn, unique_fn);
stats_unique_name(fn_len);
return unique_fn;
}
static ModuleInitState initState;
void path_init()
{
if(!ModuleShouldInitialize(&initState))
return;
pool_create(&atom_pool, 8*MiB, POOL_VARIABLE_ALLOCS);
}
void path_shutdown()
{
if(!ModuleShouldShutdown(&initState))
return;
atom_map.clear();
(void)pool_destroy(&atom_pool);
}
const char* file_get_random_name()
{
// there had better be names in atom_pool, else this will fail.
debug_assert(atom_pool.da.pos != 0);
again:
const size_t start_ofs = (size_t)rand(0, (uint)atom_pool.da.pos);
// scan back to start of string (don't scan ahead; this must
// work even if atom_pool only contains one entry).
const char* start = (const char*)atom_pool.da.base+start_ofs;
for(size_t i = 0; i < start_ofs; i++)
{
if(*start == '\0')
break;
start--;
}
// skip past the '\0' we found. loop is needed because there may be
// several if we land in padding (due to pool alignment).
size_t chars_left = atom_pool.da.pos - start_ofs;
for(; *start == '\0'; start++)
{
// we had landed in padding at the end of the buffer.
if(chars_left-- == 0)
goto again;
}
const char* next_name = start;
return next_name;
}

View File

@ -1,63 +0,0 @@
/**
* =========================================================================
* File : path.h
* Project : 0 A.D.
* Description : helper functions for VFS paths.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_PATH
#define INCLUDED_PATH
namespace ERR
{
const LibError ROOT_DIR_ALREADY_SET = -110200;
}
#define VFS_PATH_IS_DIR(path) (*path == '\0' || path[strlen(path)-1] == '/')
struct NextNumberedFilenameInfo
{
int next_num;
};
// fill V_next_fn (which must be big enough for PATH_MAX chars) with
// the next numbered filename according to the pattern defined by V_fn_fmt.
// <nfi> must be initially zeroed (e.g. by defining as static) and passed
// each time.
// if <use_vfs> (default), the paths are treated as VFS paths; otherwise,
// file.cpp's functions are used. this is necessary because one of
// our callers needs a filename for VFS archive files.
//
// this function is useful when creating new files which are not to
// overwrite the previous ones, e.g. screenshots.
// example for V_fn_fmt: "screenshots/screenshot%04d.png".
extern void next_numbered_filename(const char* V_fn_fmt,
NextNumberedFilenameInfo* nfi, char* V_next_fn, bool use_vfs = true);
extern bool path_is_atom_fn(const char* fn);
extern const char* file_get_random_name();
/**
* reset root directory that was previously established via file_set_root_dir.
*
* this function avoids the security complaint that would be raised if
* file_set_root_dir is called twice; it is provided for the
* legitimate application of a self-test setUp()/tearDown().
**/
extern void path_reset_root_dir();
// note: other functions are declared directly in the public file.h header.
extern void path_init();
extern void path_shutdown();
#endif // #ifndef INCLUDED_PATH

View File

@ -1,51 +0,0 @@
#include "lib/self_test.h"
#include "lib/res/file/file_cache.h"
#include "lib/rand.h"
class TestFileCache : public CxxTest::TestSuite
{
enum { TEST_ALLOC_TOTAL = 100*1000*1000 };
public:
void test_cache_allocator()
{
// allocated address -> its size
typedef std::map<void*, size_t> AllocMap;
AllocMap allocations;
// put allocator through its paces by allocating several times
// its capacity (this ensures memory is reused)
srand(1);
size_t total_size_used = 0;
while(total_size_used < TEST_ALLOC_TOTAL)
{
size_t size = rand(1, TEST_ALLOC_TOTAL/16);
total_size_used += size;
void* p;
// until successful alloc:
for(;;)
{
p = file_cache_allocator_alloc(size);
if(p)
break;
// out of room - remove a previous allocation
// .. choose one at random
size_t chosen_idx = (size_t)rand(0, (uint)allocations.size());
AllocMap::iterator it = allocations.begin();
for(; chosen_idx != 0; chosen_idx--)
++it;
file_cache_allocator_free(it->first, it->second);
allocations.erase(it);
}
// must not already have been allocated
TS_ASSERT_EQUALS(allocations.find(p), allocations.end());
allocations[p] = size;
}
// reset to virginal state
// note: even though everything has now been freed, this is
// necessary since the freelists may be a bit scattered already.
file_cache_allocator_reset();
}
};

View File

@ -1,68 +0,0 @@
#include "lib/self_test.h"
#include "lib/self_test.h"
#include "lib/res/file/path.h"
#include "lib/res/file/file.h"
class TestPath : public CxxTest::TestSuite
{
public:
void test_conversion()
{
char N_path[PATH_MAX] = {0};
TS_ASSERT_OK(file_make_native_path("a/b/c", N_path));
#if OS_WIN
TS_ASSERT_STR_EQUALS(N_path, "a\\b\\c");
#else
TS_ASSERT_STR_EQUALS(N_path, "a/b/c");
#endif
char P_path[PATH_MAX] = {0};
TS_ASSERT_OK(file_make_portable_path("a\\b\\c", P_path));
#if OS_WIN
TS_ASSERT_STR_EQUALS(P_path, "a/b/c");
#else
// sounds strange, but correct: on non-Windows, \\ didn't
// get recognized as separators and weren't converted.
TS_ASSERT_STR_EQUALS(P_path, "a\\b\\c");
#endif
}
// file_make_full_*_path is left untested (hard to do so)
void test_atom()
{
path_init();
// file_make_unique_fn_copy
// .. return same address for same string?
const char* atom1 = file_make_unique_fn_copy("a/bc/def");
const char* atom2 = file_make_unique_fn_copy("a/bc/def");
TS_ASSERT_EQUALS(atom1, atom2);
// .. early out (already in pool) check works?
const char* atom3 = file_make_unique_fn_copy(atom1);
TS_ASSERT_EQUALS(atom3, atom1);
// path_is_atom_fn
// is it reported as in pool?
TS_ASSERT(path_is_atom_fn(atom1));
// file_get_random_name
// see if the atom added above eventually comes out when a
// random one is returned from the pool.
int tries_left;
for(tries_left = 1000; tries_left != 0; tries_left--)
{
const char* random_name = file_get_random_name();
if(random_name == atom1)
break;
}
TS_ASSERT(tries_left != 0);
path_shutdown();
}
};

View File

@ -1,758 +0,0 @@
/**
* =========================================================================
* File : vfs.cpp
* Project : 0 A.D.
* Description : Handle-based wrapper on top of the vfs_mount API.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "vfs.h"
#include <string.h>
#include <time.h>
#include <math.h>
#include <map>
#include <list>
#include <deque>
#include <vector>
#include <string>
#include <algorithm>
#include "lib/adts.h"
#include "lib/timer.h"
#include "lib/res/res.h"
#include "lib/sysdep/dir_watch.h"
#include "file_internal.h"
#include "lib/module_init.h"
// not safe to call before main!
// pathnames are case-insensitive.
// implementation:
// when mounting, we get the exact filenames as reported by the OS;
// we allow open requests with mixed case to match those,
// but still use the correct case when passing to other libraries
// (e.g. the actual open() syscall, called via file_open).
// rationale:
// necessary, because some exporters output .EXT uppercase extensions
// and it's unreasonable to expect that users will always get it right.
// rationale for no forcibly-close support:
// issue:
// we might want to edit files while the game has them open.
// usual case: edit file, notify engine that it should be reloaded.
// here: need to tell the engine to stop what it's doing and close the file;
// only then can the artist write to the file, and trigger a reload.
//
// work involved:
// since closing a file with pending aios results in undefined
// behavior on Win32, we would have to keep track of all aios from each file,
// and cancel them. we'd also need to notify the higher level resource user
// that its read was cancelled, as opposed to failing due to read errors
// (which might cause the game to terminate).
//
// this is just more work than benefit. cases where the game holds on to files
// are rare:
// - streaming music (artist can use regular commands to stop the current
// track, or all music)
// - if the engine happens to be reading that file at the moment (expected
// to happen only during loading, and these are usually one-shot anway,
// i.e. it'll be done soon)
// - bug (someone didn't close a file - tough luck, and should be fixed
// instead of hacking around it).
// - archives (these remain open. allowing reload would mean we'd have to keep
// track of all files from an archive, and reload them all. another hassle.
// anyway, if files are to be changed in-game, then change the plain-file
// version - that's what they're for).
///////////////////////////////////////////////////////////////////////////////
//
// directory
//
///////////////////////////////////////////////////////////////////////////////
struct VDir
{
DirIterator di;
uint di_valid : 1; // <di> will be closed iff == 1
};
H_TYPE_DEFINE(VDir);
static void VDir_init(VDir* UNUSED(vd), va_list UNUSED(args))
{
}
static void VDir_dtor(VDir* vd)
{
// note: DirIterator has no way of checking if it's valid;
// we must therefore only free it if reload() succeeded.
if(vd->di_valid)
{
xdir_close(&vd->di);
vd->di_valid = 0;
}
}
static LibError VDir_reload(VDir* vd, const char* V_dir_path, Handle UNUSED(hvd))
{
debug_assert(VFS_PATH_IS_DIR(V_dir_path));
RETURN_ERR(xdir_open(V_dir_path, &vd->di));
vd->di_valid = 1;
return INFO::OK;
}
static LibError VDir_validate(const VDir* vd)
{
// note: <di> is mostly opaque and cannot be validated.
if(vd->di.filter && !isprint(vd->di.filter[0]))
WARN_RETURN(ERR::_1);
return INFO::OK;
}
static LibError VDir_to_string(const VDir* vd, char* buf)
{
const char* filter = vd->di.filter;
if(!vd->di.filter_latched)
filter = "?";
if(!filter)
filter = "*";
snprintf(buf, H_STRING_LEN, "(\"%s\")", filter);
return INFO::OK;
}
// open a directory for reading its entries via vfs_next_dirent.
// V_dir must end in '/' to indicate it's a directory path.
Handle vfs_dir_open(const char* V_dir_path)
{
// must disallow handle caching because this object is not
// copy-equivalent (since the iterator is advanced by each user).
return h_alloc(H_VDir, V_dir_path, RES_NO_CACHE);
}
// close the handle to a directory.
LibError vfs_dir_close(Handle& hd)
{
return h_free(hd, H_VDir);
}
// retrieve the next (order is unspecified) dir entry matching <filter>.
// return 0 on success, ERR::DIR_END if no matching entry was found,
// or a negative error code on failure.
// filter values:
// - 0: anything;
// - "/": any subdirectory;
// - "/|<pattern>": any subdirectory, or as below with <pattern>;
// - <pattern>: any file whose name matches; ? and * wildcards are allowed.
//
// note that the directory entries are only scanned once; after the
// end is reached (-> ERR::DIR_END returned), no further entries can
// be retrieved, even if filter changes (which shouldn't happen - see impl).
//
// rationale: we do not sort directory entries alphabetically here.
// most callers don't need it and the overhead is considerable
// (we'd have to store all entries in a vector). it is left up to
// higher-level code such as VfsUtil.
LibError vfs_dir_next_ent(const Handle hd, DirEnt* ent, const char* filter)
{
H_DEREF(hd, VDir, vd);
return dir_filtered_next_ent(&vd->di, ent, filter);
}
///////////////////////////////////////////////////////////////////////////////
//
// file
//
///////////////////////////////////////////////////////////////////////////////
// return actual path to the specified file:
// "<real_directory>/fn" or "<archive_name>/fn".
LibError vfs_realpath(const char* V_path, char* realpath)
{
TFile* tf;
CHECK_ERR(tree_lookup(V_path, &tf));
const char* atom_fn = tfile_get_atom_fn(tf);
const Mount* m = tfile_get_mount(tf);
return mount_realpath(atom_fn, m, realpath);
}
// does the specified file exist? return false on error.
// useful because a "file not found" warning is not raised, unlike vfs_stat.
bool vfs_exists(const char* V_fn)
{
TFile* tf;
return (tree_lookup(V_fn, &tf) == 0);
}
// get file status (mode, size, mtime). output param is zeroed on error.
LibError vfs_stat(const char* V_path, struct stat* s)
{
memset(s, 0, sizeof(*s));
TFile* tf;
CHECK_ERR(tree_lookup(V_path, &tf));
return tree_stat(tf, s);
}
//-----------------------------------------------------------------------------
struct VFile
{
File f;
// current file pointer. this is necessary because file.cpp's interface
// requires passing an offset for every VIo; see file_io_issue.
off_t ofs;
// pointer to VFS file info storage; used to update size/mtime
// after a newly written file is closed.
TFile* tf;
uint is_valid : 1;
// be aware when adding fields that this struct is quite large,
// and may require increasing the control block size limit.
};
H_TYPE_DEFINE(VFile);
static void VFile_init(VFile* vf, va_list args)
{
vf->f.flags = va_arg(args, int);
}
static void VFile_dtor(VFile* vf)
{
// note: checking if reload() succeeded is unnecessary because
// xfile_close and mem_free_h safely handle 0-initialized data.
WARN_ERR(xfile_close(&vf->f));
// update file state in VFS tree
// (must be done after close, since that calculates the size)
if(vf->f.flags & FILE_WRITE)
tree_update_file(vf->tf, vf->f.size, time(0)); // can't fail
if(vf->is_valid)
stats_close();
}
static LibError VFile_reload(VFile* vf, const char* V_path, Handle)
{
const uint flags = vf->f.flags;
// we're done if file is already open. need to check this because
// reload order (e.g. if resource opens a file) is unspecified.
if(xfile_is_open(&vf->f))
return INFO::OK;
TFile* tf;
uint lf = (flags & FILE_WRITE)? LF_CREATE_MISSING : 0;
LibError err = tree_lookup(V_path, &tf, lf);
if(err < 0)
{
// don't CHECK_ERR - this happens often and the dialog is annoying
debug_printf("lookup failed for %s\n", V_path);
return err;
}
// careful! FILE_WRITE_TO_TARGET consists of 2 bits; they must both be
// set (one of them is FILE_WRITE, which can be set independently).
// this is a bit ugly but better than requiring users to write
// FILE_WRITE|FILE_WRITE_TO_TARGET.
if((flags & FILE_WRITE_TO_TARGET) == FILE_WRITE_TO_TARGET)
RETURN_ERR(set_mount_to_write_target(tf));
RETURN_ERR(xfile_open(V_path, flags, tf, &vf->f));
stats_open(vf->f.atom_fn, vf->f.size);
vf->is_valid = 1;
vf->tf = tf;
return INFO::OK;
}
static LibError VFile_validate(const VFile* vf)
{
// <ofs> doesn't have any invariant we can check.
RETURN_ERR(xfile_validate(&vf->f));
return INFO::OK;
}
static LibError VFile_to_string(const VFile* UNUSED(vf), char* buf)
{
strcpy(buf, ""); // safe
return INFO::OK;
}
// return the size of an already opened file, or a negative error code.
ssize_t vfs_size(Handle hf)
{
H_DEREF(hf, VFile, vf);
return vf->f.size;
}
// open the file for synchronous or asynchronous VIo. write access is
// requested via FILE_WRITE flag, and is not possible for files in archives.
// file_flags: default 0
//
// on failure, a debug_warn is generated and a negative error code returned.
Handle vfs_open(const char* V_fn, uint file_flags)
{
// keeping files open doesn't make sense in most cases (because the
// file is used to load resources, which are cached at a higher level).
uint res_flags = RES_NO_CACHE;
// res_flags is for h_alloc and file_flags goes to VFile_init.
// h_alloc already complains on error.
return h_alloc(H_VFile, V_fn, res_flags, file_flags);
}
// close the handle to a file.
LibError vfs_close(Handle& hf)
{
// h_free already complains on error.
return h_free(hf, H_VFile);
}
// transfer the next <size> bytes to/from the given file.
// (read or write access was chosen at file-open time).
//
// if non-NULL, <cb> is called for each block transferred, passing <cbData>.
// it returns how much data was actually transferred, or a negative error
// code (in which case we abort the transfer and return that value).
// the callback mechanism is useful for user progress notification or
// processing data while waiting for the next I/O to complete
// (quasi-parallel, without the complexity of threads).
//
// p (value-return) indicates the buffer mode:
// - *p == 0: read into buffer we allocate; set *p.
// caller should mem_free it when no longer needed.
// - *p != 0: read into or write into the buffer *p.
// - p == 0: only read into temp buffers. useful if the callback
// is responsible for processing/copying the transferred blocks.
// since only temp buffers can be added to the cache,
// this is the preferred read method.
//
// return number of bytes transferred (see above), or a negative error code.
ssize_t vfs_io(const Handle hf, const size_t size, FileIOBuf* pbuf,
FileIOCB cb, uintptr_t cbData)
{
debug_printf("VFS| io: size=%d\n", size);
H_DEREF(hf, VFile, vf);
File* f = &vf->f;
stats_io_user_request(size);
trace_notify_io(f->atom_fn, size, f->flags);
off_t ofs = vf->ofs;
vf->ofs += (off_t)size;
ssize_t nbytes = xfile_io(&vf->f, ofs, size, pbuf, cb, cbData);
RETURN_ERR(nbytes);
return nbytes;
}
// load the entire file <fn> into memory.
// p and size are filled with address/size of buffer (0 on failure).
// flags influences IO mode and is typically 0.
// when the file contents are no longer needed, call file_buf_free(buf).
LibError vfs_load(const char* V_fn, FileIOBuf& buf, size_t& size,
uint file_flags, FileIOCB cb, uintptr_t cbData) // all default 0
{
debug_printf("VFS| load: V_fn=%s\n", V_fn);
const char* atom_fn = file_make_unique_fn_copy(V_fn);
const uint fb_flags = (file_flags & FILE_LONG_LIVED)? FB_LONG_LIVED : 0;
buf = file_cache_retrieve(atom_fn, &size, fb_flags);
if(buf)
{
// we want to skip the below code (especially vfs_open) for
// efficiency. that includes stats/trace accounting, though,
// so duplicate that here:
stats_cache(CR_HIT, size, atom_fn);
stats_io_user_request(size);
trace_notify_io(atom_fn, size, file_flags);
size_t actual_size;
LibError ret = file_io_call_back(buf, size, cb, cbData, actual_size);
if(ret < 0)
file_buf_free(buf);
// we don't care if the cb has "had enough" or whether it would
// accept more data - this is all it gets and we need to
// translate return value to avoid confusing callers.
if(ret == INFO::CB_CONTINUE)
ret = INFO::OK;
size = actual_size;
return ret;
}
buf = 0; size = 0; // initialize in case something below fails
Handle hf = vfs_open(atom_fn, file_flags);
H_DEREF(hf, VFile, vf);
size = vf->f.size;
buf = FILE_BUF_ALLOC;
ssize_t nread = vfs_io(hf, size, &buf, cb, cbData);
// IO failed
if(nread < 0)
{
file_buf_free(buf);
(void)vfs_close(hf);
buf = 0, size = 0; // make sure they are zeroed
return (LibError)nread;
}
debug_assert(nread == (ssize_t)size);
(void)file_cache_add(buf, size, atom_fn, file_flags);
stats_cache(CR_MISS, size, atom_fn);
(void)vfs_close(hf);
return INFO::OK;
}
// caveat: pads file to next max(4kb, sector_size) boundary
// (due to limitation of Win32 FILE_FLAG_NO_BUFFERING I/O).
// if that's a problem, specify FILE_NO_AIO when opening.
ssize_t vfs_store(const char* V_fn, const u8* p, const size_t size, uint flags /* default 0 */)
{
Handle hf = vfs_open(V_fn, flags|FILE_WRITE);
H_DEREF(hf, VFile, vf);
FileIOBuf buf = (FileIOBuf)p;
const ssize_t ret = vfs_io(hf, size, &buf);
WARN_ERR(vfs_close(hf));
return ret;
}
///////////////////////////////////////////////////////////////////////////////
//
// asynchronous I/O
//
///////////////////////////////////////////////////////////////////////////////
// we don't support forcibly closing files => don't need to keep track of
// all IOs pending for each file. too much work, little benefit.
struct VIo
{
Handle hf;
size_t size;
u8* buf;
FileIo io;
};
H_TYPE_DEFINE(VIo);
static void VIo_init(VIo* vio, va_list args)
{
vio->hf = va_arg(args, Handle);
vio->size = va_arg(args, size_t);
vio->buf = va_arg(args, u8*);
}
static void VIo_dtor(VIo* vio)
{
// note: checking if reload() succeeded is unnecessary because
// xfile_io_discard safely handles 0-initialized data.
WARN_ERR(xfile_io_discard(&vio->io));
}
// we don't support transparent read resume after file invalidation.
// if the file has changed, we'd risk returning inconsistent data.
// doesn't look possible without controlling the AIO implementation:
// when we cancel, we can't prevent the app from calling
// aio_result, which would terminate the read.
static LibError VIo_reload(VIo* vio, const char* UNUSED(fn), Handle UNUSED(h))
{
size_t size = vio->size;
u8* buf = vio->buf;
H_DEREF(vio->hf, VFile, vf);
off_t ofs = vf->ofs;
vf->ofs += (off_t)size;
return xfile_io_issue(&vf->f, ofs, size, buf, &vio->io);
}
static LibError VIo_validate(const VIo* vio)
{
if(vio->hf < 0)
WARN_RETURN(ERR::_21);
// <size> doesn't have any invariant we can check.
if(debug_is_pointer_bogus(vio->buf))
WARN_RETURN(ERR::_22);
return xfile_io_validate(&vio->io);
}
static LibError VIo_to_string(const VIo* vio, char* buf)
{
snprintf(buf, H_STRING_LEN, "buf=%p size=%d", vio->buf, vio->size);
return INFO::OK;
}
// begin transferring <size> bytes, starting at <ofs>. get result
// with vfs_io_wait; when no longer needed, free via vfs_io_discard.
Handle vfs_io_issue(Handle hf, size_t size, u8* buf)
{
const char* fn = 0;
uint flags = 0;
return h_alloc(H_VIo, fn, flags, hf, size, buf);
}
// finished with transfer <hio> - free its buffer (returned by vfs_io_wait)
LibError vfs_io_discard(Handle& hio)
{
return h_free(hio, H_VIo);
}
// indicates if the VIo referenced by <io> has completed.
// return value: 0 if pending, 1 if complete, < 0 on error.
int vfs_io_has_completed(Handle hio)
{
H_DEREF(hio, VIo, vio);
return xfile_io_has_completed(&vio->io);
}
// wait until the transfer <hio> completes, and return its buffer.
// output parameters are zeroed on error.
LibError vfs_io_wait(Handle hio, u8*& p, size_t& size)
{
H_DEREF(hio, VIo, vio);
return xfile_io_wait(&vio->io, p, size);
}
///////////////////////////////////////////////////////////////////////////////
//
// memory mapping
//
///////////////////////////////////////////////////////////////////////////////
// map the entire (uncompressed!) file <hf> into memory. if currently
// already mapped, return the previous mapping (reference-counted).
// output parameters are zeroed on failure.
//
// the mapping will be removed (if still open) when its file is closed.
// however, map/unmap calls should still be paired so that the mapping
// may be removed when no longer needed.
LibError vfs_map(const Handle hf, const uint UNUSED(flags), u8*& p, size_t& size)
{
p = 0;
size = 0;
// need to zero these here in case H_DEREF fails
H_DEREF(hf, VFile, vf);
return xfile_map(&vf->f, p, size);
}
// decrement the reference count for the mapping belonging to file <f>.
// fail if there are no references; remove the mapping if the count reaches 0.
//
// the mapping will be removed (if still open) when its file is closed.
// however, map/unmap calls should still be paired so that the mapping
// may be removed when no longer needed.
LibError vfs_unmap(const Handle hf)
{
H_DEREF(hf, VFile, vf);
return xfile_unmap(&vf->f);
}
//-----------------------------------------------------------------------------
// hotloading
//-----------------------------------------------------------------------------
// called by vfs_reload and vfs_reload_changed_files (which will already
// have rebuilt the VFS - doing so more than once a frame is unnecessary).
static LibError reload_without_rebuild(const char* fn)
{
// invalidate this file's cached blocks to make sure its contents are
// loaded anew.
RETURN_ERR(file_cache_invalidate(fn));
RETURN_ERR(h_reload(fn));
return INFO::OK;
}
// called via console command.
LibError vfs_reload(const char* fn)
{
// if <fn> currently maps to an archive, the VFS must switch
// over to using the loose file (that was presumably changed).
RETURN_ERR(mount_rebuild());
return reload_without_rebuild(fn);
}
// array of reloads requested this frame (see 'do we really need to
// reload' below). go through gyrations to avoid heap allocs.
const size_t MAX_RELOADS_PER_FRAME = 12;
typedef char Path[PATH_MAX];
typedef Path PathList[MAX_RELOADS_PER_FRAME];
// do we really need to reload? try to avoid the considerable cost of
// rebuilding VFS and scanning all Handles.
static bool can_ignore_reload(const char* V_path, PathList pending_reloads, uint num_pending)
{
// note: be careful to avoid 'race conditions' depending on the
// timeframe in which notifications reach us.
// example: editor deletes a.tga; we are notified; reload is
// triggered but fails since the file isn't found; further
// notifications (e.g. renamed a.tmp to a.tga) come within x [ms] and
// are ignored due to a time limit.
// therefore, we can only check for multiple reload requests a frame;
// to that purpose, an array is built and duplicates ignored.
const char* ext = path_extension(V_path);
// .. directory change notification; ignore because we get
// per-file notifications anyway. (note: assume no extension =>
// it's a directory).
if(ext[0] == '\0')
return true;
// .. compiled XML files the engine writes out by the hundreds;
// skipping them is a big performance gain.
if(!strcasecmp(ext, "xmb"))
return true;
// .. temp files, usually created when an editor saves a file
// (delete, create temp, rename temp); no need to reload those.
if(!strcasecmp(ext, "tmp"))
return true;
// .. more than one notification for a file; only reload once.
// note: this doesn't suffer from the 'reloaded too early'
// problem described above; if there's more than one
// request in the array, the file has since been written.
for(uint i = 0; i < num_pending; i++)
{
if(!strcmp(pending_reloads[i], V_path))
return true;
}
return false;
}
// get directory change notifications, and reload all affected files.
// must be called regularly (e.g. once a frame). this is much simpler
// than asynchronous notifications: everything would need to be thread-safe.
LibError vfs_reload_changed_files()
{
PathList pending_reloads;
uint num_pending = 0;
// process only as many notifications as we have room for; the others
// will be handled next frame. it's not imagineable that they'll pile up.
while(num_pending < MAX_RELOADS_PER_FRAME)
{
// get next notification
char N_path[PATH_MAX];
LibError ret = dir_get_changed_file(N_path);
if(ret == ERR::AGAIN) // none available; done.
break;
RETURN_ERR(ret);
// convert to VFS path
char P_path[PATH_MAX];
RETURN_ERR(file_make_full_portable_path(N_path, P_path));
char* V_path = pending_reloads[num_pending];
RETURN_ERR(mount_make_vfs_path(P_path, V_path));
if(can_ignore_reload(V_path, pending_reloads, num_pending))
continue;
// path has already been written to pending_reloads,
// so just mark it valid.
num_pending++;
}
// rebuild VFS, in case a file that has been changed is currently
// mounted from an archive (reloading would just grab the unchanged
// version in the archive). the rebuild sees differing mtimes and
// always choses the loose file version. only do this once
// (instead of per reload request) because it's slow (> 1s)!
if(num_pending != 0)
RETURN_ERR(mount_rebuild());
// now actually reload all files in the array we built
for(uint i = 0; i < num_pending; i++)
RETURN_ERR(reload_without_rebuild(pending_reloads[i]));
return INFO::OK;
}
//-----------------------------------------------------------------------------
void vfs_display()
{
tree_display();
}
static ModuleInitState initState;
// make the VFS tree ready for use. must be called before all other
// functions below, barring explicit mentions to the contrary.
//
// rationale: initialization could be done implicitly by calling this
// from all VFS APIs. we refrain from that and require the user to
// call this because a central point of initialization (file_set_root_dir)
// is necessary anyway and this way is simpler/easier to maintain.
void vfs_init()
{
if(!ModuleShouldInitialize(&initState))
return;
h_mgr_init();
stats_vfs_init_start();
mount_init();
stats_vfs_init_finish();
}
void vfs_shutdown()
{
if(!ModuleShouldShutdown(&initState))
return;
trace_shutdown();
mount_shutdown();
h_mgr_shutdown();
}

View File

@ -1,458 +0,0 @@
/**
* =========================================================================
* File : vfs.h
* Project : 0 A.D.
* Description : Virtual File System API - allows transparent access to
* : files in archives and modding via multiple mount points.
* =========================================================================
*/
// license: GPL; see lib/license.txt
/*
[KEEP IN SYNC WITH WIKI!]
Introduction
------------
The VFS (Virtual File System) is a layer between the application and
file.cpp's API. Its main purpose is to decrease the cost of file access;
also provided for are "hotloading" and "modding" via overriding files
(explained below).
The interface is almost identical to that of file.cpp, except that
it works with Handles for safety (see h_mgr.h).
File Access Cost
----------------
Games typically encompass thousands of files. Such heavy loads expose
2 problems with current file systems:
- wasted disk space. An average of half a cluster (>= 1 sector, typically
512 bytes) is lost per file due to internal fragmentation.
- lengthy file open times. Permissions checks and overhead added by
antivirus scanners combine to make these slow. Additionally, files are
typically not arranged in order of access, which induces costly
disk seeks.
The solution is to put all files in archives: internal fragmentation is
eliminated since they are packed end-to-end; open is much faster;
seeks are avoided by arranging in order of access. For more information,
see 'Archive Details' below.
Note that a good file system (Reiser3 comes close) could also deliver the
above. However, this code is available now on all platforms; there is
no disadvantage to using it and the other features remain.
Hotloading
----------
During development, artists and programmers typically follow a edit/
see how it looks in-game/repeat methodology. Unfortunately, changes to a
file are not immediately noticed by the game; the usual workaround is to
restart the map (or worse, entire game) to make sure they are reloaded.
Since decreases in edit cycle time improve productivity, we want changes to
files to be picked up immediately. To that end, we support hotloading -
as soon as the OS reports changes, all Handle objects that ensued from that
file are reloaded.
The VFS's part in this is registering "watches" that report changes to
any mounted real directory. Since the file notification backend
(currently SGI FAM and a Win32 port) cannot watch an entire directory tree,
we need to do so for every single directory. The VFS traverses each and
stores information anyway, so we do that here.
Modding
-------
1) Motivation
When users tweak game parameters or even create an entirely new game
principle with the same underlying engine, it is called modding.
As evidenced by the Counterstrike mod for Half-Life, this can greatly
prolong the life of a game. Additionally, since we started out as a
mod group, great value is placed on giving users all the tools to make
modding easy.
2) Means
The actual method of overriding game data is quite simple: a mod directory
is mounted into the file system with a higher priority than original data.
These files therefore temporarily (as long as the mod is active) replace the
originals. This allows multiple (non-overlapping!) mods to be active at the
same time and also makes switching between them easy.
The same mechanism is also used for patches to game data.
3) Rationale
Older games did not provide any support for modding other than
directly editing game data. Obviously this is risky and insufficient.
Requiring mods to provide a entire new copy of all game logic/scripts
would obviate support from the file system, but is too much work for the
modder (since all files would first have to be copied somewhere).
Allowing overriding individual files is much safer (since game data is
never touched) and easier (more fine-grained control for modders).
Patching
--------
As mentioned above, patching is also done via mounting.
Alternatives would be to completely replace the game data archive
(infeasible due to size) or apply a binary patch (complicated and
brittle WRT versioning). We are therefore happy to use the
already existing mod mechanism.
Note however that multiple patches do impact performance (despite
constant-time VFS path -> file location lookup) simply due to locality;
files are no longer arranged in order of access. Fortunately there is an
easy way to avoid this: simply run the archive builder script; all
patched files will be merged into the archive. However, be warned that
reverting to previous versions (e.g. to watch old replays) would no longer
be possible! This is because their changes have been 'baked into' the
main archive, whereas previously the patch could simply be deleted.
Mount Details
-------------
"Mounting" is understood to mean populating a given VFS directory (the
"mount point") with the contents of e.g. a real directory or archive
(the "mounted object" - for a list of supported types, see enum MountType).
It is important to note that the VFS is a full-fledged tree storing
information about each file, e.g. its last-modified time or actual location.
The advantage is that file open time does not increase with the number of
mounts, which is important because multiple patches and mods may be active.
This is in contrast to e.g. PhysicsFS, which just maintains a list of
mountings and scans it when opening each file.
Each file object in the VFS tree stores its current location; there is no
way to access files of the same name but lower priority residing in other
mounted objects. For this reason, the entire VFS must be rebuilt (i.e.
repopulating all mount points) when a mounting is removed. Fortunately
this is rare and does not happen in-game; we optimize for the common case.
Archive Details
---------------
1) Rationale
An open format (.zip) was chosen instead of a proprietary solution for the
following reasons:
- interoperability: anyone can view or add files without the need for
special tools, which is important for modding.
- less work: freely available decompression code (ZLib) eases implementation.
Disadvantages are efficiency (only adequate; an in-house format would offer
more potential for optimization) and lacking protection of data files.
Interoperability is a double-edged sword, since anyone can change critical
files or use game assets. However, obfuscating archive contents doesn't
solve anything, because the application needs to access them and a cracker
need only reverse-engineer that. Regardless, the application can call its
archives e.g. ".pk3" (as does Quake III) for minimal protection.
2) Archive Builder
Arranging archive contents in order of access was mentioned above. To that
end, the VFS can log all file open calls into a text file (one per line).
This is then processed by an archive builder script, which needs to
collect all files by VFS lookup rules, then add them to the archive in
the order specified in that file (all remaining files that weren't triggered
in the logging test run should be added thereafter).
Note that the script need only be a simple frontend for e.g. infozip, and
that a plain user-created archive will work as well (advantage of using Zip);
this is just an optimization.
3) Misc. Notes
To ease development, files may additionally be stored in normal directories.
The VFS transparently provides access to the correct (newest) version.
This is to allow keeping data files in SCM - developers can get the latest
version without always having to update archives afterwards.
One additional advantage of archives over loose files is that I/O throughput
is increased - since files are compressed, there is less to read from disk.
Decompression is free because it is done in parallel with IOs.
*/
#ifndef INCLUDED_VFS
#define INCLUDED_VFS
#include "../handle.h" // Handle def
#include "lib/posix/posix_filesystem.h" // struct stat
#include "file.h" // file open flags
// upper bound on number of files; used as size of TNode pool and
// enables an optimization in the cache if it fits in 16 bits
// (each block stores a 16-bit ID instead of pointer to TNode).
// -1 allows for an "invalid/free" value.
//
// must be #define instead of const because we check whether it
// fits in 16-bits via #if.
#define VFS_MAX_FILES ((1u << 16) - 1)
// make the VFS tree ready for use. must be called before all other
// functions below, barring explicit mentions to the contrary.
extern void vfs_init();
extern void vfs_shutdown(void);
// enable/disable logging each file open event - used by the archive builder.
// this should only be done when necessary for performance reasons and is
// typically triggered via command line param. safe to call before vfs_init.
extern void vfs_enable_file_listing(bool want_enabled);
// write a representation of the VFS tree to stdout.
extern void vfs_display(void);
//
// paths
//
// note: the VFS doesn't specify any path length restriction -
// internal filename storage is not fixed-length.
// for an an indication of how large fixed-size user buffers should be,
// use PATH_MAX.
// VFS paths are of the form: "(dir/)*file?"
// in English: '/' as path separator; trailing '/' required for dir names;
// no leading '/', since "" is the root dir.
//
// mount
//
enum VfsMountFlags
{
// the directory being mounted (but not its subdirs! see impl) will be
// searched for archives, and their contents added.
// use only if necessary, since this is slow (we need to check if
// each file is an archive, which entails reading the header).
VFS_MOUNT_ARCHIVES = 1,
// when mounting a directory, all directories beneath it are
// added recursively as well.
VFS_MOUNT_RECURSIVE = 2,
// all real directories mounted during this operation will be watched
// for changes. this flag is provided to avoid watches in output-only
// directories, e.g. screenshots/ (only causes unnecessary overhead).
VFS_MOUNT_WATCH = 4,
// anything mounted from here should be added to archive when
// building via vfs_optimizer.
VFS_MOUNT_ARCHIVABLE = 8
};
// mount <P_real_dir> into the VFS at <V_mount_point>,
// which is created if it does not yet exist.
// files in that directory override the previous VFS contents if
// <pri>(ority) is not lower.
// all archives in <P_real_dir> are also mounted, in alphabetical order.
//
// flags determines extra actions to perform; see VfsMountFlags.
//
// P_real_dir = "." or "./" isn't allowed - see implementation for rationale.
extern LibError vfs_mount(const char* V_mount_point, const char* P_real_dir, uint flags = 0, uint pri = 0);
// unmount a previously mounted item, and rebuild the VFS afterwards.
extern LibError vfs_unmount(const char* name);
// set current "mod write directory" to P_target_dir, which must
// already have been mounted into the VFS.
// all files opened for writing with the FILE_WRITE_TO_TARGET flag set will
// be written into the appropriate subdirectory of this mount point.
//
// this allows e.g. the editor to write files that are already
// stored in archives, which are read-only.
extern LibError vfs_set_write_target(const char* P_target_dir);
//
// directory entry
//
// open the directory for reading its entries via vfs_next_dirent.
// V_dir must end in '/' to indicate it's a directory path.
extern Handle vfs_dir_open(const char* V_dir_path);
// close the handle to a directory.
// all DirEnt.name strings are now invalid.
extern LibError vfs_dir_close(Handle& hd);
// retrieve the next (order is unspecified) dir entry matching <filter>.
// return 0 on success, ERR::DIR_END if no matching entry was found,
// or a negative error code on failure.
// filter values:
// - 0: anything;
// - "/": any subdirectory;
// - "/|<pattern>": any subdirectory, or as below with <pattern>;
// - <pattern>: any file whose name matches; ? and * wildcards are allowed.
//
// note that the directory entries are only scanned once; after the
// end is reached (-> ERR::DIR_END returned), no further entries can
// be retrieved, even if filter changes (which shouldn't happen - see impl).
//
// see also the definition of DirEnt in file.h.
//
// rationale: we do not sort directory entries alphabetically here.
// most callers don't need it and the overhead is considerable
// (we'd have to store all entries in a vector). it is left up to
// higher-level code such as VfsUtil.
extern LibError vfs_dir_next_ent(Handle hd, DirEnt* ent, const char* filter = 0);
// called by EnumDirEnts for each entry in a directory (optionally those in
// its subdirectories as well), passing their complete path+name, the info
// that would be returned by vfs_next_dirent, and user-specified context.
// note: path and ent parameters are only valid during the callback.
typedef void (*DirEnumCB)(const char* path, const DirEnt* ent, uintptr_t cbData);
enum DirEnumFlags
{
VFS_DIR_RECURSIVE = 1
};
// call <cb> for each entry matching <user_filter> (see vfs_next_dirent) in
// directory <path>; if flags & VFS_DIR_RECURSIVE, entries in
// subdirectories are also returned.
extern LibError vfs_dir_enum(const char* path, uint enum_flags, const char* filter,
DirEnumCB cb, uintptr_t cbData);
//
// file
//
// return actual path to the specified file:
// "<real_directory>/fn" or "<archive_name>/fn".
extern LibError vfs_realpath(const char* fn, char* realpath);
// does the specified file exist? return false on error.
// useful because a "file not found" warning is not raised, unlike vfs_stat.
extern bool vfs_exists(const char* fn);
// get file status (size, mtime). output param is zeroed on error.
extern LibError vfs_stat(const char* fn, struct stat*);
// return the size of an already opened file, or a negative error code.
extern ssize_t vfs_size(Handle hf);
// open the file for synchronous or asynchronous IO. write access is
// requested via FILE_WRITE flag, and is not possible for files in archives.
// flags defined in file.h
extern Handle vfs_open(const char* fn, uint flags = 0);
// close the handle to a file.
extern LibError vfs_close(Handle& h);
//
// asynchronous I/O
//
// low-level file routines - no caching or alignment.
// begin transferring <size> bytes, starting at <ofs>. get result
// with vfs_wait_read; when no longer needed, free via vfs_io_discard.
extern Handle vfs_io_issue(Handle hf, size_t size, u8* buf);
// indicates if the given IO has completed.
// return value: 0 if pending, 1 if complete, < 0 on error.
extern int vfs_io_has_completed(Handle hio);
// wait until the transfer <hio> completes, and return its buffer.
// output parameters are zeroed on error.
extern LibError vfs_io_wait(Handle hio, u8*& p, size_t& size);
// finished with transfer <hio> - free its buffer (returned by vfs_wait_read).
extern LibError vfs_io_discard(Handle& hio);
//
// synchronous I/O
//
// transfer the next <size> bytes to/from the given file.
// (read or write access was chosen at file-open time).
//
// if non-NULL, <cb> is called for each block transferred, passing <cbData>.
// it returns how much data was actually transferred, or a negative error
// code (in which case we abort the transfer and return that value).
// the callback mechanism is useful for user progress notification or
// processing data while waiting for the next I/O to complete
// (quasi-parallel, without the complexity of threads).
//
// p (value-return) indicates the buffer mode:
// - *p == 0: read into buffer we allocate; set *p.
// caller should mem_free it when no longer needed.
// - *p != 0: read into or write into the buffer *p.
// - p == 0: only read into temp buffers. useful if the callback
// is responsible for processing/copying the transferred blocks.
// since only temp buffers can be added to the cache,
// this is the preferred read method.
//
// return number of bytes transferred (see above), or a negative error code.
extern ssize_t vfs_io(Handle hf, size_t size, FileIOBuf* p, FileIOCB cb = 0, uintptr_t cbData = 0);
// convenience functions that replace vfs_open / vfs_io / vfs_close:
// load the entire file <fn> into memory.
// p and size are filled with address/size of buffer (0 on failure).
// flags influences IO mode and is typically 0.
// when the file contents are no longer needed, call file_buf_free(buf).
extern LibError vfs_load(const char* V_fn, FileIOBuf& buf, size_t& size,
uint flags = 0, FileIOCB cb = 0, uintptr_t cbData = 0);
extern ssize_t vfs_store(const char* fn, const u8* p, size_t size, uint flags = 0);
//
// memory mapping
//
// useful for files that are too large to be loaded into memory,
// or if only (non-sequential) portions of a file are needed at a time.
//
// this is of course only possible for uncompressed files - compressed files
// would have to be inflated sequentially, which defeats the point of mapping.
// map the entire (uncompressed!) file <hf> into memory. if currently
// already mapped, return the previous mapping (reference-counted).
// output parameters are zeroed on failure.
//
// the mapping will be removed (if still open) when its file is closed.
// however, map/unmap calls should still be paired so that the mapping
// may be removed when no longer needed.
extern LibError vfs_map(Handle hf, uint flags, u8*& p, size_t& size);
// decrement the reference count for the mapping belonging to file <f>.
// fail if there are no references; remove the mapping if the count reaches 0.
//
// the mapping will be removed (if still open) when its file is closed.
// however, map/unmap calls should still be paired so that the mapping
// may be removed when no longer needed.
extern LibError vfs_unmap(Handle hf);
//
// hotloading
//
extern LibError vfs_reload(const char* fn);
// this must be called from the main thread? (wdir_watch problem)
extern LibError vfs_reload_changed_files(void);
#endif // #ifndef INCLUDED_VFS

View File

@ -1,904 +0,0 @@
/**
* =========================================================================
* File : vfs_mount.cpp
* Project : 0 A.D.
* Description : mounts files and archives into VFS; provides x_* API
* : that dispatches to file or archive implementation.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "vfs_mount.h"
#include <deque>
#include <list>
#include <string>
#include <algorithm>
#include <ctime>
#include "lib/sysdep/dir_watch.h"
#include "lib/res/h_mgr.h"
#include "file_internal.h"
// we add/cancel directory watches from the VFS mount code for convenience -
// it iterates through all subdirectories anyway (*) and provides storage for
// a key to identify the watch (obviates separate TDir -> watch mapping).
//
// define this to strip out that code - removes .watch from struct TDir,
// and calls to res_watch_dir / res_cancel_watch.
//
// *: the add_watch code would need to iterate through subdirs and watch
// each one, because the monitor API (e.g. FAM) may only be able to
// watch single directories, instead of a whole subdirectory tree.
#define NO_DIR_WATCH
// location of a file: either archive or a real directory.
// not many instances => don't worry about efficiency.
struct Mount
{
// mounting into this VFS directory;
// must end in '/' (unless if root td, i.e. "")
std::string V_mount_point;
// real directory being mounted.
// if this Mount represents an archive, this is the real directory
// containing the Zip file (required so that this Mount is unmounted).
std::string P_name;
Handle archive;
uint pri;
// see enum VfsMountFlags
uint flags;
MountType type;
Mount(const char* V_mount_point_, const char* P_name_, Handle archive_, uint flags_, uint pri_)
: V_mount_point(V_mount_point_), P_name(P_name_)
{
archive = archive_;
flags = flags_;
pri = pri_;
if(archive > 0)
{
h_add_ref(archive);
type = MT_ARCHIVE;
}
else
type = MT_FILE;
}
~Mount()
{
if(archive > 0) // avoid h_mgr warning
archive_close(archive);
}
Mount& operator=(const Mount& rhs)
{
V_mount_point = rhs.V_mount_point;
P_name = rhs.P_name;
archive = rhs.archive;
pri = rhs.pri;
flags = rhs.flags;
type = rhs.type;
if(archive > 0) // avoid h_mgr warning
h_add_ref(archive);
return *this;
}
struct equal_to : public std::binary_function<Mount, const char*, bool>
{
bool operator()(const Mount& m, const char* P_name) const
{
return (m.P_name == P_name);
}
};
private:
Mount();
};
char mount_get_type(const Mount* m)
{
switch(m->type)
{
case MT_ARCHIVE:
return 'A';
case MT_FILE:
return 'F';
default:
return '?';
}
}
Handle mount_get_archive(const Mount* m)
{
return m->archive;
}
bool mount_is_archivable(const Mount* m)
{
return (m->flags & VFS_MOUNT_ARCHIVES) != 0;
}
bool mount_should_replace(const Mount* m_old, const Mount* m_new,
size_t size_old, size_t size_new, time_t mtime_old, time_t mtime_new)
{
// 1) "replace" if not yet associated with a Mount.
if(!m_old)
return true;
// 2) keep old if new priority is lower.
if(m_new->pri < m_old->pri)
return false;
// assume they're the same if size and last-modified time match.
// note: FAT timestamp only has 2 second resolution
const double mtime_diff = difftime(mtime_old, mtime_new);
const bool identical = (size_old == size_new) &&
fabs(mtime_diff) <= 2.0;
// 3) go with more efficient source (if files are identical)
//
// since priority is not less, we really ought to always go with m_new.
// however, there is one special case we handle for performance reasons:
// if the file contents are the same, prefer the more efficient source.
// note that priority doesn't automatically take care of this,
// especially if set incorrectly.
//
// note: see MountType for explanation of type > type2.
if(identical && m_old->type > m_new->type)
return false;
// 4) don't replace "old" file if modified more recently than "new".
// (still provide for 2 sec. FAT tolerance - see above)
if(mtime_diff > 2.0)
return false;
return true;
}
// given Mount and V_path, return its actual location (portable path).
// works for any type of path: file or directory.
LibError mount_realpath(const char* V_path, const Mount* m, char* P_real_path)
{
const char* remove = m->V_mount_point.c_str();
const char* replace = m->P_name.c_str(); // P_parent_path
CHECK_ERR(path_replace(P_real_path, V_path, remove, replace));
// if P_real_path ends with '/' (a remnant from V_path), strip
// it because that's not acceptable for portable paths.
const size_t P_len = strlen(P_real_path);
if(P_len != 0 && P_real_path[P_len-1] == '/')
P_real_path[P_len-1] = '\0';
return INFO::OK;
}
///////////////////////////////////////////////////////////////////////////////
//
// populate the directory being mounted with files from real subdirectories
// and archives.
//
///////////////////////////////////////////////////////////////////////////////
static const Mount& add_mount(const char* V_mount_point, const char* P_real_path, Handle archive,
uint flags, uint pri);
// passed through dirent_cb's afile_enum to afile_cb
struct ZipCBParams : boost::noncopyable
{
// tree directory into which we are adding the archive's files
TDir* const td;
// archive's location; assigned to all files added from here
const Mount* const m;
// storage for directory lookup optimization (see below).
// held across one afile_enum's afile_cb calls.
const char* last_path;
TDir* last_td;
ZipCBParams(TDir* dir_, const Mount* loc_)
: td(dir_), m(loc_)
{
last_path = 0;
last_td = 0;
}
};
// called by add_ent's afile_enum for each file in the archive.
// we get the full path, since that's what is stored in Zip archives.
//
// [total time 21ms, with ~2000 file's (includes add_file cost)]
static LibError afile_cb(const char* atom_fn, const struct stat* s, uintptr_t memento, uintptr_t user)
{
CHECK_PATH(atom_fn);
const char* name = path_name_only(atom_fn);
char path[PATH_MAX];
path_dir_only(atom_fn, path);
const char* atom_path = file_make_unique_fn_copy(path);
ZipCBParams* params = (ZipCBParams*)user;
TDir* td = params->td;
const Mount* m = params->m;
const char* last_path = params->last_path;
TDir* last_td = params->last_td;
// into which directory should the file be inserted?
// naive approach: tree_lookup_dir the path (slow!)
// optimization: store the last file's path; if it's the same,
// use the directory we looked up last time (much faster!)
// .. same as last time
if(last_path == atom_path)
td = last_td;
// .. last != current: need to do lookup
else
{
// we have to create them if missing, since we can't rely on the
// archiver placing directories before subdirs or files that
// reference them (WinZip doesn't always).
// we also need to start at the mount point (td).
const uint flags = LF_CREATE_MISSING|LF_START_DIR;
CHECK_ERR(tree_lookup_dir(atom_path, &td, flags));
params->last_path = atom_path;
params->last_td = td;
}
WARN_ERR(tree_add_file(td, name, m, s->st_size, s->st_mtime, memento));
vfs_opt_notify_non_loose_file(atom_fn);
return INFO::CB_CONTINUE;
}
static bool archive_less(Handle hza1, Handle hza2)
{
const char* fn1 = h_filename(hza1);
const char* fn2 = h_filename(hza2);
return strcmp(fn1, fn2) < 0;
}
typedef std::vector<Handle> Archives;
typedef Archives::const_iterator ArchiveCIt;
// return value is INFO::OK iff archives != 0 and the file should not be
// added to VFS (e.g. because it is an archive).
static LibError enqueue_archive(const char* name, const char* P_archive_dir, Archives* archives)
{
// caller doesn't want us to check if this is a Zip file. this is the
// case in all subdirectories of the mount point, since checking for all
// mounted files would be slow. see mount_dir_tree.
if(!archives)
return INFO::SKIPPED;
// get complete path for archive_open.
// this doesn't (need to) work for subdirectories of the mounted td!
// we can't use mount_get_path because we don't have the VFS path.
char P_path[PATH_MAX];
RETURN_ERR(path_append(P_path, P_archive_dir, name));
// just open the Zip file and see if it's valid. we don't bother
// checking the extension because archives won't necessarily be
// called .zip (e.g. Quake III .pk3).
Handle archive = archive_open(P_path);
// .. special case: <name> is recognizable as a Zip file but is
// invalid and can't be opened. avoid adding it to
// archive list and/or VFS.
if(archive == ERR::CORRUPTED)
goto do_not_add_to_VFS_or_list;
RETURN_ERR(archive);
archives->push_back(archive);
// avoid also adding the archive file itself to VFS.
// (when caller sees INFO::OK, they skip the file)
do_not_add_to_VFS_or_list:
return INFO::OK;
}
static LibError mount_archive(TDir* td, const Mount& m)
{
ZipCBParams params(td, &m);
archive_enum(m.archive, afile_cb, (uintptr_t)&params);
return INFO::OK;
}
static LibError mount_archives(TDir* td, Archives* archives, const Mount* mount)
{
// VFS_MOUNT_ARCHIVES flag wasn't set, or no archives present
if(archives->empty())
return INFO::OK;
// load archives in alphabetical filename order to allow patches
std::sort(archives->begin(), archives->end(), archive_less);
for(ArchiveCIt it = archives->begin(); it != archives->end(); ++it)
{
Handle hza = *it;
// add this archive to the mount list (address is guaranteed to
// remain valid).
const Mount& m = add_mount(mount->V_mount_point.c_str(), mount->P_name.c_str(), hza, mount->flags, mount->pri);
mount_archive(td, m);
}
return INFO::OK;
}
//-----------------------------------------------------------------------------
struct TDirAndPath
{
TDir* td;
std::string path;
TDirAndPath(TDir* d, const char* p)
: td(d), path(p)
{
}
};
typedef std::deque<TDirAndPath> DirQueue;
static LibError enqueue_dir(TDir* parent_td, const char* name,
const char* P_parent_path, DirQueue* dir_queue)
{
// caller doesn't want us to enqueue subdirectories; bail.
if(!dir_queue)
return INFO::OK;
// skip versioning system directories - this avoids cluttering the
// VFS with hundreds of irrelevant files.
// we don't do this for Zip files because it's harder (we'd have to
// strstr the entire path) and it is assumed the Zip file builder
// will take care of it.
if(!strcmp(name, "CVS") || !strcmp(name, ".svn"))
return INFO::OK;
// prepend parent path to get complete pathname.
char P_path[PATH_MAX];
CHECK_ERR(path_append(P_path, P_parent_path, name));
// create subdirectory..
TDir* td;
CHECK_ERR(tree_add_dir(parent_td, name, &td));
// .. and add it to the list of directories to visit.
dir_queue->push_back(TDirAndPath(td, P_path));
return INFO::OK;
}
// called by TDir::addR's file_enum for each entry in a real directory.
//
// if called for a real directory, it is added to VFS.
// else if called for a loose file that is a valid archive (*),
// it is mounted (all of its files are added)
// else the file is added to VFS.
//
// * we only perform this check in the directory being mounted,
// i.e. passed in by tree_add_dir. to determine if a file is an archive,
// we have to open it and read the header, which is slow.
// can't just check extension, because it might not be .zip (e.g. Quake3 .pk3).
//
// td - tree td into which the dirent is to be added
// m - real td's location; assigned to all files added from this mounting
// archives - if the dirent is an archive, its Mount is added here.
static LibError add_ent(TDir* td, DirEnt* ent, const char* P_parent_path, const Mount* m,
DirQueue* dir_queue, Archives* archives)
{
const char* name = ent->name;
// it's a directory entry.
if(DIRENT_IS_DIR(ent))
return enqueue_dir(td, name, P_parent_path, dir_queue);
// else: it's a file (dir_next_ent discards everything except for
// file and subdirectory entries).
if(enqueue_archive(name, m->P_name.c_str(), archives) == INFO::OK)
// return value indicates this file shouldn't be added to VFS
// (see enqueue_archive)
return INFO::OK;
// notify archive builder that this file could be archived but
// currently isn't; if there are too many of these, archive will be
// rebuilt.
// note: check if archivable to exclude stuff like screenshots
// from counting towards the threshold.
if(mount_is_archivable(m))
{
// prepend parent path to get complete pathname.
char V_path[PATH_MAX];
CHECK_ERR(path_append(V_path, tfile_get_atom_fn((TFile*)td), name));
const char* atom_fn = file_make_unique_fn_copy(V_path);
vfs_opt_notify_loose_file(atom_fn);
}
// it's a regular data file; add it to the directory.
return tree_add_file(td, name, m, ent->size, ent->mtime, 0);
}
// note: full path is needed for the dir watch.
static LibError populate_dir(TDir* td, const char* P_path, const Mount* m,
DirQueue* dir_queue, Archives* archives, uint flags)
{
LibError err;
RealDir* rd = tree_get_real_dir(td);
RETURN_ERR(mount_attach_real_dir(rd, P_path, m, flags));
DirIterator d;
RETURN_ERR(dir_open(P_path, &d));
DirEnt ent;
for(;;)
{
// don't RETURN_ERR since we need to close d.
err = dir_next_ent(&d, &ent);
if(err != INFO::OK)
break;
err = add_ent(td, &ent, P_path, m, dir_queue, archives);
WARN_ERR(err);
}
WARN_ERR(dir_close(&d));
return INFO::OK;
}
// actually mount the specified entry. split out of vfs_mount,
// because when invalidating (reloading) the VFS, we need to
// be able to mount without changing the mount list.
// add all loose files and subdirectories (recursive).
// also mounts all archives in P_real_path and adds to archives.
// add the contents of directory <p_path> to this TDir,
// marking the files' locations as <m>. flags: see VfsMountFlags.
//
// note: we are only able to add archives found in the root directory,
// due to dirent_cb implementation. that's ok - we don't want to check
// every single file to see if it's an archive (slow!).
static LibError mount_dir_tree(TDir* td_start, const Mount& m)
{
LibError err = INFO::OK;
// add_ent fills these queues with dirs/archives if the corresponding
// flags are set.
DirQueue dir_queue; // don't preallocate (not supported by TDirAndPath)
Archives archives;
archives.reserve(8); // preallocate for efficiency.
// instead of propagating flags down to add_dir, prevent recursing
// and adding archives by setting the destination pointers to 0 (easier).
DirQueue* const pdir_queue = (m.flags & VFS_MOUNT_RECURSIVE)? &dir_queue : 0;
Archives* parchives = (m.flags & VFS_MOUNT_ARCHIVES)? &archives : 0;
// kickoff (less efficient than goto, but c_str reference requires
// pop to come at end of loop => this is easiest)
dir_queue.push_back(TDirAndPath(td_start, m.P_name.c_str()));
do
{
TDir* const td = dir_queue.front().td;
const char* P_path = dir_queue.front().path.c_str();
LibError ret = populate_dir(td, P_path, &m, pdir_queue, parchives, m.flags);
if(err == INFO::OK)
err = ret;
// prevent searching for archives in subdirectories (slow!). this
// is currently required by the implementation anyway.
parchives = 0;
dir_queue.pop_front();
// pop at end of loop, because we hold a c_str() reference.
}
while(!dir_queue.empty());
// do not pass parchives because that has been set to 0!
mount_archives(td_start, &archives, &m);
return INFO::OK;
}
// the VFS stores the location (archive or directory) of each file;
// this allows multiple search paths without having to check each one
// when opening a file (slow).
//
// one Mount is allocated for each archive or directory mounted.
// therefore, files only /point/ to a (possibly shared) Mount.
// if a file's location changes (e.g. after mounting a higher-priority
// directory), the VFS entry will point to the new Mount; the priority
// of both locations is unchanged.
//
// allocate via mnt_create, passing the location. do not free!
// we keep track of all Locs allocated; they are freed at exit,
// and by mount_unmount_all (useful when rebuilding the VFS).
// this is much easier and safer than walking the VFS tree and
// freeing every location we find.
///////////////////////////////////////////////////////////////////////////////
//
// mount list (allows multiple mountings, e.g. for mods)
//
///////////////////////////////////////////////////////////////////////////////
// every mounting results in at least one Mount (and possibly more, e.g.
// if the directory contains Zip archives, which each get a Mount).
//
// requirements for container:
// - must not invalidate iterators after insertion!
// (TFile holds a pointer to the Mount from which it was added)
// - must store items in order of insertion
// xxx
typedef std::list<Mount> Mounts;
typedef Mounts::iterator MountIt;
static Mounts mounts;
static const Mount& add_mount(const char* V_mount_point, const char* P_real_path, Handle hza,
uint flags, uint pri)
{
mounts.push_back(Mount(V_mount_point, P_real_path, hza, flags, pri));
return mounts.back();
}
// note: this is not a member function of Mount to avoid having to
// forward-declare mount_archive, mount_dir_tree.
static LibError remount(const Mount& m)
{
TDir* td;
CHECK_ERR(tree_add_path(m.V_mount_point.c_str(), &m, &td));
switch(m.type)
{
case MT_ARCHIVE:
return mount_archive(td, m);
case MT_FILE:
return mount_dir_tree(td, m);
default:
WARN_RETURN(ERR::MOUNT_INVALID_TYPE);
}
}
static void mount_unmount_all(void)
{
mounts.clear();
}
static inline void remount_all()
{
std::for_each(mounts.begin(), mounts.end(), remount);
}
// mount <P_real_path> into the VFS at <V_mount_point>,
// which is created if it does not yet exist.
// files in that directory override the previous VFS contents if
// <pri>(ority) is not lower.
// all archives in <P_real_path> are also mounted, in alphabetical order.
//
// flags determines extra actions to perform; see VfsMountFlags.
//
// P_real_path = "." or "./" isn't allowed - see implementation for rationale.
LibError vfs_mount(const char* V_mount_point, const char* P_real_path, uint flags, uint pri)
{
// make sure caller didn't forget the required trailing '/'.
debug_assert(VFS_PATH_IS_DIR(V_mount_point));
// make sure it's not already mounted, i.e. in mounts.
// also prevents mounting a parent directory of a previously mounted
// directory, or vice versa. example: mount $install/data and then
// $install/data/mods/official - mods/official would also be accessible
// from the first mount point - bad.
// no matter if it's an archive - still shouldn't be a "subpath".
for(MountIt it = mounts.begin(); it != mounts.end(); ++it)
{
if(path_is_subpath(P_real_path, it->P_name.c_str()))
WARN_RETURN(ERR::ALREADY_MOUNTED);
}
// disallow "." because "./" isn't supported on Windows.
// it would also create a loophole for the parent td check above.
// "./" and "/." are caught by CHECK_PATH.
if(!strcmp(P_real_path, "."))
WARN_RETURN(ERR::PATH_NON_CANONICAL);
// (count this as "init" to obviate a separate timer)
stats_vfs_init_start();
const Mount& m = add_mount(V_mount_point, P_real_path, 0, flags, pri);
LibError ret = remount(m);
stats_vfs_init_finish();
return ret;
}
// rebuild the VFS, i.e. re-mount everything. open files are not affected.
// necessary after loose files or directories change, so that the VFS
// "notices" the changes and updates file locations. res calls this after
// dir_watch reports changes; can also be called from the console after a
// rebuild command. there is no provision for updating single VFS dirs -
// it's not worth the trouble.
LibError mount_rebuild()
{
tree_clear();
remount_all();
return INFO::OK;
}
struct IsArchiveMount
{
bool operator()(const Mount& m) const
{
return (m.type == MT_ARCHIVE);
}
};
// "backs off of" all archives - closes their files and allows them to
// be rewritten or deleted (required by archive builder).
// must call mount_rebuild when done with the rewrite/deletes,
// because this call leaves the VFS in limbo!!
//
// note: this works because archives are not "first-class" mount objects -
// they are added to the list whenever a real mount point's root directory
// contains archives. hence, we can just remove them from the list.
void mount_release_all_archives()
{
mounts.remove_if(IsArchiveMount());
}
// unmount a previously mounted item, and rebuild the VFS afterwards.
LibError vfs_unmount(const char* P_name)
{
// this removes all Mounts ensuing from the given mounting. their dtors
// free all resources and there's no need to remove the files from
// VFS (nor is this possible), since it is completely rebuilt afterwards.
MountIt begin = mounts.begin(), end = mounts.end();
MountIt last = std::remove_if(begin, end,
std::bind2nd(Mount::equal_to(), P_name));
// none were removed - need to complain so that the caller notices.
if(last == end)
WARN_RETURN(ERR::TNODE_NOT_FOUND);
// trim list and actually remove 'invalidated' entries.
mounts.erase(last, end);
return mount_rebuild();
}
// if <path> or its ancestors are mounted,
// return a VFS path that accesses it.
// used when receiving paths from external code.
LibError mount_make_vfs_path(const char* P_path, char* V_path)
{
debug_printf("mount_make_vfs_path %s %s\n", P_path, V_path);
for(MountIt it = mounts.begin(); it != mounts.end(); ++it)
{
const Mount& m = *it;
if(m.type != MT_FILE)
continue;
const char* remove = m.P_name.c_str();
const char* replace = m.V_mount_point.c_str();
if(path_replace(V_path, P_path, remove, replace) == INFO::OK)
return INFO::OK;
}
WARN_RETURN(ERR::TNODE_NOT_FOUND);
}
static const Mount* write_target;
// 2006-05-09 JW note: we are wanting to move XMB files into a separate
// folder tree (no longer interspersed with XML), so that deleting them is
// easier and dirs are less cluttered.
//
// if several mods are active, VFS would have several RealDirs mounted
// and could no longer automatically determine the write target.
//
// one solution would be to use this set_write_target support to choose the
// correct dir; however, XMB files may be generated whilst editing
// (which also requires a write_target to write files that are actually
// currently in archives), so we'd need to save/restore write_target.
// this would't be thread-safe => disaster.
//
// a vfs_store_to(filename, flags, N_actual_path) API would work, but it'd
// impose significant burden on users (finding the actual native dir),
// and be prone to abuse. additionally, it would be difficult to
// propagate N_actual_path to VFile_reload where it is needed;
// this would end up messy.
//
// instead, we'll write XMB files into VFS path "mods/$MODNAME/..",
// into which the realdir of the same name (located in some writable folder)
// is mounted; VFS therefore can write without problems.
//
// however, other code (e.g. archive builder) doesn't know about this
// trick - it only sees the flat VFS namespace, which doesn't
// include mods/$MODNAME (that is hidden). to solve this, we also mount
// any active mod's XMB dir into VFS root for read access.
// set current "mod write directory" to P_target_dir, which must
// already have been mounted into the VFS.
// all files opened for writing with the FILE_WRITE_TO_TARGET flag set will
// be written into the appropriate subdirectory of this mount point.
//
// this allows e.g. the editor to write files that are already
// stored in archives, which are read-only.
LibError vfs_set_write_target(const char* P_target_dir)
{
for(MountIt it = mounts.begin(); it != mounts.end(); ++it)
{
const Mount& m = *it;
// skip if not a directory mounting
if(m.type != MT_FILE)
continue;
// found it in list of mounted dirs
if(!strcmp(m.P_name.c_str(), P_target_dir))
{
write_target = &m;
return INFO::OK;
}
}
WARN_RETURN(ERR::NOT_MOUNTED);
}
// 'relocate' tf to the mounting established by vfs_set_write_target.
// call if <tf> is being opened with FILE_WRITE_TO_TARGET flag set.
LibError set_mount_to_write_target(TFile* tf)
{
if(!write_target)
WARN_RETURN(ERR::NOT_MOUNTED);
tfile_set_mount(tf, write_target);
// invalidate the previous values. we don't need to be clever and
// set size to that of the file in the new write_target mount point.
// this is because we're only called for files that are being
// opened for writing, which will change these values anyway.
tree_update_file(tf, 0, 0);
return INFO::OK;
}
void mount_init()
{
tree_init();
}
void mount_shutdown()
{
tree_shutdown();
mount_unmount_all();
}
static const Mount* MULTIPLE_MOUNTINGS = (const Mount*)-1;
// RDTODO: when should this be called? TDir ctor can already set this.
LibError mount_attach_real_dir(RealDir* rd, const char* P_path, const Mount* m, uint flags)
{
// more than one real dir mounted into VFS dir
// (=> can't create files for writing here)
if(rd->m)
{
// HACK: until RealDir reorg is done, we're going to have to deal with
// "attaching" to real dirs twice. don't mess up rd->m if m is the same.
if(rd->m != m)
rd->m = MULTIPLE_MOUNTINGS;
}
else
rd->m = m;
#ifndef NO_DIR_WATCH
if(flags & VFS_MOUNT_WATCH)
{
// 'watch' this directory for changes to support hotloading.
// note: do not cause this function to return an error if
// something goes wrong - this step is basically optional.
char N_path[PATH_MAX];
if(file_make_full_native_path(P_path, N_path) == INFO::OK)
(void)dir_add_watch(N_path, &rd->watch);
}
#endif
return INFO::OK;
}
void mount_detach_real_dir(RealDir* rd)
{
rd->m = 0;
#ifndef NO_DIR_WATCH
if(rd->watch) // avoid dir_cancel_watch complaining
WARN_ERR(dir_cancel_watch(rd->watch));
rd->watch = 0;
#endif
}
LibError mount_create_real_dir(const char* V_path, const Mount* m)
{
debug_assert(VFS_PATH_IS_DIR(V_path));
if(!m || m == MULTIPLE_MOUNTINGS || m->type != MT_FILE)
return INFO::OK;
char P_path[PATH_MAX];
RETURN_ERR(mount_realpath(V_path, m, P_path));
return dir_create(P_path);
}
LibError mount_populate(TDir* td, RealDir* rd)
{
UNUSED2(td);
UNUSED2(rd);
return INFO::OK;
}

View File

@ -1,116 +0,0 @@
/**
* =========================================================================
* File : vfs_mount.h
* Project : 0 A.D.
* Description : mounts files and archives into VFS; provides x_* API
* : that dispatches to file or archive implementation.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_VFS_MOUNT
#define INCLUDED_VFS_MOUNT
struct Mount; // must come before vfs_tree.h
#include "lib/res/handle.h"
#include "file.h"
#include "archive/zip.h"
#include "vfs_tree.h"
namespace ERR
{
const LibError ALREADY_MOUNTED = -110700;
const LibError NOT_MOUNTED = -110701;
const LibError MOUNT_INVALID_TYPE = -110702;
}
extern void mount_init();
extern void mount_shutdown();
// If it were possible to forward-declare enums in GCC, this one wouldn't be in
// the header. Don't use.
enum MountType
{
// the relative ordering of values expresses efficiency of the sources
// (e.g. archives are faster than loose files). mount_should_replace
// makes use of this.
MT_NONE = 0,
MT_FILE = 1,
MT_ARCHIVE = 2
};
//
// accessors that obviate the need to access Mount fields directly:
//
extern bool mount_is_archivable(const Mount* m);
extern bool mount_should_replace(const Mount* m_old, const Mount* m_new,
size_t size_old, size_t size_new, time_t mtime_old, time_t mtime_new);
extern char mount_get_type(const Mount* m);
extern Handle mount_get_archive(const Mount* m);
// given Mount and V_path, return its actual location (portable path).
// works for any type of path: file or directory.
extern LibError mount_realpath(const char* V_path, const Mount* m, char* P_real_path);
// stored by vfs_tree in TDir
struct RealDir
{
// if exactly one real directory is mounted into this virtual dir,
// this points to its location. used to add files to VFS when writing.
//
// the Mount is actually in the mount info and is invalid when
// that's unmounted, but the VFS would then be rebuilt anyway.
//
// = 0 if no real dir mounted here; = -1 if more than one.
const Mount* m;
#ifndef NO_DIR_WATCH
intptr_t watch;
#endif
};
extern LibError mount_attach_real_dir(RealDir* rd, const char* P_path, const Mount* m, uint flags);
extern void mount_detach_real_dir(RealDir* rd);
extern LibError mount_create_real_dir(const char* V_path, const Mount* m);
extern LibError mount_populate(TDir* td, RealDir* rd);
// "backs off of" all archives - closes their files and allows them to
// be rewritten or deleted (required by archive builder).
// must call mount_rebuild when done with the rewrite/deletes,
// because this call leaves the VFS in limbo!!
extern void mount_release_all_archives();
// 'relocate' tf to the mounting established by vfs_set_write_target.
// call if <tf> is being opened with FILE_WRITE_TO_TARGET flag set.
extern LibError set_mount_to_write_target(TFile* tf);
// rebuild the VFS, i.e. re-mount everything. open files are not affected.
// necessary after loose files or directories change, so that the VFS
// "notices" the changes and updates file locations. res calls this after
// dir_watch reports changes; can also be called from the console after a
// rebuild command. there is no provision for updating single VFS dirs -
// it's not worth the trouble.
extern LibError mount_rebuild();
// if <path> or its ancestors are mounted,
// return a VFS path that accesses it.
// used when receiving paths from external code.
extern LibError mount_make_vfs_path(const char* P_path, char* V_path);
#endif // #ifndef INCLUDED_VFS_MOUNT

View File

@ -1,239 +0,0 @@
/**
* =========================================================================
* File : vfs_redirector.cpp
* Project : 0 A.D.
* Description :
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "vfs_redirector.h"
#include "lib/byte_order.h" // FOURCC
#include "file_internal.h"
static const u32 vtbl_magic = FOURCC('F','P','V','T');
// HACK: these thunks and the vtbls are implemented here,
// although they belong in their respective provider's source file.
// this is currently necessary because vfs_mount doesn't yet
// abstract away the file provider (it's hardcoded for files+archives).
LibError afile_open_vfs(const char* fn, uint flags, TFile* tf,
File* f) // out
{
const uintptr_t memento = tfile_get_memento(tf);
const Mount* m = tfile_get_mount(tf);
const Handle ha = mount_get_archive(m);
return afile_open(ha, fn, memento, flags, f);
}
LibError file_open_vfs(const char* V_path, uint flags, TFile* tf,
File* f) // out
{
char N_path[PATH_MAX];
const Mount* m = tfile_get_mount(tf);
RETURN_ERR(mount_realpath(V_path, m, N_path));
RETURN_ERR(file_open(N_path, flags|FILE_DONT_SET_FN, f));
// file_open didn't set fc.atom_fn due to FILE_DONT_SET_FN.
f->atom_fn = file_make_unique_fn_copy(V_path);
return INFO::OK;
}
static const FileProvider_VTbl archive_vtbl =
{
vtbl_magic,
0,0,0, // not supported for archives ATM
afile_open_vfs, afile_close, afile_validate,
afile_io_issue, afile_io_has_completed, afile_io_wait, afile_io_discard, afile_io_validate,
afile_read,
afile_map, afile_unmap
};
static const FileProvider_VTbl file_vtbl =
{
vtbl_magic,
dir_open, dir_next_ent, dir_close,
file_open_vfs, file_close, file_validate,
file_io_issue, file_io_has_completed, file_io_wait, file_io_discard, file_io_validate,
file_io,
file_map, file_unmap
};
// see FileProvider_VTbl decl for details on why this is so empty.
static const FileProvider_VTbl tree_vtbl =
{
vtbl_magic,
tree_dir_open, tree_dir_next_ent, tree_dir_close,
0, 0, 0,
0, 0, 0, 0, 0,
0,
0, 0
};
// rationale for not using virtual functions for file_open vs afile_open:
// it would spread out the implementation of each function and makes
// keeping them in sync harder. we will very rarely add new sources and
// all these functions are in one spot anyway.
static LibError vtbl_validate(const FileProvider_VTbl* vtbl)
{
if(!vtbl)
WARN_RETURN(ERR::INVALID_PARAM);
if(vtbl->magic != vtbl_magic)
WARN_RETURN(ERR::CORRUPTED);
return INFO::OK;
}
#define CHECK_VTBL(type) RETURN_ERR(vtbl_validate(type))
//
// directory entry enumeration
//
LibError xdir_open(const char* dir, DirIterator* di)
{
// HACK: it is unclear ATM how to set this properly. assume tree_dir_* is
// the only user ATM.
di->type = &tree_vtbl;
CHECK_VTBL(di->type);
return di->type->dir_open(dir, di);
}
LibError xdir_next_ent(DirIterator* di, DirEnt* ent)
{
CHECK_VTBL(di->type);
return di->type->dir_next_ent(di, ent);
}
LibError xdir_close(DirIterator* di)
{
CHECK_VTBL(di->type);
return di->type->dir_close(di);
}
//
// file object
//
bool xfile_is_open(const File* f)
{
// not currently in use
if(f->type == 0)
return false;
WARN_ERR(vtbl_validate(f->type));
return true;
}
LibError xfile_open(const char* V_path, uint flags, TFile* tf, File* f)
{
// find out who is providing this file
const Mount* m = tfile_get_mount(tf);
debug_assert(m != 0);
// HACK: see decl of vtbls. ideally vtbl would already be stored in
// Mount, but that's not implemented yet.
char c = mount_get_type(m);
const FileProvider_VTbl* vtbl = (c == 'F')? &file_vtbl : &archive_vtbl;
CHECK_VTBL(vtbl);
RETURN_ERR(vtbl->file_open(V_path, flags, tf, f));
// success
// note: don't assign these unless we succeed to avoid the
// false impression that all is well.
f->type = vtbl;
return INFO::OK;
}
LibError xfile_close(File* f)
{
// we must not complain if the file is not open. this happens if
// attempting to open a nonexistent file: h_mgr automatically calls
// the dtor after reload fails.
// note: this takes care of checking the vtbl.
if(!xfile_is_open(f))
return INFO::OK;
LibError ret = f->type->file_close(f);
f->type = 0;
return ret;
}
LibError xfile_validate(const File* f)
{
CHECK_VTBL(f->type);
return f->type->file_validate(f);
}
//
// IO
//
LibError xfile_io_issue(File* f, off_t ofs, size_t size, u8* buf, FileIo* io)
{
io->type = f->type;
CHECK_VTBL(io->type);
return io->type->io_issue(f, ofs, size, buf, io);
}
int xfile_io_has_completed(FileIo* io)
{
CHECK_VTBL(io->type);
return io->type->io_has_completed(io);
}
LibError xfile_io_wait(FileIo* io, u8*& p, size_t& size)
{
CHECK_VTBL(io->type);
return io->type->io_wait(io, p, size);
}
LibError xfile_io_discard(FileIo* io)
{
CHECK_VTBL(io->type);
return io->type->io_discard(io);
}
LibError xfile_io_validate(const FileIo* io)
{
CHECK_VTBL(io->type);
return io->type->io_validate(io);
}
ssize_t xfile_io(File* f, off_t ofs, size_t size, FileIOBuf* pbuf, FileIOCB cb, uintptr_t cbData)
{
CHECK_VTBL(f->type);
// notes:
// - for archive file: vfs_open makes sure it wasn't opened for writing
// - normal file: let file_io alloc the buffer if the caller didn't
// (i.e. p = 0), because it knows about alignment / padding requirements
return f->type->io(f, ofs, size, pbuf, cb, cbData);
}
//
// file mapping
//
LibError xfile_map(File* f, u8*& p, size_t& size)
{
CHECK_VTBL(f->type);
return f->type->map(f, p, size);
}
LibError xfile_unmap(File* f)
{
CHECK_VTBL(f->type);
return f->type->unmap(f);
}

View File

@ -1,72 +0,0 @@
/**
* =========================================================================
* File : vfs_redirector.h
* Project : 0 A.D.
* Description :
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_VFS_REDIRECTOR
#define INCLUDED_VFS_REDIRECTOR
#include "file.h"
struct FileIo;
struct FileProvider_VTbl
{
// FOURCC that is checked on each access to ensure this is a valid vtbl.
u32 magic;
// note: no need to store name of this provider for debugging purposes;
// that can be deduced from the function pointers below.
// directory entry enumeration
// note: these don't really fit in with the other methods.
// they make sense for both the VFS tree as well as the concrete
// file providers underlying it. due to this overlap and to allow
// file.cpp's next_ent function to access dir_filtered_next_ent,
// it is included anyway.
LibError (*dir_open)(const char* dir, DirIterator* di);
LibError (*dir_next_ent)(DirIterator* di, DirEnt* ent);
LibError (*dir_close)(DirIterator* di);
// file objects
LibError (*file_open)(const char* V_path, uint flags, TFile* tf, File* f);
LibError (*file_close)(File* f);
LibError (*file_validate)(const File* f);
// IO
LibError (*io_issue)(File* f, off_t ofs, size_t size, u8* buf, FileIo* io);
int (*io_has_completed)(FileIo* io);
LibError (*io_wait)(FileIo* io, u8*& p, size_t& size);
LibError (*io_discard)(FileIo* io);
LibError (*io_validate)(const FileIo* io);
ssize_t (*io)(File* f, off_t ofs, size_t size, FileIOBuf* pbuf, FileIOCB cb, uintptr_t cbData);
// file mapping
LibError (*map)(File* f, u8*& p, size_t& size);
LibError (*unmap)(File* f);
};
extern LibError xdir_open(const char* dir, DirIterator* di);
extern LibError xdir_next_ent(DirIterator* di, DirEnt* ent);
extern LibError xdir_close(DirIterator* di);
extern bool xfile_is_open(const File* f);
extern LibError xfile_open(const char* V_path, uint flags, TFile* tf, File* f);
extern LibError xfile_close(File* f);
extern LibError xfile_validate(const File* f);
extern LibError xfile_io_issue(File* f, off_t ofs, size_t size, u8* buf, FileIo* io);
extern int xfile_io_has_completed(FileIo* io);
extern LibError xfile_io_wait(FileIo* io, u8*& p, size_t& size);
extern LibError xfile_io_discard(FileIo* io);
extern LibError xfile_io_validate(const FileIo* io);
extern ssize_t xfile_io(File* f, off_t ofs, size_t size, FileIOBuf* pbuf, FileIOCB cb, uintptr_t cbData);
extern LibError xfile_map(File* f, u8*& p, size_t& size);
extern LibError xfile_unmap(File* f);
#endif // #ifndef INCLUDED_VFS_REDIRECTOR

View File

@ -1,771 +0,0 @@
/**
* =========================================================================
* File : vfs_tree.cpp
* Project : 0 A.D.
* Description : the actual 'filesystem' and its tree of directories.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "vfs_tree.h"
#include <string.h>
#include <time.h>
#include <string>
#include <vector>
#include <algorithm>
#include "lib/posix/posix_pthread.h"
#include "lib/allocators.h"
#include "lib/adts.h"
#include "file_internal.h"
ERROR_ASSOCIATE(ERR::TNODE_NOT_FOUND, "File/directory not found", ENOENT);
ERROR_ASSOCIATE(ERR::TNODE_WRONG_TYPE, "Using a directory as file or vice versa", -1);
// Mount = location of a file in the tree.
// TFile = all information about a file stored in the tree.
// TDir = container holding TFile-s representing a dir. in the tree.
static void* node_alloc();
// remembers which VFS file is the most recently modified.
static time_t most_recent_mtime;
static void set_most_recent_if_newer(time_t mtime)
{
most_recent_mtime = std::max(most_recent_mtime, mtime);
}
time_t tree_most_recent_mtime()
{
return most_recent_mtime;
}
//-----------------------------------------------------------------------------
// locking
// these are exported to protect the vfs_mount list; apart from that, it is
// sufficient for VFS thread-safety to lock all of this module's APIs.
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
void tree_lock()
{
pthread_mutex_lock(&mutex);
}
void tree_unlock()
{
pthread_mutex_unlock(&mutex);
}
//-----------------------------------------------------------------------------
enum TNodeType
{
NT_DIR,
NT_FILE
};
class TNode
{
public:
TNodeType type;
// allocated and owned by vfs_mount
const Mount* m;
// rationale: we store both entire path and name component.
// this increases size of VFS (2 pointers needed here) and
// filename storage, but allows getting path without having to
// iterate over all dir name components.
// we could retrieve name via strrchr(path, '/'), but that is slow.
const char* V_path;
// this is compared as a normal string (not pointer comparison), but
// the pointer passed must obviously remain valid, so it is
// usually an atom_fn.
const char* name;
TNode(TNodeType type_, const char* V_path_, const char* name_, const Mount* m_)
: type(type_), V_path(V_path_), name(name_), m(m_)
{
}
};
class TFile : public TNode
{
public:
off_t size;
time_t mtime;
uintptr_t memento;
TFile(const char* V_path, const char* name, const Mount* m)
: TNode(NT_FILE, V_path, name, m)
{
size = 0;
mtime = 0;
memento = 0;
}
};
template<> class DHT_Traits<const char*, TNode*>
{
public:
static const size_t initial_entries = 32;
size_t hash(const char* key) const
{
return (size_t)fnv_lc_hash(key);
}
bool equal(const char* k1, const char* k2) const
{
// note: in theory, we could take advantage of the atom_fn
// mechanism to only compare string pointers. however, we're
// dealing with path *components* here. adding these as atoms would
// about double the memory used (to ~1 MB) and require a bit of
// care in the implementation of file_make_unique_path_copy
// (must not early-out before checking the hash table).
//
// given that path components are rather short, string comparisons
// are not expensive and we'll just go with that for simplicity.
if(!strcmp(k1, k2))
return true;
#ifndef NDEBUG
// matched except for case: this can have 2 causes:
// - intentional. that would be legitimate but doesn't make much
// sense and isn't expected.
// - bug, e.g. discarding filename case in a filelist.
// this risks not being able to find the file (since VFS and
// possibly OS are case-sensitive) and wastes memory here.
// what we'll do is warn and treat as separate filename
// (least surprise).
// if(!strcasecmp(k1, k2))
// debug_warn("filenames differ only in case: bug?");
#endif
return false;
}
const char* get_key(TNode* t) const
{
return t->name;
}
};
typedef DynHashTbl<const char*, TNode*, DHT_Traits<const char*, TNode*> > TChildren;
typedef TChildren::iterator TChildrenIt;
enum TDirFlags
{
TD_POPULATED = 1
};
class TDir : public TNode
{
uint flags; // enum TDirFlags
TChildren children;
public:
RealDir rd; // HACK; removeme
TDir(const char* V_path, const char* name, const Mount* m_)
: TNode(NT_DIR, V_path, name, 0), children()
{
flags = 0;
rd.m = m_;
rd.watch = 0;
mount_create_real_dir(V_path, rd.m);
}
TChildrenIt begin() const { return children.begin(); }
TChildrenIt end() const { return children.end(); }
// non-const - caller may change e.g. rd.watch
RealDir& get_rd() { return rd; }
void populate()
{
// the caller may potentially access this directory.
// make sure it has been populated with loose files/directories.
if(!(flags & TD_POPULATED))
{
WARN_ERR(mount_populate(this, &rd));
flags |= TD_POPULATED;
}
}
TNode* find(const char* name) const
{
return children.find(name);
}
// must not be called if already exists! use find() first or
// find_and_add instead.
LibError add(const char* name_tmp, TNodeType type, TNode** pnode, const Mount* m_override = 0)
{
// note: must be done before path_append for security
// (otherwise, '/' in <name_tmp> wouldn't be caught)
RETURN_ERR(path_component_validate(name_tmp));
char V_new_path_tmp[PATH_MAX];
const uint flags = (type == NT_DIR)? PATH_APPEND_SLASH : 0;
RETURN_ERR(path_append(V_new_path_tmp, V_path, name_tmp, flags));
const char* V_new_path = file_make_unique_fn_copy(V_new_path_tmp);
const char* name = path_name_only(V_new_path);
// for directory nodes, V_path ends in slash, so name cannot be
// derived via path_last_component. instead, we have to make an
// atom_fn out of name_tmp.
// this effectively doubles the amount of directory path text,
// but it's not that bad.
if(type == NT_DIR)
name = file_make_unique_fn_copy(name_tmp);
const Mount* m = rd.m;
if(m_override)
m = m_override;
// note: if anything below fails, this mem remains allocated in the
// pool, but that "can't happen" and is OK because pool is big enough.
void* mem = node_alloc();
if(!mem)
WARN_RETURN(ERR::NO_MEM);
TNode* node;
#include "lib/nommgr.h"
if(type == NT_FILE)
node = new(mem) TFile(V_new_path, name, m);
else
node = new(mem) TDir (V_new_path, name, m);
#include "lib/mmgr.h"
children.insert(name, node);
*pnode = node;
return INFO::OK;
}
LibError find_and_add(const char* name, TNodeType type, TNode** pnode, const Mount* m = 0)
{
TNode* node = children.find(name);
if(node)
{
// wrong type (dir vs. file)
if(node->type != type)
WARN_RETURN(ERR::TNODE_WRONG_TYPE);
*pnode = node;
return INFO::ALREADY_EXISTS;
}
return add(name, type, pnode, m);
}
// empty this directory and all subdirectories; used when rebuilding VFS.
void clearR()
{
// recurse for all subdirs
// (preorder traversal - need to do this before clearing the list)
for(TChildrenIt it = children.begin(); it != children.end(); ++it)
{
TNode* node = *it;
if(node->type == NT_DIR)
{
((TDir*)node)->clearR();
((TDir*)node)->~TDir();
}
}
// wipe out this directory
children.clear();
// the watch is restored when this directory is repopulated; we must
// remove it in case the real directory backing this one was deleted.
mount_detach_real_dir(&rd);
}
};
static Pool node_pool;
static inline void node_init()
{
const size_t el_size = std::max(sizeof(TDir), sizeof(TFile));
(void)pool_create(&node_pool, VFS_MAX_FILES*el_size, el_size);
}
static inline void node_shutdown()
{
(void)pool_destroy(&node_pool);
}
static void* node_alloc()
{
return pool_alloc(&node_pool, 0);
}
static inline void node_free_all()
{
pool_free_all(&node_pool);
}
//////////////////////////////////////////////////////////////////////////////
//
//
//
//////////////////////////////////////////////////////////////////////////////
static void displayR(TDir* td, int indent_level)
{
const char indent[] = " ";
TChildrenIt it;
// list all files in this dir
for(it = td->begin(); it != td->end(); ++it)
{
TNode* node = (*it);
if(node->type != NT_FILE)
continue;
const char* name = node->name;
TFile& file = *((TFile*)node);
char file_location = mount_get_type(file.m);
char* timestamp = ctime(&file.mtime);
timestamp[24] = '\0'; // remove '\n'
const off_t size = file.size;
// build format string: tell it how long the filename may be,
// so that it takes up all space before file info column.
char fmt[25];
int chars = 80 - indent_level*(sizeof(indent)-1);
sprintf(fmt, "%%-%d.%ds (%%c; %%6d; %%s)\n", chars, chars);
for(int i = 0; i < indent_level; i++)
printf(indent);
printf(fmt, name, file_location, size, timestamp);
}
// recurse over all subdirs
for(it = td->begin(); it != td->end(); ++it)
{
TNode* node = (*it);
if(node->type != NT_DIR)
continue;
const char* subdir_name = node->name;
// write subdir's name
// note: do it now, instead of in recursive call so that:
// - we don't have to pass dir_name parameter;
// - the VFS root node isn't displayed.
for(int i = 0; i < indent_level; i++)
printf(indent);
printf("[%s/]\n", subdir_name);
TDir* subdir = ((TDir*)node);
displayR(subdir, indent_level+1);
}
}
struct LookupCbParams : boost::noncopyable
{
const bool create_missing;
TDir* td; // current dir; assigned from node
TNode* node; // latest node returned (dir or file)
LookupCbParams(uint flags, TDir* td_)
: create_missing((flags & LF_CREATE_MISSING) != 0), td(td_)
{
// init in case lookup's <path> is "".
// this works because TDir is derived from TNode.
node = (TNode*)td;
}
};
static LibError lookup_cb(const char* component, bool is_dir, uintptr_t cbData)
{
LookupCbParams* p = (LookupCbParams*)cbData;
const TNodeType type = is_dir? NT_DIR : NT_FILE;
p->td->populate();
p->node = p->td->find(component);
if(!p->node)
{
if(p->create_missing)
RETURN_ERR(p->td->add(component, type, &p->node));
else
// complaining is left to callers; vfs_exists must be
// able to fail quietly.
return ERR::TNODE_NOT_FOUND; // NOWARN
}
if(p->node->type != type)
WARN_RETURN(ERR::TNODE_WRONG_TYPE);
if(is_dir)
p->td = (TDir*)p->node;
return INFO::CB_CONTINUE;
}
static LibError lookup(TDir* td, const char* path, uint flags, TNode** pnode)
{
// no undefined bits set
debug_assert( (flags & ~(LF_CREATE_MISSING|LF_START_DIR)) == 0 );
LookupCbParams p(flags, td);
RETURN_ERR(path_foreach_component(path, lookup_cb, (uintptr_t)&p));
// success.
*pnode = p.node;
return INFO::OK;
}
//////////////////////////////////////////////////////////////////////////////
//
//
//
//////////////////////////////////////////////////////////////////////////////
// this is a pointer to node_alloc-ed memory instead of a static TDir for
// 2 reasons:
// - no NLSO shutdown order issues; validity is well defined
// (namely between tree_init and tree_shutdown)
// - bonus: tree_init can use it when checking if called twice.
//
// this means we'll have to be extremely careful during tree_clear
// whether its memory remains valid.
static TDir* tree_root;
// make tree_root valid.
static void tree_root_init()
{
// must not be called more than once without intervening tree_shutdown.
debug_assert(!tree_root);
#include "lib/nommgr.h" // placement new
void* mem = node_alloc();
if(mem)
tree_root = new(mem) TDir("", "", 0);
#include "lib/mmgr.h"
}
// destroy the tree root node and free any extra memory held by it.
// note that its node memory still remains allocated.
static void tree_root_shutdown()
{
// must not be called without previous tree_root_init.
debug_assert(tree_root);
// this frees the root node's hash table, which would otherwise leak.
tree_root->~TDir();
tree_root = 0;
}
// establish a root node and prepare node_allocator for use.
//
// rationale: calling this from every tree_add* is ugly, so require
// manual init.
void tree_init()
{
node_init();
tree_root_init();
}
// empty all directories and free their memory.
// however, node_allocator's DynArray still remains initialized and
// the root directory is usable (albeit empty).
// use when remounting.
void tree_clear()
{
tree_root->clearR();
tree_root_shutdown(); // must come before tree_root_init
node_free_all();
// note: this is necessary because node_free_all
// pulls the rug out from under tree_root.
tree_root_init();
}
// shut down entirely; destroys node_allocator. any further use after this
// requires another tree_init.
void tree_shutdown()
{
// note: can't use tree_clear because that restores a root node
// ready for use, which allocates memory.
// wipe out all dirs (including root node), thus
// freeing memory they hold.
tree_root->clearR();
tree_root_shutdown();
// free memory underlying the nodes themselves.
node_shutdown();
}
// write a representation of the VFS tree to stdout.
void tree_display()
{
displayR(tree_root, 0);
}
LibError tree_add_file(TDir* td, const char* name,
const Mount* m, off_t size, time_t mtime, uintptr_t memento)
{
TNode* node;
LibError ret = td->find_and_add(name, NT_FILE, &node);
RETURN_ERR(ret);
if(ret == INFO::ALREADY_EXISTS)
{
TFile* tf = (TFile*)node;
if(!mount_should_replace(tf->m, m, tf->size, size, tf->mtime, mtime))
return INFO::ALREADY_EXISTS;
stats_vfs_file_remove(tf->size);
}
TFile* tf = (TFile*)node;
tf->m = m;
tf->mtime = mtime;
tf->size = size;
tf->memento = memento;
stats_vfs_file_add(size);
set_most_recent_if_newer(mtime);
return INFO::OK;
}
LibError tree_add_dir(TDir* td, const char* name, TDir** ptd)
{
TNode* node;
RETURN_ERR(td->find_and_add(name, NT_DIR, &node));
*ptd = (TDir*)node;
return INFO::OK;
}
LibError tree_lookup_dir(const char* V_path, TDir** ptd, uint flags)
{
// path is not a directory; TDir::lookup might return a file node
if(!VFS_PATH_IS_DIR(V_path))
WARN_RETURN(ERR::TNODE_WRONG_TYPE);
TDir* td = (flags & LF_START_DIR)? *ptd : tree_root;
TNode* node = NULL;
CHECK_ERR(lookup(td, V_path, flags, &node));
// directories should exist, so warn if this fails
*ptd = (TDir*)node;
return INFO::OK;
}
LibError tree_lookup(const char* V_path, TFile** pfile, uint flags)
{
// path is not a file; TDir::lookup might return a directory node
if(VFS_PATH_IS_DIR(V_path))
WARN_RETURN(ERR::TNODE_WRONG_TYPE);
TNode* node = NULL;
LibError ret = lookup(tree_root, V_path, flags, &node);
RETURN_ERR(ret);
*pfile = (TFile*)node;
return INFO::OK;
}
struct AddPathCbParams : boost::noncopyable
{
const Mount* const m;
TDir* td;
AddPathCbParams(const Mount* m_)
: m(m_), td(tree_root) {}
};
static LibError add_path_cb(const char* component, bool is_dir, uintptr_t cbData)
{
AddPathCbParams* p = (AddPathCbParams*)cbData;
// should only be called for directory paths, so complain if not dir.
if(!is_dir)
WARN_RETURN(ERR::TNODE_WRONG_TYPE);
TNode* node;
RETURN_ERR(p->td->find_and_add(component, NT_DIR, &node, p->m));
p->td = (TDir*)node;
return INFO::CB_CONTINUE;
}
// iterate over all components in V_dir_path (must reference a directory,
// i.e. end in slash). for any that are missing, add them with the
// specified mount point. this is useful for mounting directories.
//
// passes back the last directory encountered.
LibError tree_add_path(const char* V_dir_path, const Mount* m, TDir** ptd)
{
debug_assert(VFS_PATH_IS_DIR(V_dir_path));
AddPathCbParams p(m);
RETURN_ERR(path_foreach_component(V_dir_path, add_path_cb, (uintptr_t)&p));
*ptd = p.td;
return INFO::OK;
}
//////////////////////////////////////////////////////////////////////////////
// rationale: see DirIterator definition in file.h.
struct TreeDirIterator
{
TChildren::iterator it;
// cache end() to avoid needless copies
TChildren::iterator end;
// the directory we're iterating over; this is used to lock/unlock it,
// i.e. prevent modifications that would invalidate the iterator.
TDir* td;
};
cassert(sizeof(TreeDirIterator) <= DIR_ITERATOR_OPAQUE_SIZE);
LibError tree_dir_open(const char* V_dir_path, DirIterator* di)
{
debug_assert(VFS_PATH_IS_DIR(V_dir_path));
TreeDirIterator* tdi = (TreeDirIterator*)di->opaque;
TDir* td;
CHECK_ERR(tree_lookup_dir(V_dir_path, &td));
// we need to prevent modifications to this directory while an iterator is
// active, otherwise entries may be skipped or no longer valid addresses
// accessed. blocking other threads is much more convenient for callers
// than having to check for ERR::AGAIN on every call, so we use a mutex
// instead of a simple refcount. we don't bother with fine-grained locking
// (e.g. per directory or read/write locks) because it would result in
// more overhead (we have hundreds of directories) and is unnecessary.
tree_lock();
tdi->it = td->begin();
tdi->end = td->end();
tdi->td = td;
return INFO::OK;
}
LibError tree_dir_next_ent(DirIterator* di, DirEnt* ent)
{
TreeDirIterator* tdi = (TreeDirIterator*)di->opaque;
if(tdi->it == tdi->end)
return ERR::DIR_END; // NOWARN
const TNode* node = *(tdi->it++);
ent->name = node->name;
// set size and mtime fields depending on node type:
switch(node->type)
{
case NT_DIR:
ent->size = -1;
ent->mtime = 0; // not currently supported for dirs
ent->tf = 0;
break;
case NT_FILE:
{
TFile* tf = (TFile*)node;
ent->size = tf->size;
ent->mtime = tf->mtime;
ent->tf = tf;
break;
}
default:
debug_warn("invalid TNode type");
}
return INFO::OK;
}
LibError tree_dir_close(DirIterator* UNUSED(d))
{
tree_unlock();
// no further cleanup needed. we could zero out d but that might
// hide bugs; the iterator is safe (will not go beyond end) anyway.
return INFO::OK;
}
//-----------------------------------------------------------------------------
// get/set
const Mount* tfile_get_mount(const TFile* tf)
{
return tf->m;
}
uintptr_t tfile_get_memento(const TFile* tf)
{
return tf->memento;
}
const char* tfile_get_atom_fn(const TFile* tf)
{
return ((TNode*)tf)->V_path;
}
void tfile_set_mount(TFile* tf, const Mount* m)
{
tf->m = m;
}
void tree_update_file(TFile* tf, off_t size, time_t mtime)
{
tf->size = size;
tf->mtime = mtime;
}
// get file status (mode, size, mtime). output param is undefined on error.
LibError tree_stat(const TFile* tf, struct stat* s)
{
// all stat members currently supported are stored in TFile, so we
// can return them directly without having to call file|zip_stat.
s->st_mode = S_IFREG;
s->st_size = tf->size;
s->st_mtime = tf->mtime;
return INFO::OK;
}
RealDir* tree_get_real_dir(TDir* td)
{
return &td->get_rd();
}

View File

@ -1,124 +0,0 @@
/**
* =========================================================================
* File : vfs_tree.h
* Project : 0 A.D.
* Description : the actual 'filesystem' and its tree of directories.
* =========================================================================
*/
// license: GPL; see lib/license.txt
#ifndef INCLUDED_VFS_TREE
#define INCLUDED_VFS_TREE
class TFile; // must come before vfs_mount.h
class TDir;
#include "file.h" // DirEnt
#include "vfs_mount.h" // Mount
namespace ERR
{
const LibError TNODE_NOT_FOUND = -110600;
// attemped to treat a file as directory or vice versa.
const LibError TNODE_WRONG_TYPE = -110601;
}
// establish a root node and prepare node_allocator for use.
extern void tree_init();
// shut down entirely; destroys node_allocator. any further use after this
// requires another tree_init.
extern void tree_shutdown();
extern void tree_display();
// empty all directories and free their memory.
// however, node_allocator's DynArray still remains initialized and
// the root directory is usable (albeit empty).
// use when remounting.
extern void tree_clear();
extern time_t tree_most_recent_mtime();
// attempt to add <fn> to <dir>, storing its attributes.
// overrides previously existing files of the same name if the new one
// is more important, determined via priority and file location.
// called by zip_cb and add_ent.
//
// note: if "priority" is the same, replace!
// this makes sure mods/patches etc. actually replace files.
extern LibError tree_add_file(TDir* td, const char* name, const Mount* m,
off_t size, time_t mtime, uintptr_t memento);
extern LibError tree_add_dir(TDir* dir, const char* name, TDir** ptd);
enum TreeLookupFlags
{
LF_CREATE_MISSING = 1,
LF_START_DIR = 2
};
// pass back file information for <path> (relative to VFS root).
//
// if <flags> & LF_CREATE_MISSING, the file is added to VFS unless
// a higher-priority file of the same name already exists
// (used by VFile_reload when opening for writing).
//
// output params are only valid if INFO::OK is returned.
extern LibError tree_lookup(const char* path, TFile** ptf, uint flags = 0);
// starting at VFS root, traverse <path> and pass back information
// for its last directory component.
//
// if <flags> & LF_CREATE_MISSING, all missing subdirectory components are
// added to the VFS.
// if <flags> & LF_START_DIR, traversal starts at *ptd
// (used when looking up paths relative to a mount point).
//
// <path> can be to a file or dir (in which case it must end in '/',
// to make sure the last component is treated as a directory).
//
// output params are only valid if INFO::OK is returned.
extern LibError tree_lookup_dir(const char* V_path, TDir** ptd, uint flags = 0);
// iterate over all components in V_dir_path (must reference a directory,
// i.e. end in slash). for any that are missing, add them with the
// specified mount point. this is useful for mounting directories.
//
// passes back the last directory encountered.
extern LibError tree_add_path(const char* V_dir_path, const Mount* m, TDir** ptd);
extern LibError tree_dir_open(const char* V_dir_path, DirIterator* di);
extern LibError tree_dir_next_ent(DirIterator* di, DirEnt* ent);
extern LibError tree_dir_close(DirIterator* di);
// given a file that is stored on disk and its VFS path,
// return its OS path (for use with file.cpp).
// used by vfs_realpath and VFile_reopen.
extern LibError tree_realpath(TFile* tf, const char* V_path, char* P_real_path);
extern LibError tree_stat(const TFile* tf, struct stat* s);
extern const Mount* tfile_get_mount(const TFile* tf);
extern uintptr_t tfile_get_memento(const TFile* tf);
extern const char* tfile_get_atom_fn(const TFile* tf);
extern void tfile_set_mount(TFile* tf, const Mount* m);
extern void tree_update_file(TFile* tf, off_t size, time_t mtime);
struct RealDir;
extern RealDir* tree_get_real_dir(TDir* td);
// for use in vfs_mount
extern void tree_lock();
extern void tree_unlock();
#endif // #ifndef INCLUDED_VFS_TREE