janwas
f5f4670d7f
file: change block size to 32kb (=> yields highest throughput) archive, file_io: add additional block cache forwarding path when reading uncompressed data from archive (avoids repetitive IOs) file_io, file_stats: fix inaccurate and incorrect IO throughput calculation file_stats: refactor some calculations to avoid division by 0 This was SVN commit r3665.
860 lines
23 KiB
C++
860 lines
23 KiB
C++
#include "precompiled.h"
|
|
|
|
#include "lib/timer.h"
|
|
#include "lib/allocators.h"
|
|
#include "lib/res/res.h"
|
|
#include "file_internal.h"
|
|
|
|
|
|
// components:
|
|
// - za_*: Zip archive handling
|
|
// passes the list of files in an archive to lookup.
|
|
// - lookup_*: file lookup
|
|
// per archive: return file info (e.g. offset, size), given filename.
|
|
// - Archive_*: Handle-based container for archive info
|
|
// owns archive file and its lookup mechanism.
|
|
// - inf_*: in-memory inflate routines (zlib wrapper)
|
|
// decompresses blocks from file_io callback.
|
|
// - afile_*: file from Zip archive
|
|
// uses lookup to get file information; holds inflate state.
|
|
// - sync and async I/O
|
|
// uses file_* and inf_*.
|
|
// - file mapping
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// lookup_*: file lookup
|
|
// per archive: return file info (e.g. offset, size), given filename.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
// rationale:
|
|
// - we don't export a "key" (currently array index) that would allow faster
|
|
// file lookup. this would only be useful if higher-level code were to
|
|
// store the key and use it more than once. also, lookup is currently fast
|
|
// enough. finally, this would also make our file enumerate callback
|
|
// incompatible with the others (due to the extra key param).
|
|
//
|
|
// - we don't bother with a directory tree to speed up lookup. the above
|
|
// is fast enough: O(1) if accessed sequentially, otherwise O(log(files)).
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Archive_*: Handle-based container for archive info
|
|
// owns archive file and its lookup mechanism.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
struct Archive
|
|
{
|
|
File f;
|
|
|
|
ArchiveEntry* ents;
|
|
// number of valid entries in above array (see lookup_add_file_cb)
|
|
uint num_files;
|
|
|
|
// note: we need to keep track of what resources reload() allocated,
|
|
// so the dtor can free everything correctly.
|
|
uint is_open : 1;
|
|
uint is_loaded : 1;
|
|
};
|
|
|
|
H_TYPE_DEFINE(Archive);
|
|
|
|
|
|
static void Archive_init(Archive*, va_list)
|
|
{
|
|
}
|
|
|
|
static void Archive_dtor(Archive* a)
|
|
{
|
|
if(a->is_loaded)
|
|
{
|
|
(void)mem_free(a->ents);
|
|
|
|
a->is_loaded = 0;
|
|
}
|
|
if(a->is_open)
|
|
{
|
|
(void)file_close(&a->f);
|
|
a->is_open = 0;
|
|
}
|
|
}
|
|
|
|
static LibError Archive_reload(Archive* a, const char* fn, Handle)
|
|
{
|
|
// (note: don't warn on failure - this happens when
|
|
// vfs_mount blindly archive_open-s a dir)
|
|
RETURN_ERR(file_open(fn, FILE_CACHE_BLOCK, &a->f));
|
|
a->is_open = 1;
|
|
|
|
RETURN_ERR(zip_populate_archive(&a->f, a));
|
|
a->is_loaded = 1;
|
|
|
|
return ERR_OK;
|
|
}
|
|
|
|
static LibError Archive_validate(const Archive* a)
|
|
{
|
|
RETURN_ERR(file_validate(&a->f));
|
|
|
|
if(debug_is_pointer_bogus(a->ents))
|
|
return ERR_1;
|
|
|
|
return ERR_OK;
|
|
}
|
|
|
|
static LibError Archive_to_string(const Archive* a, char* buf)
|
|
{
|
|
snprintf(buf, H_STRING_LEN, "(%u files)", a->num_files);
|
|
return ERR_OK;
|
|
}
|
|
|
|
|
|
|
|
// open and return a handle to the archive indicated by <fn>.
|
|
// somewhat slow - each file is added to an internal index.
|
|
Handle archive_open(const char* fn)
|
|
{
|
|
TIMER("archive_open");
|
|
return h_alloc(H_Archive, fn);
|
|
}
|
|
|
|
|
|
// close the archive <ha> and set ha to 0
|
|
LibError archive_close(Handle& ha)
|
|
{
|
|
return h_free(ha, H_Archive);
|
|
}
|
|
|
|
|
|
|
|
|
|
// look up ArchiveEntry, given filename (untrusted!).
|
|
static LibError archive_get_file_info(Archive* a, const char* fn, uintptr_t memento, ArchiveEntry*& ent)
|
|
{
|
|
if(memento)
|
|
{
|
|
ent = (ArchiveEntry*)memento;
|
|
return ERR_OK;
|
|
}
|
|
else
|
|
{
|
|
const char* atom_fn = file_make_unique_fn_copy(fn);
|
|
for(uint i = 0; i < a->num_files; i++)
|
|
if(a->ents[i].atom_fn == atom_fn)
|
|
{
|
|
ent = &a->ents[i];
|
|
return ERR_OK;
|
|
}
|
|
}
|
|
|
|
return ERR_FILE_NOT_FOUND;
|
|
}
|
|
|
|
|
|
// successively call <cb> for each valid file in the archive <ha>,
|
|
// passing the complete path and <user>.
|
|
// if it returns a nonzero value, abort and return that, otherwise 0.
|
|
//
|
|
// FileCB's name parameter will be the full path and unique
|
|
// (i.e. returned by file_make_unique_fn_copy).
|
|
LibError archive_enum(const Handle ha, const FileCB cb, const uintptr_t user)
|
|
{
|
|
H_DEREF(ha, Archive, a);
|
|
|
|
struct stat s;
|
|
memset(&s, 0, sizeof(s));
|
|
|
|
for(uint i = 0; i < a->num_files; i++)
|
|
{
|
|
const ArchiveEntry* ent = &a->ents[i];
|
|
s.st_mode = S_IFREG;
|
|
s.st_size = (off_t)ent->ucsize;
|
|
s.st_mtime = ent->mtime;
|
|
const uintptr_t memento = (uintptr_t)ent;
|
|
LibError ret = cb(ent->atom_fn, &s, memento, user);
|
|
if(ret != INFO_CB_CONTINUE)
|
|
return ret;
|
|
}
|
|
|
|
return ERR_OK;
|
|
}
|
|
|
|
|
|
LibError archive_allocate_entries(Archive* a, size_t num_entries)
|
|
{
|
|
debug_assert(num_entries != 0); // =0 makes no sense but wouldn't be fatal
|
|
|
|
debug_assert(a->ents == 0); // must not have been allocated yet
|
|
a->ents = (ArchiveEntry*)mem_alloc(num_entries * sizeof(ArchiveEntry), 32);
|
|
if(!a->ents)
|
|
WARN_RETURN(ERR_NO_MEM);
|
|
return ERR_OK;
|
|
}
|
|
|
|
|
|
// add file <fn> to the lookup data structure.
|
|
// called from za_enum_files in order (0 <= idx < num_entries).
|
|
// the first call notifies us of # entries, so we can allocate memory.
|
|
//
|
|
// note: ent is only valid during the callback! must be copied or saved.
|
|
LibError archive_add_file(Archive* a, const ArchiveEntry* ent)
|
|
{
|
|
a->ents[a->num_files++] = *ent;
|
|
return ERR_OK;
|
|
}
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// afile_*: file from Zip archive
|
|
// uses lookup to get file information; holds inflate state.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
// convenience function, allows implementation change in AFile.
|
|
// note that size == ucsize isn't foolproof, and adding a flag to
|
|
// ofs or size is ugly and error-prone.
|
|
// no error checking - always called from functions that check af.
|
|
static inline bool is_compressed(AFile* af)
|
|
{
|
|
return af->method != CM_NONE;
|
|
}
|
|
|
|
|
|
|
|
|
|
// get file status (size, mtime). output param is zeroed on error.
|
|
LibError afile_stat(Handle ha, const char* fn, struct stat* s)
|
|
{
|
|
// zero output param in case we fail below.
|
|
memset(s, 0, sizeof(struct stat));
|
|
|
|
H_DEREF(ha, Archive, a);
|
|
|
|
ArchiveEntry* ent;
|
|
CHECK_ERR(archive_get_file_info(a, fn, 0, ent));
|
|
|
|
s->st_size = ent->ucsize;
|
|
s->st_mtime = ent->mtime;
|
|
return ERR_OK;
|
|
}
|
|
|
|
|
|
|
|
|
|
LibError afile_validate(const AFile* af)
|
|
{
|
|
if(!af)
|
|
return ERR_INVALID_PARAM;
|
|
// note: don't check af->ha - it may be freed at shutdown before
|
|
// its files. TODO: revisit once dependency support is added.
|
|
if(!af->fc.size)
|
|
return ERR_1;
|
|
// note: af->ctx is 0 if file is not compressed.
|
|
|
|
return ERR_OK;
|
|
}
|
|
|
|
#define CHECK_AFILE(af) CHECK_ERR(afile_validate(af))
|
|
|
|
|
|
// open file, and fill *af with information about it.
|
|
// return < 0 on error (output param zeroed).
|
|
LibError afile_open(const Handle ha, const char* fn, uintptr_t memento, int flags, AFile* af)
|
|
{
|
|
// zero output param in case we fail below.
|
|
memset(af, 0, sizeof(*af));
|
|
|
|
H_DEREF(ha, Archive, a);
|
|
|
|
// this is needed for AFile below. optimization: archive_get_file_info
|
|
// wants the original filename, but by passing the unique copy
|
|
// we avoid work there (its file_make_unique_fn_copy returns immediately)
|
|
const char* atom_fn = file_make_unique_fn_copy(fn);
|
|
|
|
ArchiveEntry* ent;
|
|
// don't want AFile to contain a ArchiveEntry struct -
|
|
// its ucsize member must be 'loose' for compatibility with File.
|
|
// => need to copy ArchiveEntry fields into AFile.
|
|
RETURN_ERR(archive_get_file_info(a, atom_fn, memento, ent));
|
|
|
|
zip_fixup_lfh(&a->f, ent);
|
|
|
|
uintptr_t ctx = 0;
|
|
// slight optimization: do not allocate context if not compressed
|
|
if(ent->method != CM_NONE)
|
|
{
|
|
ctx = comp_alloc(CT_DECOMPRESSION, ent->method);
|
|
if(!ctx)
|
|
return ERR_NO_MEM;
|
|
}
|
|
|
|
flags |= FILE_CACHE_BLOCK;
|
|
|
|
af->fc.flags = flags;
|
|
af->fc.size = ent->ucsize;
|
|
af->fc.atom_fn = atom_fn;
|
|
af->ofs = ent->ofs;
|
|
af->csize = ent->csize;
|
|
af->method = ent->method;
|
|
af->ha = ha;
|
|
af->ctx = ctx;
|
|
af->is_mapped = 0;
|
|
CHECK_AFILE(af);
|
|
return ERR_OK;
|
|
}
|
|
|
|
|
|
// close file.
|
|
LibError afile_close(AFile* af)
|
|
{
|
|
CHECK_AFILE(af);
|
|
// other AFile fields don't need to be freed/cleared
|
|
comp_free(af->ctx);
|
|
af->ctx = 0;
|
|
return ERR_OK;
|
|
}
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// sync and async I/O
|
|
// uses file_* and inf_*.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
static const size_t CHUNK_SIZE = 16*KiB;
|
|
|
|
// begin transferring <size> bytes, starting at <ofs>. get result
|
|
// with afile_io_wait; when no longer needed, free via afile_io_discard.
|
|
LibError afile_io_issue(AFile* af, off_t user_ofs, size_t max_output_size, void* user_buf, AFileIo* io)
|
|
{
|
|
// zero output param in case we fail below.
|
|
memset(io, 0, sizeof(AFileIo));
|
|
|
|
CHECK_AFILE(af);
|
|
H_DEREF(af->ha, Archive, a);
|
|
|
|
// not compressed; we'll just read directly from the archive file.
|
|
// no need to clamp to EOF - that's done already by the VFS.
|
|
if(!is_compressed(af))
|
|
{
|
|
// io->ctx is 0 (due to memset)
|
|
const off_t ofs = af->ofs+user_ofs;
|
|
return file_io_issue(&a->f, ofs, max_output_size, user_buf, &io->io);
|
|
}
|
|
|
|
|
|
io->ctx = af->ctx;
|
|
io->max_output_size = max_output_size;
|
|
io->user_buf = user_buf;
|
|
|
|
const off_t cofs = af->ofs + af->last_cofs; // needed to determine csize
|
|
|
|
// read up to next chunk (so that the next read is aligned -
|
|
// less work for aio) or up to EOF.
|
|
const ssize_t left_in_chunk = CHUNK_SIZE - (cofs % CHUNK_SIZE);
|
|
const ssize_t left_in_file = af->csize - cofs;
|
|
const size_t csize = MIN(left_in_chunk, left_in_file);
|
|
|
|
void* cbuf = mem_alloc(csize, 4*KiB);
|
|
if(!cbuf)
|
|
return ERR_NO_MEM;
|
|
|
|
CHECK_ERR(file_io_issue(&a->f, cofs, csize, cbuf, &io->io));
|
|
|
|
af->last_cofs += (off_t)csize;
|
|
return ERR_OK;
|
|
}
|
|
|
|
|
|
// indicates if the IO referenced by <io> has completed.
|
|
// return value: 0 if pending, 1 if complete, < 0 on error.
|
|
int afile_io_has_completed(AFileIo* io)
|
|
{
|
|
return file_io_has_completed(&io->io);
|
|
}
|
|
|
|
|
|
// wait until the transfer <io> completes, and return its buffer.
|
|
// output parameters are zeroed on error.
|
|
LibError afile_io_wait(AFileIo* io, void*& buf, size_t& size)
|
|
{
|
|
buf = 0;
|
|
size = 0;
|
|
|
|
void* raw_buf;
|
|
size_t raw_size;
|
|
CHECK_ERR(file_io_wait(&io->io, raw_buf, raw_size));
|
|
|
|
// file is compressed and we need to decompress
|
|
if(io->ctx)
|
|
{
|
|
comp_set_output(io->ctx, (void*)io->user_buf, io->max_output_size);
|
|
ssize_t ucbytes_output = comp_feed(io->ctx, raw_buf, raw_size);
|
|
free(raw_buf);
|
|
RETURN_ERR(ucbytes_output);
|
|
|
|
buf = io->user_buf;
|
|
size = ucbytes_output;
|
|
}
|
|
else
|
|
{
|
|
buf = raw_buf;
|
|
size = raw_size;
|
|
}
|
|
|
|
return ERR_OK;
|
|
}
|
|
|
|
|
|
// finished with transfer <io> - free its buffer (returned by afile_io_wait)
|
|
LibError afile_io_discard(AFileIo* io)
|
|
{
|
|
return file_io_discard(&io->io);
|
|
}
|
|
|
|
|
|
LibError afile_io_validate(const AFileIo* io)
|
|
{
|
|
if(debug_is_pointer_bogus(io->user_buf))
|
|
return ERR_1;
|
|
// <ctx> and <max_output_size> have no invariants we could check.
|
|
RETURN_ERR(file_io_validate(&io->io));
|
|
return ERR_OK;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
class Decompressor
|
|
{
|
|
public:
|
|
Decompressor(uintptr_t comp_ctx_, size_t ucsize_max, bool use_temp_buf_, FileIOCB cb, uintptr_t cb_ctx)
|
|
{
|
|
comp_ctx = comp_ctx_;
|
|
|
|
csize_total = 0;
|
|
ucsize_left = ucsize_max;
|
|
|
|
use_temp_buf = use_temp_buf_;
|
|
|
|
user_cb = cb;
|
|
user_cb_ctx = cb_ctx;
|
|
}
|
|
|
|
LibError feed(const void* cblock, size_t csize, size_t* bytes_processed)
|
|
{
|
|
if(use_temp_buf)
|
|
RETURN_ERR(comp_alloc_output(comp_ctx, csize));
|
|
|
|
void* ucblock = comp_get_output(comp_ctx);
|
|
|
|
const size_t ucsize = comp_feed(comp_ctx, cblock, csize);
|
|
*bytes_processed = ucsize;
|
|
debug_assert(ucsize <= ucsize_left);
|
|
ucsize_left -= ucsize;
|
|
|
|
LibError ret = INFO_CB_CONTINUE;
|
|
if(user_cb)
|
|
ret = user_cb(user_cb_ctx, ucblock, ucsize, bytes_processed);
|
|
if(ucsize_left == 0)
|
|
ret = ERR_OK;
|
|
return ret;
|
|
}
|
|
|
|
size_t total_csize_fed() const { return csize_total; }
|
|
|
|
private:
|
|
uintptr_t comp_ctx;
|
|
|
|
size_t csize_total;
|
|
size_t ucsize_left;
|
|
|
|
bool use_temp_buf;
|
|
|
|
// allow user-specified callbacks: "chain" them, because file_io's
|
|
// callback mechanism is already used to return blocks.
|
|
FileIOCB user_cb;
|
|
uintptr_t user_cb_ctx;
|
|
};
|
|
|
|
|
|
static LibError decompressor_feed_cb(uintptr_t cb_ctx,
|
|
const void* cblock, size_t csize, size_t* bytes_processed)
|
|
{
|
|
Decompressor* d = (Decompressor*)cb_ctx;
|
|
return d->feed(cblock, csize, bytes_processed);
|
|
}
|
|
|
|
|
|
// read from the (possibly compressed) file <af> as if it were a normal file.
|
|
// starting at the beginning of the logical (decompressed) file,
|
|
// skip <ofs> bytes of data; read the next <size> bytes into <*pbuf>.
|
|
//
|
|
// if non-NULL, <cb> is called for each block read, passing <ctx>.
|
|
// if it returns a negative error code,
|
|
// the read is aborted and that value is returned.
|
|
// the callback mechanism is useful for user progress notification or
|
|
// processing data while waiting for the next I/O to complete
|
|
// (quasi-parallel, without the complexity of threads).
|
|
//
|
|
// return bytes read, or a negative error code.
|
|
ssize_t afile_read(AFile* af, off_t ofs, size_t size, FileIOBuf* pbuf, FileIOCB cb, uintptr_t cb_ctx)
|
|
{
|
|
CHECK_AFILE(af);
|
|
H_DEREF(af->ha, Archive, a);
|
|
|
|
if(!is_compressed(af))
|
|
{
|
|
// HACK
|
|
// background: file_io will operate according to the
|
|
// *archive* file's flags, but the AFile may contain some overrides
|
|
// set via vfs_open. one example is FILE_LONG_LIVED -
|
|
// that must be copied over (temporarily) into a->f flags.
|
|
//
|
|
// we currently copy all flags - this may mean that setting
|
|
// global policy (e.g. "don't cache") for all archive files is
|
|
// difficult, but that can be worked around by forcing
|
|
// flag to be set in afile_open.
|
|
// this is better than the alternative of copying individual
|
|
// flags because it'd need to be updated as new flags are added.
|
|
a->f.fc.flags = af->fc.flags;
|
|
|
|
bool we_allocated = (pbuf != FILE_BUF_TEMP) && (*pbuf == FILE_BUF_ALLOC);
|
|
// no need to set last_cofs - only checked if compressed.
|
|
ssize_t bytes_read = file_io(&a->f, af->ofs+ofs, size, pbuf, cb, cb_ctx);
|
|
RETURN_ERR(bytes_read);
|
|
if(we_allocated)
|
|
(void)file_buf_set_real_fn(*pbuf, af->fc.atom_fn);
|
|
return bytes_read;
|
|
}
|
|
|
|
debug_assert(af->ctx != 0);
|
|
|
|
RETURN_ERR(file_buf_get(pbuf, size, af->fc.atom_fn, af->fc.flags, cb));
|
|
const bool use_temp_buf = (pbuf == FILE_BUF_TEMP);
|
|
if(!use_temp_buf)
|
|
comp_set_output(af->ctx, (void*)*pbuf, size);
|
|
|
|
const off_t cofs = af->ofs+af->last_cofs;
|
|
// remaining bytes in file. callback will cause IOs to stop when
|
|
// enough ucdata has been produced.
|
|
const size_t csize_max = af->csize - af->last_cofs;
|
|
|
|
Decompressor d(af->ctx, size, use_temp_buf, cb, cb_ctx);
|
|
ssize_t uc_transferred = file_io(&a->f, cofs, csize_max, FILE_BUF_TEMP, decompressor_feed_cb, (uintptr_t)&d);
|
|
|
|
af->last_cofs += (off_t)d.total_csize_fed();
|
|
|
|
return uc_transferred;
|
|
}
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// file mapping
|
|
//
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
// map the entire file <af> into memory. mapping compressed files
|
|
// isn't allowed, since the compression algorithm is unspecified.
|
|
// output parameters are zeroed on failure.
|
|
//
|
|
// the mapping will be removed (if still open) when its file is closed.
|
|
// however, map/unmap calls should still be paired so that the mapping
|
|
// may be removed when no longer needed.
|
|
LibError afile_map(AFile* af, void*& p, size_t& size)
|
|
{
|
|
p = 0;
|
|
size = 0;
|
|
|
|
CHECK_AFILE(af);
|
|
|
|
// mapping compressed files doesn't make sense because the
|
|
// compression algorithm is unspecified - disallow it.
|
|
if(is_compressed(af))
|
|
WARN_RETURN(ERR_IS_COMPRESSED);
|
|
|
|
// note: we mapped the archive in archive_open, but unmapped it
|
|
// in the meantime to save memory in case it wasn't going to be mapped.
|
|
// now we do so again; it's unmapped in afile_unmap (refcounted).
|
|
H_DEREF(af->ha, Archive, a);
|
|
void* archive_p;
|
|
size_t archive_size;
|
|
CHECK_ERR(file_map(&a->f, archive_p, archive_size));
|
|
|
|
p = (char*)archive_p + af->ofs;
|
|
size = af->fc.size;
|
|
|
|
af->is_mapped = 1;
|
|
return ERR_OK;
|
|
}
|
|
|
|
|
|
// remove the mapping of file <af>; fail if not mapped.
|
|
//
|
|
// the mapping will be removed (if still open) when its archive is closed.
|
|
// however, map/unmap calls should be paired so that the archive mapping
|
|
// may be removed when no longer needed.
|
|
LibError afile_unmap(AFile* af)
|
|
{
|
|
CHECK_AFILE(af);
|
|
|
|
// make sure archive mapping refcount remains balanced:
|
|
// don't allow multiple|"false" unmaps.
|
|
if(!af->is_mapped)
|
|
return ERR_FAIL;
|
|
af->is_mapped = 0;
|
|
|
|
H_DEREF(af->ha, Archive, a);
|
|
return file_unmap(&a->f);
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// archive builder
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static inline bool file_type_is_uncompressible(const char* fn)
|
|
{
|
|
const char* ext = strrchr(fn, '.');
|
|
// no extension? bail; assume compressible
|
|
if(!ext)
|
|
return true;
|
|
|
|
// this is a selection of file types that are certainly not
|
|
// further compressible. we need not include every type under the sun -
|
|
// this is only a slight optimization that avoids wasting time
|
|
// compressing files. the real decision as to cmethod is made based
|
|
// on attained compression ratio.
|
|
static const char* uncompressible_exts[] =
|
|
{
|
|
"zip", "rar",
|
|
"jpg", "jpeg", "png",
|
|
"ogg", "mp3"
|
|
};
|
|
|
|
for(uint i = 0; i < ARRAY_SIZE(uncompressible_exts); i++)
|
|
{
|
|
if(!stricmp(ext+1, uncompressible_exts[i]))
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
struct CompressParams
|
|
{
|
|
bool attempt_compress;
|
|
uintptr_t ctx;
|
|
u32 crc;
|
|
};
|
|
|
|
#include <zlib.h>
|
|
|
|
static LibError compress_cb(uintptr_t cb_ctx, const void* block, size_t size, size_t* bytes_processed)
|
|
{
|
|
CompressParams* p = (CompressParams*)cb_ctx;
|
|
|
|
// comp_feed already makes note of total #bytes fed, and we need
|
|
// vfs_io to return the uc size (to check if all data was read).
|
|
*bytes_processed = size;
|
|
|
|
// update checksum
|
|
p->crc = crc32(p->crc, (const Bytef*)block, (uInt)size);
|
|
|
|
if(p->attempt_compress)
|
|
(void)comp_feed(p->ctx, block, size);
|
|
return INFO_CB_CONTINUE;
|
|
}
|
|
|
|
|
|
// final decision on whether to store the file as compressed,
|
|
// given the observed compressed/uncompressed sizes.
|
|
static bool should_store_compressed(size_t ucsize, size_t csize)
|
|
{
|
|
const float ratio = (float)ucsize / csize;
|
|
const ssize_t bytes_saved = (ssize_t)ucsize - (ssize_t)csize;
|
|
UNUSED2(bytes_saved);
|
|
|
|
// tiny - store compressed regardless of savings.
|
|
// rationale:
|
|
// - CPU cost is negligible and overlapped with IO anyway;
|
|
// - reading from compressed files uses less memory because we
|
|
// don't need to allocate space for padding in the final buffer.
|
|
if(ucsize < 512)
|
|
return true;
|
|
|
|
// large high-entropy file - store uncompressed.
|
|
// rationale:
|
|
// - any bigger than this and CPU time becomes a problem: it isn't
|
|
// necessarily hidden by IO time anymore.
|
|
if(ucsize >= 32*KiB && ratio < 1.02f)
|
|
return false;
|
|
|
|
// TODO: any other cases?
|
|
// we currently store everything else compressed.
|
|
return true;
|
|
}
|
|
|
|
static LibError read_and_compress_file(const char* atom_fn, uintptr_t ctx,
|
|
ArchiveEntry& ent, void*& file_contents, FileIOBuf& buf) // out
|
|
{
|
|
struct stat s;
|
|
RETURN_ERR(vfs_stat(atom_fn, &s));
|
|
const size_t ucsize = s.st_size;
|
|
// skip 0-length files.
|
|
// rationale: zip.cpp needs to determine whether a CDFH entry is
|
|
// a file or directory (the latter are written by some programs but
|
|
// not needed - they'd only pollute the file table).
|
|
// it looks like checking for ucsize=csize=0 is the safest way -
|
|
// relying on file attributes (which are system-dependent!) is
|
|
// even less safe.
|
|
// we thus skip 0-length files to avoid confusing them with dirs.
|
|
if(!ucsize)
|
|
return INFO_SKIPPED;
|
|
|
|
const bool attempt_compress = !file_type_is_uncompressible(atom_fn);
|
|
if(attempt_compress)
|
|
{
|
|
RETURN_ERR(comp_reset(ctx));
|
|
RETURN_ERR(comp_alloc_output(ctx, ucsize));
|
|
}
|
|
|
|
// read file into newly allocated buffer. if attempt_compress, also
|
|
// compress the file into another buffer while waiting for IOs.
|
|
size_t ucsize_read;
|
|
const uint flags = 0;
|
|
CompressParams params = { attempt_compress, ctx, 0 };
|
|
RETURN_ERR(vfs_load(atom_fn, buf, ucsize_read, flags, compress_cb, (uintptr_t)¶ms));
|
|
debug_assert(ucsize_read == ucsize);
|
|
|
|
// if we compressed the file trial-wise, check results and
|
|
// decide whether to store as such or not (based on compression ratio)
|
|
bool store_compressed = false;
|
|
void* cdata = 0; size_t csize = 0;
|
|
if(attempt_compress)
|
|
{
|
|
LibError ret = comp_finish(ctx, &cdata, &csize);
|
|
if(ret < 0)
|
|
{
|
|
file_buf_free(buf);
|
|
return ret;
|
|
}
|
|
|
|
store_compressed = should_store_compressed(ucsize, csize);
|
|
}
|
|
|
|
// store file info
|
|
ent.ucsize = (off_t)ucsize;
|
|
ent.mtime = s.st_mtime;
|
|
// .. ent.ofs is set by zip_archive_add_file
|
|
ent.flags = 0;
|
|
ent.atom_fn = atom_fn;
|
|
ent.crc = params.crc;
|
|
if(store_compressed)
|
|
{
|
|
ent.method = CM_DEFLATE;
|
|
ent.csize = (off_t)csize;
|
|
file_contents = cdata;
|
|
}
|
|
else
|
|
{
|
|
ent.method = CM_NONE;
|
|
ent.csize = (off_t)ucsize;
|
|
file_contents = (void*)buf;
|
|
}
|
|
|
|
// note: no need to free cdata - it is owned by the
|
|
// compression context and can be reused.
|
|
|
|
return ERR_OK;
|
|
}
|
|
|
|
|
|
|
|
LibError archive_build_init(const char* P_archive_filename, Filenames V_fns,
|
|
ArchiveBuildState* ab)
|
|
{
|
|
RETURN_ERR(zip_archive_create(P_archive_filename, &ab->za));
|
|
ab->ctx = comp_alloc(CT_COMPRESSION, CM_DEFLATE);
|
|
ab->V_fns = V_fns;
|
|
|
|
// count number of files (needed to estimate progress)
|
|
for(ab->num_files = 0; ab->V_fns[ab->num_files]; ab->num_files++) {}
|
|
|
|
ab->i = 0;
|
|
return ERR_OK;
|
|
}
|
|
|
|
|
|
#include "ps/Loader.h"
|
|
|
|
int archive_build_continue(ArchiveBuildState* ab)
|
|
{
|
|
|
|
const double end_time = get_time() + 200e-3;
|
|
|
|
for(;;)
|
|
{
|
|
const char* V_fn = ab->V_fns[ab->i];
|
|
if(!V_fn)
|
|
break;
|
|
|
|
ArchiveEntry ent; void* file_contents; FileIOBuf buf;
|
|
if(read_and_compress_file(V_fn, ab->ctx, ent, file_contents, buf) == ERR_OK)
|
|
{
|
|
(void)zip_archive_add_file(ab->za, &ent, file_contents);
|
|
(void)file_buf_free(buf);
|
|
}
|
|
|
|
ab->i++;
|
|
LDR_CHECK_TIMEOUT((int)ab->i, (int)ab->num_files);
|
|
}
|
|
|
|
// note: this is currently known to fail if there are no files in the list
|
|
// - zlib.h says: Z_DATA_ERROR is returned if freed prematurely.
|
|
// safe to ignore.
|
|
comp_free(ab->ctx); ab->ctx = 0;
|
|
(void)zip_archive_finish(ab->za);
|
|
|
|
return ERR_OK;
|
|
}
|
|
|
|
|
|
void archive_build_cancel(ArchiveBuildState* ab)
|
|
{
|
|
// note: the GUI may call us even though no build was ever in progress.
|
|
// be sure to make all steps no-op if <ab> is zeroed (initial state) or
|
|
// no build is in progress.
|
|
|
|
comp_free(ab->ctx); ab->ctx = 0;
|
|
if(ab->za)
|
|
(void)zip_archive_finish(ab->za);
|
|
memset(ab, 0, sizeof(*ab));
|
|
}
|
|
|
|
|
|
LibError archive_build(const char* P_archive_filename, Filenames V_fns)
|
|
{
|
|
ArchiveBuildState ab;
|
|
RETURN_ERR(archive_build_init(P_archive_filename, V_fns, &ab));
|
|
for(;;)
|
|
{
|
|
int ret = archive_build_continue(&ab);
|
|
RETURN_ERR(ret);
|
|
if(ret == ERR_OK)
|
|
return ERR_OK;
|
|
}
|
|
} |