1
0
forked from 0ad/0ad
0ad/source/lib/res/file/archive/archive_builder.cpp

290 lines
7.6 KiB
C++

/**
* =========================================================================
* File : archive_builder.cpp
* Project : 0 A.D.
* Description :
* =========================================================================
*/
// license: GPL; see lib/license.txt
#include "precompiled.h"
#include "archive_builder.h"
#include "lib/timer.h"
#include "../file_internal.h"
// un-nice dependencies:
#include "ps/Loader.h"
// vfs_load callback that compresses the data in parallel with IO
// (for incompressible files, we just calculate the checksum)
class Compressor
{
public:
Compressor(uintptr_t ctx, const char* atom_fn, size_t usize)
: m_ctx(ctx)
, m_usize(usize)
, m_skipCompression(IsFileTypeIncompressible(atom_fn))
, m_cdata(0), m_csize(0), m_checksum(0)
{
comp_reset(m_ctx);
m_csizeBound = comp_max_output_size(m_ctx, usize);
THROW_ERR(comp_alloc_output(m_ctx, m_csizeBound));
}
LibError Feed(const u8* ublock, size_t ublockSize, size_t* bytes_processed)
{
// comp_feed already makes note of total #bytes fed, and we need
// vfs_io to return the usize (to check if all data was read).
*bytes_processed = ublockSize;
if(m_skipCompression)
{
// (since comp_finish returns the checksum, we only need to update this
// when not compressing.)
m_checksum = comp_update_checksum(m_ctx, m_checksum, ublock, ublockSize);
}
else
{
// note: we don't need the return value because comp_finish
// will tell us the total csize.
(void)comp_feed(m_ctx, ublock, ublockSize);
}
return INFO::CB_CONTINUE;
}
LibError Finish()
{
if(m_skipCompression)
return INFO::OK;
RETURN_ERR(comp_finish(m_ctx, &m_cdata, &m_csize, &m_checksum));
debug_assert(m_csize <= m_csizeBound);
return INFO::OK;
}
u32 Checksum() const
{
return m_checksum;
}
// final decision on whether to store the file as compressed,
// given the observed compressed/uncompressed sizes.
bool IsCompressionProfitable() const
{
// file is definitely incompressible.
if(m_skipCompression)
return false;
const float ratio = (float)m_usize / m_csize;
const ssize_t bytes_saved = (ssize_t)m_usize - (ssize_t)m_csize;
UNUSED2(bytes_saved);
// tiny - store compressed regardless of savings.
// rationale:
// - CPU cost is negligible and overlapped with IO anyway;
// - reading from compressed files uses less memory because we
// don't need to allocate space for padding in the final buffer.
if(m_usize < 512)
return true;
// large high-entropy file - store uncompressed.
// rationale:
// - any bigger than this and CPU time becomes a problem: it isn't
// necessarily hidden by IO time anymore.
if(m_usize >= 32*KiB && ratio < 1.02f)
return false;
// we currently store everything else compressed.
return true;
}
void GetOutput(const u8*& cdata, size_t& csize) const
{
debug_assert(!m_skipCompression);
debug_assert(m_cdata && m_csize);
cdata = m_cdata;
csize = m_csize;
// note: no need to free cdata - it is owned by the
// compression context and can be reused.
}
private:
static bool IsFileTypeIncompressible(const char* fn)
{
const char* ext = path_extension(fn);
// this is a selection of file types that are certainly not
// further compressible. we need not include every type under the sun -
// this is only a slight optimization that avoids wasting time
// compressing files. the real decision as to cmethod is made based
// on attained compression ratio.
static const char* incompressible_exts[] =
{
"zip", "rar",
"jpg", "jpeg", "png",
"ogg", "mp3"
};
for(uint i = 0; i < ARRAY_SIZE(incompressible_exts); i++)
{
if(!strcasecmp(ext+1, incompressible_exts[i]))
return true;
}
return false;
}
uintptr_t m_ctx;
size_t m_usize;
size_t m_csizeBound;
bool m_skipCompression;
u8* m_cdata;
size_t m_csize;
u32 m_checksum;
};
static LibError compressor_feed_cb(uintptr_t cbData,
const u8* ublock, size_t ublockSize, size_t* bytes_processed)
{
Compressor& compressor = *(Compressor*)cbData;
return compressor.Feed(ublock, ublockSize, bytes_processed);
}
static LibError read_and_compress_file(const char* atom_fn, uintptr_t ctx,
ArchiveEntry& ent, const u8*& file_contents, FileIOBuf& buf) // out
{
struct stat s;
RETURN_ERR(vfs_stat(atom_fn, &s));
const size_t usize = s.st_size;
// skip 0-length files.
// rationale: zip.cpp needs to determine whether a CDFH entry is
// a file or directory (the latter are written by some programs but
// not needed - they'd only pollute the file table).
// it looks like checking for usize=csize=0 is the safest way -
// relying on file attributes (which are system-dependent!) is
// even less safe.
// we thus skip 0-length files to avoid confusing them with directories.
if(!usize)
return INFO::SKIPPED;
Compressor compressor(ctx, atom_fn, usize);
// read file into newly allocated buffer and run compressor.
size_t usize_read;
const uint flags = 0;
RETURN_ERR(vfs_load(atom_fn, buf, usize_read, flags, compressor_feed_cb, (uintptr_t)&compressor));
debug_assert(usize_read == usize);
LibError ret = compressor.Finish();
if(ret < 0)
{
file_buf_free(buf);
return ret;
}
// store file info
ent.usize = (off_t)usize;
ent.mtime = s.st_mtime;
// .. ent.ofs is set by zip_archive_add_file
ent.flags = 0;
ent.atom_fn = atom_fn;
ent.checksum = compressor.Checksum();
if(compressor.IsCompressionProfitable())
{
ent.method = CM_DEFLATE;
size_t csize;
compressor.GetOutput(file_contents, csize);
ent.csize = (off_t)csize;
}
else
{
ent.method = CM_NONE;
ent.csize = (off_t)usize;
file_contents = buf;
}
return INFO::OK;
}
//-----------------------------------------------------------------------------
LibError archive_build_init(const char* P_archive_filename, Filenames V_fns, ArchiveBuildState* ab)
{
RETURN_ERR(zip_archive_create(P_archive_filename, &ab->za));
ab->ctx = comp_alloc(CT_COMPRESSION, CM_DEFLATE);
ab->V_fns = V_fns;
// count number of files (needed to estimate progress)
for(ab->num_files = 0; ab->V_fns[ab->num_files]; ab->num_files++) {}
ab->i = 0;
return INFO::OK;
}
int archive_build_continue(ArchiveBuildState* ab)
{
const double end_time = get_time() + 200e-3;
for(;;)
{
const char* V_fn = ab->V_fns[ab->i];
if(!V_fn)
break;
ArchiveEntry ent; const u8* file_contents; FileIOBuf buf;
if(read_and_compress_file(V_fn, ab->ctx, ent, file_contents, buf) == INFO::OK)
{
(void)zip_archive_add_file(ab->za, &ent, file_contents);
(void)file_buf_free(buf);
}
ab->i++;
LDR_CHECK_TIMEOUT((int)ab->i, (int)ab->num_files);
}
// note: this is currently known to fail if there are no files in the list
// - zlib.h says: Z_DATA_ERROR is returned if freed prematurely.
// safe to ignore.
comp_free(ab->ctx); ab->ctx = 0;
(void)zip_archive_finish(ab->za);
return INFO::OK;
}
void archive_build_cancel(ArchiveBuildState* ab)
{
// note: the GUI may call us even though no build was ever in progress.
// be sure to make all steps no-op if <ab> is zeroed (initial state) or
// no build is in progress.
comp_free(ab->ctx); ab->ctx = 0;
if(ab->za)
(void)zip_archive_finish(ab->za);
memset(ab, 0, sizeof(*ab));
}
LibError archive_build(const char* P_archive_filename, Filenames V_fns)
{
ArchiveBuildState ab;
RETURN_ERR(archive_build_init(P_archive_filename, V_fns, &ab));
for(;;)
{
int ret = archive_build_continue(&ab);
RETURN_ERR(ret);
if(ret == INFO::OK)
return INFO::OK;
}
}