0ad/source/lib/res/vfs.cpp

1023 lines
22 KiB
C++
Raw Normal View History

// virtual file system - transparent access to files in archives;
// allows multiple search paths
//
// Copyright (c) 2003 Jan Wassenberg
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// Contact info:
// Jan.Wassenberg@stud.uni-karlsruhe.de
// http://www.stud.uni-karlsruhe.de/~urkt/
#include <cstdio>
#include <cassert>
#include <cstdlib>
#include <cstring>
#include "lib.h"
#include "file.h"
#include "zip.h"
#include "misc.h"
#include "vfs.h"
#include "mem.h"
#include "adts.h"
#include <string>
#include <vector>
#include <stack>
#include <map>
#include <list>
#include <algorithm>
// currently not thread safe, but that will most likely change
// (if prefetch thread is to be used).
// not safe to call before main!
// rationale for no forcibly-close support:
// issue:
// we might want to edit files while the game has them open.
// usual case: edit file, notify engine that it should be reloaded.
// here: need to tell the engine to stop what it's doing and close the file;
// only then can the artist write to the file, and trigger a reload.
//
// work involved:
// since closing a file with pending aios results in undefined
// behavior on Win32, we would have to keep track of all aios from each file,
// and cancel them. we'd also need to notify the higher level resource user
// that its read was cancelled, as opposed to failing due to read errors
// (which might cause the game to terminate).
//
// this is just more work than benefit. cases where the game holds on to files
// are rare:
// - streaming music (artist can use regular commands to stop the current
// track, or all music)
// - if the engine happens to be reading that file at the moment (expected
// to happen only during loading, and these are usually one-shot anway,
// i.e. it'll be done soon)
// - bug (someone didn't close a file - tough luck, and should be fixed
// instead of hacking around it).
// - archives (these remain open. allowing reload would mean we'd have to keep
// track of all files from an archive, and reload them all. another hassle.
// anyway, if files are to be changed in-game, then change the plain-file
// version - that's what they're for).
///////////////////////////////////////////////////////////////////////////////
//
// path
//
///////////////////////////////////////////////////////////////////////////////
// path types:
// p_* : portable
// v_* : VFS
// f_* : no path at all, filename only
static int path_append(char* dst, const char* path, const char* path2)
{
const size_t path_len = strlen(path);
const size_t path2_len = strlen(path2);
if(path_len+path2_len+1 > PATH_MAX)
return -1;
char* p = dst;
strcpy(p, path);
p += path_len;
if(path_len > 0 && p[-1] != '/')
*p++ = '/';
strcpy(p, path2);
return 0;
}
static int path_validate(const uint line, const char* const path)
{
size_t path_len = 0;
const char* msg = 0; // error occurred <==> != 0
int err = -1; // pass error code to caller
// disallow absolute path
if(path[0] == '/')
{
msg = "absolute path";
goto fail;
}
// scan each char in path string; count length.
for(;;)
{
const int c = path[path_len++];
// whole path is too long
if(path_len >= VFS_MAX_PATH)
{
msg = "path too long";
goto fail;
}
// disallow ".." to prevent going above the VFS root dir
static bool last_was_dot;
if(c == '.')
{
if(last_was_dot)
{
msg = "contains \"..\"";
goto fail;
}
last_was_dot = true;
}
else
last_was_dot = false;
// disallow OS-specific dir separators
if(c == '\\' || c == ':')
{
msg = "contains OS-specific dir separator (e.g. '\\', ':')";
goto fail;
}
// end of string, all is well.
if(c == '\0')
goto ok;
}
// failed somewhere - err is the error code,
// or -1 if not set specifically above.
fail:
debug_out("path_validate at line %d failed: %s", err);
assert(0 && "path_validate failed");
return err;
ok:
return 0;
}
#define CHECK_PATH(p_path) CHECK_ERR(path_validate(__LINE__, p_path))
///////////////////////////////////////////////////////////////////////////////
//
// file location
//
///////////////////////////////////////////////////////////////////////////////
// the VFS stores the location (archive or directory) of each file;
// this allows multiple search paths without having to check each one
// when opening a file (slow).
//
// one Loc is allocated for each archive or directory mounted, and all
// real subdirectories; all files in an archive share the same location.
// therefore, files only /point/ to a (possibly shared) Loc.
// if a file's location changes (e.g. after mounting a higher-priority
// directory), the VFS entry will point to the new Loc; the priority
// of both locations is unchanged.
//
// allocate via loc_create, passing the location. do not free!
// we keep track of all Locs allocated; they are freed at exit,
// and by loc_free_all (useful when rebuilding the VFS).
// this is much easier and safer than walking the VFS tree and
// freeing every location we find.
struct Loc;
typedef std::vector<Loc*> Locs;
// not many instances and allocated via new =>
// don't worry about struct size / alignment.
struct Loc
{
Handle ha;
std::string path;
uint pri;
Loc(Handle _ha, const char* _path, uint _pri)
: ha(_ha), path(_path), pri(_pri)
{
ONCE(atexit2(loc_free_all));
// add to list for later deletion
locs.push_back(this);
}
friend int loc_free_all();
private:
static Locs locs;
};
Locs Loc::locs;
static inline void loc_free(Loc* const loc)
{ delete loc; }
static int loc_free_all()
{
// we don't expect many Locs to be added (one per loose-file dir
// or archive), so don't worry about reallocating memory.
// return value is also irrelevant - can't fail as currently implemented.
Locs& locs = Loc::locs;
std::for_each(locs.begin(), locs.end(), loc_free);
// could use smart ptr (boost or loki) instead, but this'll do
locs.clear();
return 0;
}
// wrapper on top of new + ctor to emphasize that
// the caller must not free the Loc pointer.
// (if they do, VFS entries point to freed memory => disaster)
static Loc* loc_create(const Handle ha, const char* const p_path, const uint pri)
{
return new Loc(ha, p_path, pri);
}
///////////////////////////////////////////////////////////////////////////////
//
// "file system" (tree structure; stores location of each file)
//
///////////////////////////////////////////////////////////////////////////////
struct VDir;
typedef std::map<std::string, VDir> SubDirs;
typedef SubDirs::iterator SubDirIt;
typedef std::map<std::string, Loc*> Files;
typedef Files::iterator FileIt;
// note: priority is accessed by following the Loc pointer.
// keeping a copy in the map would lead to better cache coherency,
// but it's a bit more clumsy (map filename to struct {pri, Loc*}).
// revisit if file lookup open is too slow (unlikely).
struct VDir
{
std::string v_name;
void* watch;
int file_add(const char* const fn, const uint pri, Loc* const loc)
{
std::string _fn(fn);
typedef std::pair<std::string, Loc*> Ent;
Ent ent = std::make_pair(_fn, loc);
std::pair<FileIt, bool> ret;
ret = files.insert(ent);
// file already in dir
if(!ret.second)
{
FileIt it = ret.first;
Loc*& old_loc = it->second;
// new Loc is of higher priority; replace pointer
if(old_loc->pri <= loc->pri)
{
old_loc = loc;
return 0;
}
// new Loc is of lower priority; keep old pointer
else
return 1;
}
return 0;
}
Loc* file_find(const char* fn)
{
std::string _fn(fn);
FileIt it = files.find(_fn);
if(it == files.end())
return 0;
return it->second;
}
VDir* subdir_add(const char* fn, VDir& _dir)
{
std::string _fn(fn);
_dir.v_name = _fn;
std::pair<std::string, VDir> item = std::make_pair(_fn, _dir);
std::pair<SubDirIt, bool> res;
res = subdirs.insert(item);
// already in container
if(!res.second)
assert(0 && "already in subdir");
SubDirIt it = res.first;
return &it->second;
}
VDir* subdir_find(const char* fn)
{
std::string _fn(fn);
SubDirIt it = subdirs.find(_fn);
if(it == subdirs.end())
return 0;
return &it->second;
}
void clear_tree()
{
SubDirIt it;
for(it = subdirs.begin(); it != subdirs.end(); ++it)
{
VDir& dir = it->second;
dir.clear_tree();
}
files.clear();
subdirs.clear();
}
SubDirs subdirs; // can't make private; needed for iterator
private:
Files files;
};
VDir vfs_root;
enum LookupFlags
{
PF_DEFAULT,
LF_CREATE_MISSING_COMPONENTS = 1
};
static int tree_lookup(const char* vfs_path, Loc** loc = 0, VDir** dir = 0, LookupFlags flags = PF_DEFAULT)
{
CHECK_PATH(vfs_path);
// copy into (writeable) buffer so we can 'tokenize' path components
// by replacing '/' with '\0'.
// note: CHECK_PATH does length checking
char buf[VFS_MAX_PATH];
strcpy(buf, vfs_path);
const char* cur_component = buf;
const bool create_missing_components = flags & LF_CREATE_MISSING_COMPONENTS;
VDir* cur_dir = &vfs_root;
for(;;)
{
char* slash = strchr(cur_component, '/');
// we have followed all path components.
// cur_component is the filename or ""
if(!slash)
{
// caller wants pointer to file location returned
if(loc)
{
const char* fn = cur_component;
*loc = cur_dir->file_find(fn);
// user wanted its loc, but it's not found - fail
if(!*loc)
return ERR_FILE_NOT_FOUND;
}
// caller wants pointer to this dir returned
if(dir)
*dir = cur_dir;
return 0;
}
// cur_component is a subdirectory name; change to it
else
{
const char* subdir_name = cur_component;
*slash = 0;
VDir* subdir = cur_dir->subdir_find(subdir_name);
if(!subdir)
{
if(create_missing_components)
{
VDir _dir;
subdir = cur_dir->subdir_add(subdir_name, _dir);
}
else
return ERR_PATH_NOT_FOUND;
}
cur_dir = subdir;
cur_component = slash+1;
}
}
}
typedef std::vector<Handle> Archives;
typedef Archives::iterator ArchiveIt;
struct FileCBParams
{
VDir* dir;
Loc* loc;
// somewhat of a hack. which archives are mounted into the VFS is stored
// in an Archives list in the Mount struct; they don't have anything to
// do with a VFS dir. we want to enumerate the archives in a dir via the
// normal populate(), though, so have to pass this to its callback.
Archives* archives;
};
// called for each OS dir ent.
// add each file and directory to the VFS dir.
//
// note:
// we don't mount archives here for performance reasons.
// that means archives in subdirectories of mount points aren't added!
// rationale: can't determine if file is an archive via extension -
// they might be called .pk3 or whatnot. for every file in the tree, we'd have
// to try to open it as an archive - not good.
// this restriction also simplifies the code a bit, but if it's a problem,
// just generate a list of archives here and mount them from the caller.
static int file_cb(const char* fn, uint flags, ssize_t size, uintptr_t user)
{
FileCBParams* params = (FileCBParams*)user;
VDir* cur_dir = params->dir;
Loc* cur_loc = params->loc;
Archives* archives = params->archives;
// directory
if(flags & LOC_DIR)
{
VDir _dir;
cur_dir->subdir_add(fn, _dir);
}
// file
else
{
// only add to list; don't enumerate its files yet for easier debugging
// (we see which files are in a dir / archives)
// also somewhat faster, due to better locality.
//
// don't check filename extension - archives won't necessarily
// be called .zip (example: Quake III .pk3).
// just try to open the file.
if(archives)
{
const Handle ha = zip_archive_open(fn);
if(ha > 0)
archives->push_back(ha);
}
cur_dir->file_add(fn, cur_loc->pri, cur_loc);
}
return 0;
}
static int vdir_add_from_archive(VDir* dir, const Handle ha, const uint pri, Archives* archives)
{
// all files in the archive share this location
Loc* loc = loc_create(ha, 0, pri-1);
// add all files in archive to the VFS dir
FileCBParams params = { dir, loc, archives };
CHECK_ERR(zip_enum_files(ha, file_cb, (uintptr_t)&params));
return 0;
}
static int addR(VDir* vdir, const char* path, Loc* loc, const uint pri, Archives* archives)
{
// add watch
if(!vdir->watch)
vdir->watch = 0;
// add files and subdirs to dir; gather list of all archives
FileCBParams params = { vdir, loc, archives };
file_enum_dirents(path, file_cb, (uintptr_t)&params);
// loc will now be used for files in Zip archives
loc->pri--;
// loc->path = 0;
for(SubDirIt it = vdir->subdirs.begin(); it != vdir->subdirs.end(); ++it)
{
VDir* subdir = &it->second;
char v_subdir_path[PATH_MAX];
const char* v_subdir_name_c = subdir->v_name.c_str();
CHECK_ERR(path_append(v_subdir_path, path, v_subdir_name_c));
addR(subdir, v_subdir_path, loc, pri, archives);
}
// for each archive:
// (already sorted due to file_enum_dirents)
/*
{
for(ArchiveIt it = archives->begin(); it != archives->end(); ++it)
{
const Handle ha = *it;
CHECK_ERR(vdir_add_from_archive(vdir, ha, pri, archives));
}
}
*/
return 0;
}
// parent param not reference to allow passing 0 root node?
static int vdir_add_from_dir(VDir* vdir, const char* path, const uint pri, Archives* archives)
{
// all loose files in the new dir and its subdirs share this location
Loc* loc = loc_create(0, path, pri+1);
return addR(vdir, path, loc, pri, archives);
}
///////////////////////////////////////////////////////////////////////////////
//
// mount archives and directories into the VFS
//
///////////////////////////////////////////////////////////////////////////////
// need a list of all mounted dirs or archives so we can vfs_reload at
// any time. it's also nice, but not necessary, to unmount at exit
// (so resources aren't reported as leaked).
struct Mount
{
std::string vfs_path;
std::string name; // of OS dir or archive being mounted
uint pri;
Archives archives;
Mount(const char* _vfs_path, const char* _name, uint _pri)
: vfs_path(_vfs_path), name(_name), pri(_pri), archives() {}
};
typedef std::list<Mount> Mounts;
typedef Mounts::iterator MountIt;
static Mounts mounts;
// actually mount the specified entry (either Zip archive or dir).
// split out of vfs_mount because we need to mount without changing the
// mount list, when invalidating (reloading) the VFS.
static int remount(Mount& m)
{
const char* vfs_path = m.vfs_path.c_str();
const char* name = m.name.c_str();
const uint pri = m.pri;
CHECK_PATH(name);
VDir* vdir;
CHECK_ERR(tree_lookup(vfs_path, 0, &vdir, LF_CREATE_MISSING_COMPONENTS));
int err;
// add files and subdirectories to the VFS dir
err = vdir_add_from_dir(vdir, name, pri, &m.archives);
// success
// if(err == 0)
return 0;
const Handle ha = zip_archive_open(name);
err = vdir_add_from_archive(vdir, ha, pri, &m.archives);
if(err == 0)
return 0;
return ERR_PATH_NOT_FOUND;
}
static int unmount(Mount& m)
{
std::for_each(m.archives.begin(), m.archives.end(), zip_archive_close);
return 0;
}
static void unmount_all(void)
{
std::for_each(mounts.begin(), mounts.end(), unmount);
}
int vfs_mount(const char* const vfs_path, const char* const name, const uint pri)
{
ONCE(atexit(unmount_all));
// make sure it's not already mounted, i.e. in mounts
{
for(MountIt it = mounts.begin(); it != mounts.end(); ++it)
if(it->name == name)
{
assert(0 && "vfs_mount: already mounted");
return -1;
}
}
mounts.push_back(Mount(vfs_path, name, pri));
// actually mount the entry
MountIt it = mounts.end();
Mount& m = *(--it);
return remount(m);
}
// eventually returns the first error that occurred; does not abort.
int vfs_rebuild()
{
int err = 0;
vfs_root.clear_tree();
loc_free_all();
// need error return. manual loop is easier than functor + for_each.
for(MountIt it = mounts.begin(); it != mounts.end(); ++it)
{
int ret = remount(*it);
if(err == 0)
err = ret;
}
return err;
}
int vfs_unmount(const char* name)
{
for(MountIt it = mounts.begin(); it != mounts.end(); ++it)
// found the corresponding entry
if(it->name == name)
{
int unmount_err = unmount(*it);
mounts.erase(it);
int rebuild_err = vfs_rebuild();
return (unmount_err < 0)? unmount_err : rebuild_err;
}
return ERR_PATH_NOT_FOUND;
}
///////////////////////////////////////////////////////////////////////////////
//
//
//
///////////////////////////////////////////////////////////////////////////////
// OLD
// rationale for n-archives per PATH entry:
// We need to be able to unmount specific paths (e.g. when switching mods).
// Don't want to remount everything (slow), or specify a mod tag when mounting
// (not this module's job). Instead, we include all archives in one path entry;
// the game keeps track of what path(s) it mounted for a mod,
// and unmounts those when needed.
int vfs_realpath(const char* fn, char* full_path)
{
Loc* loc;
CHECK_ERR(tree_lookup(fn, &loc));
if(loc->ha <= 0)
{
strncpy(full_path, loc->path.c_str(), PATH_MAX);
}
else
{
const char* archive_fn = h_filename(loc->ha);
if(!archive_fn)
return -1;
strncpy(full_path, archive_fn, PATH_MAX);
}
return 0;
}
int vfs_stat(const char* fn, struct stat* s)
{
Loc* loc;
CHECK_ERR(tree_lookup(fn, &loc));
if(loc->ha <= 0)
return stat(loc->path.c_str(), s);
else
return zip_stat(loc->ha, fn, s);
}
///////////////////////////////////////////////////////////////////////////////
//
// file
//
///////////////////////////////////////////////////////////////////////////////
enum
{
2004-03-05 17:23:31 +01:00
// internal file state flags
// make sure these don't conflict with vfs.h flags
VF_OPEN = 0x100,
VF_ZIP = 0x200,
};
struct VFile
{
// cached contents of file from vfs_load
// (can't just use pointer - may be freed behind our back)
Handle hm;
union
{
File f;
ZFile zf;
};
2004-03-05 17:23:31 +01:00
// be aware when adding fields that we're already pushing the size limit
// (especially in PARANOIA builds, which add a member!)
};
H_TYPE_DEFINE(VFile)
2004-03-05 17:23:31 +01:00
// with #define PARANOIA, File and ZFile get an additional member,
// and VFile was exceeding HDATA_USER_SIZE. flags and size (required
// in File as well as VFile) are now moved into the union.
// use the functions below to insulate against change a bit.
static size_t& vf_size(VFile* vf)
{
assert(offsetof(struct File, size) == offsetof(struct ZFile, ucsize));
return vf->f.size;
}
static int& vf_flags(VFile* vf)
{
assert(offsetof(struct File, flags) == offsetof(struct ZFile, flags));
return vf->f.flags;
}
static void VFile_init(VFile* vf, va_list args)
{
2004-03-05 17:23:31 +01:00
int flags = va_arg(args, int);
vf_flags(vf) = flags;
}
static void VFile_dtor(VFile* vf)
{
2004-03-05 17:23:31 +01:00
int& flags = vf_flags(vf);
if(flags & VF_OPEN)
{
2004-03-05 17:23:31 +01:00
if(flags & VF_ZIP)
zip_close(&vf->zf);
else
file_close(&vf->f);
2004-03-05 17:23:31 +01:00
flags &= ~(VF_OPEN);
}
mem_free_h(vf->hm);
}
static int VFile_reload(VFile* vf, const char* fn)
{
2004-03-05 17:23:31 +01:00
int& flags = vf_flags(vf);
// we're done if file is already open. need to check this because reload order
// (e.g. if resource opens a file) is unspecified.
2004-03-05 17:23:31 +01:00
if(flags & VF_OPEN)
return 0;
int err = -1;
Loc* loc;
CHECK_ERR(tree_lookup(fn, &loc));
if(loc->ha <= 0)
{
const char* path;
char buf[PATH_MAX+1];
// (it's in the VFS root dir)
if(loc->path[0] == '\0')
path = fn;
else
{
const char* loc_path = loc->path.c_str();
CHECK_ERR(path_append(buf, loc_path, fn));
path = buf;
}
CHECK_ERR(file_open(path, vf_flags(vf), &vf->f));
}
else
{
if(flags & VFS_WRITE)
{
assert(0 && "requesting write access to file in archive");
return -1;
}
CHECK_ERR(zip_open(loc->ha, fn, &vf->zf));
flags |= VF_ZIP;
}
// success
2004-03-05 17:23:31 +01:00
flags |= VF_OPEN;
return 0;
}
Handle vfs_open(const char* fn, int flags /* = 0 */)
{
Handle h = h_alloc(H_VFile, fn, 0, flags);
// pass file flags to init
debug_out("vfs_open fn=%s %I64x\n", fn, h);
return h;
}
inline int vfs_close(Handle& h)
{
debug_out("vfs_close %I64x\n", h);
return h_free(h, H_VFile);
}
ssize_t vfs_io(Handle hf, size_t ofs, size_t size, void*& p)
{
debug_out("vfs_io ofs=%d size=%d\n", ofs, size);
H_DEREF(hf, VFile, vf);
// (vfs_open makes sure it's not opened for writing if zip)
2004-03-05 17:23:31 +01:00
if(vf_flags(vf) & VF_ZIP)
return zip_read(&vf->zf, ofs, size, p);
// normal file:
// let file_io alloc the buffer if the caller didn't (i.e. p = 0),
// because it knows about alignment / padding requirements
return file_io(&vf->f, ofs, size, &p);
}
Handle vfs_load(const char* fn, void*& p, size_t& size)
{
debug_out("vfs_load fn=%s\n", fn);
p = 0; // vfs_io needs initial 0 value
2004-03-05 17:23:31 +01:00
size = 0; // in case open or deref fails
Handle hf = vfs_open(fn);
if(hf <= 0)
return hf; // error code
H_DEREF(hf, VFile, vf);
Handle hm = 0;
2004-03-05 17:23:31 +01:00
size = vf_size(vf);
// already read into mem - return existing mem handle
// TODO: what if mapped?
if(vf->hm > 0)
{
p = mem_get_ptr(vf->hm, &size);
if(p)
{
2004-03-05 17:23:31 +01:00
assert(vf_size(vf) == size && "vfs_load: mismatch between File and Mem size");
hm = vf->hm;
goto skip_read;
}
else
assert(0 && "vfs_load: invalid MEM attached to vfile (0 pointer)");
// happens if someone frees the pointer. not an error!
}
{ // VC6 goto fix
2004-03-03 16:16:20 +01:00
ssize_t nread = vfs_io(hf, 0, size, p);
if(nread > 0)
hm = mem_assign(p, size);
}
skip_read:
vfs_close(hf);
// if we fail, make sure these are set to 0
// (they may have been assigned values above)
if(hm <= 0)
p = 0, size = 0;
return hm;
}
int vfs_store(const char* fn, void* p, size_t size)
{
Handle hf = vfs_open(fn, VFS_WRITE);
if(hf <= 0)
return (int)hf; // error code
H_DEREF(hf, VFile, vf);
int ret = vfs_io(hf, 0, size, p);
vfs_close(hf);
return ret;
}
Handle vfs_map(const char* fn, int flags, void*& p, size_t& size)
{
Handle hf = vfs_open(fn, flags);
H_DEREF(hf, VFile, vf);
CHECK_ERR(file_map(&vf->f, p, size));
MEM_DTOR dtor = 0;
uintptr_t ctx = 0;
return mem_assign(p, size, 0, dtor, ctx);
}
int vfs_unmap(Handle& hm)
{
2004-03-05 17:23:31 +01:00
return -1;
// return h_free(hm, H_MMap);
}