// virtual file system - transparent access to files in archives; // allows multiple search paths // // Copyright (c) 2003 Jan Wassenberg // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2 of the // License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // // Contact info: // Jan.Wassenberg@stud.uni-karlsruhe.de // http://www.stud.uni-karlsruhe.de/~urkt/ #include #include #include #include #include "lib.h" #include "file.h" #include "zip.h" #include "misc.h" #include "vfs.h" #include "mem.h" #include "adts.h" #include #include #include #include #include #include // currently not thread safe, but that will most likely change // (if prefetch thread is to be used). // not safe to call before main! // rationale for no forcibly-close support: // issue: // we might want to edit files while the game has them open. // usual case: edit file, notify engine that it should be reloaded. // here: need to tell the engine to stop what it's doing and close the file; // only then can the artist write to the file, and trigger a reload. // // work involved: // since closing a file with pending aios results in undefined // behavior on Win32, we would have to keep track of all aios from each file, // and cancel them. we'd also need to notify the higher level resource user // that its read was cancelled, as opposed to failing due to read errors // (which might cause the game to terminate). // // this is just more work than benefit. cases where the game holds on to files // are rare: // - streaming music (artist can use regular commands to stop the current // track, or all music) // - if the engine happens to be reading that file at the moment (expected // to happen only during loading, and these are usually one-shot anway, // i.e. it'll be done soon) // - bug (someone didn't close a file - tough luck, and should be fixed // instead of hacking around it). // - archives (these remain open. allowing reload would mean we'd have to keep // track of all files from an archive, and reload them all. another hassle. // anyway, if files are to be changed in-game, then change the plain-file // version - that's what they're for). /////////////////////////////////////////////////////////////////////////////// // // path // /////////////////////////////////////////////////////////////////////////////// // path types: // p_* : portable // v_* : VFS // f_* : no path at all, filename only static int path_append(char* dst, const char* path, const char* path2) { const size_t path_len = strlen(path); const size_t path2_len = strlen(path2); if(path_len+path2_len+1 > PATH_MAX) return -1; char* p = dst; strcpy(p, path); p += path_len; if(path_len > 0 && p[-1] != '/') *p++ = '/'; strcpy(p, path2); return 0; } static int path_validate(const uint line, const char* const path) { size_t path_len = 0; const char* msg = 0; // error occurred <==> != 0 int err = -1; // pass error code to caller // disallow absolute path if(path[0] == '/') { msg = "absolute path"; goto fail; } // scan each char in path string; count length. for(;;) { const int c = path[path_len++]; // whole path is too long if(path_len >= VFS_MAX_PATH) { msg = "path too long"; goto fail; } // disallow ".." to prevent going above the VFS root dir static bool last_was_dot; if(c == '.') { if(last_was_dot) { msg = "contains \"..\""; goto fail; } last_was_dot = true; } else last_was_dot = false; // disallow OS-specific dir separators if(c == '\\' || c == ':') { msg = "contains OS-specific dir separator (e.g. '\\', ':')"; goto fail; } // end of string, all is well. if(c == '\0') goto ok; } // failed somewhere - err is the error code, // or -1 if not set specifically above. fail: debug_out("path_validate at line %d failed: %s", err); assert(0 && "path_validate failed"); return err; ok: return 0; } #define CHECK_PATH(p_path) CHECK_ERR(path_validate(__LINE__, p_path)) /////////////////////////////////////////////////////////////////////////////// // // file location // /////////////////////////////////////////////////////////////////////////////// // the VFS stores the location (archive or directory) of each file; // this allows multiple search paths without having to check each one // when opening a file (slow). // // one Loc is allocated for each archive or directory mounted, and all // real subdirectories; all files in an archive share the same location. // therefore, files only /point/ to a (possibly shared) Loc. // if a file's location changes (e.g. after mounting a higher-priority // directory), the VFS entry will point to the new Loc; the priority // of both locations is unchanged. // // allocate via loc_create, passing the location. do not free! // we keep track of all Locs allocated; they are freed at exit, // and by loc_free_all (useful when rebuilding the VFS). // this is much easier and safer than walking the VFS tree and // freeing every location we find. struct Loc; typedef std::vector Locs; // not many instances and allocated via new => // don't worry about struct size / alignment. struct Loc { Handle ha; std::string path; uint pri; Loc(Handle _ha, const char* _path, uint _pri) : ha(_ha), path(_path), pri(_pri) { ONCE(atexit2(loc_free_all)); // add to list for later deletion locs.push_back(this); } friend int loc_free_all(); private: static Locs locs; }; Locs Loc::locs; static inline void loc_free(Loc* const loc) { delete loc; } static int loc_free_all() { // we don't expect many Locs to be added (one per loose-file dir // or archive), so don't worry about reallocating memory. // return value is also irrelevant - can't fail as currently implemented. Locs& locs = Loc::locs; std::for_each(locs.begin(), locs.end(), loc_free); // could use smart ptr (boost or loki) instead, but this'll do locs.clear(); return 0; } // wrapper on top of new + ctor to emphasize that // the caller must not free the Loc pointer. // (if they do, VFS entries point to freed memory => disaster) static Loc* loc_create(const Handle ha, const char* const p_path, const uint pri) { return new Loc(ha, p_path, pri); } /////////////////////////////////////////////////////////////////////////////// // // "file system" (tree structure; stores location of each file) // /////////////////////////////////////////////////////////////////////////////// struct VDir; typedef std::map SubDirs; typedef SubDirs::iterator SubDirIt; typedef std::map Files; typedef Files::iterator FileIt; // note: priority is accessed by following the Loc pointer. // keeping a copy in the map would lead to better cache coherency, // but it's a bit more clumsy (map filename to struct {pri, Loc*}). // revisit if file lookup open is too slow (unlikely). struct VDir { std::string v_name; void* watch; int file_add(const char* const fn, const uint pri, Loc* const loc) { std::string _fn(fn); typedef std::pair Ent; Ent ent = std::make_pair(_fn, loc); std::pair ret; ret = files.insert(ent); // file already in dir if(!ret.second) { FileIt it = ret.first; Loc*& old_loc = it->second; // new Loc is of higher priority; replace pointer if(old_loc->pri <= loc->pri) { old_loc = loc; return 0; } // new Loc is of lower priority; keep old pointer else return 1; } return 0; } Loc* file_find(const char* fn) { std::string _fn(fn); FileIt it = files.find(_fn); if(it == files.end()) return 0; return it->second; } VDir* subdir_add(const char* fn, VDir& _dir) { std::string _fn(fn); _dir.v_name = _fn; std::pair item = std::make_pair(_fn, _dir); std::pair res; res = subdirs.insert(item); // already in container if(!res.second) assert(0 && "already in subdir"); SubDirIt it = res.first; return &it->second; } VDir* subdir_find(const char* fn) { std::string _fn(fn); SubDirIt it = subdirs.find(_fn); if(it == subdirs.end()) return 0; return &it->second; } void clear_tree() { SubDirIt it; for(it = subdirs.begin(); it != subdirs.end(); ++it) { VDir& dir = it->second; dir.clear_tree(); } files.clear(); subdirs.clear(); } SubDirs subdirs; // can't make private; needed for iterator private: Files files; }; VDir vfs_root; enum LookupFlags { PF_DEFAULT, LF_CREATE_MISSING_COMPONENTS = 1 }; static int tree_lookup(const char* vfs_path, Loc** loc = 0, VDir** dir = 0, LookupFlags flags = PF_DEFAULT) { CHECK_PATH(vfs_path); // copy into (writeable) buffer so we can 'tokenize' path components // by replacing '/' with '\0'. // note: CHECK_PATH does length checking char buf[VFS_MAX_PATH]; strcpy(buf, vfs_path); const char* cur_component = buf; const bool create_missing_components = flags & LF_CREATE_MISSING_COMPONENTS; VDir* cur_dir = &vfs_root; for(;;) { char* slash = strchr(cur_component, '/'); // we have followed all path components. // cur_component is the filename or "" if(!slash) { // caller wants pointer to file location returned if(loc) { const char* fn = cur_component; *loc = cur_dir->file_find(fn); // user wanted its loc, but it's not found - fail if(!*loc) return ERR_FILE_NOT_FOUND; } // caller wants pointer to this dir returned if(dir) *dir = cur_dir; return 0; } // cur_component is a subdirectory name; change to it else { const char* subdir_name = cur_component; *slash = 0; VDir* subdir = cur_dir->subdir_find(subdir_name); if(!subdir) { if(create_missing_components) { VDir _dir; subdir = cur_dir->subdir_add(subdir_name, _dir); } else return ERR_PATH_NOT_FOUND; } cur_dir = subdir; cur_component = slash+1; } } } typedef std::vector Archives; typedef Archives::iterator ArchiveIt; struct FileCBParams { VDir* dir; Loc* loc; // somewhat of a hack. which archives are mounted into the VFS is stored // in an Archives list in the Mount struct; they don't have anything to // do with a VFS dir. we want to enumerate the archives in a dir via the // normal populate(), though, so have to pass this to its callback. Archives* archives; }; // called for each OS dir ent. // add each file and directory to the VFS dir. // // note: // we don't mount archives here for performance reasons. // that means archives in subdirectories of mount points aren't added! // rationale: can't determine if file is an archive via extension - // they might be called .pk3 or whatnot. for every file in the tree, we'd have // to try to open it as an archive - not good. // this restriction also simplifies the code a bit, but if it's a problem, // just generate a list of archives here and mount them from the caller. static int file_cb(const char* fn, uint flags, ssize_t size, uintptr_t user) { FileCBParams* params = (FileCBParams*)user; VDir* cur_dir = params->dir; Loc* cur_loc = params->loc; Archives* archives = params->archives; // directory if(flags & LOC_DIR) { VDir _dir; cur_dir->subdir_add(fn, _dir); } // file else { // only add to list; don't enumerate its files yet for easier debugging // (we see which files are in a dir / archives) // also somewhat faster, due to better locality. // // don't check filename extension - archives won't necessarily // be called .zip (example: Quake III .pk3). // just try to open the file. if(archives) { const Handle ha = zip_archive_open(fn); if(ha > 0) archives->push_back(ha); } cur_dir->file_add(fn, cur_loc->pri, cur_loc); } return 0; } static int vdir_add_from_archive(VDir* dir, const Handle ha, const uint pri, Archives* archives) { // all files in the archive share this location Loc* loc = loc_create(ha, 0, pri-1); // add all files in archive to the VFS dir FileCBParams params = { dir, loc, archives }; CHECK_ERR(zip_enum_files(ha, file_cb, (uintptr_t)¶ms)); return 0; } static int addR(VDir* vdir, const char* path, Loc* loc, const uint pri, Archives* archives) { // add watch if(!vdir->watch) vdir->watch = 0; // add files and subdirs to dir; gather list of all archives FileCBParams params = { vdir, loc, archives }; file_enum_dirents(path, file_cb, (uintptr_t)¶ms); // loc will now be used for files in Zip archives loc->pri--; // loc->path = 0; for(SubDirIt it = vdir->subdirs.begin(); it != vdir->subdirs.end(); ++it) { VDir* subdir = &it->second; char v_subdir_path[PATH_MAX]; const char* v_subdir_name_c = subdir->v_name.c_str(); CHECK_ERR(path_append(v_subdir_path, path, v_subdir_name_c)); addR(subdir, v_subdir_path, loc, pri, archives); } // for each archive: // (already sorted due to file_enum_dirents) /* { for(ArchiveIt it = archives->begin(); it != archives->end(); ++it) { const Handle ha = *it; CHECK_ERR(vdir_add_from_archive(vdir, ha, pri, archives)); } } */ return 0; } // parent param not reference to allow passing 0 root node? static int vdir_add_from_dir(VDir* vdir, const char* path, const uint pri, Archives* archives) { // all loose files in the new dir and its subdirs share this location Loc* loc = loc_create(0, path, pri+1); return addR(vdir, path, loc, pri, archives); } /////////////////////////////////////////////////////////////////////////////// // // mount archives and directories into the VFS // /////////////////////////////////////////////////////////////////////////////// // need a list of all mounted dirs or archives so we can vfs_reload at // any time. it's also nice, but not necessary, to unmount at exit // (so resources aren't reported as leaked). struct Mount { std::string vfs_path; std::string name; // of OS dir or archive being mounted uint pri; Archives archives; Mount(const char* _vfs_path, const char* _name, uint _pri) : vfs_path(_vfs_path), name(_name), pri(_pri), archives() {} }; typedef std::list Mounts; typedef Mounts::iterator MountIt; static Mounts mounts; // actually mount the specified entry (either Zip archive or dir). // split out of vfs_mount because we need to mount without changing the // mount list, when invalidating (reloading) the VFS. static int remount(Mount& m) { const char* vfs_path = m.vfs_path.c_str(); const char* name = m.name.c_str(); const uint pri = m.pri; CHECK_PATH(name); VDir* vdir; CHECK_ERR(tree_lookup(vfs_path, 0, &vdir, LF_CREATE_MISSING_COMPONENTS)); int err; // add files and subdirectories to the VFS dir err = vdir_add_from_dir(vdir, name, pri, &m.archives); // success // if(err == 0) return 0; const Handle ha = zip_archive_open(name); err = vdir_add_from_archive(vdir, ha, pri, &m.archives); if(err == 0) return 0; return ERR_PATH_NOT_FOUND; } static int unmount(Mount& m) { std::for_each(m.archives.begin(), m.archives.end(), zip_archive_close); return 0; } static void unmount_all(void) { std::for_each(mounts.begin(), mounts.end(), unmount); } int vfs_mount(const char* const vfs_path, const char* const name, const uint pri) { ONCE(atexit(unmount_all)); // make sure it's not already mounted, i.e. in mounts { for(MountIt it = mounts.begin(); it != mounts.end(); ++it) if(it->name == name) { assert(0 && "vfs_mount: already mounted"); return -1; } } mounts.push_back(Mount(vfs_path, name, pri)); // actually mount the entry MountIt it = mounts.end(); Mount& m = *(--it); return remount(m); } // eventually returns the first error that occurred; does not abort. int vfs_rebuild() { int err = 0; vfs_root.clear_tree(); loc_free_all(); // need error return. manual loop is easier than functor + for_each. for(MountIt it = mounts.begin(); it != mounts.end(); ++it) { int ret = remount(*it); if(err == 0) err = ret; } return err; } int vfs_unmount(const char* name) { for(MountIt it = mounts.begin(); it != mounts.end(); ++it) // found the corresponding entry if(it->name == name) { int unmount_err = unmount(*it); mounts.erase(it); int rebuild_err = vfs_rebuild(); return (unmount_err < 0)? unmount_err : rebuild_err; } return ERR_PATH_NOT_FOUND; } /////////////////////////////////////////////////////////////////////////////// // // // /////////////////////////////////////////////////////////////////////////////// // OLD // rationale for n-archives per PATH entry: // We need to be able to unmount specific paths (e.g. when switching mods). // Don't want to remount everything (slow), or specify a mod tag when mounting // (not this module's job). Instead, we include all archives in one path entry; // the game keeps track of what path(s) it mounted for a mod, // and unmounts those when needed. int vfs_realpath(const char* fn, char* full_path) { Loc* loc; CHECK_ERR(tree_lookup(fn, &loc)); if(loc->ha <= 0) { strncpy(full_path, loc->path.c_str(), PATH_MAX); } else { const char* archive_fn = h_filename(loc->ha); if(!archive_fn) return -1; strncpy(full_path, archive_fn, PATH_MAX); } return 0; } int vfs_stat(const char* fn, struct stat* s) { Loc* loc; CHECK_ERR(tree_lookup(fn, &loc)); if(loc->ha <= 0) return stat(loc->path.c_str(), s); else return zip_stat(loc->ha, fn, s); } /////////////////////////////////////////////////////////////////////////////// // // file // /////////////////////////////////////////////////////////////////////////////// enum { // internal file state flags // make sure these don't conflict with vfs.h flags VF_OPEN = 0x100, VF_ZIP = 0x200, }; struct VFile { // cached contents of file from vfs_load // (can't just use pointer - may be freed behind our back) Handle hm; union { File f; ZFile zf; }; // be aware when adding fields that we're already pushing the size limit // (especially in PARANOIA builds, which add a member!) }; H_TYPE_DEFINE(VFile) // with #define PARANOIA, File and ZFile get an additional member, // and VFile was exceeding HDATA_USER_SIZE. flags and size (required // in File as well as VFile) are now moved into the union. // use the functions below to insulate against change a bit. static size_t& vf_size(VFile* vf) { assert(offsetof(struct File, size) == offsetof(struct ZFile, ucsize)); return vf->f.size; } static int& vf_flags(VFile* vf) { assert(offsetof(struct File, flags) == offsetof(struct ZFile, flags)); return vf->f.flags; } static void VFile_init(VFile* vf, va_list args) { int flags = va_arg(args, int); vf_flags(vf) = flags; } static void VFile_dtor(VFile* vf) { int& flags = vf_flags(vf); if(flags & VF_OPEN) { if(flags & VF_ZIP) zip_close(&vf->zf); else file_close(&vf->f); flags &= ~(VF_OPEN); } mem_free_h(vf->hm); } static int VFile_reload(VFile* vf, const char* fn) { int& flags = vf_flags(vf); // we're done if file is already open. need to check this because reload order // (e.g. if resource opens a file) is unspecified. if(flags & VF_OPEN) return 0; int err = -1; Loc* loc; CHECK_ERR(tree_lookup(fn, &loc)); if(loc->ha <= 0) { const char* path; char buf[PATH_MAX+1]; // (it's in the VFS root dir) if(loc->path[0] == '\0') path = fn; else { const char* loc_path = loc->path.c_str(); CHECK_ERR(path_append(buf, loc_path, fn)); path = buf; } CHECK_ERR(file_open(path, vf_flags(vf), &vf->f)); } else { if(flags & VFS_WRITE) { assert(0 && "requesting write access to file in archive"); return -1; } CHECK_ERR(zip_open(loc->ha, fn, &vf->zf)); flags |= VF_ZIP; } // success flags |= VF_OPEN; return 0; } Handle vfs_open(const char* fn, int flags /* = 0 */) { Handle h = h_alloc(H_VFile, fn, 0, flags); // pass file flags to init debug_out("vfs_open fn=%s %I64x\n", fn, h); return h; } inline int vfs_close(Handle& h) { debug_out("vfs_close %I64x\n", h); return h_free(h, H_VFile); } ssize_t vfs_io(Handle hf, size_t ofs, size_t size, void*& p) { debug_out("vfs_io ofs=%d size=%d\n", ofs, size); H_DEREF(hf, VFile, vf); // (vfs_open makes sure it's not opened for writing if zip) if(vf_flags(vf) & VF_ZIP) return zip_read(&vf->zf, ofs, size, p); // normal file: // let file_io alloc the buffer if the caller didn't (i.e. p = 0), // because it knows about alignment / padding requirements return file_io(&vf->f, ofs, size, &p); } Handle vfs_load(const char* fn, void*& p, size_t& size) { debug_out("vfs_load fn=%s\n", fn); p = 0; // vfs_io needs initial 0 value size = 0; // in case open or deref fails Handle hf = vfs_open(fn); if(hf <= 0) return hf; // error code H_DEREF(hf, VFile, vf); Handle hm = 0; size = vf_size(vf); // already read into mem - return existing mem handle // TODO: what if mapped? if(vf->hm > 0) { p = mem_get_ptr(vf->hm, &size); if(p) { assert(vf_size(vf) == size && "vfs_load: mismatch between File and Mem size"); hm = vf->hm; goto skip_read; } else assert(0 && "vfs_load: invalid MEM attached to vfile (0 pointer)"); // happens if someone frees the pointer. not an error! } { // VC6 goto fix ssize_t nread = vfs_io(hf, 0, size, p); if(nread > 0) hm = mem_assign(p, size); } skip_read: vfs_close(hf); // if we fail, make sure these are set to 0 // (they may have been assigned values above) if(hm <= 0) p = 0, size = 0; return hm; } int vfs_store(const char* fn, void* p, size_t size) { Handle hf = vfs_open(fn, VFS_WRITE); if(hf <= 0) return (int)hf; // error code H_DEREF(hf, VFile, vf); int ret = vfs_io(hf, 0, size, p); vfs_close(hf); return ret; } Handle vfs_map(const char* fn, int flags, void*& p, size_t& size) { Handle hf = vfs_open(fn, flags); H_DEREF(hf, VFile, vf); CHECK_ERR(file_map(&vf->f, p, size)); MEM_DTOR dtor = 0; uintptr_t ctx = 0; return mem_assign(p, size, 0, dtor, ctx); } int vfs_unmap(Handle& hm) { return -1; // return h_free(hm, H_MMap); }