/** * ========================================================================= * File : vfs_tree.cpp * Project : 0 A.D. * Description : the actual 'filesystem' and its tree of directories. * ========================================================================= */ // license: GPL; see lib/license.txt #include "precompiled.h" #include "vfs_tree.h" #include #include #include #include #include #include "lib/posix/posix_pthread.h" #include "lib/allocators.h" #include "lib/adts.h" #include "file_internal.h" ERROR_ASSOCIATE(ERR::TNODE_NOT_FOUND, "File/directory not found", ENOENT); ERROR_ASSOCIATE(ERR::TNODE_WRONG_TYPE, "Using a directory as file or vice versa", -1); // Mount = location of a file in the tree. // TFile = all information about a file stored in the tree. // TDir = container holding TFile-s representing a dir. in the tree. static void* node_alloc(); // remembers which VFS file is the most recently modified. static time_t most_recent_mtime; static void set_most_recent_if_newer(time_t mtime) { most_recent_mtime = std::max(most_recent_mtime, mtime); } time_t tree_most_recent_mtime() { return most_recent_mtime; } //----------------------------------------------------------------------------- // locking // these are exported to protect the vfs_mount list; apart from that, it is // sufficient for VFS thread-safety to lock all of this module's APIs. static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; void tree_lock() { pthread_mutex_lock(&mutex); } void tree_unlock() { pthread_mutex_unlock(&mutex); } //----------------------------------------------------------------------------- enum TNodeType { NT_DIR, NT_FILE }; class TNode { public: TNodeType type; // allocated and owned by vfs_mount const Mount* m; // rationale: we store both entire path and name component. // this increases size of VFS (2 pointers needed here) and // filename storage, but allows getting path without having to // iterate over all dir name components. // we could retrieve name via strrchr(path, '/'), but that is slow. const char* V_path; // this is compared as a normal string (not pointer comparison), but // the pointer passed must obviously remain valid, so it is // usually an atom_fn. const char* name; TNode(TNodeType type_, const char* V_path_, const char* name_, const Mount* m_) : type(type_), V_path(V_path_), name(name_), m(m_) { } }; class TFile : public TNode { public: off_t size; time_t mtime; uintptr_t memento; TFile(const char* V_path, const char* name, const Mount* m) : TNode(NT_FILE, V_path, name, m) { size = 0; mtime = 0; memento = 0; } }; template<> class DHT_Traits { public: static const size_t initial_entries = 32; size_t hash(const char* key) const { return (size_t)fnv_lc_hash(key); } bool equal(const char* k1, const char* k2) const { // note: in theory, we could take advantage of the atom_fn // mechanism to only compare string pointers. however, we're // dealing with path *components* here. adding these as atoms would // about double the memory used (to ~1 MB) and require a bit of // care in the implementation of file_make_unique_path_copy // (must not early-out before checking the hash table). // // given that path components are rather short, string comparisons // are not expensive and we'll just go with that for simplicity. if(!strcmp(k1, k2)) return true; #ifndef NDEBUG // matched except for case: this can have 2 causes: // - intentional. that would be legitimate but doesn't make much // sense and isn't expected. // - bug, e.g. discarding filename case in a filelist. // this risks not being able to find the file (since VFS and // possibly OS are case-sensitive) and wastes memory here. // what we'll do is warn and treat as separate filename // (least surprise). // if(!strcasecmp(k1, k2)) // debug_warn("filenames differ only in case: bug?"); #endif return false; } const char* get_key(TNode* t) const { return t->name; } }; typedef DynHashTbl > TChildren; typedef TChildren::iterator TChildrenIt; enum TDirFlags { TD_POPULATED = 1 }; class TDir : public TNode { uint flags; // enum TDirFlags TChildren children; public: RealDir rd; // HACK; removeme TDir(const char* V_path, const char* name, const Mount* m_) : TNode(NT_DIR, V_path, name, 0), children() { flags = 0; rd.m = m_; rd.watch = 0; mount_create_real_dir(V_path, rd.m); } TChildrenIt begin() const { return children.begin(); } TChildrenIt end() const { return children.end(); } // non-const - caller may change e.g. rd.watch RealDir& get_rd() { return rd; } void populate() { // the caller may potentially access this directory. // make sure it has been populated with loose files/directories. if(!(flags & TD_POPULATED)) { WARN_ERR(mount_populate(this, &rd)); flags |= TD_POPULATED; } } TNode* find(const char* name) const { return children.find(name); } // must not be called if already exists! use find() first or // find_and_add instead. LibError add(const char* name_tmp, TNodeType type, TNode** pnode, const Mount* m_override = 0) { // note: must be done before path_append for security // (otherwise, '/' in wouldn't be caught) RETURN_ERR(path_component_validate(name_tmp)); char V_new_path_tmp[PATH_MAX]; const uint flags = (type == NT_DIR)? PATH_APPEND_SLASH : 0; RETURN_ERR(path_append(V_new_path_tmp, V_path, name_tmp, flags)); const char* V_new_path = file_make_unique_fn_copy(V_new_path_tmp); const char* name = path_name_only(V_new_path); // for directory nodes, V_path ends in slash, so name cannot be // derived via path_last_component. instead, we have to make an // atom_fn out of name_tmp. // this effectively doubles the amount of directory path text, // but it's not that bad. if(type == NT_DIR) name = file_make_unique_fn_copy(name_tmp); const Mount* m = rd.m; if(m_override) m = m_override; // note: if anything below fails, this mem remains allocated in the // pool, but that "can't happen" and is OK because pool is big enough. void* mem = node_alloc(); if(!mem) WARN_RETURN(ERR::NO_MEM); TNode* node; #include "lib/nommgr.h" if(type == NT_FILE) node = new(mem) TFile(V_new_path, name, m); else node = new(mem) TDir (V_new_path, name, m); #include "lib/mmgr.h" children.insert(name, node); *pnode = node; return INFO::OK; } LibError find_and_add(const char* name, TNodeType type, TNode** pnode, const Mount* m = 0) { TNode* node = children.find(name); if(node) { // wrong type (dir vs. file) if(node->type != type) WARN_RETURN(ERR::TNODE_WRONG_TYPE); *pnode = node; return INFO::ALREADY_EXISTS; } return add(name, type, pnode, m); } // empty this directory and all subdirectories; used when rebuilding VFS. void clearR() { // recurse for all subdirs // (preorder traversal - need to do this before clearing the list) for(TChildrenIt it = children.begin(); it != children.end(); ++it) { TNode* node = *it; if(node->type == NT_DIR) { ((TDir*)node)->clearR(); ((TDir*)node)->~TDir(); } } // wipe out this directory children.clear(); // the watch is restored when this directory is repopulated; we must // remove it in case the real directory backing this one was deleted. mount_detach_real_dir(&rd); } }; static Pool node_pool; static inline void node_init() { const size_t el_size = std::max(sizeof(TDir), sizeof(TFile)); (void)pool_create(&node_pool, VFS_MAX_FILES*el_size, el_size); } static inline void node_shutdown() { (void)pool_destroy(&node_pool); } static void* node_alloc() { return pool_alloc(&node_pool, 0); } static inline void node_free_all() { pool_free_all(&node_pool); } ////////////////////////////////////////////////////////////////////////////// // // // ////////////////////////////////////////////////////////////////////////////// static void displayR(TDir* td, int indent_level) { const char indent[] = " "; TChildrenIt it; // list all files in this dir for(it = td->begin(); it != td->end(); ++it) { TNode* node = (*it); if(node->type != NT_FILE) continue; const char* name = node->name; TFile& file = *((TFile*)node); char file_location = mount_get_type(file.m); char* timestamp = ctime(&file.mtime); timestamp[24] = '\0'; // remove '\n' const off_t size = file.size; // build format string: tell it how long the filename may be, // so that it takes up all space before file info column. char fmt[25]; int chars = 80 - indent_level*(sizeof(indent)-1); sprintf(fmt, "%%-%d.%ds (%%c; %%6d; %%s)\n", chars, chars); for(int i = 0; i < indent_level; i++) printf(indent); printf(fmt, name, file_location, size, timestamp); } // recurse over all subdirs for(it = td->begin(); it != td->end(); ++it) { TNode* node = (*it); if(node->type != NT_DIR) continue; const char* subdir_name = node->name; // write subdir's name // note: do it now, instead of in recursive call so that: // - we don't have to pass dir_name parameter; // - the VFS root node isn't displayed. for(int i = 0; i < indent_level; i++) printf(indent); printf("[%s/]\n", subdir_name); TDir* subdir = ((TDir*)node); displayR(subdir, indent_level+1); } } struct LookupCbParams : boost::noncopyable { const bool create_missing; TDir* td; // current dir; assigned from node TNode* node; // latest node returned (dir or file) LookupCbParams(uint flags, TDir* td_) : create_missing((flags & LF_CREATE_MISSING) != 0), td(td_) { // init in case lookup's is "". // this works because TDir is derived from TNode. node = (TNode*)td; } }; static LibError lookup_cb(const char* component, bool is_dir, uintptr_t cbData) { LookupCbParams* p = (LookupCbParams*)cbData; const TNodeType type = is_dir? NT_DIR : NT_FILE; p->td->populate(); p->node = p->td->find(component); if(!p->node) { if(p->create_missing) RETURN_ERR(p->td->add(component, type, &p->node)); else // complaining is left to callers; vfs_exists must be // able to fail quietly. return ERR::TNODE_NOT_FOUND; // NOWARN } if(p->node->type != type) WARN_RETURN(ERR::TNODE_WRONG_TYPE); if(is_dir) p->td = (TDir*)p->node; return INFO::CB_CONTINUE; } static LibError lookup(TDir* td, const char* path, uint flags, TNode** pnode) { // no undefined bits set debug_assert( (flags & ~(LF_CREATE_MISSING|LF_START_DIR)) == 0 ); LookupCbParams p(flags, td); RETURN_ERR(path_foreach_component(path, lookup_cb, (uintptr_t)&p)); // success. *pnode = p.node; return INFO::OK; } ////////////////////////////////////////////////////////////////////////////// // // // ////////////////////////////////////////////////////////////////////////////// // this is a pointer to node_alloc-ed memory instead of a static TDir for // 2 reasons: // - no NLSO shutdown order issues; validity is well defined // (namely between tree_init and tree_shutdown) // - bonus: tree_init can use it when checking if called twice. // // this means we'll have to be extremely careful during tree_clear // whether its memory remains valid. static TDir* tree_root; // make tree_root valid. static void tree_root_init() { // must not be called more than once without intervening tree_shutdown. debug_assert(!tree_root); #include "lib/nommgr.h" // placement new void* mem = node_alloc(); if(mem) tree_root = new(mem) TDir("", "", 0); #include "lib/mmgr.h" } // destroy the tree root node and free any extra memory held by it. // note that its node memory still remains allocated. static void tree_root_shutdown() { // must not be called without previous tree_root_init. debug_assert(tree_root); // this frees the root node's hash table, which would otherwise leak. tree_root->~TDir(); tree_root = 0; } // establish a root node and prepare node_allocator for use. // // rationale: calling this from every tree_add* is ugly, so require // manual init. void tree_init() { node_init(); tree_root_init(); } // empty all directories and free their memory. // however, node_allocator's DynArray still remains initialized and // the root directory is usable (albeit empty). // use when remounting. void tree_clear() { tree_root->clearR(); tree_root_shutdown(); // must come before tree_root_init node_free_all(); // note: this is necessary because node_free_all // pulls the rug out from under tree_root. tree_root_init(); } // shut down entirely; destroys node_allocator. any further use after this // requires another tree_init. void tree_shutdown() { // note: can't use tree_clear because that restores a root node // ready for use, which allocates memory. // wipe out all dirs (including root node), thus // freeing memory they hold. tree_root->clearR(); tree_root_shutdown(); // free memory underlying the nodes themselves. node_shutdown(); } // write a representation of the VFS tree to stdout. void tree_display() { displayR(tree_root, 0); } LibError tree_add_file(TDir* td, const char* name, const Mount* m, off_t size, time_t mtime, uintptr_t memento) { TNode* node; LibError ret = td->find_and_add(name, NT_FILE, &node); RETURN_ERR(ret); if(ret == INFO::ALREADY_EXISTS) { TFile* tf = (TFile*)node; if(!mount_should_replace(tf->m, m, tf->size, size, tf->mtime, mtime)) return INFO::ALREADY_EXISTS; stats_vfs_file_remove(tf->size); } TFile* tf = (TFile*)node; tf->m = m; tf->mtime = mtime; tf->size = size; tf->memento = memento; stats_vfs_file_add(size); set_most_recent_if_newer(mtime); return INFO::OK; } LibError tree_add_dir(TDir* td, const char* name, TDir** ptd) { TNode* node; RETURN_ERR(td->find_and_add(name, NT_DIR, &node)); *ptd = (TDir*)node; return INFO::OK; } LibError tree_lookup_dir(const char* V_path, TDir** ptd, uint flags) { // path is not a directory; TDir::lookup might return a file node if(!VFS_PATH_IS_DIR(V_path)) WARN_RETURN(ERR::TNODE_WRONG_TYPE); TDir* td = (flags & LF_START_DIR)? *ptd : tree_root; TNode* node = NULL; CHECK_ERR(lookup(td, V_path, flags, &node)); // directories should exist, so warn if this fails *ptd = (TDir*)node; return INFO::OK; } LibError tree_lookup(const char* V_path, TFile** pfile, uint flags) { // path is not a file; TDir::lookup might return a directory node if(VFS_PATH_IS_DIR(V_path)) WARN_RETURN(ERR::TNODE_WRONG_TYPE); TNode* node = NULL; LibError ret = lookup(tree_root, V_path, flags, &node); RETURN_ERR(ret); *pfile = (TFile*)node; return INFO::OK; } struct AddPathCbParams : boost::noncopyable { const Mount* const m; TDir* td; AddPathCbParams(const Mount* m_) : m(m_), td(tree_root) {} }; static LibError add_path_cb(const char* component, bool is_dir, uintptr_t cbData) { AddPathCbParams* p = (AddPathCbParams*)cbData; // should only be called for directory paths, so complain if not dir. if(!is_dir) WARN_RETURN(ERR::TNODE_WRONG_TYPE); TNode* node; RETURN_ERR(p->td->find_and_add(component, NT_DIR, &node, p->m)); p->td = (TDir*)node; return INFO::CB_CONTINUE; } // iterate over all components in V_dir_path (must reference a directory, // i.e. end in slash). for any that are missing, add them with the // specified mount point. this is useful for mounting directories. // // passes back the last directory encountered. LibError tree_add_path(const char* V_dir_path, const Mount* m, TDir** ptd) { debug_assert(VFS_PATH_IS_DIR(V_dir_path)); AddPathCbParams p(m); RETURN_ERR(path_foreach_component(V_dir_path, add_path_cb, (uintptr_t)&p)); *ptd = p.td; return INFO::OK; } ////////////////////////////////////////////////////////////////////////////// // rationale: see DirIterator definition in file.h. struct TreeDirIterator { TChildren::iterator it; // cache end() to avoid needless copies TChildren::iterator end; // the directory we're iterating over; this is used to lock/unlock it, // i.e. prevent modifications that would invalidate the iterator. TDir* td; }; cassert(sizeof(TreeDirIterator) <= DIR_ITERATOR_OPAQUE_SIZE); LibError tree_dir_open(const char* V_dir_path, DirIterator* di) { debug_assert(VFS_PATH_IS_DIR(V_dir_path)); TreeDirIterator* tdi = (TreeDirIterator*)di->opaque; TDir* td; CHECK_ERR(tree_lookup_dir(V_dir_path, &td)); // we need to prevent modifications to this directory while an iterator is // active, otherwise entries may be skipped or no longer valid addresses // accessed. blocking other threads is much more convenient for callers // than having to check for ERR::AGAIN on every call, so we use a mutex // instead of a simple refcount. we don't bother with fine-grained locking // (e.g. per directory or read/write locks) because it would result in // more overhead (we have hundreds of directories) and is unnecessary. tree_lock(); tdi->it = td->begin(); tdi->end = td->end(); tdi->td = td; return INFO::OK; } LibError tree_dir_next_ent(DirIterator* di, DirEnt* ent) { TreeDirIterator* tdi = (TreeDirIterator*)di->opaque; if(tdi->it == tdi->end) return ERR::DIR_END; // NOWARN const TNode* node = *(tdi->it++); ent->name = node->name; // set size and mtime fields depending on node type: switch(node->type) { case NT_DIR: ent->size = -1; ent->mtime = 0; // not currently supported for dirs ent->tf = 0; break; case NT_FILE: { TFile* tf = (TFile*)node; ent->size = tf->size; ent->mtime = tf->mtime; ent->tf = tf; break; } default: debug_warn("invalid TNode type"); } return INFO::OK; } LibError tree_dir_close(DirIterator* UNUSED(d)) { tree_unlock(); // no further cleanup needed. we could zero out d but that might // hide bugs; the iterator is safe (will not go beyond end) anyway. return INFO::OK; } //----------------------------------------------------------------------------- // get/set const Mount* tfile_get_mount(const TFile* tf) { return tf->m; } uintptr_t tfile_get_memento(const TFile* tf) { return tf->memento; } const char* tfile_get_atom_fn(const TFile* tf) { return ((TNode*)tf)->V_path; } void tfile_set_mount(TFile* tf, const Mount* m) { tf->m = m; } void tree_update_file(TFile* tf, off_t size, time_t mtime) { tf->size = size; tf->mtime = mtime; } // get file status (mode, size, mtime). output param is undefined on error. LibError tree_stat(const TFile* tf, struct stat* s) { // all stat members currently supported are stored in TFile, so we // can return them directly without having to call file|zip_stat. s->st_mode = S_IFREG; s->st_size = tf->size; s->st_mtime = tf->mtime; return INFO::OK; } RealDir* tree_get_real_dir(TDir* td) { return &td->get_rd(); }