From 07a4f809d884a5250ed52f5c8f173eaa66c75a16 Mon Sep 17 00:00:00 2001 From: Ykkrosh Date: Tue, 24 Mar 2009 00:55:35 +0000 Subject: [PATCH] # Replace Xerces with Libxml2 (in Xeromyces, whose name now doesn't make much sense but oh well) This was SVN commit r6770. --- build/premake/premake.lua | 12 +- source/main.cpp | 8 + source/ps/GameSetup/GameSetup.cpp | 3 - source/ps/XML/XML.h | 105 ------ source/ps/XML/XMLUtils.cpp | 158 ---------- source/ps/XML/XercesErrorHandler.cpp | 64 ---- source/ps/XML/XercesErrorHandler.h | 96 ------ source/ps/XML/Xeromyces.cpp | 456 ++++++++------------------- source/ps/XML/Xeromyces.h | 24 +- source/ps/XML/tests/test_XMLWriter.h | 1 - source/ps/XML/tests/test_XeroXMB.h | 46 ++- source/ps/XML/tests/test_Xeromyces.h | 3 + 12 files changed, 211 insertions(+), 765 deletions(-) delete mode 100644 source/ps/XML/XML.h delete mode 100644 source/ps/XML/XMLUtils.cpp delete mode 100644 source/ps/XML/XercesErrorHandler.cpp delete mode 100644 source/ps/XML/XercesErrorHandler.h diff --git a/build/premake/premake.lua b/build/premake/premake.lua index 8976b1ccab..433b2ca251 100755 --- a/build/premake/premake.lua +++ b/build/premake/premake.lua @@ -333,7 +333,6 @@ function setup_all_libs () } extern_libs = { "spidermonkey", - "xerces", "enet", "boost", -- dragged in via server->simulation.h->random } @@ -356,7 +355,7 @@ function setup_all_libs () extern_libs = { "spidermonkey", "sdl", -- key definitions - "xerces", + "libxml2", "opengl", "zlib", "boost", @@ -495,7 +494,6 @@ used_extern_libs = { "zlib", "spidermonkey", - "xerces", "libxml2", "openal", @@ -565,7 +563,9 @@ function setup_main_exe () -- Utilities "pthread", "rt", -- Debugging - "bfd", "iberty" + "bfd", "iberty", + -- Dynamic libraries (needed for linking for gold) + "dl", }) -- For debug_resolve_symbol @@ -947,7 +947,9 @@ function setup_tests() -- Utilities "pthread", "rt", -- Debugging - "bfd", "iberty" + "bfd", "iberty", + -- Dynamic libraries (needed for linking for gold) + "dl", }) -- For debug_resolve_symbol diff --git a/source/main.cpp b/source/main.cpp index d6db78d36c..240c1dea92 100644 --- a/source/main.cpp +++ b/source/main.cpp @@ -362,6 +362,11 @@ static void RunGameOrAtlas(int argc, const char* argv[]) { CmdLineArgs args(argc, argv); + // We need to initialise libxml2 in the main thread before + // any thread uses it. So initialise it here before we + // might run Atlas. + CXeromyces::Startup(); + // run Atlas (if requested via args) bool ran_atlas = ATLAS_RunIfOnCmdLine(args); // Atlas handles the whole init/shutdown/etc sequence by itself; @@ -380,6 +385,9 @@ static void RunGameOrAtlas(int argc, const char* argv[]) Shutdown(0); ScriptingHost::FinalShutdown(); // this can't go in Shutdown() because that could be called multiple times per process, so stick it here instead MainControllerShutdown(); + + // Shut down libxml2 (done here to match the Startup call) + CXeromyces::Terminate(); } int main(int argc, char* argv[]) diff --git a/source/ps/GameSetup/GameSetup.cpp b/source/ps/GameSetup/GameSetup.cpp index b7fad08eb5..629450626d 100644 --- a/source/ps/GameSetup/GameSetup.cpp +++ b/source/ps/GameSetup/GameSetup.cpp @@ -655,9 +655,6 @@ static void ShutdownPs() // disable the special Windows cursor, or free textures for OGL cursors cursor_draw(0, g_mouse_x, g_mouse_y); - // close down Xerces if it was loaded - CXeromyces::Terminate(); - // Unload the real language (since it depends on the scripting engine, // which is going to be killed later) and use the English fallback messages I18n::LoadLanguage(NULL); diff --git a/source/ps/XML/XML.h b/source/ps/XML/XML.h deleted file mode 100644 index 396e2a4dd7..0000000000 --- a/source/ps/XML/XML.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - XML.h - Xerces wrappers & convenience functions - EXAMPLE : - Simple usage: - - CVFSEntityResolver *entRes=new CVFSEntityResolver(filename); - parser->setEntityResolver(entRes); - - CVFSInputSource src; - if (src.OpenFile("this/is/a/vfs/path.xml")==0) - parser->parse(src); - - delete entRes; - - The input source object should be kept alive as long as the parser is - using its input stream (i.e. until the parse is complete). The same - goes for the entity resolver. -*/ - -#ifndef INCLUDED_XML -#define INCLUDED_XML - -// temporarily go down to W3 because Xerces (in addition to all its other -// failings) isn't W4-clean. -#if MSC_VERSION -#pragma warning(push, 3) -#pragma warning(disable: 4267) // disable Wp64 warnings -#endif - -#include -#include - -#include -#include -#include -#include - -#include -#include - -// for Xeromyces.cpp (moved here so we only have to #undef new and -// revert to W3 once) -// The converter uses SAX2, so it should [theoretically] -// be fairly easy to swap Xerces for something else (if desired) -#include -#include -#include - - -#if MSC_VERSION -#pragma warning(pop) // back to W4 -#endif - -#include "XercesErrorHandler.h" -#include "ps/CStr.h" -#include "lib/file/vfs/vfs_path.h" - -XERCES_CPP_NAMESPACE_USE - -CStr XMLTranscode(const XMLCh *); -XMLCh *XMLTranscode(const char *); - -/* - CLASS : CVFSInputSource - DESCRIPTION : - Use instead of LocalFileInputSource to read XML files from VFS -*/ -class CVFSInputSource: public InputSource -{ - shared_ptr m_pBuffer; - size_t m_BufferSize; - - CVFSInputSource(const CVFSInputSource &); - CVFSInputSource &operator = (const CVFSInputSource &); - -public: - CVFSInputSource() - : m_BufferSize(0) - { - } - - virtual ~CVFSInputSource(); - - // Open a VFS path for XML parsing - // returns 0 if successful, -1 on failure - int OpenFile(const VfsPath& path); - - virtual BinInputStream *makeStream() const; -}; - -class CVFSEntityResolver: public EntityResolver -{ - const char *m_DocName; - -public: - virtual InputSource *resolveEntity( - const XMLCh *const publicId, - const XMLCh *const systemId); - - inline CVFSEntityResolver(const char *docName): - m_DocName(docName) - {} -}; - -#endif // INCLUDED_XML diff --git a/source/ps/XML/XMLUtils.cpp b/source/ps/XML/XMLUtils.cpp deleted file mode 100644 index 2560a09a54..0000000000 --- a/source/ps/XML/XMLUtils.cpp +++ /dev/null @@ -1,158 +0,0 @@ -#include "precompiled.h" - -#include "XML.h" -#include "ps/Filesystem.h" -#include "ps/CStr.h" -#include "ps/CLogger.h" - -#define LOG_CATEGORY "xml" - -/* -// but static Xerces => tons of warnings due to missing debug info, -// and warnings about invalid pointers (conflicting CRT heaps?) in parser => allow for now -#ifndef XERCES_STATIC_LIB -#error "need to define XERCES_STATIC_LIB in project options (so that Xerces uses the same CRT as the other libs)" -#endif -*/ - -#if MSC_VERSION -# ifdef XERCES_STATIC_LIB -# ifndef NDEBUG -# pragma comment(lib, "xerces-c_2D-static.lib") -# else -# pragma comment(lib, "xerces-c_2D-static.lib") -# endif // NDEBUG -# else // XERCES_STATIC_LIB -# ifndef NDEBUG -# pragma comment(lib, "xerces-c_2D.lib") -# else -# pragma comment(lib, "xerces-c_2.lib") -# endif // NDEBUG -# endif // XERCES_STATIC_LIB -#endif // MSC_VERSION - -XERCES_CPP_NAMESPACE_USE - -CStr XMLTranscode(const XMLCh* xmltext) -{ - char* str=XMLString::transcode((const XMLCh *)xmltext); - CStr result(str); - XMLString::release(&str); - return result; -} - -XMLCh *XMLTranscode(const char *str) -{ - return XMLString::transcode(str); -} - -int CVFSInputSource::OpenFile(const VfsPath& path) -{ - LibError ret = g_VFS->LoadFile(path, m_pBuffer, m_BufferSize); - if(ret != INFO::OK) - { - LOG(CLogger::Error, LOG_CATEGORY, "CVFSInputSource: file %s couldn't be loaded (LoadFile: %d)", path.string().c_str(), ret); - return -1; - } - - XMLCh *sysId=XMLString::transcode(path.string().c_str()); - setSystemId(sysId); - XMLString::release(&sysId); - - return 0; -} - -CVFSInputSource::~CVFSInputSource() -{ -} - -BinInputStream *CVFSInputSource::makeStream() const -{ - if(!m_pBuffer) - return 0; - - return new BinMemInputStream((XMLByte *)m_pBuffer.get(), (unsigned int)m_BufferSize, BinMemInputStream::BufOpt_Reference); -} - -#define IS_PATH_SEP(_chr) (_chr == '/' || _chr == '\\') - -// Return a pointer to the last path separator preceding *end, while not -// going further back than *beginning -const char *prevpathcomp(const char *end, const char *beginning) -{ - do - end--; - while (end > beginning && !IS_PATH_SEP(*end)); - return end; -} - -InputSource *CVFSEntityResolver::resolveEntity(const XMLCh *const UNUSED(publicId), - const XMLCh *const systemId) -{ - CVFSInputSource *ret=new CVFSInputSource(); - char *path=XMLString::transcode(systemId); - char *orgpath=path; - - char abspath[PATH_MAX]; - const char *end=strchr(m_DocName, '\0'); - - if (IS_PATH_SEP(*path)) - path++; - else - { - // We know that we have a relative path here: - // - Remove the file name - // - If we have a ../ components - remove them and remove one component - // off the end of the document path for each ../ component - // - prefix of document path + suffix of input path => the VFS path - - // Remove the file name - end=prevpathcomp(end, m_DocName); - - // Remove one path component for each opening ../ (or ..\) - // Note that this loop will stop when all path components from the - // document name have been stripped - the resulting path will be invalid, but - // so was the input path. - // Also note that this will not handle ../ path components in the middle of - // the input path. - while (strncmp(path, "..", 2) == 0 && IS_PATH_SEP(path[2]) && end > m_DocName) - { - end=prevpathcomp(end, m_DocName); - path += 3; - } - - // include one slash from prefix - end++; - - const ptrdiff_t prefixlen=end-m_DocName; - - cpu_memcpy(abspath, m_DocName, prefixlen); - strncpy(abspath+prefixlen, path, PATH_MAX-prefixlen); - // strncpy might not have terminated, if path was too long - abspath[PATH_MAX-1]=0; - - path=abspath; - } - - // janwas: removed for less spew -// LOG(CLogger::Normal, LOG_CATEGORY, "EntityResolver: path \"%s\" translated to \"%s\"", orgpath, path); - - - char *pos=path; - if ((pos=strchr(pos, '\\')) != NULL) - { - LOG(CLogger::Warning, LOG_CATEGORY, "While resolving XML entities for %s: path %s [%s] contains non-portable path separator \\", m_DocName, orgpath, path); - do - *pos='/'; - while ((pos=strchr(pos+1, '\\')) != NULL); - } - - if (ret->OpenFile(path)!=0) - { - delete ret; - ret=NULL; - } - - XMLString::release(&orgpath); - return ret; -} diff --git a/source/ps/XML/XercesErrorHandler.cpp b/source/ps/XML/XercesErrorHandler.cpp deleted file mode 100644 index 6605ba34db..0000000000 --- a/source/ps/XML/XercesErrorHandler.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/* -Xerces Error Handler for Pyrogenesis (and the GUI) -*/ - -// --------------------------------------------------------------------------- -// Includes -// --------------------------------------------------------------------------- - -#include "precompiled.h" - -#include "XercesErrorHandler.h" -#include -#include -#include -#include "ps/Pyrogenesis.h" -#include "ps/CLogger.h" - -#define LOG_CATEGORY "xml" - -// Use namespace -XERCES_CPP_NAMESPACE_USE - -void CXercesErrorHandler::warning(const SAXParseException &toCatch) -{ - CStr systemId=XMLTranscode(toCatch.getSystemId()); - CStr message=XMLTranscode(toCatch.getMessage()); - - LOG(CLogger::Warning, LOG_CATEGORY, "XML Parse Warning: %s:%d:%d: %s", - systemId.c_str(), - toCatch.getLineNumber(), - toCatch.getColumnNumber(), - message.c_str()); -} - -void CXercesErrorHandler::error(const SAXParseException& toCatch) -{ - CStr systemId=XMLTranscode(toCatch.getSystemId()); - CStr message=XMLTranscode(toCatch.getMessage()); - fSawErrors = true; - - LOG(CLogger::Error, LOG_CATEGORY, "XML Parse Error: %s:%d:%d: %s", - systemId.c_str(), - toCatch.getLineNumber(), - toCatch.getColumnNumber(), - message.c_str()); -} - -void CXercesErrorHandler::fatalError(const SAXParseException& toCatch) -{ - CStr systemId=XMLTranscode(toCatch.getSystemId()); - CStr message=XMLTranscode(toCatch.getMessage()); - fSawErrors = true; - - LOG(CLogger::Error, LOG_CATEGORY, "XML Parse Error (Fatal): %s:%d:%d: %s", - systemId.c_str(), - toCatch.getLineNumber(), - toCatch.getColumnNumber(), - message.c_str()); -} - -void CXercesErrorHandler::resetErrors() -{ - fSawErrors = false; -} diff --git a/source/ps/XML/XercesErrorHandler.h b/source/ps/XML/XercesErrorHandler.h deleted file mode 100644 index ccdf83df37..0000000000 --- a/source/ps/XML/XercesErrorHandler.h +++ /dev/null @@ -1,96 +0,0 @@ -/* -Xerces Error Handler for Pyrogenesis (and the GUI) - ---Overview-- - - This is a class that that will let us output - Xerces C++ Parser errors in our own Log - or whatever, fit to Pyrogenesis and foremost - the GUI. - ---More info-- - - http://xml.apache.org/xerces-c/apiDocs/classErrorHandler.html - -*/ - - -#ifndef INCLUDED_XERCESERRORHANDLER -#define INCLUDED_XERCESERRORHANDLER - -#include "XML.h" - -#include - -/** - * Adapter function that catches Xerces Reading Exceptions - * and lets us output them in Pyrogenesis CLogFile. - * - * Used for all Xerces C++ Parser reading. - * - * @see http://xml.apache.org/xerces-c/apiDocs/classErrorHandler.html - */ -class CXercesErrorHandler : public XERCES_CPP_NAMESPACE::ErrorHandler -{ -public: - CXercesErrorHandler() : - fSawErrors(false) - {} - - ~CXercesErrorHandler() - {} - - // ----------------------------------------------------------------------- - /** @name Implementation of the error handler interface */ - // ----------------------------------------------------------------------- - //@{ - /** - * Sends warning exceptions here. - */ - void warning(const XERCES_CPP_NAMESPACE::SAXParseException& toCatch); - - /** - * Sends error exceptions here. - */ - void error(const XERCES_CPP_NAMESPACE::SAXParseException& toCatch); - - /** - * Sends fatal error exceptions here. - */ - void fatalError(const XERCES_CPP_NAMESPACE::SAXParseException& toCatch); - - /** - * Sets fSawError to false. - */ - void resetErrors(); - - //@} - // ----------------------------------------------------------------------- - /** @name Access Functions */ - // ----------------------------------------------------------------------- - //@{ - /** - * @return true if Errors Occured - */ - bool GetSawErrors() const { return fSawErrors; } - - //@} -private: - // ----------------------------------------------------------------------- - /** @name Private data members */ - // ----------------------------------------------------------------------- - //@{ - - /** - * This is set if we get any errors, and is queryable via an access - * function. Its used by the main code to suppress output if there are - * errors. - * - * @see getSawErrors() - */ - bool fSawErrors; - - //@} -}; - -#endif diff --git a/source/ps/XML/Xeromyces.cpp b/source/ps/XML/Xeromyces.cpp index ea162cd085..0132bdc982 100644 --- a/source/ps/XML/Xeromyces.cpp +++ b/source/ps/XML/Xeromyces.cpp @@ -10,90 +10,23 @@ #include "ps/Filesystem.h" #include "Xeromyces.h" +#include + #define LOG_CATEGORY "xml" -#include "XML.h" - - -int CXeromyces::XercesLoaded = 0; // for once-only initialisation - -// Convenient storage for the internal tree -typedef struct { - std::string name; - utf16string value; -} XMLAttribute; - -typedef struct XMLElement { - std::string name; - int linenum; - utf16string text; - std::vector childs; - std::vector attrs; -} XMLElement; - -class XeroHandler : public DefaultHandler -{ -public: - XeroHandler() : m_locator(NULL), Root(NULL) {} - ~XeroHandler() - { - if (Root) - DeallocateElement(Root); - } - - // SAX2 event handlers: - virtual void startDocument(); - virtual void endDocument(); - virtual void startElement(const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname, const Attributes& attrs); - virtual void endElement(const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname); - virtual void characters(const XMLCh* const chars, const unsigned int length); - - const Locator* m_locator; - - virtual void setDocumentLocator(const Locator* const locator) - { - m_locator = locator; - } - - // Non-SAX2 stuff, used for storing the - // parsed data and constructing the XMB: - - void CreateXMB(); - WriteBuffer writeBuffer; - -private: - std::set ElementNames; - std::set AttributeNames; - XMLElement* Root; - XMLElement* CurrentElement; - std::stack ElementStack; - - std::map ElementID; - std::map AttributeID; - - void OutputElement(XMLElement* el); - - // Recursively frees memory - void DeallocateElement(XMLElement* el); -}; - - - -CXeromyces::CXeromyces() -{ -} - -CXeromyces::~CXeromyces() +static bool g_XeromycesStarted = false; +void CXeromyces::Startup() { + debug_assert(!g_XeromycesStarted); + xmlInitParser(); + g_XeromycesStarted = true; } void CXeromyces::Terminate() { - if (XercesLoaded) - { - XMLPlatformUtils::Terminate(); - XercesLoaded = 0; - } + debug_assert(g_XeromycesStarted); + xmlCleanupParser(); + g_XeromycesStarted = false; } @@ -128,6 +61,8 @@ void CXeromyces::GetXMBPath(const PIVFS& vfs, const VfsPath& xmlFilename, const PSRETURN CXeromyces::Load(const VfsPath& filename) { + debug_assert(g_XeromycesStarted); + // Make sure the .xml actually exists if (! FileExists(filename)) { @@ -184,93 +119,43 @@ PSRETURN CXeromyces::Load(const VfsPath& filename) // XMB isn't up to date with the XML, so rebuild it: - // Load Xerces if necessary - if (! XercesLoaded) - { - XMLPlatformUtils::Initialize(); - XercesLoaded = 1; - } - - // Open the .xml file - CVFSInputSource source; - if (source.OpenFile(filename) < 0) + CVFSFile input; + if (input.Load(filename)) { LOG(CLogger::Error, LOG_CATEGORY, "CXeromyces: Failed to open XML file %s", filename.string().c_str()); return PSRETURN_Xeromyces_XMLOpenFailed; } + xmlDocPtr doc = xmlReadMemory((const char*)input.GetBuffer(), input.GetBufferSize(), "", NULL, + XML_PARSE_NONET|XML_PARSE_NOCDATA); + // TODO: handle parse errors + WriteBuffer writeBuffer; - PSRETURN ret = ConvertXMLtoXMB(filename.string().c_str(), source, writeBuffer); - if (ret) - { - if (ret == PSRETURN_Xeromyces_XMLParseError) - LOG(CLogger::Error, LOG_CATEGORY, "CXeromyces: Errors in XML file '%s'", filename.string().c_str()); - return ret; - } - + CreateXMB(doc, writeBuffer); + + xmlFreeDoc(doc); + // Save the file to disk, so it can be loaded quickly next time g_VFS->CreateFile(xmbPath, writeBuffer.Data(), writeBuffer.Size()); - XMBBuffer = writeBuffer.Data(); // add a reference + m_XMBBuffer = writeBuffer.Data(); // add a reference // Set up the XMBFile - const bool ok = Initialise((const char*)XMBBuffer.get()); + const bool ok = Initialise((const char*)m_XMBBuffer.get()); debug_assert(ok); return PSRETURN_OK; } -// Reads from source, returns output in writeBuffer -PSRETURN CXeromyces::ConvertXMLtoXMB(const char* filename, InputSource& source, WriteBuffer& writeBuffer) -{ - // Set up the Xerces parser - SAX2XMLReader* Parser = XMLReaderFactory::createXMLReader(); - - // Disable DTDs - Parser->setFeature(XMLUni::fgXercesLoadExternalDTD, false); - - XeroHandler handler; - Parser->setContentHandler(&handler); - - CXercesErrorHandler errorHandler; - Parser->setErrorHandler(&errorHandler); - - CVFSEntityResolver entityResolver(filename); - Parser->setEntityResolver(&entityResolver); - - // Build a tree inside handler - Parser->parse(source); - - // (It's horribly inefficient doing SAX2->tree then tree->XMB, - // but the XML->XMB conversion should be done very rarely - // anyway. If it's ever needed, the XMB writing can be done - // directly from inside the SAX2 event handlers, although that's - // a little more complex) - - delete Parser; - - if (errorHandler.GetSawErrors()) - return PSRETURN_Xeromyces_XMLParseError; - // The internal tree of the XeroHandler will be cleaned up automatically - - // Convert the data structures into the XMB format - handler.CreateXMB(); - - // Copy the (refcounted) buffer into the output parameter - writeBuffer = handler.writeBuffer; - - return PSRETURN_OK; -} - bool CXeromyces::ReadXMBFile(const VfsPath& filename) { size_t size; - if(g_VFS->LoadFile(filename, XMBBuffer, size) < 0) + if(g_VFS->LoadFile(filename, m_XMBBuffer, size) < 0) return false; - debug_assert(size >= 42); // else: invalid XMB file size. (42 bytes is the smallest possible XMB. (Well, maybe not quite, but it's a nice number.)) + debug_assert(size >= 4); // make sure it's at least got the initial header // Set up the XMBFile - if(!Initialise((const char*)XMBBuffer.get())) + if(!Initialise((const char*)m_XMBBuffer.get())) return false; return true; @@ -278,152 +163,42 @@ bool CXeromyces::ReadXMBFile(const VfsPath& filename) -void XeroHandler::startDocument() +static void FindNames(const xmlNodePtr node, std::set& elementNames, std::set& attributeNames) { - Root = new XMLElement; - ElementStack.push(Root); + elementNames.insert((const char*)node->name); + + for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) + attributeNames.insert((const char*)attr->name); + + for (xmlNodePtr child = node->children; child; child = child->next) + if (child->type == XML_ELEMENT_NODE) + FindNames(child, elementNames, attributeNames); } -void XeroHandler::endDocument() -{ -} - -/* -// Silently clobbers non-ASCII characters -std::string lowercase_ascii(const XMLCh *a) -{ - std::string b; - size_t len=XMLString::stringLen(a); - b.resize(len); - for (size_t i = 0; i < len; ++i) - b[i] = (char)towlower(a[i]); - return b; -} -*/ - -/** - * Return an ASCII version of the given 16-bit string, ignoring - * any non-ASCII characters. - * - * @param const XMLCh * a Input string. - * @return std::string 8-bit ASCII version of a. - **/ -std::string toAscii( const XMLCh* a ) -{ - std::string b; - size_t len=XMLString::stringLen(a); - b.reserve(len); - for (size_t i = 0; i < len; ++i) - { - if(a[i] < 0x80) - b += (char) a[i]; - } - return b; -} - -void XeroHandler::startElement(const XMLCh* const UNUSED(uri), const XMLCh* const localname, const XMLCh* const UNUSED(qname), const Attributes& attrs) -{ - std::string elementName = toAscii(localname); - ElementNames.insert(elementName); - - // Create a new element - XMLElement* e = new XMLElement; - e->name = elementName; - e->linenum = m_locator->getLineNumber(); - - // Store all the attributes in the new element - for (unsigned int i = 0; i < attrs.getLength(); ++i) - { - std::string attrName = toAscii(attrs.getLocalName(i)); - AttributeNames.insert(attrName); - XMLAttribute* a = new XMLAttribute; - a->name = attrName; - const XMLCh *tmp = attrs.getValue(i); - a->value = utf16string(tmp, tmp+XMLString::stringLen(tmp)); - e->attrs.push_back(a); - } - - // Add the element to its parent - ElementStack.top()->childs.push_back(e); - - // Set as parent of following elements - ElementStack.push(e); -} - -void XeroHandler::endElement(const XMLCh* const UNUSED(uri), const XMLCh* const UNUSED(localname), const XMLCh* const UNUSED(qname)) -{ - ElementStack.pop(); -} - -void XeroHandler::characters(const XMLCh* const chars, const unsigned int UNUSED(length)) -{ - ElementStack.top()->text += utf16string(chars, chars+XMLString::stringLen(chars)); -} - - -void XeroHandler::CreateXMB() -{ - // Header - writeBuffer.Append(UnfinishedHeaderMagicStr, 4); - - std::set::iterator it; - int i; - - // Element names - i = 0; - int ElementCount = (int)ElementNames.size(); - writeBuffer.Append(&ElementCount, 4); - for (it = ElementNames.begin(); it != ElementNames.end(); ++it) - { - int TextLen = (int)it->length()+1; - writeBuffer.Append(&TextLen, 4); - writeBuffer.Append((void*)it->c_str(), TextLen); - ElementID[*it] = i++; - } - - // Attribute names - i = 0; - int AttributeCount = (int)AttributeNames.size(); - writeBuffer.Append(&AttributeCount, 4); - for (it = AttributeNames.begin(); it != AttributeNames.end(); ++it) - { - int TextLen = (int)it->length()+1; - writeBuffer.Append(&TextLen, 4); - writeBuffer.Append((void*)it->c_str(), TextLen); - AttributeID[*it] = i++; - } - - // All the XML contents must be surrounded by a single element - debug_assert(Root->childs.size() == 1); - - OutputElement(Root->childs[0]); - - delete Root; - Root = NULL; - - // file is now valid, so insert correct magic string - writeBuffer.Overwrite(HeaderMagicStr, 4, 0); -} - -// Writes a whole element (recursively if it has children) into the buffer, -// and also frees all the memory that has been allocated for that element. -void XeroHandler::OutputElement(XMLElement* el) +static void OutputElement(const xmlNodePtr node, WriteBuffer& writeBuffer, + std::map& elementIDs, + std::map& attributeIDs +) { // Filled in later with the length of the element - int Pos_Length = (int)writeBuffer.Size(); + size_t posLength = writeBuffer.Size(); writeBuffer.Append("????", 4); - int NameID = ElementID[el->name]; - writeBuffer.Append(&NameID, 4); + writeBuffer.Append(&elementIDs[(const char*)node->name], 4); - int AttrCount = (int)el->attrs.size(); - writeBuffer.Append(&AttrCount, 4); + u32 attrCount = 0; + for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) + ++attrCount; + writeBuffer.Append(&attrCount, 4); - int ChildCount = (int)el->childs.size(); - writeBuffer.Append(&ChildCount, 4); + u32 childCount = 0; + for (xmlNodePtr child = node->children; child; child = child->next) + if (child->type == XML_ELEMENT_NODE) + ++childCount; + writeBuffer.Append(&childCount, 4); // Filled in later with the offset to the list of child elements - int Pos_ChildrenOffset = (int)writeBuffer.Size(); + size_t posChildrenOffset = writeBuffer.Size(); writeBuffer.Append("????", 4); @@ -431,31 +206,42 @@ void XeroHandler::OutputElement(XMLElement* el) // the number of newlines trimmed (so that JS error reporting // can give the correct line number) - std::string whitespaceA = " \t\r\n"; - utf16string whitespace (whitespaceA.begin(), whitespaceA.end()); + std::string whitespace = " \t\r\n"; + std::string text; + for (xmlNodePtr child = node->children; child; child = child->next) + { + if (child->type == XML_TEXT_NODE) + { + xmlChar* content = xmlNodeGetContent(child); + text += std::string((const char*)content); + xmlFree(content); + } + } + + u32 linenum = XML_GET_LINE(node); // Find the start of the non-whitespace section - size_t first = el->text.find_first_not_of(whitespace); + size_t first = text.find_first_not_of(whitespace); - if (first == el->text.npos) + if (first == text.npos) // Entirely whitespace - easy to handle - el->text = utf16string(); + text = ""; else { // Count the number of \n being cut off, // and add them to the line number - utf16string trimmed (el->text.begin(), el->text.begin()+first); - el->linenum += (int)std::count(trimmed.begin(), trimmed.end(), (utf16_t)'\n'); + std::string trimmed (text.begin(), text.begin()+first); + linenum += std::count(trimmed.begin(), trimmed.end(), '\n'); // Find the end of the non-whitespace section, // and trim off everything else - size_t last = el->text.find_last_not_of(whitespace); - el->text = el->text.substr(first, 1+last-first); + size_t last = text.find_last_not_of(whitespace); + text = text.substr(first, 1+last-first); } // Output text, prefixed by length in bytes - if (el->text.length() == 0) + if (text.length() == 0) { // No text; don't write much writeBuffer.Append("\0\0\0\0", 4); @@ -463,54 +249,84 @@ void XeroHandler::OutputElement(XMLElement* el) else { // Write length and line number and null-terminated text - int NodeLen = 4 + 2*((int)el->text.length()+1); - writeBuffer.Append(&NodeLen, 4); - writeBuffer.Append(&el->linenum, 4); - writeBuffer.Append((void*)el->text.c_str(), NodeLen-4); + utf16string textW = CStr8(text).FromUTF8().utf16(); + u32 nodeLen = 4 + 2*(textW.length()+1); + writeBuffer.Append(&nodeLen, 4); + writeBuffer.Append(&linenum, 4); + writeBuffer.Append((void*)textW.c_str(), nodeLen-4); } // Output attributes - - int i; - - for (i = 0; i < AttrCount; ++i) + for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) { - int AttrName = AttributeID[el->attrs[i]->name]; - writeBuffer.Append(&AttrName, 4); + writeBuffer.Append(&attributeIDs[(const char*)attr->name], 4); - int AttrLen = 2*((int)el->attrs[i]->value.length()+1); - writeBuffer.Append(&AttrLen, 4); - writeBuffer.Append((void*)el->attrs[i]->value.c_str(), AttrLen); - - // Free each attribute as soon as it's been dealt with - delete el->attrs[i]; + xmlChar* value = xmlNodeGetContent(attr->children); + utf16string textW = CStr8((const char*)value).FromUTF8().utf16(); + xmlFree(value); + u32 attrLen = 2*(textW.length()+1); + writeBuffer.Append(&attrLen, 4); + writeBuffer.Append((void*)textW.c_str(), attrLen); } // Go back and fill in the child-element offset - int ChildrenOffset = (int)writeBuffer.Size() - (Pos_ChildrenOffset+4); - writeBuffer.Overwrite(&ChildrenOffset, 4, Pos_ChildrenOffset); + u32 childrenOffset = (u32)(writeBuffer.Size() - (posChildrenOffset+4)); + writeBuffer.Overwrite(&childrenOffset, 4, posChildrenOffset); - // Output all child nodes - for (i = 0; i < ChildCount; ++i) - OutputElement(el->childs[i]); + // Output all child elements + for (xmlNodePtr child = node->children; child; child = child->next) + if (child->type == XML_ELEMENT_NODE) + OutputElement(child, writeBuffer, elementIDs, attributeIDs); // Go back and fill in the length - int Length = (int)writeBuffer.Size() - Pos_Length; - writeBuffer.Overwrite(&Length, 4, Pos_Length); - - // Tidy up the parser's mess - delete el; + u32 length = (u32)(writeBuffer.Size() - posLength); + writeBuffer.Overwrite(&length, 4, posLength); } -void XeroHandler::DeallocateElement(XMLElement* el) +PSRETURN CXeromyces::CreateXMB(const xmlDocPtr doc, WriteBuffer& writeBuffer) { - size_t i; + // Header + writeBuffer.Append(UnfinishedHeaderMagicStr, 4); - for (i = 0; i < el->attrs.size(); ++i) - delete el->attrs[i]; + std::set::iterator it; + u32 i; - for (i = 0; i < el->childs.size(); ++i) - DeallocateElement(el->childs[i]); + // Find the unique element/attribute names + std::set elementNames; + std::set attributeNames; + FindNames(xmlDocGetRootElement(doc), elementNames, attributeNames); - delete el; + std::map elementIDs; + std::map attributeIDs; + + // Output element names + i = 0; + u32 elementCount = (u32)elementNames.size(); + writeBuffer.Append(&elementCount, 4); + for (it = elementNames.begin(); it != elementNames.end(); ++it) + { + u32 textLen = (u32)it->length()+1; + writeBuffer.Append(&textLen, 4); + writeBuffer.Append((void*)it->c_str(), textLen); + elementIDs[*it] = i++; + } + + // Output attribute names + i = 0; + u32 attributeCount = (u32)attributeNames.size(); + writeBuffer.Append(&attributeCount, 4); + for (it = attributeNames.begin(); it != attributeNames.end(); ++it) + { + u32 textLen = (u32)it->length()+1; + writeBuffer.Append(&textLen, 4); + writeBuffer.Append((void*)it->c_str(), textLen); + attributeIDs[*it] = i++; + } + + OutputElement(xmlDocGetRootElement(doc), writeBuffer, elementIDs, attributeIDs); + + // file is now valid, so insert correct magic string + writeBuffer.Overwrite(HeaderMagicStr, 4, 0); + + return PSRETURN_OK; } diff --git a/source/ps/XML/Xeromyces.h b/source/ps/XML/Xeromyces.h index 3cac81d12b..c4d600b827 100644 --- a/source/ps/XML/Xeromyces.h +++ b/source/ps/XML/Xeromyces.h @@ -13,22 +13,28 @@ ERROR_TYPE(Xeromyces, XMLOpenFailed); ERROR_TYPE(Xeromyces, XMLParseError); #include "XeroXMB.h" -#include "ps/Filesystem.h" -#include "XML.h" // XXX remove this +#include "lib/file/vfs/vfs.h" + +class WriteBuffer; + + +typedef struct _xmlDoc xmlDoc; +typedef xmlDoc* xmlDocPtr; class CXeromyces : public XMBFile { friend class TestXeromyces; friend class TestXeroXMB; public: - CXeromyces(); - ~CXeromyces(); - // Load from an XML file (with invisible XMB caching). PSRETURN Load(const VfsPath& filename); - // Call once when shutting down the program, to unload Xerces. + // Call once when initialising the program, to load libxml2. + // This should be run in the main thread, before any thread + // uses libxml2. + static void Startup(); + // Call once when shutting down the program, to unload libxml2. static void Terminate(); private: @@ -38,11 +44,9 @@ private: bool ReadXMBFile(const VfsPath& filename); - static PSRETURN ConvertXMLtoXMB(const char* filename, InputSource& source, WriteBuffer& writeBuffer); // XXX remove filename + static PSRETURN CreateXMB(const xmlDocPtr doc, WriteBuffer& writeBuffer); - shared_ptr XMBBuffer; - - static int XercesLoaded; // for once-only initialisation + shared_ptr m_XMBBuffer; }; diff --git a/source/ps/XML/tests/test_XMLWriter.h b/source/ps/XML/tests/test_XMLWriter.h index ef48c62662..f210f41cf8 100644 --- a/source/ps/XML/tests/test_XMLWriter.h +++ b/source/ps/XML/tests/test_XMLWriter.h @@ -1,6 +1,5 @@ #include "lib/self_test.h" -#include "ps/XML/XML.h" #include "ps/XML/XMLWriter.h" class TestXmlWriter : public CxxTest::TestSuite diff --git a/source/ps/XML/tests/test_XeroXMB.h b/source/ps/XML/tests/test_XeroXMB.h index 34fb0e48ba..0346a1cca7 100644 --- a/source/ps/XML/tests/test_XeroXMB.h +++ b/source/ps/XML/tests/test_XeroXMB.h @@ -2,7 +2,10 @@ #include "ps/XML/Xeromyces.h" +#include "lib/file/io/write_buffer.h" + #include +#include XERCES_CPP_NAMESPACE_USE @@ -13,10 +16,12 @@ private: XMBFile parse(const char* doc) { - XMLPlatformUtils::Initialize(); - MemBufInputSource source((const XMLByte*)doc, strlen(doc), "null"); + xmlDocPtr xmlDoc = xmlReadMemory(doc, strlen(doc), "", NULL, + XML_PARSE_NONET|XML_PARSE_NOCDATA); WriteBuffer buffer; - PSRETURN ret = CXeromyces::ConvertXMLtoXMB("/dev/null", source, buffer); + PSRETURN ret = CXeromyces::CreateXMB(xmlDoc, buffer); + xmlFreeDoc(xmlDoc); + TS_ASSERT_EQUALS(ret, PSRETURN_OK); XMBFile xmb; @@ -73,4 +78,39 @@ public: TS_ASSERT_EQUALS(CStr(xmb.GetRoot().GetText()), "x <>&\"'foobar\n\nbazqux"); } + void test_unicode() + { + XMBFile xmb (parse("ሴ\xE1\x88\xB4")); + CStrW text; + + text = xmb.GetRoot().GetText(); + TS_ASSERT_EQUALS(text.length(), 2); + TS_ASSERT_EQUALS(text[0], 0x1234); + TS_ASSERT_EQUALS(text[1], 0x1234); + + text = xmb.GetRoot().GetAttributes().Item(0).Value; + TS_ASSERT_EQUALS(text.length(), 2); + TS_ASSERT_EQUALS(text[0], 0x1234); + TS_ASSERT_EQUALS(text[1], 0x1234); + } + + void test_iso88591() + { + XMBFile xmb (parse("ሴ\xE1\x88\xB4")); + CStrW text; + + text = xmb.GetRoot().GetText(); + TS_ASSERT_EQUALS(text.length(), 4); + TS_ASSERT_EQUALS(text[0], 0x1234); + TS_ASSERT_EQUALS(text[1], 0x00E1); + TS_ASSERT_EQUALS(text[2], 0x0088); + TS_ASSERT_EQUALS(text[3], 0x00B4); + + text = xmb.GetRoot().GetAttributes().Item(0).Value; + TS_ASSERT_EQUALS(text.length(), 4); + TS_ASSERT_EQUALS(text[0], 0x1234); + TS_ASSERT_EQUALS(text[1], 0x00E1); + TS_ASSERT_EQUALS(text[2], 0x0088); + TS_ASSERT_EQUALS(text[3], 0x00B4); + } }; diff --git a/source/ps/XML/tests/test_Xeromyces.h b/source/ps/XML/tests/test_Xeromyces.h index c6fd8678ae..c9899d72c4 100644 --- a/source/ps/XML/tests/test_Xeromyces.h +++ b/source/ps/XML/tests/test_Xeromyces.h @@ -24,4 +24,7 @@ public: path_ResetRootDir(); } + + // TODO: Should test the reading/parsing/writing code, + // and parse error handling };