1
0
forked from 0ad/0ad

# Replace Xerces with Libxml2

(in Xeromyces, whose name now doesn't make much sense but oh well)

This was SVN commit r6770.
This commit is contained in:
Ykkrosh 2009-03-24 00:55:35 +00:00
parent 079d9032ac
commit 07a4f809d8
12 changed files with 211 additions and 765 deletions

View File

@ -333,7 +333,6 @@ function setup_all_libs ()
}
extern_libs = {
"spidermonkey",
"xerces",
"enet",
"boost", -- dragged in via server->simulation.h->random
}
@ -356,7 +355,7 @@ function setup_all_libs ()
extern_libs = {
"spidermonkey",
"sdl", -- key definitions
"xerces",
"libxml2",
"opengl",
"zlib",
"boost",
@ -495,7 +494,6 @@ used_extern_libs = {
"zlib",
"spidermonkey",
"xerces",
"libxml2",
"openal",
@ -565,7 +563,9 @@ function setup_main_exe ()
-- Utilities
"pthread", "rt",
-- Debugging
"bfd", "iberty"
"bfd", "iberty",
-- Dynamic libraries (needed for linking for gold)
"dl",
})
-- For debug_resolve_symbol
@ -947,7 +947,9 @@ function setup_tests()
-- Utilities
"pthread", "rt",
-- Debugging
"bfd", "iberty"
"bfd", "iberty",
-- Dynamic libraries (needed for linking for gold)
"dl",
})
-- For debug_resolve_symbol

View File

@ -362,6 +362,11 @@ static void RunGameOrAtlas(int argc, const char* argv[])
{
CmdLineArgs args(argc, argv);
// We need to initialise libxml2 in the main thread before
// any thread uses it. So initialise it here before we
// might run Atlas.
CXeromyces::Startup();
// run Atlas (if requested via args)
bool ran_atlas = ATLAS_RunIfOnCmdLine(args);
// Atlas handles the whole init/shutdown/etc sequence by itself;
@ -380,6 +385,9 @@ static void RunGameOrAtlas(int argc, const char* argv[])
Shutdown(0);
ScriptingHost::FinalShutdown(); // this can't go in Shutdown() because that could be called multiple times per process, so stick it here instead
MainControllerShutdown();
// Shut down libxml2 (done here to match the Startup call)
CXeromyces::Terminate();
}
int main(int argc, char* argv[])

View File

@ -655,9 +655,6 @@ static void ShutdownPs()
// disable the special Windows cursor, or free textures for OGL cursors
cursor_draw(0, g_mouse_x, g_mouse_y);
// close down Xerces if it was loaded
CXeromyces::Terminate();
// Unload the real language (since it depends on the scripting engine,
// which is going to be killed later) and use the English fallback messages
I18n::LoadLanguage(NULL);

View File

@ -1,105 +0,0 @@
/*
XML.h - Xerces wrappers & convenience functions
EXAMPLE :
Simple usage:
CVFSEntityResolver *entRes=new CVFSEntityResolver(filename);
parser->setEntityResolver(entRes);
CVFSInputSource src;
if (src.OpenFile("this/is/a/vfs/path.xml")==0)
parser->parse(src);
delete entRes;
The input source object should be kept alive as long as the parser is
using its input stream (i.e. until the parse is complete). The same
goes for the entity resolver.
*/
#ifndef INCLUDED_XML
#define INCLUDED_XML
// temporarily go down to W3 because Xerces (in addition to all its other
// failings) isn't W4-clean.
#if MSC_VERSION
#pragma warning(push, 3)
#pragma warning(disable: 4267) // disable Wp64 warnings
#endif
#include <xercesc/util/XMLString.hpp>
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/sax/InputSource.hpp>
#include <xercesc/sax/EntityResolver.hpp>
#include <xercesc/sax/Locator.hpp>
#include <xercesc/util/BinMemInputStream.hpp>
#include <xercesc/sax/SAXParseException.hpp>
#include <xercesc/sax/ErrorHandler.hpp>
// for Xeromyces.cpp (moved here so we only have to #undef new and
// revert to W3 once)
// The converter uses SAX2, so it should [theoretically]
// be fairly easy to swap Xerces for something else (if desired)
#include <xercesc/sax2/XMLReaderFactory.hpp>
#include <xercesc/sax2/DefaultHandler.hpp>
#include <xercesc/sax2/Attributes.hpp>
#if MSC_VERSION
#pragma warning(pop) // back to W4
#endif
#include "XercesErrorHandler.h"
#include "ps/CStr.h"
#include "lib/file/vfs/vfs_path.h"
XERCES_CPP_NAMESPACE_USE
CStr XMLTranscode(const XMLCh *);
XMLCh *XMLTranscode(const char *);
/*
CLASS : CVFSInputSource
DESCRIPTION :
Use instead of LocalFileInputSource to read XML files from VFS
*/
class CVFSInputSource: public InputSource
{
shared_ptr<u8> m_pBuffer;
size_t m_BufferSize;
CVFSInputSource(const CVFSInputSource &);
CVFSInputSource &operator = (const CVFSInputSource &);
public:
CVFSInputSource()
: m_BufferSize(0)
{
}
virtual ~CVFSInputSource();
// Open a VFS path for XML parsing
// returns 0 if successful, -1 on failure
int OpenFile(const VfsPath& path);
virtual BinInputStream *makeStream() const;
};
class CVFSEntityResolver: public EntityResolver
{
const char *m_DocName;
public:
virtual InputSource *resolveEntity(
const XMLCh *const publicId,
const XMLCh *const systemId);
inline CVFSEntityResolver(const char *docName):
m_DocName(docName)
{}
};
#endif // INCLUDED_XML

View File

@ -1,158 +0,0 @@
#include "precompiled.h"
#include "XML.h"
#include "ps/Filesystem.h"
#include "ps/CStr.h"
#include "ps/CLogger.h"
#define LOG_CATEGORY "xml"
/*
// but static Xerces => tons of warnings due to missing debug info,
// and warnings about invalid pointers (conflicting CRT heaps?) in parser => allow for now
#ifndef XERCES_STATIC_LIB
#error "need to define XERCES_STATIC_LIB in project options (so that Xerces uses the same CRT as the other libs)"
#endif
*/
#if MSC_VERSION
# ifdef XERCES_STATIC_LIB
# ifndef NDEBUG
# pragma comment(lib, "xerces-c_2D-static.lib")
# else
# pragma comment(lib, "xerces-c_2D-static.lib")
# endif // NDEBUG
# else // XERCES_STATIC_LIB
# ifndef NDEBUG
# pragma comment(lib, "xerces-c_2D.lib")
# else
# pragma comment(lib, "xerces-c_2.lib")
# endif // NDEBUG
# endif // XERCES_STATIC_LIB
#endif // MSC_VERSION
XERCES_CPP_NAMESPACE_USE
CStr XMLTranscode(const XMLCh* xmltext)
{
char* str=XMLString::transcode((const XMLCh *)xmltext);
CStr result(str);
XMLString::release(&str);
return result;
}
XMLCh *XMLTranscode(const char *str)
{
return XMLString::transcode(str);
}
int CVFSInputSource::OpenFile(const VfsPath& path)
{
LibError ret = g_VFS->LoadFile(path, m_pBuffer, m_BufferSize);
if(ret != INFO::OK)
{
LOG(CLogger::Error, LOG_CATEGORY, "CVFSInputSource: file %s couldn't be loaded (LoadFile: %d)", path.string().c_str(), ret);
return -1;
}
XMLCh *sysId=XMLString::transcode(path.string().c_str());
setSystemId(sysId);
XMLString::release(&sysId);
return 0;
}
CVFSInputSource::~CVFSInputSource()
{
}
BinInputStream *CVFSInputSource::makeStream() const
{
if(!m_pBuffer)
return 0;
return new BinMemInputStream((XMLByte *)m_pBuffer.get(), (unsigned int)m_BufferSize, BinMemInputStream::BufOpt_Reference);
}
#define IS_PATH_SEP(_chr) (_chr == '/' || _chr == '\\')
// Return a pointer to the last path separator preceding *end, while not
// going further back than *beginning
const char *prevpathcomp(const char *end, const char *beginning)
{
do
end--;
while (end > beginning && !IS_PATH_SEP(*end));
return end;
}
InputSource *CVFSEntityResolver::resolveEntity(const XMLCh *const UNUSED(publicId),
const XMLCh *const systemId)
{
CVFSInputSource *ret=new CVFSInputSource();
char *path=XMLString::transcode(systemId);
char *orgpath=path;
char abspath[PATH_MAX];
const char *end=strchr(m_DocName, '\0');
if (IS_PATH_SEP(*path))
path++;
else
{
// We know that we have a relative path here:
// - Remove the file name
// - If we have a ../ components - remove them and remove one component
// off the end of the document path for each ../ component
// - prefix of document path + suffix of input path => the VFS path
// Remove the file name
end=prevpathcomp(end, m_DocName);
// Remove one path component for each opening ../ (or ..\)
// Note that this loop will stop when all path components from the
// document name have been stripped - the resulting path will be invalid, but
// so was the input path.
// Also note that this will not handle ../ path components in the middle of
// the input path.
while (strncmp(path, "..", 2) == 0 && IS_PATH_SEP(path[2]) && end > m_DocName)
{
end=prevpathcomp(end, m_DocName);
path += 3;
}
// include one slash from prefix
end++;
const ptrdiff_t prefixlen=end-m_DocName;
cpu_memcpy(abspath, m_DocName, prefixlen);
strncpy(abspath+prefixlen, path, PATH_MAX-prefixlen);
// strncpy might not have terminated, if path was too long
abspath[PATH_MAX-1]=0;
path=abspath;
}
// janwas: removed for less spew
// LOG(CLogger::Normal, LOG_CATEGORY, "EntityResolver: path \"%s\" translated to \"%s\"", orgpath, path);
char *pos=path;
if ((pos=strchr(pos, '\\')) != NULL)
{
LOG(CLogger::Warning, LOG_CATEGORY, "While resolving XML entities for %s: path %s [%s] contains non-portable path separator \\", m_DocName, orgpath, path);
do
*pos='/';
while ((pos=strchr(pos+1, '\\')) != NULL);
}
if (ret->OpenFile(path)!=0)
{
delete ret;
ret=NULL;
}
XMLString::release(&orgpath);
return ret;
}

View File

@ -1,64 +0,0 @@
/*
Xerces Error Handler for Pyrogenesis (and the GUI)
*/
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include "precompiled.h"
#include "XercesErrorHandler.h"
#include <iostream>
#include <stdlib.h>
#include <string.h>
#include "ps/Pyrogenesis.h"
#include "ps/CLogger.h"
#define LOG_CATEGORY "xml"
// Use namespace
XERCES_CPP_NAMESPACE_USE
void CXercesErrorHandler::warning(const SAXParseException &toCatch)
{
CStr systemId=XMLTranscode(toCatch.getSystemId());
CStr message=XMLTranscode(toCatch.getMessage());
LOG(CLogger::Warning, LOG_CATEGORY, "XML Parse Warning: %s:%d:%d: %s",
systemId.c_str(),
toCatch.getLineNumber(),
toCatch.getColumnNumber(),
message.c_str());
}
void CXercesErrorHandler::error(const SAXParseException& toCatch)
{
CStr systemId=XMLTranscode(toCatch.getSystemId());
CStr message=XMLTranscode(toCatch.getMessage());
fSawErrors = true;
LOG(CLogger::Error, LOG_CATEGORY, "XML Parse Error: %s:%d:%d: %s",
systemId.c_str(),
toCatch.getLineNumber(),
toCatch.getColumnNumber(),
message.c_str());
}
void CXercesErrorHandler::fatalError(const SAXParseException& toCatch)
{
CStr systemId=XMLTranscode(toCatch.getSystemId());
CStr message=XMLTranscode(toCatch.getMessage());
fSawErrors = true;
LOG(CLogger::Error, LOG_CATEGORY, "XML Parse Error (Fatal): %s:%d:%d: %s",
systemId.c_str(),
toCatch.getLineNumber(),
toCatch.getColumnNumber(),
message.c_str());
}
void CXercesErrorHandler::resetErrors()
{
fSawErrors = false;
}

View File

@ -1,96 +0,0 @@
/*
Xerces Error Handler for Pyrogenesis (and the GUI)
--Overview--
This is a class that that will let us output
Xerces C++ Parser errors in our own Log
or whatever, fit to Pyrogenesis and foremost
the GUI.
--More info--
http://xml.apache.org/xerces-c/apiDocs/classErrorHandler.html
*/
#ifndef INCLUDED_XERCESERRORHANDLER
#define INCLUDED_XERCESERRORHANDLER
#include "XML.h"
#include <iostream>
/**
* Adapter function that catches Xerces Reading Exceptions
* and lets us output them in Pyrogenesis CLogFile.
*
* Used for all Xerces C++ Parser reading.
*
* @see http://xml.apache.org/xerces-c/apiDocs/classErrorHandler.html
*/
class CXercesErrorHandler : public XERCES_CPP_NAMESPACE::ErrorHandler
{
public:
CXercesErrorHandler() :
fSawErrors(false)
{}
~CXercesErrorHandler()
{}
// -----------------------------------------------------------------------
/** @name Implementation of the error handler interface */
// -----------------------------------------------------------------------
//@{
/**
* Sends warning exceptions here.
*/
void warning(const XERCES_CPP_NAMESPACE::SAXParseException& toCatch);
/**
* Sends error exceptions here.
*/
void error(const XERCES_CPP_NAMESPACE::SAXParseException& toCatch);
/**
* Sends fatal error exceptions here.
*/
void fatalError(const XERCES_CPP_NAMESPACE::SAXParseException& toCatch);
/**
* Sets fSawError to false.
*/
void resetErrors();
//@}
// -----------------------------------------------------------------------
/** @name Access Functions */
// -----------------------------------------------------------------------
//@{
/**
* @return true if Errors Occured
*/
bool GetSawErrors() const { return fSawErrors; }
//@}
private:
// -----------------------------------------------------------------------
/** @name Private data members */
// -----------------------------------------------------------------------
//@{
/**
* This is set if we get any errors, and is queryable via an access
* function. Its used by the main code to suppress output if there are
* errors.
*
* @see getSawErrors()
*/
bool fSawErrors;
//@}
};
#endif

View File

@ -10,90 +10,23 @@
#include "ps/Filesystem.h"
#include "Xeromyces.h"
#include <libxml/parser.h>
#define LOG_CATEGORY "xml"
#include "XML.h"
int CXeromyces::XercesLoaded = 0; // for once-only initialisation
// Convenient storage for the internal tree
typedef struct {
std::string name;
utf16string value;
} XMLAttribute;
typedef struct XMLElement {
std::string name;
int linenum;
utf16string text;
std::vector<XMLElement*> childs;
std::vector<XMLAttribute*> attrs;
} XMLElement;
class XeroHandler : public DefaultHandler
{
public:
XeroHandler() : m_locator(NULL), Root(NULL) {}
~XeroHandler()
{
if (Root)
DeallocateElement(Root);
}
// SAX2 event handlers:
virtual void startDocument();
virtual void endDocument();
virtual void startElement(const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname, const Attributes& attrs);
virtual void endElement(const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname);
virtual void characters(const XMLCh* const chars, const unsigned int length);
const Locator* m_locator;
virtual void setDocumentLocator(const Locator* const locator)
{
m_locator = locator;
}
// Non-SAX2 stuff, used for storing the
// parsed data and constructing the XMB:
void CreateXMB();
WriteBuffer writeBuffer;
private:
std::set<std::string> ElementNames;
std::set<std::string> AttributeNames;
XMLElement* Root;
XMLElement* CurrentElement;
std::stack<XMLElement*> ElementStack;
std::map<std::string, int> ElementID;
std::map<std::string, int> AttributeID;
void OutputElement(XMLElement* el);
// Recursively frees memory
void DeallocateElement(XMLElement* el);
};
CXeromyces::CXeromyces()
{
}
CXeromyces::~CXeromyces()
static bool g_XeromycesStarted = false;
void CXeromyces::Startup()
{
debug_assert(!g_XeromycesStarted);
xmlInitParser();
g_XeromycesStarted = true;
}
void CXeromyces::Terminate()
{
if (XercesLoaded)
{
XMLPlatformUtils::Terminate();
XercesLoaded = 0;
}
debug_assert(g_XeromycesStarted);
xmlCleanupParser();
g_XeromycesStarted = false;
}
@ -128,6 +61,8 @@ void CXeromyces::GetXMBPath(const PIVFS& vfs, const VfsPath& xmlFilename, const
PSRETURN CXeromyces::Load(const VfsPath& filename)
{
debug_assert(g_XeromycesStarted);
// Make sure the .xml actually exists
if (! FileExists(filename))
{
@ -184,93 +119,43 @@ PSRETURN CXeromyces::Load(const VfsPath& filename)
// XMB isn't up to date with the XML, so rebuild it:
// Load Xerces if necessary
if (! XercesLoaded)
{
XMLPlatformUtils::Initialize();
XercesLoaded = 1;
}
// Open the .xml file
CVFSInputSource source;
if (source.OpenFile(filename) < 0)
CVFSFile input;
if (input.Load(filename))
{
LOG(CLogger::Error, LOG_CATEGORY, "CXeromyces: Failed to open XML file %s", filename.string().c_str());
return PSRETURN_Xeromyces_XMLOpenFailed;
}
xmlDocPtr doc = xmlReadMemory((const char*)input.GetBuffer(), input.GetBufferSize(), "", NULL,
XML_PARSE_NONET|XML_PARSE_NOCDATA);
// TODO: handle parse errors
WriteBuffer writeBuffer;
PSRETURN ret = ConvertXMLtoXMB(filename.string().c_str(), source, writeBuffer);
if (ret)
{
if (ret == PSRETURN_Xeromyces_XMLParseError)
LOG(CLogger::Error, LOG_CATEGORY, "CXeromyces: Errors in XML file '%s'", filename.string().c_str());
return ret;
}
CreateXMB(doc, writeBuffer);
xmlFreeDoc(doc);
// Save the file to disk, so it can be loaded quickly next time
g_VFS->CreateFile(xmbPath, writeBuffer.Data(), writeBuffer.Size());
XMBBuffer = writeBuffer.Data(); // add a reference
m_XMBBuffer = writeBuffer.Data(); // add a reference
// Set up the XMBFile
const bool ok = Initialise((const char*)XMBBuffer.get());
const bool ok = Initialise((const char*)m_XMBBuffer.get());
debug_assert(ok);
return PSRETURN_OK;
}
// Reads from source, returns output in writeBuffer
PSRETURN CXeromyces::ConvertXMLtoXMB(const char* filename, InputSource& source, WriteBuffer& writeBuffer)
{
// Set up the Xerces parser
SAX2XMLReader* Parser = XMLReaderFactory::createXMLReader();
// Disable DTDs
Parser->setFeature(XMLUni::fgXercesLoadExternalDTD, false);
XeroHandler handler;
Parser->setContentHandler(&handler);
CXercesErrorHandler errorHandler;
Parser->setErrorHandler(&errorHandler);
CVFSEntityResolver entityResolver(filename);
Parser->setEntityResolver(&entityResolver);
// Build a tree inside handler
Parser->parse(source);
// (It's horribly inefficient doing SAX2->tree then tree->XMB,
// but the XML->XMB conversion should be done very rarely
// anyway. If it's ever needed, the XMB writing can be done
// directly from inside the SAX2 event handlers, although that's
// a little more complex)
delete Parser;
if (errorHandler.GetSawErrors())
return PSRETURN_Xeromyces_XMLParseError;
// The internal tree of the XeroHandler will be cleaned up automatically
// Convert the data structures into the XMB format
handler.CreateXMB();
// Copy the (refcounted) buffer into the output parameter
writeBuffer = handler.writeBuffer;
return PSRETURN_OK;
}
bool CXeromyces::ReadXMBFile(const VfsPath& filename)
{
size_t size;
if(g_VFS->LoadFile(filename, XMBBuffer, size) < 0)
if(g_VFS->LoadFile(filename, m_XMBBuffer, size) < 0)
return false;
debug_assert(size >= 42); // else: invalid XMB file size. (42 bytes is the smallest possible XMB. (Well, maybe not quite, but it's a nice number.))
debug_assert(size >= 4); // make sure it's at least got the initial header
// Set up the XMBFile
if(!Initialise((const char*)XMBBuffer.get()))
if(!Initialise((const char*)m_XMBBuffer.get()))
return false;
return true;
@ -278,152 +163,42 @@ bool CXeromyces::ReadXMBFile(const VfsPath& filename)
void XeroHandler::startDocument()
static void FindNames(const xmlNodePtr node, std::set<std::string>& elementNames, std::set<std::string>& attributeNames)
{
Root = new XMLElement;
ElementStack.push(Root);
elementNames.insert((const char*)node->name);
for (xmlAttrPtr attr = node->properties; attr; attr = attr->next)
attributeNames.insert((const char*)attr->name);
for (xmlNodePtr child = node->children; child; child = child->next)
if (child->type == XML_ELEMENT_NODE)
FindNames(child, elementNames, attributeNames);
}
void XeroHandler::endDocument()
{
}
/*
// Silently clobbers non-ASCII characters
std::string lowercase_ascii(const XMLCh *a)
{
std::string b;
size_t len=XMLString::stringLen(a);
b.resize(len);
for (size_t i = 0; i < len; ++i)
b[i] = (char)towlower(a[i]);
return b;
}
*/
/**
* Return an ASCII version of the given 16-bit string, ignoring
* any non-ASCII characters.
*
* @param const XMLCh * a Input string.
* @return std::string 8-bit ASCII version of <code>a</code>.
**/
std::string toAscii( const XMLCh* a )
{
std::string b;
size_t len=XMLString::stringLen(a);
b.reserve(len);
for (size_t i = 0; i < len; ++i)
{
if(a[i] < 0x80)
b += (char) a[i];
}
return b;
}
void XeroHandler::startElement(const XMLCh* const UNUSED(uri), const XMLCh* const localname, const XMLCh* const UNUSED(qname), const Attributes& attrs)
{
std::string elementName = toAscii(localname);
ElementNames.insert(elementName);
// Create a new element
XMLElement* e = new XMLElement;
e->name = elementName;
e->linenum = m_locator->getLineNumber();
// Store all the attributes in the new element
for (unsigned int i = 0; i < attrs.getLength(); ++i)
{
std::string attrName = toAscii(attrs.getLocalName(i));
AttributeNames.insert(attrName);
XMLAttribute* a = new XMLAttribute;
a->name = attrName;
const XMLCh *tmp = attrs.getValue(i);
a->value = utf16string(tmp, tmp+XMLString::stringLen(tmp));
e->attrs.push_back(a);
}
// Add the element to its parent
ElementStack.top()->childs.push_back(e);
// Set as parent of following elements
ElementStack.push(e);
}
void XeroHandler::endElement(const XMLCh* const UNUSED(uri), const XMLCh* const UNUSED(localname), const XMLCh* const UNUSED(qname))
{
ElementStack.pop();
}
void XeroHandler::characters(const XMLCh* const chars, const unsigned int UNUSED(length))
{
ElementStack.top()->text += utf16string(chars, chars+XMLString::stringLen(chars));
}
void XeroHandler::CreateXMB()
{
// Header
writeBuffer.Append(UnfinishedHeaderMagicStr, 4);
std::set<std::string>::iterator it;
int i;
// Element names
i = 0;
int ElementCount = (int)ElementNames.size();
writeBuffer.Append(&ElementCount, 4);
for (it = ElementNames.begin(); it != ElementNames.end(); ++it)
{
int TextLen = (int)it->length()+1;
writeBuffer.Append(&TextLen, 4);
writeBuffer.Append((void*)it->c_str(), TextLen);
ElementID[*it] = i++;
}
// Attribute names
i = 0;
int AttributeCount = (int)AttributeNames.size();
writeBuffer.Append(&AttributeCount, 4);
for (it = AttributeNames.begin(); it != AttributeNames.end(); ++it)
{
int TextLen = (int)it->length()+1;
writeBuffer.Append(&TextLen, 4);
writeBuffer.Append((void*)it->c_str(), TextLen);
AttributeID[*it] = i++;
}
// All the XML contents must be surrounded by a single element
debug_assert(Root->childs.size() == 1);
OutputElement(Root->childs[0]);
delete Root;
Root = NULL;
// file is now valid, so insert correct magic string
writeBuffer.Overwrite(HeaderMagicStr, 4, 0);
}
// Writes a whole element (recursively if it has children) into the buffer,
// and also frees all the memory that has been allocated for that element.
void XeroHandler::OutputElement(XMLElement* el)
static void OutputElement(const xmlNodePtr node, WriteBuffer& writeBuffer,
std::map<std::string, u32>& elementIDs,
std::map<std::string, u32>& attributeIDs
)
{
// Filled in later with the length of the element
int Pos_Length = (int)writeBuffer.Size();
size_t posLength = writeBuffer.Size();
writeBuffer.Append("????", 4);
int NameID = ElementID[el->name];
writeBuffer.Append(&NameID, 4);
writeBuffer.Append(&elementIDs[(const char*)node->name], 4);
int AttrCount = (int)el->attrs.size();
writeBuffer.Append(&AttrCount, 4);
u32 attrCount = 0;
for (xmlAttrPtr attr = node->properties; attr; attr = attr->next)
++attrCount;
writeBuffer.Append(&attrCount, 4);
int ChildCount = (int)el->childs.size();
writeBuffer.Append(&ChildCount, 4);
u32 childCount = 0;
for (xmlNodePtr child = node->children; child; child = child->next)
if (child->type == XML_ELEMENT_NODE)
++childCount;
writeBuffer.Append(&childCount, 4);
// Filled in later with the offset to the list of child elements
int Pos_ChildrenOffset = (int)writeBuffer.Size();
size_t posChildrenOffset = writeBuffer.Size();
writeBuffer.Append("????", 4);
@ -431,31 +206,42 @@ void XeroHandler::OutputElement(XMLElement* el)
// the number of newlines trimmed (so that JS error reporting
// can give the correct line number)
std::string whitespaceA = " \t\r\n";
utf16string whitespace (whitespaceA.begin(), whitespaceA.end());
std::string whitespace = " \t\r\n";
std::string text;
for (xmlNodePtr child = node->children; child; child = child->next)
{
if (child->type == XML_TEXT_NODE)
{
xmlChar* content = xmlNodeGetContent(child);
text += std::string((const char*)content);
xmlFree(content);
}
}
u32 linenum = XML_GET_LINE(node);
// Find the start of the non-whitespace section
size_t first = el->text.find_first_not_of(whitespace);
size_t first = text.find_first_not_of(whitespace);
if (first == el->text.npos)
if (first == text.npos)
// Entirely whitespace - easy to handle
el->text = utf16string();
text = "";
else
{
// Count the number of \n being cut off,
// and add them to the line number
utf16string trimmed (el->text.begin(), el->text.begin()+first);
el->linenum += (int)std::count(trimmed.begin(), trimmed.end(), (utf16_t)'\n');
std::string trimmed (text.begin(), text.begin()+first);
linenum += std::count(trimmed.begin(), trimmed.end(), '\n');
// Find the end of the non-whitespace section,
// and trim off everything else
size_t last = el->text.find_last_not_of(whitespace);
el->text = el->text.substr(first, 1+last-first);
size_t last = text.find_last_not_of(whitespace);
text = text.substr(first, 1+last-first);
}
// Output text, prefixed by length in bytes
if (el->text.length() == 0)
if (text.length() == 0)
{
// No text; don't write much
writeBuffer.Append("\0\0\0\0", 4);
@ -463,54 +249,84 @@ void XeroHandler::OutputElement(XMLElement* el)
else
{
// Write length and line number and null-terminated text
int NodeLen = 4 + 2*((int)el->text.length()+1);
writeBuffer.Append(&NodeLen, 4);
writeBuffer.Append(&el->linenum, 4);
writeBuffer.Append((void*)el->text.c_str(), NodeLen-4);
utf16string textW = CStr8(text).FromUTF8().utf16();
u32 nodeLen = 4 + 2*(textW.length()+1);
writeBuffer.Append(&nodeLen, 4);
writeBuffer.Append(&linenum, 4);
writeBuffer.Append((void*)textW.c_str(), nodeLen-4);
}
// Output attributes
int i;
for (i = 0; i < AttrCount; ++i)
for (xmlAttrPtr attr = node->properties; attr; attr = attr->next)
{
int AttrName = AttributeID[el->attrs[i]->name];
writeBuffer.Append(&AttrName, 4);
writeBuffer.Append(&attributeIDs[(const char*)attr->name], 4);
int AttrLen = 2*((int)el->attrs[i]->value.length()+1);
writeBuffer.Append(&AttrLen, 4);
writeBuffer.Append((void*)el->attrs[i]->value.c_str(), AttrLen);
// Free each attribute as soon as it's been dealt with
delete el->attrs[i];
xmlChar* value = xmlNodeGetContent(attr->children);
utf16string textW = CStr8((const char*)value).FromUTF8().utf16();
xmlFree(value);
u32 attrLen = 2*(textW.length()+1);
writeBuffer.Append(&attrLen, 4);
writeBuffer.Append((void*)textW.c_str(), attrLen);
}
// Go back and fill in the child-element offset
int ChildrenOffset = (int)writeBuffer.Size() - (Pos_ChildrenOffset+4);
writeBuffer.Overwrite(&ChildrenOffset, 4, Pos_ChildrenOffset);
u32 childrenOffset = (u32)(writeBuffer.Size() - (posChildrenOffset+4));
writeBuffer.Overwrite(&childrenOffset, 4, posChildrenOffset);
// Output all child nodes
for (i = 0; i < ChildCount; ++i)
OutputElement(el->childs[i]);
// Output all child elements
for (xmlNodePtr child = node->children; child; child = child->next)
if (child->type == XML_ELEMENT_NODE)
OutputElement(child, writeBuffer, elementIDs, attributeIDs);
// Go back and fill in the length
int Length = (int)writeBuffer.Size() - Pos_Length;
writeBuffer.Overwrite(&Length, 4, Pos_Length);
// Tidy up the parser's mess
delete el;
u32 length = (u32)(writeBuffer.Size() - posLength);
writeBuffer.Overwrite(&length, 4, posLength);
}
void XeroHandler::DeallocateElement(XMLElement* el)
PSRETURN CXeromyces::CreateXMB(const xmlDocPtr doc, WriteBuffer& writeBuffer)
{
size_t i;
// Header
writeBuffer.Append(UnfinishedHeaderMagicStr, 4);
for (i = 0; i < el->attrs.size(); ++i)
delete el->attrs[i];
std::set<std::string>::iterator it;
u32 i;
for (i = 0; i < el->childs.size(); ++i)
DeallocateElement(el->childs[i]);
// Find the unique element/attribute names
std::set<std::string> elementNames;
std::set<std::string> attributeNames;
FindNames(xmlDocGetRootElement(doc), elementNames, attributeNames);
delete el;
std::map<std::string, u32> elementIDs;
std::map<std::string, u32> attributeIDs;
// Output element names
i = 0;
u32 elementCount = (u32)elementNames.size();
writeBuffer.Append(&elementCount, 4);
for (it = elementNames.begin(); it != elementNames.end(); ++it)
{
u32 textLen = (u32)it->length()+1;
writeBuffer.Append(&textLen, 4);
writeBuffer.Append((void*)it->c_str(), textLen);
elementIDs[*it] = i++;
}
// Output attribute names
i = 0;
u32 attributeCount = (u32)attributeNames.size();
writeBuffer.Append(&attributeCount, 4);
for (it = attributeNames.begin(); it != attributeNames.end(); ++it)
{
u32 textLen = (u32)it->length()+1;
writeBuffer.Append(&textLen, 4);
writeBuffer.Append((void*)it->c_str(), textLen);
attributeIDs[*it] = i++;
}
OutputElement(xmlDocGetRootElement(doc), writeBuffer, elementIDs, attributeIDs);
// file is now valid, so insert correct magic string
writeBuffer.Overwrite(HeaderMagicStr, 4, 0);
return PSRETURN_OK;
}

View File

@ -13,22 +13,28 @@ ERROR_TYPE(Xeromyces, XMLOpenFailed);
ERROR_TYPE(Xeromyces, XMLParseError);
#include "XeroXMB.h"
#include "ps/Filesystem.h"
#include "XML.h" // XXX remove this
#include "lib/file/vfs/vfs.h"
class WriteBuffer;
typedef struct _xmlDoc xmlDoc;
typedef xmlDoc* xmlDocPtr;
class CXeromyces : public XMBFile
{
friend class TestXeromyces;
friend class TestXeroXMB;
public:
CXeromyces();
~CXeromyces();
// Load from an XML file (with invisible XMB caching).
PSRETURN Load(const VfsPath& filename);
// Call once when shutting down the program, to unload Xerces.
// Call once when initialising the program, to load libxml2.
// This should be run in the main thread, before any thread
// uses libxml2.
static void Startup();
// Call once when shutting down the program, to unload libxml2.
static void Terminate();
private:
@ -38,11 +44,9 @@ private:
bool ReadXMBFile(const VfsPath& filename);
static PSRETURN ConvertXMLtoXMB(const char* filename, InputSource& source, WriteBuffer& writeBuffer); // XXX remove filename
static PSRETURN CreateXMB(const xmlDocPtr doc, WriteBuffer& writeBuffer);
shared_ptr<u8> XMBBuffer;
static int XercesLoaded; // for once-only initialisation
shared_ptr<u8> m_XMBBuffer;
};

View File

@ -1,6 +1,5 @@
#include "lib/self_test.h"
#include "ps/XML/XML.h"
#include "ps/XML/XMLWriter.h"
class TestXmlWriter : public CxxTest::TestSuite

View File

@ -2,7 +2,10 @@
#include "ps/XML/Xeromyces.h"
#include "lib/file/io/write_buffer.h"
#include <xercesc/framework/MemBufInputSource.hpp>
#include <libxml/parser.h>
XERCES_CPP_NAMESPACE_USE
@ -13,10 +16,12 @@ private:
XMBFile parse(const char* doc)
{
XMLPlatformUtils::Initialize();
MemBufInputSource source((const XMLByte*)doc, strlen(doc), "null");
xmlDocPtr xmlDoc = xmlReadMemory(doc, strlen(doc), "", NULL,
XML_PARSE_NONET|XML_PARSE_NOCDATA);
WriteBuffer buffer;
PSRETURN ret = CXeromyces::ConvertXMLtoXMB("/dev/null", source, buffer);
PSRETURN ret = CXeromyces::CreateXMB(xmlDoc, buffer);
xmlFreeDoc(xmlDoc);
TS_ASSERT_EQUALS(ret, PSRETURN_OK);
XMBFile xmb;
@ -73,4 +78,39 @@ public:
TS_ASSERT_EQUALS(CStr(xmb.GetRoot().GetText()), "x <>&\"'foobar\n\nbazqux");
}
void test_unicode()
{
XMBFile xmb (parse("<?xml version=\"1.0\" encoding=\"utf-8\"?><foo x='&#x1234;\xE1\x88\xB4'>&#x1234;\xE1\x88\xB4</foo>"));
CStrW text;
text = xmb.GetRoot().GetText();
TS_ASSERT_EQUALS(text.length(), 2);
TS_ASSERT_EQUALS(text[0], 0x1234);
TS_ASSERT_EQUALS(text[1], 0x1234);
text = xmb.GetRoot().GetAttributes().Item(0).Value;
TS_ASSERT_EQUALS(text.length(), 2);
TS_ASSERT_EQUALS(text[0], 0x1234);
TS_ASSERT_EQUALS(text[1], 0x1234);
}
void test_iso88591()
{
XMBFile xmb (parse("<?xml version=\"1.0\" encoding=\"iso88591\"?><foo x='&#x1234;\xE1\x88\xB4'>&#x1234;\xE1\x88\xB4</foo>"));
CStrW text;
text = xmb.GetRoot().GetText();
TS_ASSERT_EQUALS(text.length(), 4);
TS_ASSERT_EQUALS(text[0], 0x1234);
TS_ASSERT_EQUALS(text[1], 0x00E1);
TS_ASSERT_EQUALS(text[2], 0x0088);
TS_ASSERT_EQUALS(text[3], 0x00B4);
text = xmb.GetRoot().GetAttributes().Item(0).Value;
TS_ASSERT_EQUALS(text.length(), 4);
TS_ASSERT_EQUALS(text[0], 0x1234);
TS_ASSERT_EQUALS(text[1], 0x00E1);
TS_ASSERT_EQUALS(text[2], 0x0088);
TS_ASSERT_EQUALS(text[3], 0x00B4);
}
};

View File

@ -24,4 +24,7 @@ public:
path_ResetRootDir();
}
// TODO: Should test the reading/parsing/writing code,
// and parse error handling
};