0ad/source/ps/XML/Xeromyces.cpp
2008-07-17 17:00:00 +00:00

504 lines
14 KiB
C++

#include "precompiled.h"
#include <vector>
#include <set>
#include <map>
#include <stack>
#include <algorithm>
#include "ps/CLogger.h"
#include "ps/Filesystem.h"
#include "Xeromyces.h"
#define LOG_CATEGORY "xml"
#include "XML.h"
int CXeromyces::XercesLoaded = 0; // for once-only initialisation
// Convenient storage for the internal tree
typedef struct {
std::string name;
utf16string value;
} XMLAttribute;
typedef struct XMLElement {
std::string name;
int linenum;
utf16string text;
std::vector<XMLElement*> childs;
std::vector<XMLAttribute*> attrs;
} XMLElement;
class XeroHandler : public DefaultHandler
{
public:
XeroHandler() : m_locator(NULL), Root(NULL) {}
~XeroHandler()
{
if (Root)
DeallocateElement(Root);
}
// SAX2 event handlers:
virtual void startDocument();
virtual void endDocument();
virtual void startElement(const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname, const Attributes& attrs);
virtual void endElement(const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname);
virtual void characters(const XMLCh* const chars, const unsigned int length);
const Locator* m_locator;
virtual void setDocumentLocator(const Locator* const locator)
{
m_locator = locator;
}
// Non-SAX2 stuff, used for storing the
// parsed data and constructing the XMB:
void CreateXMB();
WriteBuffer writeBuffer;
private:
std::set<std::string> ElementNames;
std::set<std::string> AttributeNames;
XMLElement* Root;
XMLElement* CurrentElement;
std::stack<XMLElement*> ElementStack;
std::map<std::string, int> ElementID;
std::map<std::string, int> AttributeID;
void OutputElement(XMLElement* el);
// Recursively frees memory
void DeallocateElement(XMLElement* el);
};
CXeromyces::CXeromyces()
{
}
CXeromyces::~CXeromyces()
{
}
void CXeromyces::Terminate()
{
if (XercesLoaded)
{
XMLPlatformUtils::Terminate();
XercesLoaded = 0;
}
}
// Find out write location of the XMB file corresponding to xmlFilename
void CXeromyces::GetXMBPath(const PIVFS& vfs, const VfsPath& xmlFilename, const VfsPath& xmbFilename, VfsPath& xmbActualPath)
{
// rationale:
// - it is necessary to write out XMB files into a subdirectory
// corresponding to the mod from which the XML file is taken.
// this avoids confusion when multiple mods are active -
// their XMB files' VFS filename would otherwise be indistinguishable.
// - we group files in the cache/ mount point first by mod, and only
// then XMB. this is so that all output files for a given mod can
// easily be deleted. the operation of deleting all old/unused
// XMB files requires a program anyway (to find out which are no
// longer needed), so it's not a problem that XMB files reside in
// a subdirectory (which would make manually deleting all harder).
// get real path of XML file (e.g. mods/official/entities/...)
Path P_XMBRealPath;
vfs->GetRealPath(xmlFilename, P_XMBRealPath);
// extract mod name from that
char modName[PATH_MAX];
// .. NOTE: can't use %s, of course (keeps going beyond '/')
int matches = sscanf(P_XMBRealPath.string().c_str(), "mods/%[^/]", modName);
debug_assert(matches == 1);
// build full name: cache, then mod name, XMB subdir, original XMB path
xmbActualPath = VfsPath("cache/mods") / modName / "xmb" / xmbFilename;
}
PSRETURN CXeromyces::Load(const VfsPath& filename)
{
// Make sure the .xml actually exists
if (! FileExists(filename))
{
LOG(CLogger::Error, LOG_CATEGORY, "CXeromyces: Failed to find XML file %s", filename.string().c_str());
return PSRETURN_Xeromyces_XMLOpenFailed;
}
// Get some data about the .xml file
FileInfo fileInfo;
if (g_VFS->GetFileInfo(filename, &fileInfo) < 0)
{
LOG(CLogger::Error, LOG_CATEGORY, "CXeromyces: Failed to stat XML file %s", filename.string().c_str());
return PSRETURN_Xeromyces_XMLOpenFailed;
}
/*
XMBs are stored with a unique name, where the name is generated from
characteristics of the XML file. If a file already exists with the
generated name, it is assumed that that file is a valid conversion of
the XML, and so it's loaded. Otherwise, the XMB is created with that
filename.
This means it's never necessary to overwrite existing XMB files; since
the XMBs are often in archives, it's not easy to rewrite those files,
and it's not possible to switch to using a loose file because the VFS
has already decided that file is inside an archive. So each XMB is given
a unique name, and old ones are somehow purged.
*/
// Generate the filename for the xmb:
// <xml filename>_<mtime><size><format version>.xmb
// with mtime/size as 8-digit hex, where mtime's lowest bit is
// zeroed because zip files only have 2 second resolution.
const int suffixLength = 22;
char suffix[suffixLength+1];
int ret = sprintf(suffix, "_%08x%08xB.xmb", (int)(fileInfo.MTime() & ~1), (int)fileInfo.Size());
debug_assert(ret == suffixLength);
VfsPath xmbFilename = change_extension(filename, suffix);
VfsPath xmbPath;
GetXMBPath(g_VFS, filename, xmbFilename, xmbPath);
// If the file exists, use it
if (FileExists(xmbPath))
{
if (ReadXMBFile(xmbPath))
return PSRETURN_OK;
// (no longer return PSRETURN_Xeromyces_XMLOpenFailed here because
// failure legitimately happens due to partially-written XMB files.)
}
// XMB isn't up to date with the XML, so rebuild it:
// Load Xerces if necessary
if (! XercesLoaded)
{
XMLPlatformUtils::Initialize();
XercesLoaded = 1;
}
// Open the .xml file
CVFSInputSource source;
if (source.OpenFile(filename) < 0)
{
LOG(CLogger::Error, LOG_CATEGORY, "CXeromyces: Failed to open XML file %s", filename.string().c_str());
return PSRETURN_Xeromyces_XMLOpenFailed;
}
// Set up the Xerces parser
SAX2XMLReader* Parser = XMLReaderFactory::createXMLReader();
// Enable validation
Parser->setFeature(XMLUni::fgSAX2CoreValidation, true);
Parser->setFeature(XMLUni::fgXercesDynamic, true);
XeroHandler handler;
Parser->setContentHandler(&handler);
CXercesErrorHandler errorHandler;
Parser->setErrorHandler(&errorHandler);
CVFSEntityResolver entityResolver(filename.string().c_str());
Parser->setEntityResolver(&entityResolver);
// Build a tree inside handler
Parser->parse(source);
// (It's horribly inefficient doing SAX2->tree then tree->XMB,
// but the XML->XMB conversion should be done very rarely
// anyway. If it's ever needed, the XMB writing can be done
// directly from inside the SAX2 event handlers, although that's
// a little more complex)
delete Parser;
if (errorHandler.GetSawErrors())
{
LOG(CLogger::Error, LOG_CATEGORY, "CXeromyces: Errors in XML file '%s'", filename.string().c_str());
return PSRETURN_Xeromyces_XMLParseError;
// The internal tree of the XeroHandler will be cleaned up automatically
}
// Convert the data structures into the XMB format
handler.CreateXMB();
// Save the file to disk, so it can be loaded quickly next time
WriteBuffer& writeBuffer = handler.writeBuffer;
g_VFS->CreateFile(xmbPath, writeBuffer.Data(), writeBuffer.Size());
XMBBuffer = writeBuffer.Data(); // add a reference
// Set up the XMBFile
const bool ok = Initialise((const char*)XMBBuffer.get());
debug_assert(ok);
return PSRETURN_OK;
}
bool CXeromyces::ReadXMBFile(const VfsPath& filename)
{
size_t size;
if(g_VFS->LoadFile(filename, XMBBuffer, size) < 0)
return false;
debug_assert(size >= 42); // else: invalid XMB file size. (42 bytes is the smallest possible XMB. (Well, maybe not quite, but it's a nice number.))
// Set up the XMBFile
if(!Initialise((const char*)XMBBuffer.get()))
return false;
return true;
}
void XeroHandler::startDocument()
{
Root = new XMLElement;
ElementStack.push(Root);
}
void XeroHandler::endDocument()
{
}
/*
// Silently clobbers non-ASCII characters
std::string lowercase_ascii(const XMLCh *a)
{
std::string b;
size_t len=XMLString::stringLen(a);
b.resize(len);
for (size_t i = 0; i < len; ++i)
b[i] = (char)towlower(a[i]);
return b;
}
*/
/**
* Return an ASCII version of the given 16-bit string, ignoring
* any non-ASCII characters.
*
* @param const XMLCh * a Input string.
* @return std::string 8-bit ASCII version of <code>a</code>.
**/
std::string toAscii( const XMLCh* a )
{
std::string b;
size_t len=XMLString::stringLen(a);
b.reserve(len);
for (size_t i = 0; i < len; ++i)
{
if(a[i] < 0x80)
b += (char) a[i];
}
return b;
}
void XeroHandler::startElement(const XMLCh* const UNUSED(uri), const XMLCh* const localname, const XMLCh* const UNUSED(qname), const Attributes& attrs)
{
std::string elementName = toAscii(localname);
ElementNames.insert(elementName);
// Create a new element
XMLElement* e = new XMLElement;
e->name = elementName;
e->linenum = m_locator->getLineNumber();
// Store all the attributes in the new element
for (unsigned int i = 0; i < attrs.getLength(); ++i)
{
std::string attrName = toAscii(attrs.getLocalName(i));
AttributeNames.insert(attrName);
XMLAttribute* a = new XMLAttribute;
a->name = attrName;
const XMLCh *tmp = attrs.getValue(i);
a->value = utf16string(tmp, tmp+XMLString::stringLen(tmp));
e->attrs.push_back(a);
}
// Add the element to its parent
ElementStack.top()->childs.push_back(e);
// Set as parent of following elements
ElementStack.push(e);
}
void XeroHandler::endElement(const XMLCh* const UNUSED(uri), const XMLCh* const UNUSED(localname), const XMLCh* const UNUSED(qname))
{
ElementStack.pop();
}
void XeroHandler::characters(const XMLCh* const chars, const unsigned int UNUSED(length))
{
ElementStack.top()->text += utf16string(chars, chars+XMLString::stringLen(chars));
}
void XeroHandler::CreateXMB()
{
// Header
writeBuffer.Append(UnfinishedHeaderMagicStr, 4);
std::set<std::string>::iterator it;
int i;
// Element names
i = 0;
int ElementCount = (int)ElementNames.size();
writeBuffer.Append(&ElementCount, 4);
for (it = ElementNames.begin(); it != ElementNames.end(); ++it)
{
int TextLen = (int)it->length()+1;
writeBuffer.Append(&TextLen, 4);
writeBuffer.Append((void*)it->c_str(), TextLen);
ElementID[*it] = i++;
}
// Attribute names
i = 0;
int AttributeCount = (int)AttributeNames.size();
writeBuffer.Append(&AttributeCount, 4);
for (it = AttributeNames.begin(); it != AttributeNames.end(); ++it)
{
int TextLen = (int)it->length()+1;
writeBuffer.Append(&TextLen, 4);
writeBuffer.Append((void*)it->c_str(), TextLen);
AttributeID[*it] = i++;
}
// All the XML contents must be surrounded by a single element
debug_assert(Root->childs.size() == 1);
OutputElement(Root->childs[0]);
delete Root;
Root = NULL;
// file is now valid, so insert correct magic string
writeBuffer.Overwrite(HeaderMagicStr, 4, 0);
}
// Writes a whole element (recursively if it has children) into the buffer,
// and also frees all the memory that has been allocated for that element.
void XeroHandler::OutputElement(XMLElement* el)
{
// Filled in later with the length of the element
int Pos_Length = (int)writeBuffer.Size();
writeBuffer.Append("????", 4);
int NameID = ElementID[el->name];
writeBuffer.Append(&NameID, 4);
int AttrCount = (int)el->attrs.size();
writeBuffer.Append(&AttrCount, 4);
int ChildCount = (int)el->childs.size();
writeBuffer.Append(&ChildCount, 4);
// Filled in later with the offset to the list of child elements
int Pos_ChildrenOffset = (int)writeBuffer.Size();
writeBuffer.Append("????", 4);
// Trim excess whitespace in the entity's text, while counting
// the number of newlines trimmed (so that JS error reporting
// can give the correct line number)
std::string whitespaceA = " \t\r\n";
utf16string whitespace (whitespaceA.begin(), whitespaceA.end());
// Find the start of the non-whitespace section
size_t first = el->text.find_first_not_of(whitespace);
if (first == el->text.npos)
// Entirely whitespace - easy to handle
el->text = utf16string();
else
{
// Count the number of \n being cut off,
// and add them to the line number
utf16string trimmed (el->text.begin(), el->text.begin()+first);
el->linenum += (int)std::count(trimmed.begin(), trimmed.end(), (utf16_t)'\n');
// Find the end of the non-whitespace section,
// and trim off everything else
size_t last = el->text.find_last_not_of(whitespace);
el->text = el->text.substr(first, 1+last-first);
}
// Output text, prefixed by length in bytes
if (el->text.length() == 0)
{
// No text; don't write much
writeBuffer.Append("\0\0\0\0", 4);
}
else
{
// Write length and line number and null-terminated text
int NodeLen = 4 + 2*((int)el->text.length()+1);
writeBuffer.Append(&NodeLen, 4);
writeBuffer.Append(&el->linenum, 4);
writeBuffer.Append((void*)el->text.c_str(), NodeLen-4);
}
// Output attributes
int i;
for (i = 0; i < AttrCount; ++i)
{
int AttrName = AttributeID[el->attrs[i]->name];
writeBuffer.Append(&AttrName, 4);
int AttrLen = 2*((int)el->attrs[i]->value.length()+1);
writeBuffer.Append(&AttrLen, 4);
writeBuffer.Append((void*)el->attrs[i]->value.c_str(), AttrLen);
// Free each attribute as soon as it's been dealt with
delete el->attrs[i];
}
// Go back and fill in the child-element offset
int ChildrenOffset = (int)writeBuffer.Size() - (Pos_ChildrenOffset+4);
writeBuffer.Overwrite(&ChildrenOffset, 4, Pos_ChildrenOffset);
// Output all child nodes
for (i = 0; i < ChildCount; ++i)
OutputElement(el->childs[i]);
// Go back and fill in the length
int Length = (int)writeBuffer.Size() - Pos_Length;
writeBuffer.Overwrite(&Length, 4, Pos_Length);
// Tidy up the parser's mess
delete el;
}
void XeroHandler::DeallocateElement(XMLElement* el)
{
size_t i;
for (i = 0; i < el->attrs.size(); ++i)
delete el->attrs[i];
for (i = 0; i < el->childs.size(); ++i)
DeallocateElement(el->childs[i]);
delete el;
}