0ad/source/ps/XML/Xeromyces.cpp
Matei 44fe226dd2 # Housekeeping and pathfinder enhancements / optimization when dealing with ranged actions.
- Modified Xeromyces to no longer automatically convert element and
attribute names to lowercase, so that we can have camelCase names. We
should now be able to convert all the multi-word entity properties, like
pass_through_allies, to camelCase, like passThroughAllies, which is more
consistent with the rest of our JavaScript naming conventions. To
support the existing code that assumes lowercase element names, I made
the getElementID and getAttributeID methods (used in the EL and AT
macros) ignore case, and I changed any code that directly accessed
element names to use the right case. CEntityTemplate now converts
Names_LikeThis to names_likeThis (changing each separate "word" in the
name to camelCase). Changed the version letter in XMB filenames from A
to B to support this without requiring people to delete old XMBs.

- Enhanced the pathfinder's handling of contact paths, resulting in a
very large speedup for actions like attacking, construction, etc. The
problem was that the pathfinder used to not count a given state as the
goal unless it was exactly coincident with the target location. This is
fine when you order a unit to go exactly to a certain spot, but if
you're ordering a unit to build, gather or attack something, then the
target tile is impassable (because your target is there) and therefore
the pathfinder never declares a state final. As a result, the pathfinder
tries hundreds of extra tiles in case there is a long path that gets to
the goal, and after failing to find any path that reaches the goal, it
gives you one to the closest point it got to. To fix it, I made the
pathfinder take into account a radius around the goal in which it's OK
to be, which depends on the size of the target unit and the range of
your action.

This was SVN commit r4186.
2006-08-01 03:41:21 +00:00

599 lines
15 KiB
C++

#include "precompiled.h"
#include <vector>
#include <set>
#include <map>
#include <stack>
#include <algorithm>
#include "ps/CLogger.h"
#include "lib/res/file/vfs.h"
#include "Xeromyces.h"
#define ZLIB_DLL
#include <zlib.h> // for crc32
#define LOG_CATEGORY "xml"
#include "XML.h"
int CXeromyces::XercesLoaded = 0; // for once-only initialisation
// Slightly nasty fwrite/fseek/ftell style thing
class membuffer
{
public:
membuffer()
{
buffer = (char*)malloc(bufferinc);
debug_assert(buffer);
allocated = bufferinc;
length = 0;
}
~membuffer()
{
free(buffer);
}
void write(const void* data, int size)
{
while (length + size >= allocated) grow();
memcpy2(&buffer[length], data, size);
length += size;
}
void write(const void* data, int size, int offset)
{
debug_assert(offset >= 0 && offset+size < length);
memcpy2(&buffer[offset], data, size);
}
int tell()
{
return length;
}
char* steal_buffer()
{
char* ret = buffer;
buffer = NULL;
return ret;
}
char* buffer;
int length;
private:
int allocated;
static const int bufferinc = 1024;
void grow()
{
allocated += bufferinc;
char* newbuffer = (char*)realloc(buffer, allocated);
if (newbuffer)
buffer = newbuffer;
else
debug_warn("Xeromyces: realloc failed");
}
};
// Convenient storage for the internal tree
typedef struct {
std::string name;
utf16string value;
} XMLAttribute;
typedef struct XMLElement {
std::string name;
int linenum;
utf16string text;
std::vector<XMLElement*> childs;
std::vector<XMLAttribute*> attrs;
} XMLElement;
class XeroHandler : public DefaultHandler
{
public:
XeroHandler() : m_locator(NULL), Root(NULL) {}
~XeroHandler()
{
if (Root)
DeallocateElement(Root);
}
// SAX2 event handlers:
virtual void startDocument();
virtual void endDocument();
virtual void startElement(const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname, const Attributes& attrs);
virtual void endElement(const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname);
virtual void characters(const XMLCh* const chars, const unsigned int length);
const Locator* m_locator;
virtual void setDocumentLocator(const Locator* const locator)
{
m_locator = locator;
}
// Non-SAX2 stuff, used for storing the
// parsed data and constructing the XMB:
void CreateXMB();
membuffer buffer;
private:
std::set<std::string> ElementNames;
std::set<std::string> AttributeNames;
XMLElement* Root;
XMLElement* CurrentElement;
std::stack<XMLElement*> ElementStack;
std::map<std::string, int> ElementID;
std::map<std::string, int> AttributeID;
void OutputElement(XMLElement* el);
// Recursively frees memory
void DeallocateElement(XMLElement* el);
};
CXeromyces::CXeromyces()
: XMBFileHandle(0), XMBBuffer(NULL)
{
}
CXeromyces::~CXeromyces() {
if (XMBFileHandle)
{
// If it was read from a file, close it
delete XMBFileHandle;
}
else
{
// If it was converted from a XML directly into memory,
// free that memory buffer
free(XMBBuffer);
}
}
void CXeromyces::Terminate()
{
if (XercesLoaded)
{
XMLPlatformUtils::Terminate();
XercesLoaded = 0;
}
}
// Find out write location of the XMB file corresponding to xmlFilename
void CXeromyces::getXMBPath(const char* xmlFilename, const char* xmbFilename,
char* xmbPath)
{
// rationale:
// - it is necessary to write out XMB files into a subdirectory
// corresponding to the mod from which the XML file is taken.
// this avoids confusion when multiple mods are active -
// their XMB files' VFS filename would otherwise be indistinguishable.
// - we group files in the cache/ mount point first by mod, and only
// then XMB. this is so that all output files for a given mod can
// easily be deleted. the operation of deleting all old/unused
// XMB files requires a program anyway (to find out which are no
// longer needed), so it's not a problem that XMB files reside in
// a subdirectory (which would make manually deleting all harder).
// get real path of XML file (e.g. mods/official/entities/...)
char P_XMBRealPath[PATH_MAX];
vfs_realpath(xmlFilename, P_XMBRealPath);
// extract mod name from that
char modName[PATH_MAX];
// .. NOTE: can't use %s, of course (keeps going beyond '/')
int matches = sscanf(P_XMBRealPath, "mods/%[^/]", modName);
debug_assert(matches == 1);
// build full name: cache, then mod name, XMB subdir, original XMB path
snprintf(xmbPath, PATH_MAX, "cache/mods/%s/xmb/%s", modName, xmbFilename);
}
PSRETURN CXeromyces::Load(const char* filename)
{
// Make sure the .xml actually exists
if (! vfs_exists(filename))
{
LOG(ERROR, LOG_CATEGORY, "CXeromyces: Failed to find XML file %s", filename);
return PSRETURN_Xeromyces_XMLOpenFailed;
}
// Get some data about the .xml file
struct stat xmlStat;
if (vfs_stat(filename, &xmlStat) < 0)
{
LOG(ERROR, LOG_CATEGORY, "CXeromyces: Failed to stat XML file %s", filename);
return PSRETURN_Xeromyces_XMLOpenFailed;
}
/*
XMBs are stored with a unique name, where the name is generated from
characteristics of the XML file. If a file already exists with the
generated name, it is assumed that that file is a valid conversion of
the XML, and so it's loaded. Otherwise, the XMB is created with that
filename.
This means it's never necessary to overwrite existing XMB files; since
the XMBs are often in archives, it's not easy to rewrite those files,
and it's not possible to switch to using a loose file because the VFS
has already decided that file is inside an archive. So each XMB is given
a unique name, and old ones are somehow purged.
*/
// Generate the filename for the xmb:
// <xml filename>_<mtime><size><format version>.xmb
// with mtime/size as 8-digit hex, where mtime's lowest bit is
// zeroed because zip files only have 2 second resolution.
CStr xmbFilename = filename;
// Strip the .xml suffix
int pos;
if ((pos = xmbFilename.FindInsensitive(".xml")) != -1)
xmbFilename = xmbFilename.Left(pos);
const int bufLen = 22;
char buf[bufLen+1];
if (sprintf(buf, "_%08x%08xB.xmb", (int)xmlStat.st_mtime & ~1, (int)xmlStat.st_size) != bufLen)
{
debug_warn("Failed to create filename (?!)");
return PSRETURN_Xeromyces_XMLOpenFailed;
}
xmbFilename += buf;
char xmbPath[PATH_MAX];
getXMBPath(filename, xmbFilename, xmbPath);
// If the file exists, use it
if (vfs_exists(xmbPath))
{
if (ReadXMBFile(xmbPath))
return PSRETURN_OK;
else
return PSRETURN_Xeromyces_XMLOpenFailed;
}
// XMB isn't up to date with the XML, so rebuild it:
// Load Xerces if necessary
if (! XercesLoaded)
{
XMLPlatformUtils::Initialize();
XercesLoaded = 1;
}
// Open the .xml file
// note: FILE_LONG_LIVED is necessary because we load XML, load DTD,
// and only then free XML.
CVFSInputSource source;
if (source.OpenFile(filename, FILE_LONG_LIVED) < 0)
{
LOG(ERROR, LOG_CATEGORY, "CXeromyces: Failed to open XML file %s", filename);
return PSRETURN_Xeromyces_XMLOpenFailed;
}
// Set up the Xerces parser
SAX2XMLReader* Parser = XMLReaderFactory::createXMLReader();
// Enable validation
Parser->setFeature(XMLUni::fgSAX2CoreValidation, true);
Parser->setFeature(XMLUni::fgXercesDynamic, true);
XeroHandler handler;
Parser->setContentHandler(&handler);
CXercesErrorHandler errorHandler;
Parser->setErrorHandler(&errorHandler);
CVFSEntityResolver entityResolver(filename);
Parser->setEntityResolver(&entityResolver);
// Build a tree inside handler
Parser->parse(source);
// (It's horribly inefficient doing SAX2->tree then tree->XMB,
// but the XML->XMB conversion should be done very rarely
// anyway. If it's ever needed, the XMB writing can be done
// directly from inside the SAX2 event handlers, although that's
// a little more complex)
delete Parser;
if (errorHandler.getSawErrors())
{
LOG(ERROR, LOG_CATEGORY, "CXeromyces: Errors in XML file '%s'", filename);
return PSRETURN_Xeromyces_XMLParseError;
// The internal tree of the XeroHandler will be cleaned up automatically
}
// Convert the data structures into the XMB format
handler.CreateXMB();
// Save the file to disk, so it can be loaded quickly next time
vfs_store(xmbPath, handler.buffer.buffer, handler.buffer.length, FILE_NO_AIO);
// Store the buffer so it can be freed later
XMBBuffer = handler.buffer.steal_buffer();
// Set up the XMBFile
Initialise(XMBBuffer);
return PSRETURN_OK;
}
bool CXeromyces::ReadXMBFile(const char* filename)
{
CVFSFile* file = new CVFSFile;
// note: an XMB file's buffer is held in memory across all load/free
// sequences of dependent files it references. that hurts the
// file cache allocator and incurs a warning unless we
// inform the file manager of this behavior via FILE_LONG_LIVED.
if (file->Load(filename, FILE_LONG_LIVED) != PSRETURN_OK)
return false;
const void* buffer = file->GetBuffer();
debug_assert(file->GetBufferSize() >= 42 && "Invalid XMB file"); // 42 bytes is the smallest possible XMB. (Well, maybe not quite, but it's a nice number.)
debug_assert(*(u32*)buffer == HeaderMagic && "Invalid XMB file header");
// Store the Handle so it can be closed later
XMBFileHandle = file;
// Set up the XMBFile
Initialise((const char*)buffer);
return true;
}
void XeroHandler::startDocument()
{
Root = new XMLElement;
ElementStack.push(Root);
}
void XeroHandler::endDocument()
{
}
/*
// Silently clobbers non-ASCII characters
std::string lowercase_ascii(const XMLCh *a)
{
std::string b;
uint len=XMLString::stringLen(a);
b.resize(len);
for (uint i = 0; i < len; ++i)
b[i] = (char)towlower(a[i]);
return b;
}
*/
/**
* Return an ASCII version of the given 16-bit string, ignoring
* any non-ASCII characters.
*
* @param const XMLCh * a Input string.
* @return std::string 8-bit ASCII version of <code>a</code>.
**/
std::string toAscii( const XMLCh* a )
{
std::string b;
uint len=XMLString::stringLen(a);
b.reserve(len);
for (uint i = 0; i < len; ++i)
{
if(iswascii(a[i]))
b += (char) a[i];
}
return b;
}
void XeroHandler::startElement(const XMLCh* const UNUSED(uri), const XMLCh* const localname, const XMLCh* const UNUSED(qname), const Attributes& attrs)
{
std::string elementName = toAscii(localname);
ElementNames.insert(elementName);
// Create a new element
XMLElement* e = new XMLElement;
e->name = elementName;
e->linenum = m_locator->getLineNumber();
// Store all the attributes in the new element
for (unsigned int i = 0; i < attrs.getLength(); ++i)
{
std::string attrName = toAscii(attrs.getLocalName(i));
AttributeNames.insert(attrName);
XMLAttribute* a = new XMLAttribute;
a->name = attrName;
const XMLCh *tmp = attrs.getValue(i);
a->value = utf16string(tmp, tmp+XMLString::stringLen(tmp));
e->attrs.push_back(a);
}
// Add the element to its parent
ElementStack.top()->childs.push_back(e);
// Set as parent of following elements
ElementStack.push(e);
}
void XeroHandler::endElement(const XMLCh* const UNUSED(uri), const XMLCh* const UNUSED(localname), const XMLCh* const UNUSED(qname))
{
ElementStack.pop();
}
void XeroHandler::characters(const XMLCh* const chars, const unsigned int UNUSED(length))
{
ElementStack.top()->text += utf16string(chars, chars+XMLString::stringLen(chars));
}
void XeroHandler::CreateXMB()
{
// Header
buffer.write((void*)HeaderMagicStr, 4);
std::set<std::string>::iterator it;
int i;
// Element names
i = 0;
int ElementCount = (int)ElementNames.size();
buffer.write(&ElementCount, 4);
for (it = ElementNames.begin(); it != ElementNames.end(); ++it)
{
int TextLen = (int)it->length()+1;
buffer.write(&TextLen, 4);
buffer.write((void*)it->c_str(), TextLen);
ElementID[*it] = i++;
}
// Attribute names
i = 0;
int AttributeCount = (int)AttributeNames.size();
buffer.write(&AttributeCount, 4);
for (it = AttributeNames.begin(); it != AttributeNames.end(); ++it)
{
int TextLen = (int)it->length()+1;
buffer.write(&TextLen, 4);
buffer.write((void*)it->c_str(), TextLen);
AttributeID[*it] = i++;
}
// All the XML contents must be surrounded by a single element
debug_assert(Root->childs.size() == 1);
OutputElement(Root->childs[0]);
delete Root;
Root = NULL;
}
// Writes a whole element (recursively if it has children) into the buffer,
// and also frees all the memory that has been allocated for that element.
void XeroHandler::OutputElement(XMLElement* el)
{
// Filled in later with the length of the element
int Pos_Length = buffer.tell();
buffer.write("????", 4);
int NameID = ElementID[el->name];
buffer.write(&NameID, 4);
int AttrCount = (int)el->attrs.size();
buffer.write(&AttrCount, 4);
int ChildCount = (int)el->childs.size();
buffer.write(&ChildCount, 4);
// Filled in later with the offset to the list of child elements
int Pos_ChildrenOffset = buffer.tell();
buffer.write("????", 4);
// Trim excess whitespace in the entity's text, while counting
// the number of newlines trimmed (so that JS error reporting
// can give the correct line number)
std::string whitespaceA = " \t\r\n";
utf16string whitespace (whitespaceA.begin(), whitespaceA.end());
// Find the start of the non-whitespace section
size_t first = el->text.find_first_not_of(whitespace);
if (first == el->text.npos)
// Entirely whitespace - easy to handle
el->text = utf16string();
else
{
// Count the number of \n being cut off,
// and add them to the line number
utf16string trimmed (el->text.begin(), el->text.begin()+first);
el->linenum += (int)std::count(trimmed.begin(), trimmed.end(), (utf16_t)'\n');
// Find the end of the non-whitespace section,
// and trim off everything else
size_t last = el->text.find_last_not_of(whitespace);
el->text = el->text.substr(first, 1+last-first);
}
// Output text, prefixed by length in bytes
if (el->text.length() == 0)
{
// No text; don't write much
buffer.write("\0\0\0\0", 4);
}
else
{
// Write length and line number and null-terminated text
int NodeLen = 4 + 2*((int)el->text.length()+1);
buffer.write(&NodeLen, 4);
buffer.write(&el->linenum, 4);
buffer.write((void*)el->text.c_str(), NodeLen-4);
}
// Output attributes
int i;
for (i = 0; i < AttrCount; ++i)
{
int AttrName = AttributeID[el->attrs[i]->name];
buffer.write(&AttrName, 4);
int AttrLen = 2*((int)el->attrs[i]->value.length()+1);
buffer.write(&AttrLen, 4);
buffer.write((void*)el->attrs[i]->value.c_str(), AttrLen);
// Free each attribute as soon as it's been dealt with
delete el->attrs[i];
}
// Go back and fill in the child-element offset
int ChildrenOffset = buffer.tell() - (Pos_ChildrenOffset+4);
buffer.write(&ChildrenOffset, 4, Pos_ChildrenOffset);
// Output all child nodes
for (i = 0; i < ChildCount; ++i)
OutputElement(el->childs[i]);
// Go back and fill in the length
int Length = buffer.tell() - Pos_Length;
buffer.write(&Length, 4, Pos_Length);
// Tidy up the parser's mess
delete el;
}
void XeroHandler::DeallocateElement(XMLElement* el)
{
size_t i;
for (i = 0; i < el->attrs.size(); ++i)
delete el->attrs[i];
for (i = 0; i < el->childs.size(); ++i)
DeallocateElement(el->childs[i]);
delete el;
}