Add interned string class, for fast comparisons and reduced memory usage.

This was SVN commit r11422.
This commit is contained in:
Ykkrosh 2012-04-03 18:26:52 +00:00
parent 8f59cb7b90
commit a56169ff8c
2 changed files with 218 additions and 0 deletions

137
source/ps/CStrIntern.cpp Normal file
View File

@ -0,0 +1,137 @@
/* Copyright (C) 2012 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see <http://www.gnu.org/licenses/>.
*/
#include "precompiled.h"
#include "CStrIntern.h"
#include "lib/fnv_hash.h"
#include "ps/CLogger.h"
#include <boost/unordered_map.hpp>
class CStrInternInternals
{
public:
CStrInternInternals(const char* str, size_t len)
: data(str, str+len), hash(fnv_hash(str, len))
{
// LOGWARNING(L"New interned string '%hs'", data.c_str());
}
bool operator==(const CStrInternInternals& b) const
{
// Compare hash first for quick rejection of inequal strings
return (hash == b.hash && data == b.data);
}
const std::string data;
const u32 hash; // fnv_hash of data
private:
CStrInternInternals& operator=(const CStrInternInternals&);
};
// Interned strings are stored in a hash table, indexed by string:
typedef std::string StringsKey;
struct StringsKeyHash
{
size_t operator()(const StringsKey& key) const
{
return fnv_hash(key.c_str(), key.length());
}
};
// To avoid std::string memory allocations when GetString does lookups in the
// hash table of interned strings, we make use of boost::unordered_map's ability
// to do lookups with a functionally equivalent proxy object:
struct StringsKeyProxy
{
const char* str;
size_t len;
};
struct StringsKeyProxyHash
{
size_t operator()(const StringsKeyProxy& key) const
{
return fnv_hash(key.str, key.len);
}
};
struct StringsKeyProxyEq
{
bool operator()(const StringsKeyProxy& proxy, const StringsKey& key) const
{
return (proxy.len == key.length() && memcmp(proxy.str, key.c_str(), proxy.len) == 0);
}
};
static boost::unordered_map<StringsKey, shared_ptr<CStrInternInternals>, StringsKeyHash> g_Strings;
static CStrInternInternals* GetString(const char* str, size_t len)
{
// g_Strings is not thread-safe, so complain if anyone is using this
// type in non-main threads. (If that's desired, g_Strings should be changed
// to be thread-safe, preferably without sacrificing performance.)
ENSURE(ThreadUtil::IsMainThread());
StringsKeyProxy proxy = { str, len };
boost::unordered_map<StringsKey, shared_ptr<CStrInternInternals> >::iterator it =
g_Strings.find(proxy, StringsKeyProxyHash(), StringsKeyProxyEq());
if (it != g_Strings.end())
return it->second.get();
shared_ptr<CStrInternInternals> internals(new CStrInternInternals(str, len));
g_Strings.insert(std::make_pair(internals->data, internals));
return internals.get();
}
CStrIntern::CStrIntern()
{
m = GetString("", 0);
}
CStrIntern::CStrIntern(const char* str)
{
m = GetString(str, strlen(str));
}
CStrIntern::CStrIntern(const std::string& str)
{
m = GetString(str.c_str(), str.length());
}
u32 CStrIntern::GetHash() const
{
return m->hash;
}
const char* CStrIntern::c_str() const
{
return m->data.c_str();
}
const std::string& CStrIntern::string() const
{
return m->data;
}

81
source/ps/CStrIntern.h Normal file
View File

@ -0,0 +1,81 @@
/* Copyright (C) 2012 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef INCLUDED_CSTRINTERN
#define INCLUDED_CSTRINTERN
class CStrInternInternals;
/**
* Interned 8-bit strings.
* Each instance with the same string content is a pointer to the same piece of
* memory, allowing very fast string comparisons.
*
* Since a CStrIntern is just a dumb pointer, copying is very fast,
* and pass-by-value should be preferred over pass-by-reference.
*
* Memory allocated for strings will never be freed, so don't use this for
* unbounded numbers of strings (e.g. text rendered by gameplay scripts) -
* it's intended for a small number of short frequently-used strings.
*
* Not thread-safe - only allocate these strings from the main thread.
*/
class CStrIntern
{
public:
CStrIntern();
explicit CStrIntern(const char* str);
explicit CStrIntern(const std::string& str);
/**
* Returns cached FNV1-A hash of the string.
*/
u32 GetHash() const;
/**
* Returns null-terminated string.
*/
const char* c_str() const;
/**
* Returns as std::string.
*/
const std::string& string() const;
/**
* String equality.
*/
bool operator==(const CStrIntern& b) const
{
return m == b.m;
}
/**
* Compare with some arbitrary total order.
* (In particular, this is not alphabetic order,
* and is not consistent between runs of the game.)
*/
bool operator<(const CStrIntern& b) const
{
return m < b.m;
}
private:
CStrInternInternals* m;
};
#endif // INCLUDED_CSTRINTERN