0ad/source/i18n/CLocale.cpp

#include "precompiled.h"

#include "CLocale.h"
#include "TSComponent.h"
#include "ps/StringConvert.h"


#include <algorithm>

#include "ps/CLogger.h"
#define LOG_CATEGORY "i18n"

using namespace I18n;

// Vaguely useful utility function for deleting stuff
template<typename T> void delete_fn(T* v) { delete v; }

// These could be optimised for little-endian sizeof(wchar_t)==2 systems:
static inline void ReadWString8_(const char*& data, Str& str)
{
	u8 length = *(u8*)data;
	data += 1;
	StringConvert::ucs2le_to_wstring(data, data+length, str);
	data += length;
}
static inline void ReadWString16_(const char*& data, Str& str)
{
	u16 length = *(u16*)data;
	data += 2;
	StringConvert::ucs2le_to_wstring(data, data+length, str);
	data += length;
}

#define ReadWString8(s) Str s; ReadWString8_(data, s);
#define ReadWString16(s) Str s; ReadWString16_(data, s);


bool CLocale::LoadStrings(const char* data)
{
	// TODO: More robust file format (so errors can be detected in a
	// nicer way than watching for access violations)

	u16 PhraseCount = *(u16*)data;
	data += 2;

	for (int i = 0; i < PhraseCount; ++i)
	{
		ReadWString16(Key);

		u8 VarCount = *(u8*)data;
		data += 1;

		// Get the relevant entry in the string hash, creating it if it doesn't exist
		TranslatedString* String = Strings[Key];
		if (! String)
			String = Strings[Key] = new TranslatedString;
		// If this is a redefined string, make sure it's empty
		String->Parts.clear();
		// Store the number of variables, so translate(x)<<y<<z can check for validity
		String->VarCount = VarCount;

		u8 SectionCount = *(u8*)data;
		data += 1;

		for (int j = 0; j < SectionCount; ++j)
		{
			u8 SectionType = *(u8*)data;
			data += 1;

			switch (SectionType)
			{
			case 0: // Constant string
				{
					ReadWString16(StringText);

					String->Parts.push_back(new TSComponentString(StringText.c_str()));
					break;
				}

			case 1: // Variable
				{
					u8 VarID = *(u8*)data;
					data += 1;

					String->Parts.push_back(new TSComponentVariable(VarID));

					break;
				}

			case 2: // Function
				{
					u8 NameLength = *(u8*)data;
					data += 1;

					std::string NameText ((char*)data, (char*)(data + NameLength));
					data += NameLength;

					u8 ParamCount = *(u8*)data;
					data += 1;

					TSComponentFunction* Func = new TSComponentFunction(NameText.c_str());

					for (int k = 0; k < ParamCount; ++k)
					{
						u8 ParamType = *(u8*)data;
						data += 1;

						switch (ParamType)
						{
						case 0: // String
							{
								ReadWString8(StrText);

								Func->AddParam(new ScriptValueString(Script, StrText.c_str()));

								break;
							}

						case 1: // Variable
							{
								u8 ID = *(u8*)data;
								data += 1;

								Func->AddParam(new ScriptValueVariable(Script, ID));

								break;
							}

						case 2: // Integer
							{
								u32 Num = *(u32*)data;
								data += 4;

								Func->AddParam(new ScriptValueInteger(Script, Num));

								break;
							}

						default: // Argh!
							debug_warn("Invalid function parameter type");
						}
					}

					String->Parts.push_back(Func);

					break;
				}

			default: // Argh!
				debug_warn("Invalid translation string section type");
			}
		}
	}

	return true;
}


bool CLocale::LoadFunctions(const char* data, size_t len, const char* filename)
{
	// Insist on little-endian UTF16 files containing a BOM (e.g. as generated
	// by Notepad when saving in Unicode format)

	// TODO: Support more Unicode file formats

	if (len < 2)
	{
		LOG(ERROR, LOG_CATEGORY, "I18n: Functions file '%s' is too short", filename);
		return false;
	}

	if (*(jschar*)data != 0xFEFF)
	{
		LOG(ERROR, LOG_CATEGORY, "I18n: Functions file '%s' has invalid Unicode format (lacking little-endian BOM)", filename);
		return false;
	}

	if (! Script.ExecuteCode((jschar*)(data+2), len/2, filename))
	{
		LOG(ERROR, LOG_CATEGORY, "I18n: JS errors in functions file '%s'", filename);
		return false;
	}

	return true;
}


bool CLocale::LoadDictionary(const char* data)
{
	ReadWString8(DictName);

	u8 PropertyCount = *(u8*)data;
	data += 1;

	DictData& dict = Dictionaries[DictName];

	if (dict.DictProperties.size() && PropertyCount != dict.DictProperties.size())
	{
		LOG(ERROR, LOG_CATEGORY, "I18n: Multiple dictionary files loaded with the name ('%ls') and different properties", DictName.c_str());
		return false;
		// TODO: Check headings to make sure they're identical (or handle them more cleverly)
	}

	// Read the names of the properties

	int i;

	for (i = 0; i < PropertyCount; ++i)
	{
		ReadWString8(Property);
		dict.DictProperties[Property] = i;
	}

	u16 ValueCount = *(u16*)data;
	data += 2;

	// Read each 'value' (word + properties)

	for (i = 0; i < ValueCount; ++i)
	{
		ReadWString8(Word);

		std::vector<std::wstring>& props = dict.DictWords[Word];

		for (int j = 0; j < PropertyCount; ++j)
		{
			ReadWString8(Value);
			props.push_back(Value);
		}
	}

	return true;
}

void CLocale::UnloadDictionaries()
{
	Dictionaries.clear();
}


const CLocale::LookupType* CLocale::LookupWord(const Str& dictname, const Str& word)
{
	std::map<Str, DictData>::const_iterator dictit = Dictionaries.find(dictname);
	if (dictit == Dictionaries.end())
	{
		LOG(WARNING, LOG_CATEGORY, "I18n: Non-loaded dictionary '%ls' accessed", dictname.c_str());
		return NULL;
	}
	std::map<Str, std::vector<Str> >::const_iterator wordit = dictit->second.DictWords.find(word);
	if (wordit == dictit->second.DictWords.end())
	{
		// Word not found. Respond quietly, so JS code can handle missing
		// words in a more appropriate way.
		return NULL;
	}

	// Return some data that can later be passed to LookupProperty
	return new LookupType(&dictit->second, &wordit->second);
}

bool CLocale::LookupProperty(const LookupType* data, const Str& property, Str& result)
{
	std::map<Str, int>::const_iterator propit = data->first->DictProperties.find(property);
	if (propit == data->first->DictProperties.end())
		return false;

	// Return the appropriate string
	result = (*data->second)[propit->second];
	return true;
}


const StrImW CLocale::CallFunction(const char* name, const std::vector<BufferVariable*>& vars, const std::vector<ScriptValue*>& params)
{
	return Script.CallFunction(name, vars, params);
}


StringBuffer CLocale::Translate(const wchar_t* id)
{
	if (++CacheAge > CacheAgeLimit)
	{
		CacheAge = 0;
		ClearCache();
	}

	StringsType::iterator TransStr = Strings.find(Str(id));
	if (TransStr == Strings.end())
	{
		LOG(WARNING, LOG_CATEGORY, "I18n: No translation found for string '%ls'", id);

		// Just use the ID string directly, and remember it for the future
		return StringBuffer(&AddDefaultString(id), this);
	}

	return StringBuffer((*TransStr).second, this);
}


void CLocale::AddToCache(StringBuffer* sb, Str& str)
{
	CacheData& d = TranslationCache[sb->String];

	// Clean up any earlier cache entry
	std::for_each(d.vars.begin(), d.vars.end(), delete_fn<BufferVariable>);

	// Set the data for the new cache entry
	d.hash = sb->Hash();
	d.vars = sb->Variables;
	d.output = str;
}

bool CLocale::ReadCached(StringBuffer* sb, Str& str)
{
	// Look for a string with the right key in the cache
	std::map<TranslatedString*, CacheData>::iterator it =
		TranslationCache.find(sb->String);

	// See if it actually exists
	if (it == TranslationCache.end())
		return false;

	// Check quickly whether the hashes match
	if (sb->Hash() != (*it).second.hash)
		return false;

	// Check every variable to see whether they're identical
	debug_assert(sb->Variables.size() == (*it).second.vars.size()); // this should always be true
	size_t count = sb->Variables.size();
	for (size_t i = 0; i < count; ++i)
		if (! ( *sb->Variables[i] == *(*it).second.vars[i] ) )
			return false;

	str = (*it).second.output;
	return true;
}

void CLocale::ClearCache()
{
	// Deallocate cached data
	for (std::map<TranslatedString*, CacheData>::iterator it = TranslationCache.begin(); it != TranslationCache.end(); ++it)
		std::for_each((*it).second.vars.begin(), (*it).second.vars.end(), delete_fn<BufferVariable>);

	TranslationCache.clear();
}


bool is_valid_variable_char(wchar_t c)
{
	// c =~ /[a-zA-Z0-9_]/
	// (Hurrah for internationalisation.)
	return
		(c >= 'a' && c <= 'z')
	||	(c >= 'A' && c <= 'Z')
	||	(c >= '0' && c <= '9')
	||	(c == '_');
}

TranslatedString& CLocale::AddDefaultString(const wchar_t* id)
{
	// Parse a string involving $variables and $$ (=$)

	enum ParseState {
		st_default,
		st_afterdollar,
		st_variable
	};
	ParseState state = st_default;

	TranslatedString* str = new TranslatedString;
	str->VarCount = 0;

	std::wstring tempstr;

	for (const wchar_t* ch = id; *ch != '\0'; ++ch)
	{
		switch (state)
		{
		case st_default:
			if (*ch == '$')
			{
				state = st_afterdollar;
			}
			else
			{
				tempstr += *ch;
			}

			break;
		case st_afterdollar:
			if (*ch == '$')
			{
				tempstr += '$';
				state = st_default;
			}
			else
			{
				// Start of a variable name.
				// Push the old string onto the component stack
				if (tempstr.length())
				{
					str->Parts.push_back(new TSComponentString(tempstr.c_str()));
					tempstr.clear();
				}

				// Set the ID (starting at 0) and increment the count
				str->Parts.push_back(new TSComponentVariable(str->VarCount++));

				state = st_variable;
			}
			break;
		case st_variable:
			if (*ch == '$')
			{
				state = st_afterdollar;
			}
			else if (! is_valid_variable_char(*ch))
			{
				state = st_default;
				tempstr = *ch;
			}
			// We don't care about the actual name of the variable, so just ignore it.

			break;
		}
	}
	// Make sure the last string is added to the parts list
	if (tempstr.length())
		str->Parts.push_back(new TSComponentString(tempstr.c_str()));

	Strings[id] = str;

	return *str;
}


CLocale::~CLocale()
{
	// Clean up the list of strings
	for (StringsType::iterator it = Strings.begin(); it != Strings.end(); ++it)
		delete (*it).second;

	ClearCache();
}