1
0
forked from 0ad/0ad

Serialize JS strings as UTF-16(ish), to avoid the cost of UTF-8 conversion.

This was SVN commit r7576.
This commit is contained in:
Ykkrosh 2010-05-25 18:07:41 +00:00
parent fd1f864cde
commit a9963dee56
6 changed files with 45 additions and 53 deletions

View File

@ -254,16 +254,13 @@ void CBinarySerializer::ScriptString(const char* name, JSString* string)
jschar* chars = JS_GetStringChars(string);
size_t length = JS_GetStringLength(string);
// Use UTF-8, for storage efficiency
// TODO: Maybe we should have a utf8_from_utf16string
#if BYTE_ORDER != LITTLE_ENDIAN
#error TODO: probably need to convert JS strings to little-endian
#endif
utf16string str16(chars, chars + length);
std::wstring strw(str16.begin(), str16.end());
LibError err;
std::string str8 = utf8_from_wstring(strw, &err);
if (err != INFO::OK)
throw PSERROR_Serialize_InvalidCharInString();
PutString(name, str8);
// Serialize strings directly as UTF-16, to avoid expensive encoding conversions
NumberU32_Unbounded("string length", (uint32_t)length);
RawBytes(name, (const u8*)chars, length*2);
}
u32 CBinarySerializer::GetScriptBackrefTag(JSObject* obj)

View File

@ -126,22 +126,6 @@ void IDeserializer::String(std::wstring& out, uint32_t minlength, uint32_t maxle
throw PSERROR_Deserialize_OutOfBounds();
}
void IDeserializer::StringUTF16(utf16string& out)
{
std::string str;
uint32_t len;
NumberU32_Unbounded(len);
str.resize(len); // TODO: should check len <= bytes remaining in stream
Get((u8*)str.data(), len);
// TODO: Maybe we should have a utf16string_from_utf8
LibError err;
std::wstring strw = wstring_from_utf8(str, &err);
if (err != INFO::OK)
throw PSERROR_Deserialize_InvalidCharInString();
out = utf16string(strw.begin(), strw.end());
}
void IDeserializer::RawBytes(u8* data, size_t len)
{
Get(data, len);

View File

@ -49,7 +49,6 @@ public:
virtual void Bool(bool& out);
virtual void StringASCII(std::string& out, uint32_t minlength, uint32_t maxlength);
virtual void String(std::wstring& out, uint32_t minlength, uint32_t maxlength);
virtual void StringUTF16(utf16string& out);
/// Deserialize a jsval, replacing 'out'
virtual void ScriptVal(jsval& out) = 0;

View File

@ -110,7 +110,7 @@ jsval CStdDeserializer::ReadScriptVal(JSObject* appendParent)
for (uint32_t i = 0; i < numProps; ++i)
{
utf16string propname;
StringUTF16(propname);
ReadStringUTF16(propname);
jsval propval = ReadScriptVal(NULL);
CScriptValRooted propvalRoot(cx, propval);
@ -162,17 +162,26 @@ jsval CStdDeserializer::ReadScriptVal(JSObject* appendParent)
}
}
void CStdDeserializer::ReadStringUTF16(utf16string& str)
{
uint32_t len;
NumberU32_Unbounded(len);
str.resize(len); // TODO: should check len*2 <= bytes remaining in stream, before resizing
Get((u8*)str.data(), len*2);
}
void CStdDeserializer::ScriptString(JSString*& out)
{
utf16string str;
StringUTF16(str);
ReadStringUTF16(str);
#if BYTE_ORDER != LITTLE_ENDIAN
#error TODO: probably need to convert JS strings from little-endian
#endif
out = JS_NewUCStringCopyN(m_ScriptInterface.GetContext(), (const jschar*)str.data(), str.length());
if (!out)
{
LOGERROR(L"JS_NewUCStringCopyN failed");
throw PSERROR_Deserialize_ScriptError();
}
throw PSERROR_Deserialize_ScriptError("JS_NewUCStringCopyN failed");
}
void CStdDeserializer::ScriptVal(jsval& out)

View File

@ -40,6 +40,7 @@ protected:
private:
jsval ReadScriptVal(JSObject* appendParent);
void ReadStringUTF16(utf16string& str);
virtual void AddScriptBackref(JSObject* obj);
virtual JSObject* GetScriptBackref(u32 tag);

View File

@ -267,30 +267,30 @@ public:
serialize.ScriptVal("script", obj);
TS_ASSERT_STREAM(stream, 100,
TS_ASSERT_STREAM(stream, 115,
"\x03" // SCRIPT_TYPE_OBJECT
"\x02\0\0\0" // num props
"\x01\0\0\0" "x" // "x"
"\x01\0\0\0" "x\0" // "x"
"\x05" // SCRIPT_TYPE_INT
"\x7b\0\0\0" // 123
"\x01\0\0\0" "y" // "y"
"\x01\0\0\0" "y\0" // "y"
"\x02" // SCRIPT_TYPE_ARRAY
"\x08\0\0\0" // num props
"\x01\0\0\0" "0" // "0"
"\x01\0\0\0" "0\0" // "0"
"\x05" "\x01\0\0\0" // SCRIPT_TYPE_INT 1
"\x01\0\0\0" "1" // "1"
"\x01\0\0\0" "1\0" // "1"
"\x06" "\0\0\0\0\0\0\xf8\x3f" // SCRIPT_TYPE_DOUBLE 1.5
"\x01\0\0\0" "2" // "2"
"\x04" "\x01\0\0\0" "2" // SCRIPT_TYPE_STRING "2"
"\x01\0\0\0" "3" // "3"
"\x04" "\x04\0\0\0" "test" // SCRIPT_TYPE_STRING "test"
"\x01\0\0\0" "4" // "4"
"\x01\0\0\0" "2\0" // "2"
"\x04" "\x01\0\0\0" "2\0" // SCRIPT_TYPE_STRING "2"
"\x01\0\0\0" "3\0" // "3"
"\x04" "\x04\0\0\0" "t\0e\0s\0t\0" // SCRIPT_TYPE_STRING "test"
"\x01\0\0\0" "4\0" // "4"
"\x00" // SCRIPT_TYPE_VOID
"\x01\0\0\0" "5" // "5"
"\x01\0\0\0" "5\0" // "5"
"\x01" // SCRIPT_TYPE_NULL
"\x01\0\0\0" "6" // "6"
"\x01\0\0\0" "6\0" // "6"
"\x07" "\x01" // SCRIPT_TYPE_BOOLEAN true
"\x01\0\0\0" "7" // "7"
"\x01\0\0\0" "7\0" // "7"
"\x07" "\x00" // SCRIPT_TYPE_BOOLEAN false
);
@ -347,10 +347,12 @@ public:
"y:\"\\uE000\\uFFFD\""
"})");
TS_ASSERT_THROWS(helper_script_roundtrip("invalid chars 1", "(\"\\ud7ff\\ud800\")", "..."), PSERROR_Serialize_InvalidCharInString);
TS_ASSERT_THROWS(helper_script_roundtrip("invalid chars 2", "(\"\\udfff\")", "..."), PSERROR_Serialize_InvalidCharInString);
TS_ASSERT_THROWS(helper_script_roundtrip("invalid chars 3", "(\"\\uffff\")", "..."), PSERROR_Serialize_InvalidCharInString);
TS_ASSERT_THROWS(helper_script_roundtrip("invalid chars 4", "(\"\\ud800\\udc00\")" /* U+10000 */, "..."), PSERROR_Serialize_InvalidCharInString);
// Disabled since we no longer do the UTF-8 conversion that rejects invalid characters
// TS_ASSERT_THROWS(helper_script_roundtrip("invalid chars 1", "(\"\\ud7ff\\ud800\")", "..."), PSERROR_Serialize_InvalidCharInString);
// TS_ASSERT_THROWS(helper_script_roundtrip("invalid chars 2", "(\"\\udfff\")", "..."), PSERROR_Serialize_InvalidCharInString);
// TS_ASSERT_THROWS(helper_script_roundtrip("invalid chars 3", "(\"\\uffff\")", "..."), PSERROR_Serialize_InvalidCharInString);
// TS_ASSERT_THROWS(helper_script_roundtrip("invalid chars 4", "(\"\\ud800\\udc00\")" /* U+10000 */, "..."), PSERROR_Serialize_InvalidCharInString);
helper_script_roundtrip("unicode", "\"\\ud800\\uffff\"", "(new String(\"\\uD800\\uFFFF\"))");
}
void TODO_test_script_objects()
@ -369,13 +371,13 @@ public:
{
const char stream[] = "\x02" // SCRIPT_TYPE_ARRAY
"\x04\0\0\0" // num props
"\x01\0\0\0" "0" // "0"
"\x01\0\0\0" "0\0" // "0"
"\x05" "\x00\0\0\xC0" // SCRIPT_TYPE_INT -1073741824 (JS_INT_MIN)
"\x01\0\0\0" "1" // "1"
"\x01\0\0\0" "1\0" // "1"
"\x06" "\0\0\x40\0\0\0\xD0\xC1" // SCRIPT_TYPE_DOUBLE -1073741825 (JS_INT_MIN-1)
"\x01\0\0\0" "2" // "2"
"\x01\0\0\0" "2\0" // "2"
"\x05" "\xFF\xFF\xFF\x3F" // SCRIPT_TYPE_INT 1073741823
"\x01\0\0\0" "3" // "3"
"\x01\0\0\0" "3\0" // "3"
"\x06" "\0\0\0\0\0\0\xD0\x41" // SCRIPT_TYPE_DOUBLE 1073741824
;