Serialize JS strings as UTF-16(ish), to avoid the cost of UTF-8 conversion.
This was SVN commit r7576.
This commit is contained in:
parent
fd1f864cde
commit
a9963dee56
@ -254,16 +254,13 @@ void CBinarySerializer::ScriptString(const char* name, JSString* string)
|
||||
jschar* chars = JS_GetStringChars(string);
|
||||
size_t length = JS_GetStringLength(string);
|
||||
|
||||
// Use UTF-8, for storage efficiency
|
||||
// TODO: Maybe we should have a utf8_from_utf16string
|
||||
#if BYTE_ORDER != LITTLE_ENDIAN
|
||||
#error TODO: probably need to convert JS strings to little-endian
|
||||
#endif
|
||||
|
||||
utf16string str16(chars, chars + length);
|
||||
std::wstring strw(str16.begin(), str16.end());
|
||||
LibError err;
|
||||
std::string str8 = utf8_from_wstring(strw, &err);
|
||||
if (err != INFO::OK)
|
||||
throw PSERROR_Serialize_InvalidCharInString();
|
||||
PutString(name, str8);
|
||||
// Serialize strings directly as UTF-16, to avoid expensive encoding conversions
|
||||
NumberU32_Unbounded("string length", (uint32_t)length);
|
||||
RawBytes(name, (const u8*)chars, length*2);
|
||||
}
|
||||
|
||||
u32 CBinarySerializer::GetScriptBackrefTag(JSObject* obj)
|
||||
|
@ -126,22 +126,6 @@ void IDeserializer::String(std::wstring& out, uint32_t minlength, uint32_t maxle
|
||||
throw PSERROR_Deserialize_OutOfBounds();
|
||||
}
|
||||
|
||||
void IDeserializer::StringUTF16(utf16string& out)
|
||||
{
|
||||
std::string str;
|
||||
uint32_t len;
|
||||
NumberU32_Unbounded(len);
|
||||
str.resize(len); // TODO: should check len <= bytes remaining in stream
|
||||
Get((u8*)str.data(), len);
|
||||
|
||||
// TODO: Maybe we should have a utf16string_from_utf8
|
||||
LibError err;
|
||||
std::wstring strw = wstring_from_utf8(str, &err);
|
||||
if (err != INFO::OK)
|
||||
throw PSERROR_Deserialize_InvalidCharInString();
|
||||
out = utf16string(strw.begin(), strw.end());
|
||||
}
|
||||
|
||||
void IDeserializer::RawBytes(u8* data, size_t len)
|
||||
{
|
||||
Get(data, len);
|
||||
|
@ -49,7 +49,6 @@ public:
|
||||
virtual void Bool(bool& out);
|
||||
virtual void StringASCII(std::string& out, uint32_t minlength, uint32_t maxlength);
|
||||
virtual void String(std::wstring& out, uint32_t minlength, uint32_t maxlength);
|
||||
virtual void StringUTF16(utf16string& out);
|
||||
|
||||
/// Deserialize a jsval, replacing 'out'
|
||||
virtual void ScriptVal(jsval& out) = 0;
|
||||
|
@ -110,7 +110,7 @@ jsval CStdDeserializer::ReadScriptVal(JSObject* appendParent)
|
||||
for (uint32_t i = 0; i < numProps; ++i)
|
||||
{
|
||||
utf16string propname;
|
||||
StringUTF16(propname);
|
||||
ReadStringUTF16(propname);
|
||||
|
||||
jsval propval = ReadScriptVal(NULL);
|
||||
CScriptValRooted propvalRoot(cx, propval);
|
||||
@ -162,17 +162,26 @@ jsval CStdDeserializer::ReadScriptVal(JSObject* appendParent)
|
||||
}
|
||||
}
|
||||
|
||||
void CStdDeserializer::ReadStringUTF16(utf16string& str)
|
||||
{
|
||||
uint32_t len;
|
||||
NumberU32_Unbounded(len);
|
||||
str.resize(len); // TODO: should check len*2 <= bytes remaining in stream, before resizing
|
||||
Get((u8*)str.data(), len*2);
|
||||
}
|
||||
|
||||
void CStdDeserializer::ScriptString(JSString*& out)
|
||||
{
|
||||
utf16string str;
|
||||
StringUTF16(str);
|
||||
ReadStringUTF16(str);
|
||||
|
||||
#if BYTE_ORDER != LITTLE_ENDIAN
|
||||
#error TODO: probably need to convert JS strings from little-endian
|
||||
#endif
|
||||
|
||||
out = JS_NewUCStringCopyN(m_ScriptInterface.GetContext(), (const jschar*)str.data(), str.length());
|
||||
if (!out)
|
||||
{
|
||||
LOGERROR(L"JS_NewUCStringCopyN failed");
|
||||
throw PSERROR_Deserialize_ScriptError();
|
||||
}
|
||||
throw PSERROR_Deserialize_ScriptError("JS_NewUCStringCopyN failed");
|
||||
}
|
||||
|
||||
void CStdDeserializer::ScriptVal(jsval& out)
|
||||
|
@ -40,6 +40,7 @@ protected:
|
||||
|
||||
private:
|
||||
jsval ReadScriptVal(JSObject* appendParent);
|
||||
void ReadStringUTF16(utf16string& str);
|
||||
|
||||
virtual void AddScriptBackref(JSObject* obj);
|
||||
virtual JSObject* GetScriptBackref(u32 tag);
|
||||
|
@ -267,30 +267,30 @@ public:
|
||||
|
||||
serialize.ScriptVal("script", obj);
|
||||
|
||||
TS_ASSERT_STREAM(stream, 100,
|
||||
TS_ASSERT_STREAM(stream, 115,
|
||||
"\x03" // SCRIPT_TYPE_OBJECT
|
||||
"\x02\0\0\0" // num props
|
||||
"\x01\0\0\0" "x" // "x"
|
||||
"\x01\0\0\0" "x\0" // "x"
|
||||
"\x05" // SCRIPT_TYPE_INT
|
||||
"\x7b\0\0\0" // 123
|
||||
"\x01\0\0\0" "y" // "y"
|
||||
"\x01\0\0\0" "y\0" // "y"
|
||||
"\x02" // SCRIPT_TYPE_ARRAY
|
||||
"\x08\0\0\0" // num props
|
||||
"\x01\0\0\0" "0" // "0"
|
||||
"\x01\0\0\0" "0\0" // "0"
|
||||
"\x05" "\x01\0\0\0" // SCRIPT_TYPE_INT 1
|
||||
"\x01\0\0\0" "1" // "1"
|
||||
"\x01\0\0\0" "1\0" // "1"
|
||||
"\x06" "\0\0\0\0\0\0\xf8\x3f" // SCRIPT_TYPE_DOUBLE 1.5
|
||||
"\x01\0\0\0" "2" // "2"
|
||||
"\x04" "\x01\0\0\0" "2" // SCRIPT_TYPE_STRING "2"
|
||||
"\x01\0\0\0" "3" // "3"
|
||||
"\x04" "\x04\0\0\0" "test" // SCRIPT_TYPE_STRING "test"
|
||||
"\x01\0\0\0" "4" // "4"
|
||||
"\x01\0\0\0" "2\0" // "2"
|
||||
"\x04" "\x01\0\0\0" "2\0" // SCRIPT_TYPE_STRING "2"
|
||||
"\x01\0\0\0" "3\0" // "3"
|
||||
"\x04" "\x04\0\0\0" "t\0e\0s\0t\0" // SCRIPT_TYPE_STRING "test"
|
||||
"\x01\0\0\0" "4\0" // "4"
|
||||
"\x00" // SCRIPT_TYPE_VOID
|
||||
"\x01\0\0\0" "5" // "5"
|
||||
"\x01\0\0\0" "5\0" // "5"
|
||||
"\x01" // SCRIPT_TYPE_NULL
|
||||
"\x01\0\0\0" "6" // "6"
|
||||
"\x01\0\0\0" "6\0" // "6"
|
||||
"\x07" "\x01" // SCRIPT_TYPE_BOOLEAN true
|
||||
"\x01\0\0\0" "7" // "7"
|
||||
"\x01\0\0\0" "7\0" // "7"
|
||||
"\x07" "\x00" // SCRIPT_TYPE_BOOLEAN false
|
||||
);
|
||||
|
||||
@ -347,10 +347,12 @@ public:
|
||||
"y:\"\\uE000\\uFFFD\""
|
||||
"})");
|
||||
|
||||
TS_ASSERT_THROWS(helper_script_roundtrip("invalid chars 1", "(\"\\ud7ff\\ud800\")", "..."), PSERROR_Serialize_InvalidCharInString);
|
||||
TS_ASSERT_THROWS(helper_script_roundtrip("invalid chars 2", "(\"\\udfff\")", "..."), PSERROR_Serialize_InvalidCharInString);
|
||||
TS_ASSERT_THROWS(helper_script_roundtrip("invalid chars 3", "(\"\\uffff\")", "..."), PSERROR_Serialize_InvalidCharInString);
|
||||
TS_ASSERT_THROWS(helper_script_roundtrip("invalid chars 4", "(\"\\ud800\\udc00\")" /* U+10000 */, "..."), PSERROR_Serialize_InvalidCharInString);
|
||||
// Disabled since we no longer do the UTF-8 conversion that rejects invalid characters
|
||||
// TS_ASSERT_THROWS(helper_script_roundtrip("invalid chars 1", "(\"\\ud7ff\\ud800\")", "..."), PSERROR_Serialize_InvalidCharInString);
|
||||
// TS_ASSERT_THROWS(helper_script_roundtrip("invalid chars 2", "(\"\\udfff\")", "..."), PSERROR_Serialize_InvalidCharInString);
|
||||
// TS_ASSERT_THROWS(helper_script_roundtrip("invalid chars 3", "(\"\\uffff\")", "..."), PSERROR_Serialize_InvalidCharInString);
|
||||
// TS_ASSERT_THROWS(helper_script_roundtrip("invalid chars 4", "(\"\\ud800\\udc00\")" /* U+10000 */, "..."), PSERROR_Serialize_InvalidCharInString);
|
||||
helper_script_roundtrip("unicode", "\"\\ud800\\uffff\"", "(new String(\"\\uD800\\uFFFF\"))");
|
||||
}
|
||||
|
||||
void TODO_test_script_objects()
|
||||
@ -369,13 +371,13 @@ public:
|
||||
{
|
||||
const char stream[] = "\x02" // SCRIPT_TYPE_ARRAY
|
||||
"\x04\0\0\0" // num props
|
||||
"\x01\0\0\0" "0" // "0"
|
||||
"\x01\0\0\0" "0\0" // "0"
|
||||
"\x05" "\x00\0\0\xC0" // SCRIPT_TYPE_INT -1073741824 (JS_INT_MIN)
|
||||
"\x01\0\0\0" "1" // "1"
|
||||
"\x01\0\0\0" "1\0" // "1"
|
||||
"\x06" "\0\0\x40\0\0\0\xD0\xC1" // SCRIPT_TYPE_DOUBLE -1073741825 (JS_INT_MIN-1)
|
||||
"\x01\0\0\0" "2" // "2"
|
||||
"\x01\0\0\0" "2\0" // "2"
|
||||
"\x05" "\xFF\xFF\xFF\x3F" // SCRIPT_TYPE_INT 1073741823
|
||||
"\x01\0\0\0" "3" // "3"
|
||||
"\x01\0\0\0" "3\0" // "3"
|
||||
"\x06" "\0\0\0\0\0\0\xD0\x41" // SCRIPT_TYPE_DOUBLE 1073741824
|
||||
;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user