forked from 0ad/0ad
vladislavbelov
b90066a69b
Differential Revision: https://code.wildfiregames.com/D4251 This was SVN commit r25900.
573 lines
12 KiB
C++
573 lines
12 KiB
C++
/* Copyright (C) 2021 Wildfire Games.
|
|
* This file is part of 0 A.D.
|
|
*
|
|
* 0 A.D. is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* 0 A.D. is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with 0 A.D. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "precompiled.h"
|
|
|
|
#ifndef CStr_CPP_FIRST
|
|
#define CStr_CPP_FIRST
|
|
|
|
#include "lib/fnv_hash.h"
|
|
#include "lib/utf8.h"
|
|
#include "lib/byte_order.h"
|
|
#include "network/Serialization.h"
|
|
|
|
#include <cctype>
|
|
#include <cwctype>
|
|
#include <iomanip>
|
|
#include <sstream>
|
|
#include <type_traits>
|
|
|
|
namespace
|
|
{
|
|
// Use a knowingly false expression, as we can't use
|
|
// static_assert(false, ...) directly, because it's an ill-formed program
|
|
// with a value (false) which doesn't depend on any input parameter.
|
|
// We don't use constexpr bool AlwaysFalse = false, because some compilers
|
|
// complain about an unused constant.
|
|
template<typename T>
|
|
struct AlwaysFalse : std::false_type
|
|
{};
|
|
|
|
template<typename StrBase>
|
|
using tstringstream = std::basic_stringstream<typename StrBase::value_type>;
|
|
|
|
template<typename Char>
|
|
bool istspace(const Char chr)
|
|
{
|
|
if constexpr (std::is_same_v<Char, char>)
|
|
return static_cast<bool>(std::isspace(chr));
|
|
else
|
|
return static_cast<bool>(std::iswspace(chr));
|
|
}
|
|
|
|
template<typename Char>
|
|
Char totlower(const Char chr)
|
|
{
|
|
if constexpr (std::is_same_v<Char, char>)
|
|
return std::tolower(chr);
|
|
else
|
|
return std::towlower(chr);
|
|
}
|
|
|
|
template<typename Char>
|
|
Char totupper(const Char chr)
|
|
{
|
|
if constexpr (std::is_same_v<Char, char>)
|
|
return std::toupper(chr);
|
|
else
|
|
return std::towupper(chr);
|
|
}
|
|
|
|
template<typename StrBase>
|
|
u8* SerializeImpl(const StrBase& str, u8* buffer)
|
|
{
|
|
using Char = typename StrBase::value_type;
|
|
ENSURE(buffer);
|
|
if constexpr (std::is_same_v<Char, char>)
|
|
{
|
|
// CStr8 is always serialized to / from ASCII(or whatever 8 - bit codepage stored
|
|
// in the CStr).
|
|
size_t len = str.length();
|
|
Serialize_int_4(buffer, (u32)len);
|
|
size_t i = 0;
|
|
for (i = 0; i < len; i++)
|
|
buffer[i] = str[i];
|
|
return buffer + len;
|
|
}
|
|
else if constexpr (std::is_same_v<Char, wchar_t>)
|
|
{
|
|
// CStrW is always serialized to / from UTF - 16.
|
|
size_t len = str.length();
|
|
size_t i = 0;
|
|
for (i = 0; i < len; i++)
|
|
{
|
|
const u16 bigEndian = to_be16(str[i]);
|
|
*(u16 *)(buffer + i * 2) = bigEndian;
|
|
}
|
|
*(u16 *)(buffer + i * 2) = 0;
|
|
return buffer + len * 2 + 2;
|
|
}
|
|
else
|
|
static_assert(AlwaysFalse<Char>::value, "Not implemented.");
|
|
}
|
|
|
|
template<typename StrBase>
|
|
const u8* DeserializeImpl(const u8* buffer, const u8* bufferend, StrBase& str)
|
|
{
|
|
using Char = typename StrBase::value_type;
|
|
ENSURE(buffer);
|
|
ENSURE(bufferend);
|
|
if constexpr (std::is_same_v<Char, char>)
|
|
{
|
|
u32 len;
|
|
Deserialize_int_4(buffer, len);
|
|
if (buffer + len > bufferend)
|
|
return NULL;
|
|
str = StrBase(buffer, buffer + len);
|
|
return buffer + len;
|
|
}
|
|
else if constexpr (std::is_same_v<Char, wchar_t>)
|
|
{
|
|
const u16 *strend = (const u16 *)buffer;
|
|
while ((const u8 *)strend < bufferend && *strend)
|
|
strend++;
|
|
if ((const u8 *)strend >= bufferend)
|
|
return nullptr;
|
|
|
|
str.resize(strend - (const u16 *)buffer);
|
|
const u16 *ptr = (const u16 *)buffer;
|
|
|
|
typename StrBase::iterator it = str.begin();
|
|
while (ptr < strend)
|
|
{
|
|
const u16 native = to_be16(*(ptr++)); // we want from_be16, but that's the same
|
|
*(it++) = (Char)native;
|
|
}
|
|
|
|
return (const u8 *)(strend + 1);
|
|
}
|
|
else
|
|
static_assert(AlwaysFalse<Char>::value, "Not implemented.");
|
|
}
|
|
|
|
template<typename StrBase>
|
|
size_t GetSerializedLengthImpl(const StrBase& str)
|
|
{
|
|
using Char = typename StrBase::value_type;
|
|
if constexpr (std::is_same_v<Char, char>)
|
|
return str.length() + 4;
|
|
else if constexpr (std::is_same_v<Char, wchar_t>)
|
|
return str.length() * 2 + 2;
|
|
else
|
|
static_assert(AlwaysFalse<Char>::value, "Not implemented.");
|
|
}
|
|
|
|
} // anonymous namespace
|
|
|
|
#define UNIDOUBLER_HEADER "CStr.cpp"
|
|
#include "UniDoubler.h"
|
|
|
|
|
|
// Only include these function definitions in the first instance of CStr.cpp:
|
|
|
|
/**
|
|
* Convert CStr to UTF-8
|
|
*
|
|
* @return CStr8 converted string
|
|
**/
|
|
CStr8 CStrW::ToUTF8() const
|
|
{
|
|
Status err;
|
|
return utf8_from_wstring(*this, &err);
|
|
}
|
|
|
|
/**
|
|
* Convert UTF-8 to CStr
|
|
*
|
|
* @return CStrW converted string
|
|
**/
|
|
CStrW CStr8::FromUTF8() const
|
|
{
|
|
Status err;
|
|
return wstring_from_utf8(*this, &err);
|
|
}
|
|
|
|
#else
|
|
|
|
// The following code is compiled twice, as CStrW then as CStr8:
|
|
|
|
#include "CStr.h"
|
|
|
|
CStr CStr::Repeat(const CStr& str, size_t reps)
|
|
{
|
|
CStr ret;
|
|
ret.reserve(str.length() * reps);
|
|
while (reps--) ret += str;
|
|
return ret;
|
|
}
|
|
|
|
// Construction from numbers:
|
|
|
|
CStr CStr::FromInt(int n)
|
|
{
|
|
tstringstream<StrBase> ss;
|
|
ss << n;
|
|
return ss.str();
|
|
}
|
|
|
|
CStr CStr::FromUInt(unsigned int n)
|
|
{
|
|
tstringstream<StrBase> ss;
|
|
ss << n;
|
|
return ss.str();
|
|
}
|
|
|
|
CStr CStr::FromInt64(i64 n)
|
|
{
|
|
tstringstream<StrBase> ss;
|
|
ss << n;
|
|
return ss.str();
|
|
}
|
|
|
|
CStr CStr::FromDouble(double n)
|
|
{
|
|
tstringstream<StrBase> ss;
|
|
ss << n;
|
|
return ss.str();
|
|
}
|
|
|
|
// Conversion to numbers:
|
|
|
|
int CStr::ToInt() const
|
|
{
|
|
int ret = 0;
|
|
tstringstream<StrBase> str(*this);
|
|
str >> ret;
|
|
return ret;
|
|
}
|
|
|
|
unsigned int CStr::ToUInt() const
|
|
{
|
|
unsigned int ret = 0;
|
|
tstringstream<StrBase> str(*this);
|
|
str >> ret;
|
|
return ret;
|
|
}
|
|
|
|
long CStr::ToLong() const
|
|
{
|
|
long ret = 0;
|
|
tstringstream<StrBase> str(*this);
|
|
str >> ret;
|
|
return ret;
|
|
}
|
|
|
|
unsigned long CStr::ToULong() const
|
|
{
|
|
unsigned long ret = 0;
|
|
tstringstream<StrBase> str(*this);
|
|
str >> ret;
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* libc++ and libstd++ differ on how they handle string-to-number parsing for floating-points numbers.
|
|
* See https://trac.wildfiregames.com/ticket/2780#comment:4 for details.
|
|
* To prevent this, only consider [0-9.-+], replace the others in-place with a neutral character.
|
|
*/
|
|
CStr ParseableAsNumber(CStr cleaned_copy)
|
|
{
|
|
for (CStr::Char& c : cleaned_copy)
|
|
if (!std::isdigit(c) && c != '.' && c != '-' && c != '+')
|
|
c = ' ';
|
|
|
|
return cleaned_copy;
|
|
}
|
|
|
|
float CStr::ToFloat() const
|
|
{
|
|
float ret = 0;
|
|
tstringstream<StrBase> str(ParseableAsNumber(*this));
|
|
str >> ret;
|
|
return ret;
|
|
}
|
|
|
|
double CStr::ToDouble() const
|
|
{
|
|
double ret = 0;
|
|
tstringstream<StrBase> str(ParseableAsNumber(*this));
|
|
str >> ret;
|
|
return ret;
|
|
}
|
|
|
|
// Search the string for another string
|
|
long CStr::Find(const CStr& str) const
|
|
{
|
|
size_t pos = find(str, 0);
|
|
|
|
if (pos != npos)
|
|
return static_cast<long>(pos);
|
|
|
|
return -1;
|
|
}
|
|
|
|
// Search the string for another string
|
|
long CStr::Find(const Char chr) const
|
|
{
|
|
size_t pos = find(chr, 0);
|
|
|
|
if (pos != npos)
|
|
return static_cast<long>(pos);
|
|
|
|
return -1;
|
|
}
|
|
|
|
// Search the string for another string
|
|
long CStr::Find(const int start, const Char chr) const
|
|
{
|
|
size_t pos = find(chr, start);
|
|
|
|
if (pos != npos)
|
|
return static_cast<long>(pos);
|
|
|
|
return -1;
|
|
}
|
|
|
|
long CStr::FindInsensitive(const int start, const Char chr) const { return LowerCase().Find(start, totlower(chr)); }
|
|
long CStr::FindInsensitive(const Char chr) const { return LowerCase().Find(totlower(chr)); }
|
|
long CStr::FindInsensitive(const CStr& str) const { return LowerCase().Find(str.LowerCase()); }
|
|
|
|
long CStr::ReverseFind(const CStr& str) const
|
|
{
|
|
size_t pos = rfind(str, length() );
|
|
|
|
if (pos != npos)
|
|
return static_cast<long>(pos);
|
|
|
|
return -1;
|
|
}
|
|
|
|
// Lowercase and uppercase
|
|
CStr CStr::LowerCase() const
|
|
{
|
|
StrBase newStr = *this;
|
|
for (size_t i = 0; i < length(); i++)
|
|
newStr[i] = (Char)totlower((*this)[i]);
|
|
|
|
return newStr;
|
|
}
|
|
|
|
CStr CStr::UpperCase() const
|
|
{
|
|
StrBase newStr = *this;
|
|
for (size_t i = 0; i < length(); i++)
|
|
newStr[i] = (Char)totupper((*this)[i]);
|
|
|
|
return newStr;
|
|
}
|
|
|
|
|
|
// Retrieve the substring of the first n characters
|
|
CStr CStr::Left(size_t len) const
|
|
{
|
|
ENSURE(len <= length());
|
|
return substr(0, len);
|
|
}
|
|
|
|
// Retrieve the substring of the last n characters
|
|
CStr CStr::Right(size_t len) const
|
|
{
|
|
ENSURE(len <= length());
|
|
return substr(length()-len, len);
|
|
}
|
|
|
|
// Retrieve the substring following the last occurrence of Str
|
|
// (or the whole string if it doesn't contain Str)
|
|
CStr CStr::AfterLast(const CStr& str, size_t startPos) const
|
|
{
|
|
size_t pos = rfind(str, startPos);
|
|
if (pos == npos)
|
|
return *this;
|
|
else
|
|
return substr(pos + str.length());
|
|
}
|
|
|
|
// Retrieve the substring preceding the last occurrence of Str
|
|
// (or the whole string if it doesn't contain Str)
|
|
CStr CStr::BeforeLast(const CStr& str, size_t startPos) const
|
|
{
|
|
size_t pos = rfind(str, startPos);
|
|
if (pos == npos)
|
|
return *this;
|
|
else
|
|
return substr(0, pos);
|
|
}
|
|
|
|
// Retrieve the substring following the first occurrence of Str
|
|
// (or the whole string if it doesn't contain Str)
|
|
CStr CStr::AfterFirst(const CStr& str, size_t startPos) const
|
|
{
|
|
size_t pos = find(str, startPos);
|
|
if (pos == npos)
|
|
return *this;
|
|
else
|
|
return substr(pos + str.length());
|
|
}
|
|
|
|
// Retrieve the substring preceding the first occurrence of Str
|
|
// (or the whole string if it doesn't contain Str)
|
|
CStr CStr::BeforeFirst(const CStr& str, size_t startPos) const
|
|
{
|
|
size_t pos = find(str, startPos);
|
|
if (pos == npos)
|
|
return *this;
|
|
else
|
|
return substr(0, pos);
|
|
}
|
|
|
|
// Remove all occurrences of some character or substring
|
|
void CStr::Remove(const CStr& str)
|
|
{
|
|
size_t foundAt = 0;
|
|
while (foundAt != npos)
|
|
{
|
|
foundAt = find(str, 0);
|
|
|
|
if (foundAt != npos)
|
|
erase(foundAt, str.length());
|
|
}
|
|
}
|
|
|
|
// Replace all occurrences of some substring by another
|
|
void CStr::Replace(const CStr& toReplace, const CStr& replaceWith)
|
|
{
|
|
size_t pos = 0;
|
|
while (pos != npos)
|
|
{
|
|
pos = find(toReplace, pos);
|
|
if (pos != npos)
|
|
{
|
|
erase(pos, toReplace.length());
|
|
insert(pos, replaceWith);
|
|
pos += replaceWith.length();
|
|
}
|
|
}
|
|
}
|
|
|
|
std::string CStr::EscapeToPrintableASCII() const
|
|
{
|
|
std::string newStr;
|
|
for (size_t i = 0; i < length(); i++)
|
|
{
|
|
Char ch = (*this)[i];
|
|
|
|
if (ch == '"') newStr += "\\\"";
|
|
else if (ch == '\\') newStr += "\\\\";
|
|
else if (ch == '\b') newStr += "\\b";
|
|
else if (ch == '\f') newStr += "\\f";
|
|
else if (ch == '\n') newStr += "\\n";
|
|
else if (ch == '\r') newStr += "\\r";
|
|
else if (ch == '\t') newStr += "\\t";
|
|
else if (ch >= 32 && ch <= 126)
|
|
newStr += ch;
|
|
else
|
|
{
|
|
std::stringstream ss;
|
|
ss << "\\u" << std::hex << std::setfill('0') << std::setw(4) << (int)(unsigned char)ch;
|
|
newStr += ss.str();
|
|
}
|
|
}
|
|
return newStr;
|
|
}
|
|
|
|
// Returns a trimmed string, removes whitespace from the left/right/both
|
|
CStr CStr::Trim(PS_TRIM_MODE mode) const
|
|
{
|
|
size_t left = 0, right = 0;
|
|
|
|
switch (mode)
|
|
{
|
|
case PS_TRIM_LEFT:
|
|
{
|
|
for (left = 0; left < length(); left++)
|
|
if (istspace((*this)[left]) == false)
|
|
break; // end found, trim 0 to Left-1 inclusive
|
|
} break;
|
|
|
|
case PS_TRIM_RIGHT:
|
|
{
|
|
right = length();
|
|
while (right--)
|
|
if (istspace((*this)[right]) == false)
|
|
break; // end found, trim len-1 to Right+1 inclusive
|
|
} break;
|
|
|
|
case PS_TRIM_BOTH:
|
|
{
|
|
for (left = 0; left < length(); left++)
|
|
if (istspace((*this)[left]) == false)
|
|
break; // end found, trim 0 to Left-1 inclusive
|
|
|
|
right = length();
|
|
while (right--)
|
|
if (istspace((*this)[right]) == false)
|
|
break; // end found, trim len-1 to Right+1 inclusive
|
|
} break;
|
|
|
|
default:
|
|
debug_warn(L"CStr::Trim: invalid Mode");
|
|
}
|
|
|
|
|
|
return substr(left, right - left + 1);
|
|
}
|
|
|
|
CStr CStr::Pad(PS_TRIM_MODE mode, size_t len) const
|
|
{
|
|
size_t left = 0, right = 0;
|
|
|
|
if (len <= length())
|
|
return *this;
|
|
|
|
// From here: Length-length() >= 1
|
|
|
|
switch (mode)
|
|
{
|
|
case PS_TRIM_LEFT:
|
|
left = len - length();
|
|
break;
|
|
|
|
case PS_TRIM_RIGHT:
|
|
right = len - length();
|
|
break;
|
|
|
|
case PS_TRIM_BOTH:
|
|
left = (len - length() + 1) / 2;
|
|
right = (len - length() - 1) / 2; // cannot be negative
|
|
break;
|
|
|
|
default:
|
|
debug_warn(L"CStr::Trim: invalid Mode");
|
|
}
|
|
|
|
return StrBase(left, ' ') + *this + StrBase(right, ' ');
|
|
}
|
|
|
|
size_t CStr::GetHashCode() const
|
|
{
|
|
return (size_t)fnv_hash(data(), length()*sizeof(value_type));
|
|
// janwas 2005-03-18: now use 32-bit version; 64 is slower and
|
|
// the result was truncated down to 32 anyway.
|
|
}
|
|
|
|
u8* CStr::Serialize(u8* buffer) const
|
|
{
|
|
return SerializeImpl(*this, buffer);
|
|
}
|
|
|
|
const u8* CStr::Deserialize(const u8* buffer, const u8* bufferend)
|
|
{
|
|
return DeserializeImpl(buffer, bufferend, *this);
|
|
}
|
|
|
|
size_t CStr::GetSerializedLength() const
|
|
{
|
|
return GetSerializedLengthImpl(*this);
|
|
}
|
|
|
|
#endif // CStr_CPP_FIRST
|