1
0
forked from 0ad/0ad
0ad/source/ps/CStr.cpp

573 lines
12 KiB
C++

/* Copyright (C) 2021 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see <http://www.gnu.org/licenses/>.
*/
#include "precompiled.h"
#ifndef CStr_CPP_FIRST
#define CStr_CPP_FIRST
#include "lib/fnv_hash.h"
#include "lib/utf8.h"
#include "lib/byte_order.h"
#include "network/Serialization.h"
#include <cctype>
#include <cwctype>
#include <iomanip>
#include <sstream>
#include <type_traits>
namespace
{
// Use a knowingly false expression, as we can't use
// static_assert(false, ...) directly, because it's an ill-formed program
// with a value (false) which doesn't depend on any input parameter.
// We don't use constexpr bool AlwaysFalse = false, because some compilers
// complain about an unused constant.
template<typename T>
struct AlwaysFalse : std::false_type
{};
template<typename StrBase>
using tstringstream = std::basic_stringstream<typename StrBase::value_type>;
template<typename Char>
bool istspace(const Char chr)
{
if constexpr (std::is_same_v<Char, char>)
return static_cast<bool>(std::isspace(chr));
else
return static_cast<bool>(std::iswspace(chr));
}
template<typename Char>
Char totlower(const Char chr)
{
if constexpr (std::is_same_v<Char, char>)
return std::tolower(chr);
else
return std::towlower(chr);
}
template<typename Char>
Char totupper(const Char chr)
{
if constexpr (std::is_same_v<Char, char>)
return std::toupper(chr);
else
return std::towupper(chr);
}
template<typename StrBase>
u8* SerializeImpl(const StrBase& str, u8* buffer)
{
using Char = typename StrBase::value_type;
ENSURE(buffer);
if constexpr (std::is_same_v<Char, char>)
{
// CStr8 is always serialized to / from ASCII(or whatever 8 - bit codepage stored
// in the CStr).
size_t len = str.length();
Serialize_int_4(buffer, (u32)len);
size_t i = 0;
for (i = 0; i < len; i++)
buffer[i] = str[i];
return buffer + len;
}
else if constexpr (std::is_same_v<Char, wchar_t>)
{
// CStrW is always serialized to / from UTF - 16.
size_t len = str.length();
size_t i = 0;
for (i = 0; i < len; i++)
{
const u16 bigEndian = to_be16(str[i]);
*(u16 *)(buffer + i * 2) = bigEndian;
}
*(u16 *)(buffer + i * 2) = 0;
return buffer + len * 2 + 2;
}
else
static_assert(AlwaysFalse<Char>::value, "Not implemented.");
}
template<typename StrBase>
const u8* DeserializeImpl(const u8* buffer, const u8* bufferend, StrBase& str)
{
using Char = typename StrBase::value_type;
ENSURE(buffer);
ENSURE(bufferend);
if constexpr (std::is_same_v<Char, char>)
{
u32 len;
Deserialize_int_4(buffer, len);
if (buffer + len > bufferend)
return NULL;
str = StrBase(buffer, buffer + len);
return buffer + len;
}
else if constexpr (std::is_same_v<Char, wchar_t>)
{
const u16 *strend = (const u16 *)buffer;
while ((const u8 *)strend < bufferend && *strend)
strend++;
if ((const u8 *)strend >= bufferend)
return nullptr;
str.resize(strend - (const u16 *)buffer);
const u16 *ptr = (const u16 *)buffer;
typename StrBase::iterator it = str.begin();
while (ptr < strend)
{
const u16 native = to_be16(*(ptr++)); // we want from_be16, but that's the same
*(it++) = (Char)native;
}
return (const u8 *)(strend + 1);
}
else
static_assert(AlwaysFalse<Char>::value, "Not implemented.");
}
template<typename StrBase>
size_t GetSerializedLengthImpl(const StrBase& str)
{
using Char = typename StrBase::value_type;
if constexpr (std::is_same_v<Char, char>)
return str.length() + 4;
else if constexpr (std::is_same_v<Char, wchar_t>)
return str.length() * 2 + 2;
else
static_assert(AlwaysFalse<Char>::value, "Not implemented.");
}
} // anonymous namespace
#define UNIDOUBLER_HEADER "CStr.cpp"
#include "UniDoubler.h"
// Only include these function definitions in the first instance of CStr.cpp:
/**
* Convert CStr to UTF-8
*
* @return CStr8 converted string
**/
CStr8 CStrW::ToUTF8() const
{
Status err;
return utf8_from_wstring(*this, &err);
}
/**
* Convert UTF-8 to CStr
*
* @return CStrW converted string
**/
CStrW CStr8::FromUTF8() const
{
Status err;
return wstring_from_utf8(*this, &err);
}
#else
// The following code is compiled twice, as CStrW then as CStr8:
#include "CStr.h"
CStr CStr::Repeat(const CStr& str, size_t reps)
{
CStr ret;
ret.reserve(str.length() * reps);
while (reps--) ret += str;
return ret;
}
// Construction from numbers:
CStr CStr::FromInt(int n)
{
tstringstream<StrBase> ss;
ss << n;
return ss.str();
}
CStr CStr::FromUInt(unsigned int n)
{
tstringstream<StrBase> ss;
ss << n;
return ss.str();
}
CStr CStr::FromInt64(i64 n)
{
tstringstream<StrBase> ss;
ss << n;
return ss.str();
}
CStr CStr::FromDouble(double n)
{
tstringstream<StrBase> ss;
ss << n;
return ss.str();
}
// Conversion to numbers:
int CStr::ToInt() const
{
int ret = 0;
tstringstream<StrBase> str(*this);
str >> ret;
return ret;
}
unsigned int CStr::ToUInt() const
{
unsigned int ret = 0;
tstringstream<StrBase> str(*this);
str >> ret;
return ret;
}
long CStr::ToLong() const
{
long ret = 0;
tstringstream<StrBase> str(*this);
str >> ret;
return ret;
}
unsigned long CStr::ToULong() const
{
unsigned long ret = 0;
tstringstream<StrBase> str(*this);
str >> ret;
return ret;
}
/**
* libc++ and libstd++ differ on how they handle string-to-number parsing for floating-points numbers.
* See https://trac.wildfiregames.com/ticket/2780#comment:4 for details.
* To prevent this, only consider [0-9.-+], replace the others in-place with a neutral character.
*/
CStr ParseableAsNumber(CStr cleaned_copy)
{
for (CStr::Char& c : cleaned_copy)
if (!std::isdigit(c) && c != '.' && c != '-' && c != '+')
c = ' ';
return cleaned_copy;
}
float CStr::ToFloat() const
{
float ret = 0;
tstringstream<StrBase> str(ParseableAsNumber(*this));
str >> ret;
return ret;
}
double CStr::ToDouble() const
{
double ret = 0;
tstringstream<StrBase> str(ParseableAsNumber(*this));
str >> ret;
return ret;
}
// Search the string for another string
long CStr::Find(const CStr& str) const
{
size_t pos = find(str, 0);
if (pos != npos)
return static_cast<long>(pos);
return -1;
}
// Search the string for another string
long CStr::Find(const Char chr) const
{
size_t pos = find(chr, 0);
if (pos != npos)
return static_cast<long>(pos);
return -1;
}
// Search the string for another string
long CStr::Find(const int start, const Char chr) const
{
size_t pos = find(chr, start);
if (pos != npos)
return static_cast<long>(pos);
return -1;
}
long CStr::FindInsensitive(const int start, const Char chr) const { return LowerCase().Find(start, totlower(chr)); }
long CStr::FindInsensitive(const Char chr) const { return LowerCase().Find(totlower(chr)); }
long CStr::FindInsensitive(const CStr& str) const { return LowerCase().Find(str.LowerCase()); }
long CStr::ReverseFind(const CStr& str) const
{
size_t pos = rfind(str, length() );
if (pos != npos)
return static_cast<long>(pos);
return -1;
}
// Lowercase and uppercase
CStr CStr::LowerCase() const
{
StrBase newStr = *this;
for (size_t i = 0; i < length(); i++)
newStr[i] = (Char)totlower((*this)[i]);
return newStr;
}
CStr CStr::UpperCase() const
{
StrBase newStr = *this;
for (size_t i = 0; i < length(); i++)
newStr[i] = (Char)totupper((*this)[i]);
return newStr;
}
// Retrieve the substring of the first n characters
CStr CStr::Left(size_t len) const
{
ENSURE(len <= length());
return substr(0, len);
}
// Retrieve the substring of the last n characters
CStr CStr::Right(size_t len) const
{
ENSURE(len <= length());
return substr(length()-len, len);
}
// Retrieve the substring following the last occurrence of Str
// (or the whole string if it doesn't contain Str)
CStr CStr::AfterLast(const CStr& str, size_t startPos) const
{
size_t pos = rfind(str, startPos);
if (pos == npos)
return *this;
else
return substr(pos + str.length());
}
// Retrieve the substring preceding the last occurrence of Str
// (or the whole string if it doesn't contain Str)
CStr CStr::BeforeLast(const CStr& str, size_t startPos) const
{
size_t pos = rfind(str, startPos);
if (pos == npos)
return *this;
else
return substr(0, pos);
}
// Retrieve the substring following the first occurrence of Str
// (or the whole string if it doesn't contain Str)
CStr CStr::AfterFirst(const CStr& str, size_t startPos) const
{
size_t pos = find(str, startPos);
if (pos == npos)
return *this;
else
return substr(pos + str.length());
}
// Retrieve the substring preceding the first occurrence of Str
// (or the whole string if it doesn't contain Str)
CStr CStr::BeforeFirst(const CStr& str, size_t startPos) const
{
size_t pos = find(str, startPos);
if (pos == npos)
return *this;
else
return substr(0, pos);
}
// Remove all occurrences of some character or substring
void CStr::Remove(const CStr& str)
{
size_t foundAt = 0;
while (foundAt != npos)
{
foundAt = find(str, 0);
if (foundAt != npos)
erase(foundAt, str.length());
}
}
// Replace all occurrences of some substring by another
void CStr::Replace(const CStr& toReplace, const CStr& replaceWith)
{
size_t pos = 0;
while (pos != npos)
{
pos = find(toReplace, pos);
if (pos != npos)
{
erase(pos, toReplace.length());
insert(pos, replaceWith);
pos += replaceWith.length();
}
}
}
std::string CStr::EscapeToPrintableASCII() const
{
std::string newStr;
for (size_t i = 0; i < length(); i++)
{
Char ch = (*this)[i];
if (ch == '"') newStr += "\\\"";
else if (ch == '\\') newStr += "\\\\";
else if (ch == '\b') newStr += "\\b";
else if (ch == '\f') newStr += "\\f";
else if (ch == '\n') newStr += "\\n";
else if (ch == '\r') newStr += "\\r";
else if (ch == '\t') newStr += "\\t";
else if (ch >= 32 && ch <= 126)
newStr += ch;
else
{
std::stringstream ss;
ss << "\\u" << std::hex << std::setfill('0') << std::setw(4) << (int)(unsigned char)ch;
newStr += ss.str();
}
}
return newStr;
}
// Returns a trimmed string, removes whitespace from the left/right/both
CStr CStr::Trim(PS_TRIM_MODE mode) const
{
size_t left = 0, right = 0;
switch (mode)
{
case PS_TRIM_LEFT:
{
for (left = 0; left < length(); left++)
if (istspace((*this)[left]) == false)
break; // end found, trim 0 to Left-1 inclusive
} break;
case PS_TRIM_RIGHT:
{
right = length();
while (right--)
if (istspace((*this)[right]) == false)
break; // end found, trim len-1 to Right+1 inclusive
} break;
case PS_TRIM_BOTH:
{
for (left = 0; left < length(); left++)
if (istspace((*this)[left]) == false)
break; // end found, trim 0 to Left-1 inclusive
right = length();
while (right--)
if (istspace((*this)[right]) == false)
break; // end found, trim len-1 to Right+1 inclusive
} break;
default:
debug_warn(L"CStr::Trim: invalid Mode");
}
return substr(left, right - left + 1);
}
CStr CStr::Pad(PS_TRIM_MODE mode, size_t len) const
{
size_t left = 0, right = 0;
if (len <= length())
return *this;
// From here: Length-length() >= 1
switch (mode)
{
case PS_TRIM_LEFT:
left = len - length();
break;
case PS_TRIM_RIGHT:
right = len - length();
break;
case PS_TRIM_BOTH:
left = (len - length() + 1) / 2;
right = (len - length() - 1) / 2; // cannot be negative
break;
default:
debug_warn(L"CStr::Trim: invalid Mode");
}
return StrBase(left, ' ') + *this + StrBase(right, ' ');
}
size_t CStr::GetHashCode() const
{
return (size_t)fnv_hash(data(), length()*sizeof(value_type));
// janwas 2005-03-18: now use 32-bit version; 64 is slower and
// the result was truncated down to 32 anyway.
}
u8* CStr::Serialize(u8* buffer) const
{
return SerializeImpl(*this, buffer);
}
const u8* CStr::Deserialize(const u8* buffer, const u8* bufferend)
{
return DeserializeImpl(buffer, bufferend, *this);
}
size_t CStr::GetSerializedLength() const
{
return GetSerializedLengthImpl(*this);
}
#endif // CStr_CPP_FIRST