Force some byte-order functions to be inline.

Allow aggressive inlining of MD5 buffer updates (particularly for
fixed-size updates).
Make UTF-8 conversion a little faster.

This was SVN commit r7581.
This commit is contained in:
Ykkrosh 2010-05-25 18:47:11 +00:00
parent 5ce9acc263
commit 9090d25ef8
5 changed files with 63 additions and 91 deletions

View File

@ -60,61 +60,6 @@ u64 swap64(const u64 x)
//-----------------------------------------------------------------------------
u16 to_le16(u16 x)
{
#if BYTE_ORDER == BIG_ENDIAN
return swap16(x);
#else
return x;
#endif
}
u32 to_le32(u32 x)
{
#if BYTE_ORDER == BIG_ENDIAN
return swap32(x);
#else
return x;
#endif
}
u64 to_le64(u64 x)
{
#if BYTE_ORDER == BIG_ENDIAN
return swap64(x);
#else
return x;
#endif
}
u16 to_be16(u16 x)
{
#if BYTE_ORDER == BIG_ENDIAN
return x;
#else
return swap16(x);
#endif
}
u32 to_be32(u32 x)
{
#if BYTE_ORDER == BIG_ENDIAN
return x;
#else
return swap32(x);
#endif
}
u64 to_be64(u64 x)
{
#if BYTE_ORDER == BIG_ENDIAN
return x;
#else
return swap64(x);
#endif
}
u16 read_le16(const void* p)
{

View File

@ -72,15 +72,25 @@
#endif
/// convert a little-endian number to/from native byte order.
LIB_API u16 to_le16(u16 x);
LIB_API u32 to_le32(u32 x); /// see to_le16
LIB_API u64 to_le64(u64 x); /// see to_le16
/// convert a big-endian number to/from native byte order.
LIB_API u16 to_be16(u16 x);
LIB_API u32 to_be32(u32 x); /// see to_be16
LIB_API u64 to_be64(u64 x); /// see to_be16
#if BYTE_ORDER == BIG_ENDIAN
// convert a little-endian number to/from native byte order.
# define to_le16(x) swap16(x)
# define to_le32(x) swap32(x)
# define to_le64(x) swap64(x)
// convert a big-endian number to/from native byte order.
# define to_be16(x) (x)
# define to_be32(x) (x)
# define to_be64(x) (x)
#else // LITTLE_ENDIAN
// convert a little-endian number to/from native byte order.
# define to_le16(x) (x)
# define to_le32(x) (x)
# define to_le64(x) (x)
// convert a big-endian number to/from native byte order.
# define to_be16(x) swap16(x)
# define to_be32(x) swap32(x)
# define to_be64(x) swap64(x)
#endif
/// read a little-endian number from memory into native byte order.
LIB_API u16 read_le16(const void* p);

View File

@ -104,15 +104,21 @@ class UTF8Codec
public:
static void Encode(UTF32 u, UTF8*& dstPos)
{
const size_t size = Size(u);
static const UTF8 firstByteMarks[1+3] = { 0, 0x00, 0xC0, 0xE0 };
for(size_t i = 1; i < size; i++)
switch (Size(u))
{
dstPos[size-i] = UTF8((u|0x80u) & 0xBFu);
u >>= 6;
case 1:
*dstPos++ = UTF8(u);
break;
case 2:
*dstPos++ = UTF8((u >> 6) | 0xC0);
*dstPos++ = UTF8((u | 0x80u) & 0xBFu);
break;
case 3:
*dstPos++ = UTF8((u >> 12) | 0xE0);
*dstPos++ = UTF8(((u >> 6) | 0x80u) & 0xBFu);
*dstPos++ = UTF8((u | 0x80u) & 0xBFu);
break;
}
dstPos[0] = UTF8(u | firstByteMarks[size]);
dstPos += size;
}
// @return decoded scalar, or replacementCharacter on error

View File

@ -39,24 +39,13 @@ void MD5::InitState()
memset(m_Buf, 0xcc, sizeof(m_Buf));
}
void MD5::Update(const u8* data, size_t len)
void MD5::UpdateRest(const u8* data, size_t len)
{
const size_t CHUNK_SIZE = sizeof(m_Buf);
debug_assert(m_BufLen < CHUNK_SIZE);
m_InputLen += len;
// If we have enough space in m_Buf and won't flush, simply append the input
if (m_BufLen + len < CHUNK_SIZE)
{
memcpy(m_Buf + m_BufLen, data, len);
m_BufLen += len;
return;
}
// Add as much data as possible to the buffer
size_t n = CHUNK_SIZE - m_BufLen;
debug_assert(len >= n);
// debug_assert(len >= n);
memcpy(m_Buf + m_BufLen, data, n);
data += n;
len -= n;
@ -103,11 +92,8 @@ void MD5::Final(u8* digest)
InitState();
}
template <class T> inline T rotlFixed(T x, unsigned int y)
{
debug_assert(y < sizeof(T)*8);
return T((x<<y) | (x>>(sizeof(T)*8-y)));
}
// Use macro rather than inline function for significantly better debug-mode performance
#define rotlFixed(x, y) (((x) << (y)) | ((x) >> (32 - (y))))
// TODO: Crypto++ has an overload using _lrotl on MSVC - is that worthwhile?
void MD5::Transform(const u32* in)
@ -118,11 +104,12 @@ void MD5::Transform(const u32* in)
#define F4(x, y, z) (y ^ (x | ~z))
#define MD5STEP(f, w, x, y, z, data, s) \
w = rotlFixed(w + f(x, y, z) + data, s) + x
t = w + f(x, y, z) + data; w = rotlFixed(t, s) + x
u32* digest = m_Digest;
u32 a, b, c, d;
u32 t;
a = digest[0];
b = digest[1];

View File

@ -18,6 +18,8 @@
#ifndef INCLUDED_MD5
#define INCLUDED_MD5
#include <cstring>
/**
* MD5 hashing algorithm. Note that MD5 is broken and must not be used for
* anything that requires security.
@ -28,10 +30,32 @@ public:
static const size_t DIGESTSIZE = 16;
MD5();
void Update(const u8* data, size_t len);
void Update(const u8* data, size_t len)
{
// (Defined inline for efficiency in the common fixed-length fits-in-buffer case)
const size_t CHUNK_SIZE = sizeof(m_Buf);
m_InputLen += len;
// If we have enough space in m_Buf and won't flush, simply append the input
if (m_BufLen + len < CHUNK_SIZE)
{
memcpy(m_Buf + m_BufLen, data, len);
m_BufLen += len;
return;
}
// Fall back to non-inline function if we have to do more work
UpdateRest(data, len);
}
void Final(u8* digest);
private:
void InitState();
void UpdateRest(const u8* data, size_t len);
void Transform(const u32* in);
u32 m_Digest[4]; // internal state
u8 m_Buf[64]; // buffered input bytes