1
0
forked from 0ad/0ad

Implement a FixedVector2D::CompareLengthSquared function to provide a more performant alternative to CompareLength.

Avoids computing the square of the compared value every call.

Differential Revision: https://code.wildfiregames.com/D2058
Reviewed By: Vladislav
This was SVN commit r23489.
This commit is contained in:
elexis 2020-02-09 21:00:43 +00:00
parent 49710ee5e8
commit 0dff6940f7
5 changed files with 89 additions and 61 deletions

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2013 Wildfire Games.
/* Copyright (C) 2020 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
@ -31,9 +31,15 @@ class CStrW;
#if MSC_VERSION
// i32*i32 -> i64 multiply: MSVC x86 doesn't optimise i64 multiplies automatically, so use the intrinsic
#include <intrin.h>
#define FIXED_MUL_I64_I32_I32(a, b) (__emul((a), (b)))
#define MUL_I64_I32_I32(a, b)\
(__emul((a), (b)))
#define SQUARE_U64_FIXED(a)\
static_cast<u64>(__emul((a).GetInternalValue(), (a).GetInternalValue()))
#else
#define FIXED_MUL_I64_I32_I32(a, b) ((i64)(a) * (i64)(b))
#define MUL_I64_I32_I32(a, b)\
static_cast<i64>(a) * static_cast<i64>(b)
#define SQUARE_U64_FIXED(a)\
static_cast<u64>(static_cast<i64>((a).GetInternalValue()) * static_cast<i64>((a).GetInternalValue()))
#endif
//define overflow macros
@ -310,7 +316,7 @@ public:
*/
CFixed Multiply(CFixed n) const
{
i64 t = FIXED_MUL_I64_I32_I32(value, n.value);
i64 t = MUL_I64_I32_I32(value, n.value);
t >>= fract_bits;
CheckCastOverflow(t, T, L"Overflow in CFixed::Multiply(CFixed n)", L"Underflow in CFixed::Multiply(CFixed n)")
@ -330,7 +336,7 @@ public:
*/
CFixed MulDiv(CFixed m, CFixed d) const
{
i64 t = FIXED_MUL_I64_I32_I32(value, m.value) / (i64)d.value;
i64 t = MUL_I64_I32_I32(value, m.value) / static_cast<i64>(d.value);
CheckCastOverflow(t, T, L"Overflow in CFixed::Multiply(CFixed n)", L"Underflow in CFixed::Multiply(CFixed n)")
return CFixed((T)t);
}

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2017 Wildfire Games.
/* Copyright (C) 2020 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
@ -101,10 +101,8 @@ public:
fixed Length() const
{
// Do intermediate calculations with 64-bit ints to avoid overflows
i32 x = X.GetInternalValue();
i32 y = Y.GetInternalValue();
u64 xx = (u64)FIXED_MUL_I64_I32_I32(x, x);
u64 yy = (u64)FIXED_MUL_I64_I32_I32(y, y);
u64 xx = SQUARE_U64_FIXED(X);
u64 yy = SQUARE_U64_FIXED(Y);
u64 d2 = xx + yy;
CheckUnsignedAdditionOverflow(d2, xx, L"Overflow in CFixedVector2D::Length() part 1")
@ -112,7 +110,7 @@ public:
CheckU32CastOverflow(d, i32, L"Overflow in CFixedVector2D::Length() part 2")
fixed r;
r.SetInternalValue((i32)d);
r.SetInternalValue(static_cast<i32>(d));
return r;
}
@ -123,20 +121,33 @@ public:
*/
int CompareLength(fixed cmp) const
{
i32 x = X.GetInternalValue(); // abs(x) <= 2^31
i32 y = Y.GetInternalValue();
u64 xx = (u64)FIXED_MUL_I64_I32_I32(x, x); // xx <= 2^62
u64 yy = (u64)FIXED_MUL_I64_I32_I32(y, y);
u64 d2 = xx + yy; // d2 <= 2^63 (no overflow)
u64 d2 = SQUARE_U64_FIXED(X) + SQUARE_U64_FIXED(Y); // d2 <= 2^63 (no overflow)
u64 cmpSquared = SQUARE_U64_FIXED(cmp);
i32 c = cmp.GetInternalValue();
u64 c2 = (u64)FIXED_MUL_I64_I32_I32(c, c);
if (d2 < c2)
if (d2 < cmpSquared)
return -1;
else if (d2 > c2)
if (d2 > cmpSquared)
return +1;
else
return 0;
return 0;
}
/**
* Same as above, but avoids squaring the compared value.
* The argument must be the result of an SQUARE_U64_FIXED operation.
*/
int CompareLengthSquared(u64 cmpSquared) const
{
u64 d2 = SQUARE_U64_FIXED(X) + SQUARE_U64_FIXED(Y); // d2 <= 2^63 (no overflow)
if (d2 < cmpSquared)
return -1;
if (d2 > cmpSquared)
return +1;
return 0;
}
/**
@ -146,25 +157,21 @@ public:
*/
int CompareLength(const CFixedVector2D& other) const
{
i32 x = X.GetInternalValue();
i32 y = Y.GetInternalValue();
u64 d2 = (u64)FIXED_MUL_I64_I32_I32(x, x) + (u64)FIXED_MUL_I64_I32_I32(y, y);
i32 ox = other.X.GetInternalValue();
i32 oy = other.Y.GetInternalValue();
u64 od2 = (u64)FIXED_MUL_I64_I32_I32(ox, ox) + (u64)FIXED_MUL_I64_I32_I32(oy, oy);
u64 d2 = SQUARE_U64_FIXED(X) + SQUARE_U64_FIXED(Y);
u64 od2 = SQUARE_U64_FIXED(other.X) + SQUARE_U64_FIXED(other.Y);
if (d2 < od2)
return -1;
else if (d2 > od2)
if (d2 > od2)
return +1;
else
return 0;
return 0;
}
bool IsZero() const
{
return (X.IsZero() && Y.IsZero());
return X.IsZero() && Y.IsZero();
}
/**
@ -200,15 +207,15 @@ public:
*/
fixed Dot(const CFixedVector2D& v) const
{
i64 x = FIXED_MUL_I64_I32_I32(X.GetInternalValue(), v.X.GetInternalValue());
i64 y = FIXED_MUL_I64_I32_I32(Y.GetInternalValue(), v.Y.GetInternalValue());
i64 x = MUL_I64_I32_I32(X.GetInternalValue(), v.X.GetInternalValue());
i64 y = MUL_I64_I32_I32(Y.GetInternalValue(), v.Y.GetInternalValue());
CheckSignedAdditionOverflow(i64, x, y, L"Overflow in CFixedVector2D::Dot() part 1", L"Underflow in CFixedVector2D::Dot() part 1")
i64 sum = x + y;
sum >>= fixed::fract_bits;
CheckCastOverflow(sum, i32, L"Overflow in CFixedVector2D::Dot() part 2", L"Underflow in CFixedVector2D::Dot() part 2")
fixed ret;
ret.SetInternalValue((i32)sum);
ret.SetInternalValue(static_cast<i32>(sum));
return ret;
}

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2013 Wildfire Games.
/* Copyright (C) 2020 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
@ -82,12 +82,9 @@ public:
fixed Length() const
{
// Do intermediate calculations with 64-bit ints to avoid overflows
i32 x = X.GetInternalValue();
i32 y = Y.GetInternalValue();
i32 z = Z.GetInternalValue();
u64 xx = (u64)FIXED_MUL_I64_I32_I32(x, x);
u64 yy = (u64)FIXED_MUL_I64_I32_I32(y, y);
u64 zz = (u64)FIXED_MUL_I64_I32_I32(z, z);
u64 xx = SQUARE_U64_FIXED(X);
u64 yy = SQUARE_U64_FIXED(Y);
u64 zz = SQUARE_U64_FIXED(Z);
u64 t = xx + yy;
CheckUnsignedAdditionOverflow(t, xx, L"Overflow in CFixedVector3D::Length() part 1")
@ -137,20 +134,20 @@ public:
*/
CFixedVector3D Cross(const CFixedVector3D& v)
{
i64 y_vz = FIXED_MUL_I64_I32_I32(Y.GetInternalValue(), v.Z.GetInternalValue());
i64 z_vy = FIXED_MUL_I64_I32_I32(Z.GetInternalValue(), v.Y.GetInternalValue());
i64 y_vz = MUL_I64_I32_I32(Y.GetInternalValue(), v.Z.GetInternalValue());
i64 z_vy = MUL_I64_I32_I32(Z.GetInternalValue(), v.Y.GetInternalValue());
CheckSignedSubtractionOverflow(i64, y_vz, z_vy, L"Overflow in CFixedVector3D::Cross() part 1", L"Underflow in CFixedVector3D::Cross() part 1")
i64 x = y_vz - z_vy;
x >>= fixed::fract_bits;
i64 z_vx = FIXED_MUL_I64_I32_I32(Z.GetInternalValue(), v.X.GetInternalValue());
i64 x_vz = FIXED_MUL_I64_I32_I32(X.GetInternalValue(), v.Z.GetInternalValue());
i64 z_vx = MUL_I64_I32_I32(Z.GetInternalValue(), v.X.GetInternalValue());
i64 x_vz = MUL_I64_I32_I32(X.GetInternalValue(), v.Z.GetInternalValue());
CheckSignedSubtractionOverflow(i64, z_vx, x_vz, L"Overflow in CFixedVector3D::Cross() part 2", L"Underflow in CFixedVector3D::Cross() part 2")
i64 y = z_vx - x_vz;
y >>= fixed::fract_bits;
i64 x_vy = FIXED_MUL_I64_I32_I32(X.GetInternalValue(), v.Y.GetInternalValue());
i64 y_vx = FIXED_MUL_I64_I32_I32(Y.GetInternalValue(), v.X.GetInternalValue());
i64 x_vy = MUL_I64_I32_I32(X.GetInternalValue(), v.Y.GetInternalValue());
i64 y_vx = MUL_I64_I32_I32(Y.GetInternalValue(), v.X.GetInternalValue());
CheckSignedSubtractionOverflow(i64, x_vy, y_vx, L"Overflow in CFixedVector3D::Cross() part 3", L"Underflow in CFixedVector3D::Cross() part 3")
i64 z = x_vy - y_vx;
z >>= fixed::fract_bits;
@ -170,9 +167,9 @@ public:
*/
fixed Dot(const CFixedVector3D& v)
{
i64 x = FIXED_MUL_I64_I32_I32(X.GetInternalValue(), v.X.GetInternalValue());
i64 y = FIXED_MUL_I64_I32_I32(Y.GetInternalValue(), v.Y.GetInternalValue());
i64 z = FIXED_MUL_I64_I32_I32(Z.GetInternalValue(), v.Z.GetInternalValue());
i64 x = MUL_I64_I32_I32(X.GetInternalValue(), v.X.GetInternalValue());
i64 y = MUL_I64_I32_I32(Y.GetInternalValue(), v.Y.GetInternalValue());
i64 z = MUL_I64_I32_I32(Z.GetInternalValue(), v.Z.GetInternalValue());
CheckSignedAdditionOverflow(i64, x, y, L"Overflow in CFixedVector3D::Dot() part 1", L"Underflow in CFixedVector3D::Dot() part 1")
i64 t = x + y;

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2010 Wildfire Games.
/* Copyright (C) 2020 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
@ -66,6 +66,26 @@ public:
TS_ASSERT_DELTA(v3.Length().ToDouble(), sqrt(2.0)*large.ToDouble(), 0.01);
}
void test_CompareLength()
{
CFixedVector2D v1(fixed::FromInt(3), fixed::FromInt(4));
TS_ASSERT_EQUALS(v1.CompareLength(fixed::FromInt(4)), 1);
TS_ASSERT_EQUALS(v1.CompareLength(fixed::FromInt(5)), 0);
TS_ASSERT_EQUALS(v1.CompareLength(fixed::FromInt(6)), -1);
CFixedVector2D v2(fixed::FromInt(2), fixed::FromInt(3));
CFixedVector2D v3(fixed::FromInt(4), fixed::FromInt(5));
TS_ASSERT_EQUALS(v1.CompareLength(v2), 1);
TS_ASSERT_EQUALS(v1.CompareLength(v1), 0);
TS_ASSERT_EQUALS(v1.CompareLength(v3), -1);
TS_ASSERT_EQUALS(v1.CompareLengthSquared(SQUARE_U64_FIXED(fixed::FromDouble(4.00))), 1);
TS_ASSERT_EQUALS(v1.CompareLengthSquared(SQUARE_U64_FIXED(fixed::FromDouble(4.99))), 1);
TS_ASSERT_EQUALS(v1.CompareLengthSquared(SQUARE_U64_FIXED(fixed::FromDouble(5.00))), 0);
TS_ASSERT_EQUALS(v1.CompareLengthSquared(SQUARE_U64_FIXED(fixed::FromDouble(5.01))), -1);
TS_ASSERT_EQUALS(v1.CompareLengthSquared(SQUARE_U64_FIXED(fixed::FromDouble(6.00))), -1);
}
void test_Normalize()
{
CFixedVector2D v0 (fixed::FromInt(0), fixed::FromInt(0));

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2019 Wildfire Games.
/* Copyright (C) 2020 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
@ -161,17 +161,15 @@ static inline u16 CalcVisionSharingMask(player_id_t player)
*/
static bool InParabolicRange(CFixedVector3D v, fixed range)
{
i32 x = v.X.GetInternalValue(); // abs(x) <= 2^31
i32 z = v.Z.GetInternalValue();
u64 xx = (u64)FIXED_MUL_I64_I32_I32(x, x); // xx <= 2^62
u64 zz = (u64)FIXED_MUL_I64_I32_I32(z, z);
u64 xx = SQUARE_U64_FIXED(v.X); // xx <= 2^62
u64 zz = SQUARE_U64_FIXED(v.Z);
i64 d2 = (xx + zz) >> 1; // d2 <= 2^62 (no overflow)
i32 y = v.Y.GetInternalValue();
i32 c = range.GetInternalValue();
i32 c_2 = c >> 1;
i64 c2 = FIXED_MUL_I64_I32_I32(c_2 - y, c);
i64 c2 = MUL_I64_I32_I32(c_2 - y, c);
if (d2 <= c2)
return true;
@ -1295,7 +1293,7 @@ public:
return r;
// angle = 0 goes in the positive Z direction
entity_pos_t precision = entity_pos_t::FromInt((int)TERRAIN_TILE_SIZE)/8;
u64 precisionSquared = SQUARE_U64_FIXED(entity_pos_t::FromInt(static_cast<int>(TERRAIN_TILE_SIZE)) / 8);
CmpPtr<ICmpWaterManager> cmpWaterManager(GetSystemEntity());
entity_pos_t waterLevel = cmpWaterManager ? cmpWaterManager->GetWaterLevel(pos.X, pos.Z) : entity_pos_t::Zero();
@ -1324,7 +1322,7 @@ public:
}
// Loop until vectors come close enough
while ((maxVector - minVector).CompareLength(precision) > 0)
while ((maxVector - minVector).CompareLengthSquared(precisionSquared) > 0)
{
// difference still bigger than precision, bisect to get smaller difference
entity_pos_t newDistance = (minDistance+maxDistance)/entity_pos_t::FromInt(2);