Implement a FixedVector2D::CompareLengthSquared function to provide a more performant alternative to CompareLength.

Avoids computing the square of the compared value every call. Differential Revision: https://code.wildfiregames.com/D2058 Reviewed By: Vladislav This was SVN commit r23489.
2020-02-09 21:00:43 +00:00 · 2020-02-09 21:00:43 +00:00 · 0dff6940f7
commit 0dff6940f7
parent 49710ee5e8
5 changed files with 89 additions and 61 deletions
--- a/source/maths/Fixed.h
+++ b/source/maths/Fixed.h
@ -1,4 +1,4 @@
-/* Copyright (C) 2013 Wildfire Games.
+/* Copyright (C) 2020 Wildfire Games.
 * This file is part of 0 A.D.
 *
 * 0 A.D. is free software: you can redistribute it and/or modify
@ -31,9 +31,15 @@ class CStrW;
 #if MSC_VERSION
 // i32*i32 -> i64 multiply: MSVC x86 doesn't optimise i64 multiplies automatically, so use the intrinsic
 #include <intrin.h>
-#define FIXED_MUL_I64_I32_I32(a, b) (__emul((a), (b)))
+#define MUL_I64_I32_I32(a, b)\
+	(__emul((a), (b)))
+#define SQUARE_U64_FIXED(a)\
+	static_cast<u64>(__emul((a).GetInternalValue(), (a).GetInternalValue()))
 #else
-#define FIXED_MUL_I64_I32_I32(a, b) ((i64)(a) * (i64)(b))
+#define MUL_I64_I32_I32(a, b)\
+	static_cast<i64>(a) * static_cast<i64>(b)
+#define SQUARE_U64_FIXED(a)\
+	static_cast<u64>(static_cast<i64>((a).GetInternalValue()) * static_cast<i64>((a).GetInternalValue()))
 #endif

 //define overflow macros
@ -310,7 +316,7 @@ public:
 	 */
 	CFixed Multiply(CFixed n) const
 	{
-		i64 t = FIXED_MUL_I64_I32_I32(value, n.value);
+		i64 t = MUL_I64_I32_I32(value, n.value);
 		t >>= fract_bits;

 		CheckCastOverflow(t, T, L"Overflow in CFixed::Multiply(CFixed n)", L"Underflow in CFixed::Multiply(CFixed n)")
@ -330,7 +336,7 @@ public:
 	 */
 	CFixed MulDiv(CFixed m, CFixed d) const
 	{
-		i64 t = FIXED_MUL_I64_I32_I32(value, m.value) / (i64)d.value;
+		i64 t = MUL_I64_I32_I32(value, m.value) / static_cast<i64>(d.value);
 		CheckCastOverflow(t, T, L"Overflow in CFixed::Multiply(CFixed n)", L"Underflow in CFixed::Multiply(CFixed n)")
 		return CFixed((T)t);
 	}
--- a/source/maths/FixedVector2D.h
+++ b/source/maths/FixedVector2D.h
@ -1,4 +1,4 @@
-/* Copyright (C) 2017 Wildfire Games.
+/* Copyright (C) 2020 Wildfire Games.
 * This file is part of 0 A.D.
 *
 * 0 A.D. is free software: you can redistribute it and/or modify
@ -101,10 +101,8 @@ public:
 	fixed Length() const
 	{
 		// Do intermediate calculations with 64-bit ints to avoid overflows
-		i32 x = X.GetInternalValue();
-		i32 y = Y.GetInternalValue();
-		u64 xx = (u64)FIXED_MUL_I64_I32_I32(x, x);
-		u64 yy = (u64)FIXED_MUL_I64_I32_I32(y, y);
+		u64 xx = SQUARE_U64_FIXED(X);
+		u64 yy = SQUARE_U64_FIXED(Y);
 		u64 d2 = xx + yy;
 		CheckUnsignedAdditionOverflow(d2, xx, L"Overflow in CFixedVector2D::Length() part 1")

@ -112,7 +110,7 @@ public:

 		CheckU32CastOverflow(d, i32, L"Overflow in CFixedVector2D::Length() part 2")
 		fixed r;
-		r.SetInternalValue((i32)d);
+		r.SetInternalValue(static_cast<i32>(d));
 		return r;
 	}

@ -123,20 +121,33 @@ public:
 	 */
 	int CompareLength(fixed cmp) const
 	{
-		i32 x = X.GetInternalValue(); // abs(x) <= 2^31
-		i32 y = Y.GetInternalValue();
-		u64 xx = (u64)FIXED_MUL_I64_I32_I32(x, x); // xx <= 2^62
-		u64 yy = (u64)FIXED_MUL_I64_I32_I32(y, y);
-		u64 d2 = xx + yy; // d2 <= 2^63 (no overflow)
+		u64 d2 = SQUARE_U64_FIXED(X) + SQUARE_U64_FIXED(Y); // d2 <= 2^63 (no overflow)
+		u64 cmpSquared = SQUARE_U64_FIXED(cmp);

-		i32 c = cmp.GetInternalValue();
-		u64 c2 = (u64)FIXED_MUL_I64_I32_I32(c, c);
-		if (d2 < c2)
+		if (d2 < cmpSquared)
 			return -1;
-		else if (d2 > c2)
+
+		if (d2 > cmpSquared)
 			return +1;
-		else
-			return 0;
+
+		return 0;
+	}
+
+	/**
+	 * Same as above, but avoids squaring the compared value.
+	 * The argument must be the result of an SQUARE_U64_FIXED operation.
+	 */
+	int CompareLengthSquared(u64 cmpSquared) const
+	{
+		u64 d2 = SQUARE_U64_FIXED(X) + SQUARE_U64_FIXED(Y); // d2 <= 2^63 (no overflow)
+
+		if (d2 < cmpSquared)
+			return -1;
+
+		if (d2 > cmpSquared)
+			return +1;
+
+		return 0;
 	}

 	/**
@ -146,25 +157,21 @@ public:
 	 */
 	int CompareLength(const CFixedVector2D& other) const
 	{
-		i32 x = X.GetInternalValue();
-		i32 y = Y.GetInternalValue();
-		u64 d2 = (u64)FIXED_MUL_I64_I32_I32(x, x) + (u64)FIXED_MUL_I64_I32_I32(y, y);
-
-		i32 ox = other.X.GetInternalValue();
-		i32 oy = other.Y.GetInternalValue();
-		u64 od2 = (u64)FIXED_MUL_I64_I32_I32(ox, ox) + (u64)FIXED_MUL_I64_I32_I32(oy, oy);
+		u64 d2 = SQUARE_U64_FIXED(X) + SQUARE_U64_FIXED(Y);
+		u64 od2 = SQUARE_U64_FIXED(other.X) + SQUARE_U64_FIXED(other.Y);

 		if (d2 < od2)
 			return -1;
-		else if (d2 > od2)
+
+		if (d2 > od2)
 			return +1;
-		else
-			return 0;
+
+		return 0;
 	}

 	bool IsZero() const
 	{
-		return (X.IsZero() && Y.IsZero());
+		return X.IsZero() && Y.IsZero();
 	}

 	/**
@ -200,15 +207,15 @@ public:
 	 */
 	fixed Dot(const CFixedVector2D& v) const
 	{
-		i64 x = FIXED_MUL_I64_I32_I32(X.GetInternalValue(), v.X.GetInternalValue());
-		i64 y = FIXED_MUL_I64_I32_I32(Y.GetInternalValue(), v.Y.GetInternalValue());
+		i64 x = MUL_I64_I32_I32(X.GetInternalValue(), v.X.GetInternalValue());
+		i64 y = MUL_I64_I32_I32(Y.GetInternalValue(), v.Y.GetInternalValue());
 		CheckSignedAdditionOverflow(i64, x, y, L"Overflow in CFixedVector2D::Dot() part 1", L"Underflow in CFixedVector2D::Dot() part 1")
 		i64 sum = x + y;
 		sum >>= fixed::fract_bits;

 		CheckCastOverflow(sum, i32, L"Overflow in CFixedVector2D::Dot() part 2", L"Underflow in CFixedVector2D::Dot() part 2")
 		fixed ret;
-		ret.SetInternalValue((i32)sum);
+		ret.SetInternalValue(static_cast<i32>(sum));
 		return ret;
 	}

--- a/source/maths/FixedVector3D.h
+++ b/source/maths/FixedVector3D.h
@ -1,4 +1,4 @@
-/* Copyright (C) 2013 Wildfire Games.
+/* Copyright (C) 2020 Wildfire Games.
 * This file is part of 0 A.D.
 *
 * 0 A.D. is free software: you can redistribute it and/or modify
@ -82,12 +82,9 @@ public:
 	fixed Length() const
 	{
 		// Do intermediate calculations with 64-bit ints to avoid overflows
-		i32 x = X.GetInternalValue();
-		i32 y = Y.GetInternalValue();
-		i32 z = Z.GetInternalValue();
-		u64 xx = (u64)FIXED_MUL_I64_I32_I32(x, x);
-		u64 yy = (u64)FIXED_MUL_I64_I32_I32(y, y);
-		u64 zz = (u64)FIXED_MUL_I64_I32_I32(z, z);
+		u64 xx = SQUARE_U64_FIXED(X);
+		u64 yy = SQUARE_U64_FIXED(Y);
+		u64 zz = SQUARE_U64_FIXED(Z);
 		u64 t = xx + yy;
 		CheckUnsignedAdditionOverflow(t, xx, L"Overflow in CFixedVector3D::Length() part 1")

@ -137,20 +134,20 @@ public:
 	 */
 	CFixedVector3D Cross(const CFixedVector3D& v)
 	{
-		i64 y_vz = FIXED_MUL_I64_I32_I32(Y.GetInternalValue(), v.Z.GetInternalValue());
-		i64 z_vy = FIXED_MUL_I64_I32_I32(Z.GetInternalValue(), v.Y.GetInternalValue());
+		i64 y_vz = MUL_I64_I32_I32(Y.GetInternalValue(), v.Z.GetInternalValue());
+		i64 z_vy = MUL_I64_I32_I32(Z.GetInternalValue(), v.Y.GetInternalValue());
 		CheckSignedSubtractionOverflow(i64, y_vz, z_vy, L"Overflow in CFixedVector3D::Cross() part 1", L"Underflow in CFixedVector3D::Cross() part 1")
 		i64 x = y_vz - z_vy;
 		x >>= fixed::fract_bits;

-		i64 z_vx = FIXED_MUL_I64_I32_I32(Z.GetInternalValue(), v.X.GetInternalValue());
-		i64 x_vz = FIXED_MUL_I64_I32_I32(X.GetInternalValue(), v.Z.GetInternalValue());
+		i64 z_vx = MUL_I64_I32_I32(Z.GetInternalValue(), v.X.GetInternalValue());
+		i64 x_vz = MUL_I64_I32_I32(X.GetInternalValue(), v.Z.GetInternalValue());
 		CheckSignedSubtractionOverflow(i64, z_vx, x_vz, L"Overflow in CFixedVector3D::Cross() part 2", L"Underflow in CFixedVector3D::Cross() part 2")
 		i64 y = z_vx - x_vz;
 		y >>= fixed::fract_bits;

-		i64 x_vy = FIXED_MUL_I64_I32_I32(X.GetInternalValue(), v.Y.GetInternalValue());
-		i64 y_vx = FIXED_MUL_I64_I32_I32(Y.GetInternalValue(), v.X.GetInternalValue());
+		i64 x_vy = MUL_I64_I32_I32(X.GetInternalValue(), v.Y.GetInternalValue());
+		i64 y_vx = MUL_I64_I32_I32(Y.GetInternalValue(), v.X.GetInternalValue());
 		CheckSignedSubtractionOverflow(i64, x_vy, y_vx, L"Overflow in CFixedVector3D::Cross() part 3", L"Underflow in CFixedVector3D::Cross() part 3")
 		i64 z = x_vy - y_vx;
 		z >>= fixed::fract_bits;
@ -170,9 +167,9 @@ public:
 	 */
 	fixed Dot(const CFixedVector3D& v)
 	{
-		i64 x = FIXED_MUL_I64_I32_I32(X.GetInternalValue(), v.X.GetInternalValue());
-		i64 y = FIXED_MUL_I64_I32_I32(Y.GetInternalValue(), v.Y.GetInternalValue());
-		i64 z = FIXED_MUL_I64_I32_I32(Z.GetInternalValue(), v.Z.GetInternalValue());
+		i64 x = MUL_I64_I32_I32(X.GetInternalValue(), v.X.GetInternalValue());
+		i64 y = MUL_I64_I32_I32(Y.GetInternalValue(), v.Y.GetInternalValue());
+		i64 z = MUL_I64_I32_I32(Z.GetInternalValue(), v.Z.GetInternalValue());
 		CheckSignedAdditionOverflow(i64, x, y, L"Overflow in CFixedVector3D::Dot() part 1", L"Underflow in CFixedVector3D::Dot() part 1")
 		i64 t = x + y;

--- a/source/maths/tests/test_FixedVector2D.h
+++ b/source/maths/tests/test_FixedVector2D.h
@ -1,4 +1,4 @@
-/* Copyright (C) 2010 Wildfire Games.
+/* Copyright (C) 2020 Wildfire Games.
 * This file is part of 0 A.D.
 *
 * 0 A.D. is free software: you can redistribute it and/or modify
@ -66,6 +66,26 @@ public:
 		TS_ASSERT_DELTA(v3.Length().ToDouble(), sqrt(2.0)*large.ToDouble(), 0.01);
 	}

+	void test_CompareLength()
+	{
+		CFixedVector2D v1(fixed::FromInt(3), fixed::FromInt(4));
+		TS_ASSERT_EQUALS(v1.CompareLength(fixed::FromInt(4)), 1);
+		TS_ASSERT_EQUALS(v1.CompareLength(fixed::FromInt(5)), 0);
+		TS_ASSERT_EQUALS(v1.CompareLength(fixed::FromInt(6)), -1);
+
+		CFixedVector2D v2(fixed::FromInt(2), fixed::FromInt(3));
+		CFixedVector2D v3(fixed::FromInt(4), fixed::FromInt(5));
+		TS_ASSERT_EQUALS(v1.CompareLength(v2), 1);
+		TS_ASSERT_EQUALS(v1.CompareLength(v1), 0);
+		TS_ASSERT_EQUALS(v1.CompareLength(v3), -1);
+
+		TS_ASSERT_EQUALS(v1.CompareLengthSquared(SQUARE_U64_FIXED(fixed::FromDouble(4.00))), 1);
+		TS_ASSERT_EQUALS(v1.CompareLengthSquared(SQUARE_U64_FIXED(fixed::FromDouble(4.99))), 1);
+		TS_ASSERT_EQUALS(v1.CompareLengthSquared(SQUARE_U64_FIXED(fixed::FromDouble(5.00))), 0);
+		TS_ASSERT_EQUALS(v1.CompareLengthSquared(SQUARE_U64_FIXED(fixed::FromDouble(5.01))), -1);
+		TS_ASSERT_EQUALS(v1.CompareLengthSquared(SQUARE_U64_FIXED(fixed::FromDouble(6.00))), -1);
+	}
+
 	void test_Normalize()
 	{
 		CFixedVector2D v0 (fixed::FromInt(0), fixed::FromInt(0));
--- a/source/simulation2/components/CCmpRangeManager.cpp
+++ b/source/simulation2/components/CCmpRangeManager.cpp
@ -1,4 +1,4 @@
-/* Copyright (C) 2019 Wildfire Games.
+/* Copyright (C) 2020 Wildfire Games.
 * This file is part of 0 A.D.
 *
 * 0 A.D. is free software: you can redistribute it and/or modify
@ -161,17 +161,15 @@ static inline u16 CalcVisionSharingMask(player_id_t player)
 */
 static bool InParabolicRange(CFixedVector3D v, fixed range)
 {
-	i32 x = v.X.GetInternalValue(); // abs(x) <= 2^31
-	i32 z = v.Z.GetInternalValue();
-	u64 xx = (u64)FIXED_MUL_I64_I32_I32(x, x); // xx <= 2^62
-	u64 zz = (u64)FIXED_MUL_I64_I32_I32(z, z);
+	u64 xx = SQUARE_U64_FIXED(v.X); // xx <= 2^62
+	u64 zz = SQUARE_U64_FIXED(v.Z);
 	i64 d2 = (xx + zz) >> 1; // d2 <= 2^62 (no overflow)

 	i32 y = v.Y.GetInternalValue();
 	i32 c = range.GetInternalValue();
 	i32 c_2 = c >> 1;

-	i64 c2 = FIXED_MUL_I64_I32_I32(c_2 - y, c);
+	i64 c2 = MUL_I64_I32_I32(c_2 - y, c);

 	if (d2 <= c2)
 		return true;
@ -1295,7 +1293,7 @@ public:
 			return r;

 		// angle = 0 goes in the positive Z direction
-		entity_pos_t precision = entity_pos_t::FromInt((int)TERRAIN_TILE_SIZE)/8;
+		u64 precisionSquared = SQUARE_U64_FIXED(entity_pos_t::FromInt(static_cast<int>(TERRAIN_TILE_SIZE)) / 8);

 		CmpPtr<ICmpWaterManager> cmpWaterManager(GetSystemEntity());
 		entity_pos_t waterLevel = cmpWaterManager ? cmpWaterManager->GetWaterLevel(pos.X, pos.Z) : entity_pos_t::Zero();
@ -1324,7 +1322,7 @@ public:
 			}

 			// Loop until vectors come close enough
-			while ((maxVector - minVector).CompareLength(precision) > 0)
+			while ((maxVector - minVector).CompareLengthSquared(precisionSquared) > 0)
 			{
 				// difference still bigger than precision, bisect to get smaller difference
 				entity_pos_t newDistance = (minDistance+maxDistance)/entity_pos_t::FromInt(2);