Remove SSE detection duplication in Colors and ModelRenderer

- Rename macros to be more explicit
 - Move detection code to a separate file
 - Remove a lot of checks in ARB mode (ModelDef.cpp would check for sse
multiple times per frame)
 - Make explicit the SSE2 dependency for Windows

Comments by: @vladislavbelov @wraitii @OptimusShepard
Differential Revision: https://code.wildfiregames.com/D3212
This was SVN commit r24489.
This commit is contained in:
Stan 2020-12-31 15:37:28 +00:00
parent e009d322cc
commit d28d17e96c
10 changed files with 143 additions and 66 deletions

View File

@ -210,6 +210,11 @@ function project_set_build_flags()
flags { "MultiProcessorCompile" }
-- Since KB4088875 Windows 7 has a soft requirement for SSE2.
-- Windows 8+ and Firefox ESR52 make it hard requirement.
-- Finally since VS2012 it's enabled implicitely when not set.
vectorextensions "SSE2"
-- use native wchar_t type (not typedef to unsigned short)
nativewchar "on"

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2019 Wildfire Games.
/* Copyright (C) 2020 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
@ -21,15 +21,15 @@
#include "graphics/SColor.h"
#include "maths/MathUtil.h"
#include "lib/sse.h"
#include "ps/CLogger.h"
#include "ps/CStr.h"
#if HAVE_SSE
# include <xmmintrin.h>
# include "lib/sysdep/arch/x86_x64/x86_x64.h"
#if COMPILER_HAS_SSE
#include <xmmintrin.h>
#endif
static SColor4ub fallback_ConvertRGBColorTo4ub(const RGBColor& src)
static SColor4ub ConvertRGBColorTo4ubFallback(const RGBColor& src)
{
SColor4ub result;
result.R = Clamp(static_cast<int>(src.X * 255), 0, 255);
@ -40,12 +40,12 @@ static SColor4ub fallback_ConvertRGBColorTo4ub(const RGBColor& src)
}
// on IA32, this is replaced by an SSE assembly version in ia32.cpp
SColor4ub (*ConvertRGBColorTo4ub)(const RGBColor& src) = fallback_ConvertRGBColorTo4ub;
SColor4ub (*ConvertRGBColorTo4ub)(const RGBColor& src) = ConvertRGBColorTo4ubFallback;
// Assembler-optimized function for color conversion
#if HAVE_SSE
static SColor4ub sse_ConvertRGBColorTo4ub(const RGBColor& src)
#if COMPILER_HAS_SSE
static SColor4ub ConvertRGBColorTo4ubSSE(const RGBColor& src)
{
const __m128 zero = _mm_setzero_ps();
const __m128 _255 = _mm_set_ss(255.0f);
@ -77,10 +77,10 @@ static SColor4ub sse_ConvertRGBColorTo4ub(const RGBColor& src)
void ColorActivateFastImpl()
{
#if HAVE_SSE
if (x86_x64::Cap(x86_x64::CAP_SSE))
#if COMPILER_HAS_SSE
if (HostHasSSE())
{
ConvertRGBColorTo4ub = sse_ConvertRGBColorTo4ub;
ConvertRGBColorTo4ub = ConvertRGBColorTo4ubSSE;
return;
}
#endif

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2019 Wildfire Games.
/* Copyright (C) 2020 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
@ -32,8 +32,9 @@ typedef CVector4D RGBAColor;
// one of several implementations depending on CPU caps.
extern SColor4ub (*ConvertRGBColorTo4ub)(const RGBColor& src);
// call once ia32_Init has run; detects CPU caps and activates the best
// possible codepath.
/**
* Detects CPU caps and activates the best possible codepath.
*/
extern void ColorActivateFastImpl();
class CStr8;

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2015 Wildfire Games.
/* Copyright (C) 2020 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
@ -23,10 +23,11 @@
#include "ModelDef.h"
#include "graphics/SkeletonAnimDef.h"
#include "lib/sse.h"
#include "ps/FileIo.h"
#include "maths/Vector4D.h"
#if HAVE_SSE
#if COMPILER_HAS_SSE
# include <xmmintrin.h>
#endif
@ -87,7 +88,16 @@ CVector3D CModelDef::SkinNormal(const SModelVertex& vtx,
return result;
}
void CModelDef::SkinPointsAndNormals(
void(*CModelDef::SkinPointsAndNormals)(
size_t numVertices,
const VertexArrayIterator<CVector3D>& Position,
const VertexArrayIterator<CVector3D>& Normal,
const SModelVertex* vertices,
const size_t* blendIndices,
const CMatrix3D newPoseMatrices[]) {};
static void SkinPointsAndNormalsFallback(
size_t numVertices,
const VertexArrayIterator<CVector3D>& Position,
const VertexArrayIterator<CVector3D>& Normal,
@ -121,8 +131,8 @@ void CModelDef::SkinPointsAndNormals(
}
}
#if HAVE_SSE
void CModelDef::SkinPointsAndNormals_SSE(
#if COMPILER_HAS_SSE
static void SkinPointsAndNormalsSSE(
size_t numVertices,
const VertexArrayIterator<CVector3D>& Position,
const VertexArrayIterator<CVector3D>& Normal,
@ -471,3 +481,15 @@ CModelDefRPrivate* CModelDef::GetRenderData(const void* key) const
return 0;
}
void ModelDefActivateFastImpl()
{
#if COMPILER_HAS_SSE
if (HostHasSSE())
{
CModelDef::SkinPointsAndNormals = SkinPointsAndNormalsSSE;
return;
}
#endif
CModelDef::SkinPointsAndNormals = SkinPointsAndNormalsFallback;
}

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2011 Wildfire Games.
/* Copyright (C) 2020 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
@ -209,7 +209,7 @@ public:
* (This is equivalent to looping over SkinPoint and SkinNormal,
* but slightly more efficient.)
*/
static void SkinPointsAndNormals(
static void(*SkinPointsAndNormals)(
size_t numVertices,
const VertexArrayIterator<CVector3D>& Position,
const VertexArrayIterator<CVector3D>& Normal,
@ -217,19 +217,6 @@ public:
const size_t* blendIndices,
const CMatrix3D newPoseMatrices[]);
#if HAVE_SSE
/**
* SSE-optimised version of SkinPointsAndNormals.
*/
static void SkinPointsAndNormals_SSE(
size_t numVertices,
const VertexArrayIterator<CVector3D>& Position,
const VertexArrayIterator<CVector3D>& Normal,
const SModelVertex* vertices,
const size_t* blendIndices,
const CMatrix3D newPoseMatrices[]);
#endif
/**
* Blend bone matrices together to fill bone palette.
*/
@ -285,5 +272,10 @@ private:
RenderDataMap m_RenderData;
};
/**
* Detects CPU caps and activates the best possible codepath.
*/
extern void ModelDefActivateFastImpl();
#endif

44
source/lib/sse.cpp Normal file
View File

@ -0,0 +1,44 @@
/* Copyright (C) 2020 Wildfire Games.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "precompiled.h"
#include "lib/sse.h"
#if COMPILER_HAS_SSE
#include "lib/code_generation.h"
#include "lib/debug.h"
#include "lib/sysdep/arch.h"
#if ARCH_X86_X64
#include "lib/sysdep/arch/x86_x64/x86_x64.h"
#endif
bool HostHasSSE()
{
#if ARCH_X86_X64
return x86_x64::Cap(x86_x64::CAP_SSE);
#else
return false;
#endif
}
#endif

32
source/lib/sse.h Normal file
View File

@ -0,0 +1,32 @@
/* Copyright (C) 2020 Wildfire Games.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef INCLUDED_SSE
#define INCLUDED_SSE
#include "lib/sysdep/compiler.h"
#if COMPILER_HAS_SSE
extern bool HostHasSSE();
#endif
#endif // INCLUDED_SSE

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019 Wildfire Games.
/* Copyright (c) 2020 Wildfire Games.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
@ -97,23 +97,23 @@
// Streaming SIMD Extensions (not supported by all GCC)
// this only ascertains compiler support; use x86_x64::Cap to
// check whether the instructions are supported by the CPU.
#ifndef HAVE_SSE
#ifndef COMPILER_HAS_SSE
# if GCC_VERSION && defined(__SSE__)
# define HAVE_SSE 1
# define COMPILER_HAS_SSE 1
# elif MSC_VERSION // also includes ICC
# define HAVE_SSE 1
# define COMPILER_HAS_SSE 1
# else
# define HAVE_SSE 0
# define COMPILER_HAS_SSE 0
# endif
#endif
#ifndef HAVE_SSE2
#ifndef COMPILER_HAS_SSE2
# if GCC_VERSION && defined(__SSE2__)
# define HAVE_SSE2 1
# define COMPILER_HAS_SSE2 1
# elif MSC_VERSION // also includes ICC
# define HAVE_SSE2 1
# define COMPILER_HAS_SSE2 1
# else
# define HAVE_SSE2 0
# define COMPILER_HAS_SSE2 0
# endif
#endif

View File

@ -28,10 +28,12 @@
#include "lib/res/graphics/cursor.h"
#include "graphics/CinemaManager.h"
#include "graphics/Color.h"
#include "graphics/FontMetrics.h"
#include "graphics/GameView.h"
#include "graphics/LightEnv.h"
#include "graphics/MapReader.h"
#include "graphics/ModelDef.h"
#include "graphics/MaterialManager.h"
#include "graphics/TerrainTextureManager.h"
#include "gui/CGUI.h"
@ -611,7 +613,7 @@ static void InitRenderer()
vp.m_Width = g_xres;
vp.m_Height = g_yres;
g_Renderer.SetViewport(vp);
ModelDefActivateFastImpl();
ColorActivateFastImpl();
ModelRenderer::Init();
}

View File

@ -41,23 +41,11 @@
#include "renderer/TimeManager.h"
#include "renderer/WaterManager.h"
#if ARCH_X86_X64
# include "lib/sysdep/arch/x86_x64/x86_x64.h"
#endif
///////////////////////////////////////////////////////////////////////////////////////////////
// ModelRenderer implementation
#if ARCH_X86_X64
static bool g_EnableSSE = false;
#endif
void ModelRenderer::Init()
{
#if ARCH_X86_X64
if (x86_x64::Cap(x86_x64::CAP_SSE))
g_EnableSSE = true;
#endif
}
// Helper function to copy object-space position and normal vectors into arrays.
@ -98,16 +86,7 @@ void ModelRenderer::BuildPositionAndNormals(
return;
}
#if HAVE_SSE
if (g_EnableSSE)
{
CModelDef::SkinPointsAndNormals_SSE(numVertices, Position, Normal, vertices, mdef->GetBlendIndices(), model->GetAnimatedBoneMatrices());
}
else
#endif
{
CModelDef::SkinPointsAndNormals(numVertices, Position, Normal, vertices, mdef->GetBlendIndices(), model->GetAnimatedBoneMatrices());
}
CModelDef::SkinPointsAndNormals(numVertices, Position, Normal, vertices, mdef->GetBlendIndices(), model->GetAnimatedBoneMatrices());
}
else
{