From 3916c25b840ccc66071dfa95b5a52d460a3ceed3 Mon Sep 17 00:00:00 2001 From: Ykkrosh Date: Wed, 9 Nov 2011 23:11:28 +0000 Subject: [PATCH] Optimise vertex skinning code with SSE, based on patch by gruby. Fixes #905. This was SVN commit r10499. --- source/graphics/Model.cpp | 5 +- source/graphics/Model.h | 3 +- source/graphics/ModelDef.cpp | 103 +++++++++++++++++- source/graphics/ModelDef.h | 15 ++- source/ps/GameSetup/GameSetup.cpp | 2 + .../renderer/FixedFunctionModelRenderer.cpp | 46 ++++++-- source/renderer/HWLightingModelRenderer.cpp | 58 +++++++--- source/renderer/ModelRenderer.cpp | 21 +++- source/renderer/ModelRenderer.h | 11 +- source/renderer/TransparencyRenderer.cpp | 46 ++++++-- source/renderer/VertexArray.cpp | 7 +- source/renderer/VertexArray.h | 12 +- 12 files changed, 276 insertions(+), 53 deletions(-) diff --git a/source/graphics/Model.cpp b/source/graphics/Model.cpp index 704e2b27ba..aa24a19bb3 100644 --- a/source/graphics/Model.cpp +++ b/source/graphics/Model.cpp @@ -32,6 +32,7 @@ #include "ObjectEntry.h" #include "lib/res/graphics/ogl_tex.h" #include "lib/res/h_mgr.h" +#include "lib/sysdep/rtl.h" #include "ps/Profile.h" #include "ps/CLogger.h" @@ -57,7 +58,7 @@ CModel::~CModel() // ReleaseData: delete anything allocated by the model void CModel::ReleaseData() { - delete[] m_BoneMatrices; + rtl_FreeAligned(m_BoneMatrices); delete[] m_InverseBindBoneMatrices; for (size_t i = 0; i < m_Props.size(); ++i) @@ -84,7 +85,7 @@ bool CModel::InitModel(const CModelDefPtr& modeldef) size_t numBlends = modeldef->GetNumBlends(); // allocate matrices for bone transformations - m_BoneMatrices = new CMatrix3D[numBones + numBlends]; + m_BoneMatrices = (CMatrix3D*)rtl_AllocateAligned(sizeof(CMatrix3D) * (numBones + numBlends), 16); for (size_t i = 0; i < numBones + numBlends; ++i) { m_BoneMatrices[i].SetIdentity(); diff --git a/source/graphics/Model.h b/source/graphics/Model.h index 84efeb65cb..202378f04d 100644 --- a/source/graphics/Model.h +++ b/source/graphics/Model.h @@ -154,11 +154,12 @@ public: */ bool IsSkinned() { return (m_BoneMatrices != NULL); } - // return the models bone matrices + // return the models bone matrices; 16-byte aligned for SSE reads const CMatrix3D* GetAnimatedBoneMatrices() { ENSURE(m_PositionValid); return m_BoneMatrices; } + const CMatrix3D* GetInverseBindBoneMatrices() { return m_InverseBindBoneMatrices; } diff --git a/source/graphics/ModelDef.cpp b/source/graphics/ModelDef.cpp index 2afd09cc8c..d528695170 100644 --- a/source/graphics/ModelDef.cpp +++ b/source/graphics/ModelDef.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2010 Wildfire Games. +/* Copyright (C) 2011 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify @@ -26,6 +26,10 @@ #include "ps/FileIo.h" #include "maths/Vector4D.h" +#if ARCH_X86_X64 +# include +#endif + CVector3D CModelDef::SkinPoint(const SModelVertex& vtx, const CMatrix3D newPoseMatrices[]) { @@ -91,12 +95,18 @@ void CModelDef::SkinPointsAndNormals( const size_t* blendIndices, const CMatrix3D newPoseMatrices[]) { + // To avoid some performance overhead, get the raw vertex array pointers + char* PositionData = Position.GetData(); + size_t PositionStride = Position.GetStride(); + char* NormalData = Normal.GetData(); + size_t NormalStride = Normal.GetStride(); + for (size_t j = 0; j < numVertices; ++j) { const SModelVertex& vtx = vertices[j]; - Position[j] = newPoseMatrices[blendIndices[j]].Transform(vtx.m_Coords); - Normal[j] = newPoseMatrices[blendIndices[j]].Rotate(vtx.m_Norm); + CVector3D pos = newPoseMatrices[blendIndices[j]].Transform(vtx.m_Coords); + CVector3D norm = newPoseMatrices[blendIndices[j]].Rotate(vtx.m_Norm); // If there was more than one influence, the result is probably not going // to be of unit length (since it's a weighted sum of several independent @@ -104,10 +114,95 @@ void CModelDef::SkinPointsAndNormals( // (It's fairly common to only have one influence, so it seems sensible to // optimise that case a bit.) if (vtx.m_Blend.m_Bone[1] != 0xff) // if more than one influence - Normal[j].Normalize(); + norm.Normalize(); + + memcpy(PositionData + PositionStride*j, &pos.X, 3*sizeof(float)); + memcpy(NormalData + NormalStride*j, &norm.X, 3*sizeof(float)); } } +#if ARCH_X86_X64 +void CModelDef::SkinPointsAndNormals_SSE( + size_t numVertices, + const VertexArrayIterator& Position, + const VertexArrayIterator& Normal, + const SModelVertex* vertices, + const size_t* blendIndices, + const CMatrix3D newPoseMatrices[]) +{ + // To avoid some performance overhead, get the raw vertex array pointers + char* PositionData = Position.GetData(); + size_t PositionStride = Position.GetStride(); + char* NormalData = Normal.GetData(); + size_t NormalStride = Normal.GetStride(); + + // Must be aligned correctly for SSE + ASSERT((intptr_t)newPoseMatrices % 16 == 0); + ASSERT((intptr_t)PositionData % 16 == 0); + ASSERT((intptr_t)PositionStride % 16 == 0); + ASSERT((intptr_t)NormalData % 16 == 0); + ASSERT((intptr_t)NormalStride % 16 == 0); + + __m128 col0, col1, col2, col3, vec0, vec1, vec2; + + for (size_t j = 0; j < numVertices; ++j) + { + const SModelVertex& vtx = vertices[j]; + const CMatrix3D& mtx = newPoseMatrices[blendIndices[j]]; + + // Loads matrix to xmm registers. + col0 = _mm_load_ps(mtx._data); + col1 = _mm_load_ps(mtx._data + 4); + col2 = _mm_load_ps(mtx._data + 8); + col3 = _mm_load_ps(mtx._data + 12); + + // Loads and computes vertex coordinates. + vec0 = _mm_load1_ps(&vtx.m_Coords.X); + vec0 = _mm_mul_ps(col0, vec0); + vec1 = _mm_load1_ps(&vtx.m_Coords.Y); + vec1 = _mm_mul_ps(col1, vec1); + vec0 = _mm_add_ps(vec0, vec1); + vec1 = _mm_load1_ps(&vtx.m_Coords.Z); + vec1 = _mm_mul_ps(col2, vec1); + vec1 = _mm_add_ps(vec1, col3); + vec0 = _mm_add_ps(vec0, vec1); + _mm_store_ps((float*)(PositionData + PositionStride*j), vec0); + + // Loads and computes normal vectors. + vec0 = _mm_load1_ps(&vtx.m_Norm.X); + vec0 = _mm_mul_ps(col0, vec0); + vec1 = _mm_load1_ps(&vtx.m_Norm.Y); + vec1 = _mm_mul_ps(col1, vec1); + vec0 = _mm_add_ps(vec0, vec1); + vec1 = _mm_load1_ps(&vtx.m_Norm.Z); + vec1 = _mm_mul_ps(col2, vec1); + vec0 = _mm_add_ps(vec0, vec1); + + // If there was more than one influence, the result is probably not going + // to be of unit length (since it's a weighted sum of several independent + // unit vectors), so we need to normalise it. + // (It's fairly common to only have one influence, so it seems sensible to + // optimise that case a bit.) + if (vtx.m_Blend.m_Bone[1] != 0xff) // if more than one influence + { + // Normalization. + // vec1 = [x*x, y*y, z*z, ?*?] + vec1 = _mm_mul_ps(vec0, vec0); + // vec2 = [y*y, z*z, x*x, y*y] + vec2 = _mm_shuffle_ps(vec1, vec1, _MM_SHUFFLE(1, 2, 0, 1)); + vec1 = _mm_add_ps(vec1, vec2); + // vec2 = [z*z, x*x, y*y, z*z] + vec2 = _mm_shuffle_ps(vec2, vec2, _MM_SHUFFLE(1, 2, 0, 1)); + vec1 = _mm_add_ps(vec1, vec2); + // rsqrt(a) = 1 / sqrt(a) + vec1 = _mm_rsqrt_ps(vec1); + vec0 = _mm_mul_ps(vec0, vec1); + } + _mm_store_ps((float*)(NormalData + NormalStride*j), vec0); + } +} +#endif + void CModelDef::BlendBoneMatrices( CMatrix3D boneMatrices[]) { diff --git a/source/graphics/ModelDef.h b/source/graphics/ModelDef.h index e7ce494e83..9634277555 100644 --- a/source/graphics/ModelDef.h +++ b/source/graphics/ModelDef.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010 Wildfire Games. +/* Copyright (C) 2011 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify @@ -179,6 +179,19 @@ public: const size_t* blendIndices, const CMatrix3D newPoseMatrices[]); +#if ARCH_X86_X64 + /** + * SSE-optimised version of SkinPointsAndNormals. + */ + static void SkinPointsAndNormals_SSE( + size_t numVertices, + const VertexArrayIterator& Position, + const VertexArrayIterator& Normal, + const SModelVertex* vertices, + const size_t* blendIndices, + const CMatrix3D newPoseMatrices[]); +#endif + /** * Blend bone matrices together to fill bone palette. */ diff --git a/source/ps/GameSetup/GameSetup.cpp b/source/ps/GameSetup/GameSetup.cpp index 282158f018..e21f82a873 100644 --- a/source/ps/GameSetup/GameSetup.cpp +++ b/source/ps/GameSetup/GameSetup.cpp @@ -62,6 +62,7 @@ #include "renderer/Renderer.h" #include "renderer/VertexBufferManager.h" +#include "renderer/ModelRenderer.h" #include "maths/MathUtil.h" @@ -591,6 +592,7 @@ static void InitRenderer() g_Renderer.SetViewport(vp); ColorActivateFastImpl(); + ModelRenderer::Init(); } static void InitSDL() diff --git a/source/renderer/FixedFunctionModelRenderer.cpp b/source/renderer/FixedFunctionModelRenderer.cpp index 557b61c37d..f653cf672f 100644 --- a/source/renderer/FixedFunctionModelRenderer.cpp +++ b/source/renderer/FixedFunctionModelRenderer.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2009 Wildfire Games. +/* Copyright (C) 2011 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify @@ -21,7 +21,9 @@ #include "precompiled.h" +#include "lib/bits.h" #include "lib/ogl.h" +#include "lib/sysdep/rtl.h" #include "maths/Vector3D.h" #include "maths/Vector4D.h" @@ -97,8 +99,13 @@ struct FFModel struct FixedFunctionModelRendererInternals { - /// Transformed vertex normals - required for recalculating lighting on skinned models - std::vector normals; + /** + * Scratch space for normal vector calculation. + * Space is reserved so we don't have to do frequent reallocations. + * Allocated with rtl_AllocateAligned(normalsNumVertices*16, 16) for SSE writes. + */ + char* normals; + size_t normalsNumVertices; /// Previously prepared modeldef FFModelDef* ffmodeldef; @@ -110,10 +117,14 @@ FixedFunctionModelRenderer::FixedFunctionModelRenderer() { m = new FixedFunctionModelRendererInternals; m->ffmodeldef = 0; + m->normals = 0; + m->normalsNumVertices = 0; } FixedFunctionModelRenderer::~FixedFunctionModelRenderer() { + rtl_FreeAligned(m->normals); + delete m; } @@ -133,17 +144,26 @@ void* FixedFunctionModelRenderer::CreateModelData(CModel* model) // Build the per-model data FFModel* ffmodel = new FFModel; - ffmodel->m_Position.type = GL_FLOAT; - ffmodel->m_Position.elems = 3; - ffmodel->m_Array.AddAttribute(&ffmodel->m_Position); + // Positions must be 16-byte aligned for SSE writes. + // We can pack the color after the position; it will be corrupted by + // BuildPositionAndNormals, but that's okay since we'll recompute the + // colors afterwards. ffmodel->m_Color.type = GL_UNSIGNED_BYTE; ffmodel->m_Color.elems = 4; ffmodel->m_Array.AddAttribute(&ffmodel->m_Color); + ffmodel->m_Position.type = GL_FLOAT; + ffmodel->m_Position.elems = 3; + ffmodel->m_Array.AddAttribute(&ffmodel->m_Position); + ffmodel->m_Array.SetNumVertices(mdef->GetNumVertices()); ffmodel->m_Array.Layout(); + // Verify alignment + ENSURE(ffmodel->m_Position.offset % 16 == 0); + ENSURE(ffmodel->m_Array.GetStride() % 16 == 0); + return ffmodel; } @@ -159,11 +179,19 @@ void FixedFunctionModelRenderer::UpdateModelData(CModel* model, void* data, int size_t numVertices = mdef->GetNumVertices(); // build vertices - if (m->normals.size() < numVertices) - m->normals.resize(numVertices); + + // allocate working space for computing normals + if (numVertices > m->normalsNumVertices) + { + rtl_FreeAligned(m->normals); + + size_t newSize = round_up_to_pow2(numVertices); + m->normals = (char*)rtl_AllocateAligned(newSize*16, 16); + m->normalsNumVertices = newSize; + } VertexArrayIterator Position = ffmodel->m_Position.GetIterator(); - VertexArrayIterator Normal = VertexArrayIterator((char*)&m->normals[0], sizeof(CVector3D)); + VertexArrayIterator Normal = VertexArrayIterator(m->normals, 16); ModelRenderer::BuildPositionAndNormals(model, Position, Normal); diff --git a/source/renderer/HWLightingModelRenderer.cpp b/source/renderer/HWLightingModelRenderer.cpp index 31ab164202..bd3d1adb76 100644 --- a/source/renderer/HWLightingModelRenderer.cpp +++ b/source/renderer/HWLightingModelRenderer.cpp @@ -37,13 +37,35 @@ struct ShaderModelDef : public CModelDefRPrivate /// Indices are the same for all models, so share them VertexIndexArray m_IndexArray; + /// Static per-CModelDef vertex array + VertexArray m_Array; + + /// UV coordinates are stored in the static array + VertexArray::Attribute m_UV; + ShaderModelDef(const CModelDefPtr& mdef); }; ShaderModelDef::ShaderModelDef(const CModelDefPtr& mdef) - : m_IndexArray(GL_STATIC_DRAW) + : m_IndexArray(GL_STATIC_DRAW), m_Array(GL_STATIC_DRAW) { + size_t numVertices = mdef->GetNumVertices(); + + m_UV.type = GL_FLOAT; + m_UV.elems = 2; + m_Array.AddAttribute(&m_UV); + + m_Array.SetNumVertices(numVertices); + m_Array.Layout(); + + VertexArrayIterator UVit = m_UV.GetIterator(); + + ModelRenderer::BuildUV(mdef, UVit); + + m_Array.Upload(); + m_Array.FreeBackingStore(); + m_IndexArray.SetNumVertices(mdef->GetNumFaces()*3); m_IndexArray.Layout(); ModelRenderer::BuildIndices(mdef, m_IndexArray.GetIterator()); @@ -61,9 +83,6 @@ struct ShaderModel VertexArray::Attribute m_Position; VertexArray::Attribute m_Normal; - /// UV is stored per-CModel in order to avoid space wastage due to alignment - VertexArray::Attribute m_UV; - ShaderModel() : m_Array(GL_DYNAMIC_DRAW) { } }; @@ -103,25 +122,23 @@ void* ShaderModelRenderer::CreateModelData(CModel* model) // Build the per-model data ShaderModel* shadermodel = new ShaderModel; + // Positions and normals must be 16-byte aligned for SSE writes. + shadermodel->m_Position.type = GL_FLOAT; - shadermodel->m_Position.elems = 3; + shadermodel->m_Position.elems = 4; shadermodel->m_Array.AddAttribute(&shadermodel->m_Position); - shadermodel->m_UV.type = GL_FLOAT; - shadermodel->m_UV.elems = 2; - shadermodel->m_Array.AddAttribute(&shadermodel->m_UV); - shadermodel->m_Normal.type = GL_FLOAT; - shadermodel->m_Normal.elems = 3; + shadermodel->m_Normal.elems = 4; shadermodel->m_Array.AddAttribute(&shadermodel->m_Normal); shadermodel->m_Array.SetNumVertices(mdef->GetNumVertices()); shadermodel->m_Array.Layout(); - // Fill in static UV coordinates - VertexArrayIterator UVit = shadermodel->m_UV.GetIterator(); - - ModelRenderer::BuildUV(mdef, UVit); + // Verify alignment + ENSURE(shadermodel->m_Position.offset % 16 == 0); + ENSURE(shadermodel->m_Normal.offset % 16 == 0); + ENSURE(shadermodel->m_Array.GetStride() % 16 == 0); return shadermodel; } @@ -188,11 +205,19 @@ void ShaderModelRenderer::EndPass(int streamflags) // Prepare UV coordinates for this modeldef -void ShaderModelRenderer::PrepareModelDef(int UNUSED(streamflags), const CModelDefPtr& def) +void ShaderModelRenderer::PrepareModelDef(int streamflags, const CModelDefPtr& def) { m->shadermodeldef = (ShaderModelDef*)def->GetRenderData(m); ENSURE(m->shadermodeldef); + + if (streamflags & STREAM_UV0) + { + u8* base = m->shadermodeldef->m_Array.Bind(); + GLsizei stride = (GLsizei)m->shadermodeldef->m_Array.GetStride(); + + glTexCoordPointer(2, GL_FLOAT, stride, base + m->shadermodeldef->m_UV.offset); + } } @@ -213,9 +238,6 @@ void ShaderModelRenderer::RenderModel(int streamflags, CModel* model, void* data if (streamflags & STREAM_NORMAL) glNormalPointer(GL_FLOAT, stride, base + shadermodel->m_Normal.offset); - if (streamflags & STREAM_UV0) - glTexCoordPointer(2, GL_FLOAT, stride, base + shadermodel->m_UV.offset); - // render the lot size_t numFaces = mdldef->GetNumFaces(); diff --git a/source/renderer/ModelRenderer.cpp b/source/renderer/ModelRenderer.cpp index 0b048040a3..9ee9a4371d 100644 --- a/source/renderer/ModelRenderer.cpp +++ b/source/renderer/ModelRenderer.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2009 Wildfire Games. +/* Copyright (C) 2011 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify @@ -41,10 +41,23 @@ #include +#if ARCH_X86_X64 +# include "lib/sysdep/arch/x86_x64/x86_x64.h" +#endif /////////////////////////////////////////////////////////////////////////////////////////////// // ModelRenderer implementation +static bool g_EnableSSE = false; + +void ModelRenderer::Init() +{ +#if ARCH_X86_X64 + if (x86_x64_cap(X86_X64_CAP_SSE)) + g_EnableSSE = true; +#endif +} + // Helper function to copy object-space position and normal vectors into arrays. void ModelRenderer::CopyPositionAndNormals( const CModelDefPtr& mdef, @@ -84,8 +97,10 @@ void ModelRenderer::BuildPositionAndNormals( return; } - CModelDef::SkinPointsAndNormals(numVertices, Position, Normal, vertices, mdef->GetBlendIndices(), model->GetAnimatedBoneMatrices()); - + if (g_EnableSSE) + CModelDef::SkinPointsAndNormals_SSE(numVertices, Position, Normal, vertices, mdef->GetBlendIndices(), model->GetAnimatedBoneMatrices()); + else + CModelDef::SkinPointsAndNormals(numVertices, Position, Normal, vertices, mdef->GetBlendIndices(), model->GetAnimatedBoneMatrices()); } else { diff --git a/source/renderer/ModelRenderer.h b/source/renderer/ModelRenderer.h index 76786edda0..692546b993 100644 --- a/source/renderer/ModelRenderer.h +++ b/source/renderer/ModelRenderer.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2009 Wildfire Games. +/* Copyright (C) 2011 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify @@ -123,6 +123,12 @@ public: ModelRenderer() { } virtual ~ModelRenderer() { } + /** + * Initialise global settings. + * Should be called before using the class. + */ + static void Init(); + /** * Submit: Submit a model for rendering this frame. * @@ -212,7 +218,8 @@ public: * @param Position Points to the array that will receive * transformed position vectors. The array behind the iterator * must be large enough to hold model->GetModelDef()->GetNumVertices() - * vertices. + * vertices. It must allow 16 bytes to be written to each element + * (i.e. provide 4 bytes of padding after each CVector3D). * @param Normal Points to the array that will receive transformed * normal vectors. The array behind the iterator must be as large as * the Position array. diff --git a/source/renderer/TransparencyRenderer.cpp b/source/renderer/TransparencyRenderer.cpp index aa0af878ad..7c5c119336 100644 --- a/source/renderer/TransparencyRenderer.cpp +++ b/source/renderer/TransparencyRenderer.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2009 Wildfire Games. +/* Copyright (C) 2011 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify @@ -25,7 +25,9 @@ #include #include +#include "lib/bits.h" #include "lib/ogl.h" +#include "lib/sysdep/rtl.h" #include "maths/MathUtil.h" #include "maths/Vector3D.h" #include "maths/Vector4D.h" @@ -117,17 +119,26 @@ PSModel::PSModel(CModel* model) { CModelDefPtr mdef = m_Model->GetModelDef(); - m_Position.type = GL_FLOAT; - m_Position.elems = 3; - m_Array.AddAttribute(&m_Position); + // Positions and normals must be 16-byte aligned for SSE writes. + // We can pack the color after the position; it will be corrupted by + // BuildPositionAndNormals, but that's okay since we'll recompute the + // colors afterwards. m_Color.type = GL_UNSIGNED_BYTE; m_Color.elems = 4; m_Array.AddAttribute(&m_Color); + m_Position.type = GL_FLOAT; + m_Position.elems = 3; + m_Array.AddAttribute(&m_Position); + m_Array.SetNumVertices(mdef->GetNumVertices()); m_Array.Layout(); + // Verify alignment + ENSURE(m_Position.offset % 16 == 0); + ENSURE(m_Array.GetStride() % 16 == 0); + m_Indices = new u16[mdef->GetNumFaces()*3]; } @@ -194,8 +205,13 @@ float PSModel::BackToFrontIndexSort(const CMatrix3D& worldToCam) */ struct PolygonSortModelRendererInternals { - /// Scratch space for normal vector calculation - std::vector normals; + /** + * Scratch space for normal vector calculation. + * Space is reserved so we don't have to do frequent reallocations. + * Allocated with rtl_AllocateAligned(normalsNumVertices*16, 16) for SSE writes. + */ + char* normals; + size_t normalsNumVertices; }; @@ -203,10 +219,14 @@ struct PolygonSortModelRendererInternals PolygonSortModelRenderer::PolygonSortModelRenderer() { m = new PolygonSortModelRendererInternals; + m->normals = 0; + m->normalsNumVertices = 0; } PolygonSortModelRenderer::~PolygonSortModelRenderer() { + rtl_FreeAligned(m->normals); + delete m; } @@ -237,11 +257,19 @@ void PolygonSortModelRenderer::UpdateModelData(CModel* model, void* data, int up size_t numVertices = mdef->GetNumVertices(); // build vertices - if (m->normals.size() < numVertices) - m->normals.resize(numVertices); + + // allocate working space for computing normals + if (numVertices > m->normalsNumVertices) + { + rtl_FreeAligned(m->normals); + + size_t newSize = round_up_to_pow2(numVertices); + m->normals = (char*)rtl_AllocateAligned(newSize*16, 16); + m->normalsNumVertices = newSize; + } VertexArrayIterator Position = psmdl->m_Position.GetIterator(); - VertexArrayIterator Normal = VertexArrayIterator((char*)&m->normals[0], sizeof(CVector3D)); + VertexArrayIterator Normal = VertexArrayIterator(m->normals, 16); ModelRenderer::BuildPositionAndNormals(model, Position, Normal); diff --git a/source/renderer/VertexArray.cpp b/source/renderer/VertexArray.cpp index bfbf018679..69893dbbc0 100644 --- a/source/renderer/VertexArray.cpp +++ b/source/renderer/VertexArray.cpp @@ -19,6 +19,7 @@ #include "lib/alignment.h" #include "lib/ogl.h" +#include "lib/sysdep/rtl.h" #include "maths/Vector3D.h" #include "maths/Vector4D.h" #include "graphics/SColor.h" @@ -47,7 +48,7 @@ VertexArray::~VertexArray() // Free all resources on destruction or when a layout parameter changes void VertexArray::Free() { - delete[] m_BackingStore; + rtl_FreeAligned(m_BackingStore); m_BackingStore = 0; if (m_VB) @@ -214,7 +215,7 @@ void VertexArray::Layout() //debug_printf(L"Stride: %u\n", m_Stride); if (m_Stride) - m_BackingStore = new char[m_Stride * m_NumVertices]; + m_BackingStore = (char*)rtl_AllocateAligned(m_Stride * m_NumVertices, 16); } @@ -249,7 +250,7 @@ u8* VertexArray::Bind() // Free the backing store to save some memory void VertexArray::FreeBackingStore() { - delete[] m_BackingStore; + rtl_FreeAligned(m_BackingStore); m_BackingStore = 0; } diff --git a/source/renderer/VertexArray.h b/source/renderer/VertexArray.h index e66496d2d4..7cf6cfaadd 100644 --- a/source/renderer/VertexArray.h +++ b/source/renderer/VertexArray.h @@ -103,6 +103,16 @@ public: return tmp; } + // Accessors for raw buffer data, for performance-critical code + char* GetData() const + { + return m_Data; + } + size_t GetStride() const + { + return m_Stride; + } + private: char* m_Data; size_t m_Stride; @@ -187,7 +197,7 @@ private: CVertexBuffer::VBChunk* m_VB; size_t m_Stride; - char* m_BackingStore; + char* m_BackingStore; // 16-byte aligned, to allow fast SSE access }; /**