1
0
forked from 0ad/0ad

# Add GPU performance profiling support.

Add some profiler region attributes.
Render events in profile visualiser.
Change profiler from GPL to MIT.

This was SVN commit r10492.
This commit is contained in:
Ykkrosh 2011-11-09 13:09:01 +00:00
parent 8fba3f92b1
commit cfb79c3062
24 changed files with 1354 additions and 102 deletions

View File

@ -201,7 +201,13 @@ hotkey.text.move.right = "Ctrl+RightArrow" ; Move cursor to start of word to
; > PROFILER
hotkey.profile.toggle = "F11" ; Enable/disable real-time profiler
hotkey.profile.save = "Shift+F11" ; Save current profiler data to logs/profile.txt
hotkey.profile2.enable = "F11" ; Enable HTTP server for new profiler
hotkey.profile2.enable = "F11" ; Enable HTTP/GPU modes for new profiler
profiler2.http.autoenable = false ; Enable HTTP server output at startup (default off for security/performance)
profiler2.gpu.autoenable = false ; Enable GPU timing at startup (default off for performance/compatibility)
profiler2.gpu.arb.enable = true ; Allow GL_ARB_timer_query timing mode when available
profiler2.gpu.ext.enable = true ; Allow GL_EXT_timer_query timing mode when available
profiler2.gpu.intel.enable = true ; Allow GL_INTEL_performance_queries timing mode when available
; > QUICKSAVE
hotkey.quicksave = "Shift+F5"

View File

@ -272,7 +272,7 @@ void CGUIManager::TickObjects()
void CGUIManager::Draw()
{
PROFILE3("render gui");
PROFILE3_GPU("gui");
for (PageStackType::iterator it = m_PageStack.begin(); it != m_PageStack.end(); ++it)
it->gui->Draw();

View File

@ -495,6 +495,10 @@ void IGUIObject::SetScriptHandler(const CStr& Action, JSObject* Function)
InReaction IGUIObject::SendEvent(EGUIMessageType type, const CStr& EventName)
{
PROFILE2_EVENT("gui event");
PROFILE2_ATTR("type: %s", EventName.c_str());
PROFILE2_ATTR("object: %s", m_Name.c_str());
SGUIMessage msg(type);
HandleMessage(msg);

View File

@ -41,6 +41,9 @@ template<> jsval ScriptInterface::ToJSVal<SDL_Event_>(JSContext* cx, SDL_Event_
case SDL_MOUSEMOTION: typeName = "mousemotion"; break;
case SDL_MOUSEBUTTONDOWN: typeName = "mousebuttondown"; break;
case SDL_MOUSEBUTTONUP: typeName = "mousebuttonup"; break;
case SDL_QUIT: typeName = "quit"; break;
case SDL_VIDEOEXPOSE: typeName = "videoexpose"; break;
case SDL_VIDEORESIZE: typeName = "videoresize"; break;
case SDL_HOTKEYDOWN: typeName = "hotkeydown"; break;
case SDL_HOTKEYUP: typeName = "hotkeyup"; break;
default: typeName = "(unknown)"; break;

View File

@ -235,6 +235,40 @@ FUNC2(void, glGetVertexAttribfvARB, glGetVertexAttribfv, "2.0", (GLuint index, G
FUNC2(void, glGetVertexAttribivARB, glGetVertexAttribiv, "2.0", (GLuint index, GLenum pname, GLint *params))
FUNC2(void, glGetVertexAttribPointervARB, glGetVertexAttribPointerv, "2.0", (GLuint index, GLenum pname, void **pointer))
// GL_ARB_occlusion_query / GL1.5:
FUNC2(void, glGenQueriesARB, glGenQueries, "1.5", (GLsizei n, GLuint *ids))
FUNC2(void, glDeleteQueriesARB, glDeleteQueries, "1.5", (GLsizei n, const GLuint *ids))
FUNC2(GLboolean, glIsQueryARB, glIsQuery, "1.5", (GLuint id))
FUNC2(void, glBeginQueryARB, glBeginQuery, "1.5", (GLenum target, GLuint id))
FUNC2(void, glEndQueryARB, glEndQuery, "1.5", (GLenum target))
FUNC2(void, glGetQueryivARB, glGetQueryiv, "1.5", (GLenum target, GLenum pname, GLint *params))
FUNC2(void, glGetQueryObjectivARB, glGetQueryObjectiv, "1.5", (GLuint id, GLenum pname, GLint *params))
FUNC2(void, glGetQueryObjectuivARB, glGetQueryObjectuiv, "1.5", (GLuint id, GLenum pname, GLuint *params))
// GL_ARB_sync / GL3.2:
FUNC2(void, glGetInteger64v, glGetInteger64v, "3.2", (GLenum pname, GLint64 *params))
// GL_ARB_timer_query / GL3.3:
FUNC2(void, glQueryCounter, glQueryCounter, "3.3", (GLuint id, GLenum target))
FUNC2(void, glGetQueryObjecti64v, glGetQueryObjecti64v, "3.3", (GLuint id, GLenum pname, GLint64 *params))
FUNC2(void, glGetQueryObjectui64v, glGetQueryObjectui64v, "3.3", (GLuint id, GLenum pname, GLuint64 *params))
// GL_GREMEDY_string_marker (from gDEBugger)
FUNC(int, glStringMarkerGREMEDY, (GLsizei len, const GLvoid *string))
// GL_INTEL_performance_queries (undocumented, may be unstable, use at own risk;
// see http://zaynar.co.uk/docs/gl-intel-performance-queries.html)
FUNC(void, glGetFirstPerfQueryIdINTEL, (GLuint *queryId))
FUNC(void, glGetNextPerfQueryIdINTEL, (GLuint prevQueryId, GLuint *queryId))
FUNC(void, glGetPerfQueryInfoINTEL, (GLuint queryId, GLuint nameMaxLength, char *name, GLuint *counterBufferSize, GLuint *numCounters, GLuint *maxQueries, GLuint *))
FUNC(void, glGetPerfCounterInfoINTEL, (GLuint queryId, GLuint counterId, GLuint nameMaxLength, char *name, GLuint descMaxLength, char *desc, GLuint *offset, GLuint *size, GLuint *usage, GLuint *type, GLuint64 *))
FUNC(void, glCreatePerfQueryINTEL, (GLuint queryId, GLuint *id))
FUNC(void, glBeginPerfQueryINTEL, (GLuint id))
FUNC(void, glEndPerfQueryINTEL, (GLuint id))
FUNC(void, glDeletePerfQueryINTEL, (GLuint id))
FUNC(void, glGetPerfQueryDataINTEL, (GLuint id, GLenum requestType, GLuint maxLength, char *buffer, GLuint *length))
#if OS_WIN
// WGL_EXT_swap_control
FUNC(int, wglSwapIntervalEXT, (int))
@ -250,7 +284,4 @@ FUNC(int, wglQueryPbufferARB, (HPBUFFERARB, int, int*))
FUNC(int, wglGetPixelFormatAttribivARB, (HDC, int, int, unsigned int, const int*, int*))
FUNC(int, wglGetPixelFormatAttribfvARB, (HDC, int, int, unsigned int, const int*, float*))
FUNC(int, wglChoosePixelFormatARB, (HDC, const int *, const float*, unsigned int, int*, unsigned int*))
// GL_GREMEDY_string_marker (from gDEBugger)
FUNC(int, glStringMarkerGREMEDY, (GLsizei len, const GLvoid *string))
#endif // OS_WIN

View File

@ -135,6 +135,7 @@ static InReaction MainInputHandler(const SDL_Event_* ev)
}
else if (hotkey == "profile2.enable")
{
g_Profiler2.EnableGPU();
g_Profiler2.EnableHTTP();
return IN_HANDLED;
}
@ -151,8 +152,17 @@ static void PumpEvents()
PROFILE3("dispatch events");
SDL_Event_ ev;
while(SDL_PollEvent(&ev.ev))
while (SDL_PollEvent(&ev.ev))
{
PROFILE2("event");
if (g_GUI)
{
std::string data = g_GUI->GetScriptInterface().StringifyJSON(
ScriptInterface::ToJSVal(g_GUI->GetScriptInterface().GetContext(), ev));
PROFILE2_ATTR("%s", data.c_str());
}
in_dispatch_event(&ev);
}
}
@ -253,6 +263,8 @@ static void Frame()
{
g_Profiler2.RecordFrameStart();
PROFILE2("frame");
g_Profiler2.IncrementFrameNumber();
PROFILE2_ATTR("%d", g_Profiler2.GetFrameNumber());
ogl_WarnIfError();

View File

@ -175,7 +175,7 @@ void CConsole::Render()
{
if (! (m_bVisible || m_bToggle) ) return;
PROFILE3("render console");
PROFILE3_GPU("console");
CFont font(CONSOLE_FONT);
font.Bind();

View File

@ -277,7 +277,7 @@ void CLogger::LogError(const wchar_t* fmt, ...)
void CLogger::Render()
{
PROFILE3("render logger");
PROFILE3_GPU("logger");
CleanupRenderQueue();

View File

@ -275,7 +275,11 @@ bool CGame::Update(double deltaTime, bool doInterpolate)
if (m_TurnManager->Update(deltaTime, maxTurns))
{
g_GUI->SendEventToAll("SimulationUpdate");
{
PROFILE3("gui sim update");
g_GUI->SendEventToAll("SimulationUpdate");
}
GetView()->GetLOSTexture().MakeDirty();
}
}

View File

@ -198,6 +198,10 @@ void Render()
ogl_WarnIfError();
g_Profiler2.RecordGPUFrameStart();
ogl_WarnIfError();
CStr skystring = "255 0 255";
CFG_GET_USER_VAL("skycolor", String, skystring);
CColor skycol;
@ -272,7 +276,7 @@ void Render()
// Draw the cursor (or set the Windows cursor, on Windows)
if (g_DoRenderCursor)
{
PROFILE3("render cursor");
PROFILE3_GPU("cursor");
CStrW cursorName = g_CursorName;
if (cursorName.empty())
{
@ -303,6 +307,10 @@ void Render()
PROFILE2_ATTR("particles: %d", (int)g_Renderer.GetStats().m_Particles);
ogl_WarnIfError();
g_Profiler2.RecordGPUFrameEnd();
ogl_WarnIfError();
}
@ -647,6 +655,8 @@ void Shutdown(int UNUSED(flags))
tex_codec_unregister_all();
g_Profiler2.ShutdownGPU();
TIMER_BEGIN(L"shutdown SDL");
ShutdownSDL();
TIMER_END(L"shutdown SDL");
@ -850,6 +860,13 @@ void Init(const CmdLineArgs& args, int UNUSED(flags))
// g_ConfigDB, command line args, globals
CONFIG_Init(args);
// Optionally start profiler HTTP output automatically
// (By default it's only enabled by a hotkey, for security/performance)
bool profilerHTTPEnable = false;
CFG_GET_USER_VAL("profiler2.http.autoenable", Bool, profilerHTTPEnable);
if (profilerHTTPEnable)
g_Profiler2.EnableHTTP();
if (!g_Quickstart)
g_UserReporter.Initialize(); // after config
@ -879,6 +896,13 @@ void InitGraphics(const CmdLineArgs& args, int flags)
ogl_WarnIfError();
// Optionally start profiler GPU timings automatically
// (By default it's only enabled by a hotkey, for performance/compatibility)
bool profilerGPUEnable = false;
CFG_GET_USER_VAL("profiler2.gpu.autoenable", Bool, profilerGPUEnable);
if (profilerGPUEnable)
g_Profiler2.EnableGPU();
if(!g_Quickstart)
{
WriteSystemInfo();

View File

@ -198,4 +198,7 @@ public:
// Do both old and new profilers simultaneously (1+2=3), for convenience.
#define PROFILE3(name) PROFILE(name); PROFILE2(name)
// Also do GPU
#define PROFILE3_GPU(name) PROFILE(name); PROFILE2(name); PROFILE2_GPU(name)
#endif // INCLUDED_PROFILE

View File

@ -159,7 +159,7 @@ void CProfileViewer::RenderProfile()
return;
}
PROFILE3("render profiler");
PROFILE3_GPU("profile viewer");
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);

View File

@ -1,18 +1,23 @@
/* Copyright (C) 2011 Wildfire Games.
* This file is part of 0 A.D.
/* Copyright (c) 2011 Wildfire Games
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see <http://www.gnu.org/licenses/>.
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "precompiled.h"
@ -21,6 +26,7 @@
#include "lib/allocators/shared_ptr.h"
#include "ps/CLogger.h"
#include "ps/Profiler2GPU.h"
#include "third_party/mongoose/mongoose.h"
CProfiler2 g_Profiler2;
@ -29,10 +35,15 @@ CProfiler2 g_Profiler2;
const u8 CProfiler2::RESYNC_MAGIC[8] = {0x11, 0x22, 0x33, 0x44, 0xf4, 0x93, 0xbe, 0x15};
CProfiler2::CProfiler2() :
m_Initialised(false), m_MgContext(NULL)
m_Initialised(false), m_FrameNumber(0), m_MgContext(NULL), m_GPU(NULL)
{
}
CProfiler2::~CProfiler2()
{
ENSURE(!m_Initialised); // should have called Shutdown() explicitly
}
/**
* Mongoose callback. Run in an arbitrary thread (possibly concurrently with other requests).
*/
@ -133,6 +144,12 @@ void CProfiler2::Initialise()
RegisterCurrentThread("main");
}
void CProfiler2::InitialiseGPU()
{
ENSURE(!m_GPU);
m_GPU = new CProfiler2GPU(*this);
}
void CProfiler2::EnableHTTP()
{
ENSURE(m_Initialised);
@ -150,10 +167,24 @@ void CProfiler2::EnableHTTP()
ENSURE(m_MgContext);
}
void CProfiler2::EnableGPU()
{
ENSURE(m_Initialised);
if (!m_GPU)
InitialiseGPU();
}
void CProfiler2::ShutdownGPU()
{
SAFE_DELETE(m_GPU);
}
void CProfiler2::Shutdown()
{
ENSURE(m_Initialised);
ENSURE(!m_GPU); // must shutdown GPU before profiler
if (m_MgContext)
{
mg_stop(m_MgContext);
@ -167,6 +198,30 @@ void CProfiler2::Shutdown()
m_Initialised = false;
}
void CProfiler2::RecordGPUFrameStart()
{
if (m_GPU)
m_GPU->FrameStart();
}
void CProfiler2::RecordGPUFrameEnd()
{
if (m_GPU)
m_GPU->FrameEnd();
}
void CProfiler2::RecordGPURegionEnter(const char* id)
{
if (m_GPU)
m_GPU->RegionEnter(id);
}
void CProfiler2::RecordGPURegionLeave(const char* id)
{
if (m_GPU)
m_GPU->RegionLeave(id);
}
/**
* Called by pthreads when a registered thread is destroyed.
*/
@ -174,12 +229,7 @@ void CProfiler2::TLSDtor(void* data)
{
ThreadStorage* storage = (ThreadStorage*)data;
CProfiler2& profiler = storage->GetProfiler();
{
CScopeLock lock(profiler.m_Mutex);
profiler.m_Threads.erase(std::find(profiler.m_Threads.begin(), profiler.m_Threads.end(), storage));
}
storage->GetProfiler().RemoveThreadStorage(storage);
delete (ThreadStorage*)data;
}
@ -197,10 +247,21 @@ void CProfiler2::RegisterCurrentThread(const std::string& name)
RecordSyncMarker();
RecordEvent("thread start");
AddThreadStorage(storage);
}
void CProfiler2::AddThreadStorage(ThreadStorage* storage)
{
CScopeLock lock(m_Mutex);
m_Threads.push_back(storage);
}
void CProfiler2::RemoveThreadStorage(ThreadStorage* storage)
{
CScopeLock lock(m_Mutex);
m_Threads.erase(std::find(m_Threads.begin(), m_Threads.end(), storage));
}
CProfiler2::ThreadStorage::ThreadStorage(CProfiler2& profiler, const std::string& name) :
m_Profiler(profiler), m_Name(name), m_BufferPos0(0), m_BufferPos1(0), m_LastTime(timer_Time())
{
@ -256,6 +317,8 @@ void CProfiler2::ThreadStorage::RecordAttribute(const char* fmt, va_list argp)
void CProfiler2::ConstructJSONOverview(std::ostream& stream)
{
TIMER(L"profile2 overview");
CScopeLock lock(m_Mutex);
stream << "{\"threads\":[";
@ -275,6 +338,8 @@ void CProfiler2::ConstructJSONOverview(std::ostream& stream)
template<typename V>
void RunBufferVisitor(const std::string& buffer, V& visitor)
{
TIMER(L"profile2 visitor");
// The buffer doesn't necessarily start at the beginning of an item
// (we just grabbed it from some arbitrary point in the middle),
// so scan forwards until we find a sync marker.
@ -421,24 +486,33 @@ public:
const char* CProfiler2::ConstructJSONResponse(std::ostream& stream, const std::string& thread)
{
CScopeLock lock(m_Mutex);
TIMER(L"profile2 query");
std::string buffer;
ThreadStorage* storage = NULL;
for (size_t i = 0; i < m_Threads.size(); ++i)
{
if (m_Threads[i]->GetName() == thread)
TIMER(L"profile2 get buffer");
CScopeLock lock(m_Mutex); // lock against changes to m_Threads or deletions of ThreadStorage
ThreadStorage* storage = NULL;
for (size_t i = 0; i < m_Threads.size(); ++i)
{
storage = m_Threads[i];
break;
if (m_Threads[i]->GetName() == thread)
{
storage = m_Threads[i];
break;
}
}
if (!storage)
return "cannot find named thread";
stream << "{\"events\":[\n";
buffer = storage->GetBuffer();
}
if (!storage)
return "cannot find named thread";
stream << "{\"events\":[\n";
std::string buffer = storage->GetBuffer();
BufferVisitor_Dump visitor(stream);
RunBufferVisitor(buffer, visitor);

View File

@ -1,18 +1,23 @@
/* Copyright (C) 2011 Wildfire Games.
* This file is part of 0 A.D.
/* Copyright (c) 2011 Wildfire Games
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see <http://www.gnu.org/licenses/>.
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/**
@ -80,8 +85,12 @@ struct mg_context;
// Note: Lots of functions are defined inline, to hypothetically
// minimise performance overhead.
class CProfiler2GPU;
class CProfiler2
{
friend class CProfiler2GPU_base;
public:
// Items stored in the buffers:
@ -113,7 +122,7 @@ public:
private:
// TODO: what's a good size?
// TODO: different threads might want different sizes
static const size_t BUFFER_SIZE = 128*1024;
static const size_t BUFFER_SIZE = 1024*1024;
/**
* Class instantiated in every registered thread.
@ -153,6 +162,14 @@ private:
}
void RecordAttribute(const char* fmt, va_list argp);
void RecordAttributePrintf(const char* fmt, ...)
{
va_list argp;
va_start(argp, fmt);
RecordAttribute(fmt, argp);
va_end(argp);
}
CProfiler2& GetProfiler()
{
@ -230,6 +247,7 @@ private:
public:
CProfiler2();
~CProfiler2();
/**
* Call in main thread to set up the profiler,
@ -244,6 +262,18 @@ public:
*/
void EnableHTTP();
/**
* Call in main thread to enable the GPU profiling support,
* after OpenGL has been initialised.
*/
void EnableGPU();
/**
* Call in main thread to shut down the GPU profiling support,
* before shutting down OpenGL.
*/
void ShutdownGPU();
/**
* Call in main thread to shut everything down.
* All other profiled threads should have been terminated already.
@ -272,6 +302,7 @@ public:
*/
void RecordFrameStart()
{
ENSURE(ThreadUtil::IsMainThread());
GetThreadStorage().RecordFrameStart(GetTime());
}
@ -298,6 +329,11 @@ public:
va_end(argp);
}
void RecordGPUFrameStart();
void RecordGPUFrameEnd();
void RecordGPURegionEnter(const char* id);
void RecordGPURegionLeave(const char* id);
/**
* Call in any thread to produce a JSON representation of the general
* state of the application.
@ -311,7 +347,27 @@ public:
*/
const char* ConstructJSONResponse(std::ostream& stream, const std::string& thread);
double GetTime()
{
return timer_Time();
}
int GetFrameNumber()
{
return m_FrameNumber;
}
void IncrementFrameNumber()
{
++m_FrameNumber;
}
void AddThreadStorage(ThreadStorage* storage);
void RemoveThreadStorage(ThreadStorage* storage);
private:
void InitialiseGPU();
static void TLSDtor(void* data);
ThreadStorage& GetThreadStorage()
@ -321,17 +377,16 @@ private:
return *storage;
}
double GetTime()
{
return timer_Time();
}
bool m_Initialised;
int m_FrameNumber;
mg_context* m_MgContext;
pthread_key_t m_TLS;
CProfiler2GPU* m_GPU;
CMutex m_Mutex;
std::vector<ThreadStorage*> m_Threads; // thread-safe; protected by m_Mutex
};
@ -356,6 +411,24 @@ private:
const char* m_Name;
};
/**
* Scope-based GPU enter/leave helper.
*/
class CProfile2GPURegion
{
public:
CProfile2GPURegion(const char* name) : m_Name(name)
{
g_Profiler2.RecordGPURegionEnter(m_Name);
}
~CProfile2GPURegion()
{
g_Profiler2.RecordGPURegionLeave(m_Name);
}
private:
const char* m_Name;
};
/**
* Starts timing from now until the end of the current scope.
* @p region is the name to associate with this region (should be
@ -365,6 +438,8 @@ private:
*/
#define PROFILE2(region) CProfile2Region profile2__(region)
#define PROFILE2_GPU(region) CProfile2GPURegion profile2gpu__(region)
/**
* Record the named event at the current time.
*/

841
source/ps/Profiler2GPU.cpp Normal file
View File

@ -0,0 +1,841 @@
/* Copyright (c) 2011 Wildfire Games
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "precompiled.h"
#include "Profiler2GPU.h"
#include "lib/ogl.h"
#include "lib/allocators/shared_ptr.h"
#include "ps/ConfigDB.h"
#include "ps/Profiler2.h"
class CProfiler2GPU_base
{
NONCOPYABLE(CProfiler2GPU_base);
protected:
CProfiler2GPU_base(CProfiler2& profiler, const char* name) :
m_Profiler(profiler), m_Storage(profiler, name)
{
m_Storage.RecordSyncMarker(m_Profiler.GetTime());
m_Storage.Record(CProfiler2::ITEM_EVENT, m_Profiler.GetTime(), "thread start");
m_Profiler.AddThreadStorage(&m_Storage);
}
~CProfiler2GPU_base()
{
m_Profiler.RemoveThreadStorage(&m_Storage);
}
CProfiler2& m_Profiler;
CProfiler2::ThreadStorage m_Storage;
};
//////////////////////////////////////////////////////////////////////////
// Base class for ARB_timer_query, EXT_timer_query
class CProfiler2GPU_timer_query : public CProfiler2GPU_base
{
protected:
CProfiler2GPU_timer_query(CProfiler2& profiler, const char* name) :
CProfiler2GPU_base(profiler, name)
{
}
~CProfiler2GPU_timer_query()
{
pglDeleteQueriesARB(m_FreeQueries.size(), m_FreeQueries.data());
ogl_WarnIfError();
}
// Returns a new GL query object (or a recycled old one)
GLuint NewQuery()
{
if (m_FreeQueries.empty())
{
// Generate a batch of new queries
m_FreeQueries.resize(8);
pglGenQueriesARB(m_FreeQueries.size(), m_FreeQueries.data());
ogl_WarnIfError();
}
GLuint query = m_FreeQueries.back();
m_FreeQueries.pop_back();
return query;
}
std::vector<GLuint> m_FreeQueries; // query objects that are allocated but not currently in used
};
//////////////////////////////////////////////////////////////////////////
/*
* GL_ARB_timer_query supports sync and async queries for absolute GPU
* timestamps, which lets us time regions of code relative to the CPU.
* At the start of a frame, we record the CPU time and sync GPU timestamp,
* giving the time-vs-timestamp offset.
* At each enter/leave-region event, we do an async GPU timestamp query.
* When all the queries for a frame have their results available,
* we convert their GPU timestamps into CPU times and record the data.
*/
class CProfiler2GPU_ARB_timer_query : public CProfiler2GPU_timer_query
{
struct SEvent
{
const char* id;
GLuint query;
bool isEnter; // true if entering region; false if leaving
};
struct SFrame
{
u32 num;
double syncTimeStart; // CPU time at start of maybe this frame or a recent one
GLint64 syncTimestampStart; // GL timestamp corresponding to timeStart
std::vector<SEvent> events;
};
std::deque<SFrame> m_Frames;
public:
static bool IsSupported()
{
return ogl_HaveExtension("GL_ARB_timer_query");
}
CProfiler2GPU_ARB_timer_query(CProfiler2& profiler) :
CProfiler2GPU_timer_query(profiler, "gpu_arb")
{
// TODO: maybe we should check QUERY_COUNTER_BITS to ensure it's
// high enough (but apparently it might trigger GL errors on ATI)
}
~CProfiler2GPU_ARB_timer_query()
{
// Pop frames to return queries to the free list
while (!m_Frames.empty())
PopFrontFrame();
}
void FrameStart()
{
ProcessFrames();
SFrame frame;
frame.num = m_Profiler.GetFrameNumber();
// On (at least) some NVIDIA Windows drivers, when GPU-bound, or when
// vsync enabled and not CPU-bound, the first glGet* call at the start
// of a frame appears to trigger a wait (to stop the GPU getting too
// far behind, or to wait for the vsync period).
// That will be this GL_TIMESTAMP get, which potentially distorts the
// reported results. So we'll only do it fairly rarely, and for most
// frames we'll just assume the clocks don't drift much
const double RESYNC_PERIOD = 1.0; // seconds
double now = m_Profiler.GetTime();
if (m_Frames.empty() || now > m_Frames.back().syncTimeStart + RESYNC_PERIOD)
{
PROFILE2("profile timestamp resync");
pglGetInteger64v(GL_TIMESTAMP, &frame.syncTimestampStart);
ogl_WarnIfError();
frame.syncTimeStart = m_Profiler.GetTime();
// (Have to do GetTime again after GL_TIMESTAMP, because GL_TIMESTAMP
// might wait a while before returning its now-current timestamp)
}
else
{
// Reuse the previous frame's sync data
frame.syncTimeStart = m_Frames[m_Frames.size()-1].syncTimeStart;
frame.syncTimestampStart = m_Frames[m_Frames.size()-1].syncTimestampStart;
}
m_Frames.push_back(frame);
RegionEnter("frame");
}
void FrameEnd()
{
RegionLeave("frame");
}
void RecordRegion(const char* id, bool isEnter)
{
ENSURE(!m_Frames.empty());
SFrame& frame = m_Frames.back();
SEvent event;
event.id = id;
event.query = NewQuery();
event.isEnter = isEnter;
pglQueryCounter(event.query, GL_TIMESTAMP);
ogl_WarnIfError();
frame.events.push_back(event);
}
void RegionEnter(const char* id)
{
RecordRegion(id, true);
}
void RegionLeave(const char* id)
{
RecordRegion(id, false);
}
private:
void ProcessFrames()
{
while (!m_Frames.empty())
{
SFrame& frame = m_Frames.front();
// Queries become available in order so we only need to check the last one
GLint available = 0;
pglGetQueryObjectivARB(frame.events.back().query, GL_QUERY_RESULT_AVAILABLE, &available);
ogl_WarnIfError();
if (!available)
break;
// The frame's queries are now available, so retrieve and record all their results:
for (size_t i = 0; i < frame.events.size(); ++i)
{
GLuint64 queryTimestamp = 0;
pglGetQueryObjectui64v(frame.events[i].query, GL_QUERY_RESULT, &queryTimestamp);
ogl_WarnIfError();
// Convert to absolute CPU-clock time
double t = frame.syncTimeStart + (double)(queryTimestamp - frame.syncTimestampStart) / 1e9;
// Record a frame-start for syncing
if (i == 0)
m_Storage.RecordFrameStart(t);
if (frame.events[i].isEnter)
m_Storage.Record(CProfiler2::ITEM_ENTER, t, frame.events[i].id);
else
m_Storage.Record(CProfiler2::ITEM_LEAVE, t, frame.events[i].id);
// Associate the frame number with the "frame" region
if (i == 0)
m_Storage.RecordAttributePrintf("%d", frame.num);
}
PopFrontFrame();
}
}
void PopFrontFrame()
{
ENSURE(!m_Frames.empty());
SFrame& frame = m_Frames.front();
for (size_t i = 0; i < frame.events.size(); ++i)
m_FreeQueries.push_back(frame.events[i].query);
m_Frames.pop_front();
}
};
//////////////////////////////////////////////////////////////////////////
/*
* GL_EXT_timer_query only supports async queries for elapsed time,
* and only a single simultaneous query.
* We can't correctly convert it to absolute time, so we just pretend
* each GPU frame starts the same time as the CPU for that frame.
* We do a query for elapsed time between every adjacent enter/leave-region event.
* When all the queries for a frame have their results available,
* we sum the elapsed times to calculate when each event occurs within the
* frame, and record the data.
*/
class CProfiler2GPU_EXT_timer_query : public CProfiler2GPU_timer_query
{
struct SEvent
{
const char* id;
GLuint query; // query for time elapsed from this event until the next, or 0 for final event
bool isEnter; // true if entering region; false if leaving
};
struct SFrame
{
u32 num;
double timeStart; // CPU time at frame start
std::vector<SEvent> events;
};
std::deque<SFrame> m_Frames;
public:
static bool IsSupported()
{
return ogl_HaveExtension("GL_EXT_timer_query");
}
CProfiler2GPU_EXT_timer_query(CProfiler2& profiler) :
CProfiler2GPU_timer_query(profiler, "gpu_ext")
{
}
~CProfiler2GPU_EXT_timer_query()
{
// Pop frames to return queries to the free list
while (!m_Frames.empty())
PopFrontFrame();
}
void FrameStart()
{
ProcessFrames();
SFrame frame;
frame.num = m_Profiler.GetFrameNumber();
frame.timeStart = m_Profiler.GetTime();
m_Frames.push_back(frame);
RegionEnter("frame");
}
void FrameEnd()
{
RegionLeave("frame");
pglEndQueryARB(GL_TIME_ELAPSED);
ogl_WarnIfError();
}
void RecordRegion(const char* id, bool isEnter)
{
ENSURE(!m_Frames.empty());
SFrame& frame = m_Frames.back();
SEvent event;
event.id = id;
event.query = NewQuery();
event.isEnter = isEnter;
if (!frame.events.empty())
{
pglEndQueryARB(GL_TIME_ELAPSED);
ogl_WarnIfError();
}
pglBeginQueryARB(GL_TIME_ELAPSED, event.query);
ogl_WarnIfError();
frame.events.push_back(event);
}
void RegionEnter(const char* id)
{
RecordRegion(id, true);
}
void RegionLeave(const char* id)
{
RecordRegion(id, false);
}
private:
void ProcessFrames()
{
while (!m_Frames.empty())
{
SFrame& frame = m_Frames.front();
// Queries become available in order so we only need to check the last one
GLint available = 0;
pglGetQueryObjectivARB(frame.events.back().query, GL_QUERY_RESULT_AVAILABLE, &available);
ogl_WarnIfError();
if (!available)
break;
// The frame's queries are now available, so retrieve and record all their results:
double t = frame.timeStart;
m_Storage.RecordFrameStart(t);
for (size_t i = 0; i < frame.events.size(); ++i)
{
if (frame.events[i].isEnter)
m_Storage.Record(CProfiler2::ITEM_ENTER, t, frame.events[i].id);
else
m_Storage.Record(CProfiler2::ITEM_LEAVE, t, frame.events[i].id);
// Associate the frame number with the "frame" region
if (i == 0)
m_Storage.RecordAttributePrintf("%d", frame.num);
// Advance by the elapsed time to the next event
GLuint64 queryElapsed = 0;
pglGetQueryObjectui64v(frame.events[i].query, GL_QUERY_RESULT, &queryElapsed);
ogl_WarnIfError();
t += (double)queryElapsed / 1e9;
}
PopFrontFrame();
}
}
void PopFrontFrame()
{
ENSURE(!m_Frames.empty());
SFrame& frame = m_Frames.front();
for (size_t i = 0; i < frame.events.size(); ++i)
m_FreeQueries.push_back(frame.events[i].query);
m_Frames.pop_front();
}
};
//////////////////////////////////////////////////////////////////////////
/*
* GL_INTEL_performance_queries is not officially documented
* (see http://zaynar.co.uk/docs/gl-intel-performance-queries.html)
* but it's potentially useful so we'll support it anyway.
* It supports async queries giving elapsed time plus a load of other
* counters that we'd like to use, and supports many simultaneous queries
* (unlike GL_EXT_timer_query).
* There are multiple query types (typically 2), each with its own set of
* multiple counters.
* On each enter-region event, we start a new set of queries.
* On each leave-region event, we end the corresponding set of queries.
* We can't tell the offsets between the enter events of nested regions,
* so we pretend they all got entered at the same time.
*/
class CProfiler2GPU_INTEL_performance_queries : public CProfiler2GPU_base
{
struct SEvent
{
const char* id;
bool isEnter;
std::vector<GLuint> queries; // if isEnter, one per SPerfQueryType; else empty
};
struct SFrame
{
u32 num;
double timeStart; // CPU time at frame start
std::vector<SEvent> events;
std::vector<size_t> activeRegions; // stack of indexes into events
};
std::deque<SFrame> m_Frames;
// Counters listed by the graphics driver for a particular query type
struct SPerfCounter
{
std::string name;
std::string desc;
GLuint offset;
GLuint size;
GLuint type;
};
// Query types listed by the graphics driver
struct SPerfQueryType
{
GLuint queryTypeId;
std::string name;
GLuint counterBufferSize;
std::vector<SPerfCounter> counters;
std::vector<GLuint> freeQueries; // query objects that are allocated but not currently in use
};
std::vector<SPerfQueryType> m_QueryTypes;
#define INTEL_PERFQUERIES_NONBLOCK 0x83FA
#define INTEL_PERFQUERIES_BLOCK 0x83FB
#define INTEL_PERFQUERIES_TYPE_UNSIGNED_INT 0x9402
#define INTEL_PERFQUERIES_TYPE_UNSIGNED_INT64 0x9403
#define INTEL_PERFQUERIES_TYPE_FLOAT 0x9404
#define INTEL_PERFQUERIES_TYPE_BOOL 0x9406
public:
static bool IsSupported()
{
return ogl_HaveExtension("GL_INTEL_performance_queries");
}
CProfiler2GPU_INTEL_performance_queries(CProfiler2& profiler) :
CProfiler2GPU_base(profiler, "gpu_intel")
{
LoadPerfCounters();
}
~CProfiler2GPU_INTEL_performance_queries()
{
// Pop frames to return queries to the free list
while (!m_Frames.empty())
PopFrontFrame();
for (size_t i = 0; i < m_QueryTypes.size(); ++i)
for (size_t j = 0; j < m_QueryTypes[i].freeQueries.size(); ++j)
pglDeletePerfQueryINTEL(m_QueryTypes[i].freeQueries[j]);
ogl_WarnIfError();
}
void FrameStart()
{
ProcessFrames();
SFrame frame;
frame.num = m_Profiler.GetFrameNumber();
frame.timeStart = m_Profiler.GetTime();
m_Frames.push_back(frame);
RegionEnter("frame");
}
void FrameEnd()
{
RegionLeave("frame");
}
void RegionEnter(const char* id)
{
ENSURE(!m_Frames.empty());
SFrame& frame = m_Frames.back();
SEvent event;
event.id = id;
event.isEnter = true;
for (size_t i = 0; i < m_QueryTypes.size(); ++i)
{
GLuint id = NewQuery(i);
pglBeginPerfQueryINTEL(id);
ogl_WarnIfError();
event.queries.push_back(id);
}
frame.activeRegions.push_back(frame.events.size());
frame.events.push_back(event);
}
void RegionLeave(const char* id)
{
ENSURE(!m_Frames.empty());
SFrame& frame = m_Frames.back();
ENSURE(!frame.activeRegions.empty());
SEvent& activeEvent = frame.events[frame.activeRegions.back()];
for (size_t i = 0; i < m_QueryTypes.size(); ++i)
{
pglEndPerfQueryINTEL(activeEvent.queries[i]);
ogl_WarnIfError();
}
frame.activeRegions.pop_back();
SEvent event;
event.id = id;
event.isEnter = false;
frame.events.push_back(event);
}
private:
GLuint NewQuery(size_t queryIdx)
{
ENSURE(queryIdx < m_QueryTypes.size());
if (m_QueryTypes[queryIdx].freeQueries.empty())
{
GLuint id;
pglCreatePerfQueryINTEL(m_QueryTypes[queryIdx].queryTypeId, &id);
ogl_WarnIfError();
return id;
}
GLuint id = m_QueryTypes[queryIdx].freeQueries.back();
m_QueryTypes[queryIdx].freeQueries.pop_back();
return id;
}
void ProcessFrames()
{
while (!m_Frames.empty())
{
SFrame& frame = m_Frames.front();
// Queries don't become available in order, so check them all before
// trying to read the results from any
for (size_t j = 0; j < m_QueryTypes.size(); ++j)
{
size_t size = m_QueryTypes[j].counterBufferSize;
shared_ptr<char> buf(new char[size], ArrayDeleter());
for (size_t i = 0; i < frame.events.size(); ++i)
{
if (!frame.events[i].isEnter)
continue;
GLuint length = 0;
pglGetPerfQueryDataINTEL(frame.events[i].queries[j], INTEL_PERFQUERIES_NONBLOCK, size, buf.get(), &length);
ogl_WarnIfError();
if (length == 0)
return;
}
}
double lastTime = frame.timeStart;
std::stack<double> endTimes;
m_Storage.RecordFrameStart(frame.timeStart);
for (size_t i = 0; i < frame.events.size(); ++i)
{
if (frame.events[i].isEnter)
{
m_Storage.Record(CProfiler2::ITEM_ENTER, lastTime, frame.events[i].id);
if (i == 0)
m_Storage.RecordAttributePrintf("%d", frame.num);
double elapsed = 0.0;
for (size_t j = 0; j < m_QueryTypes.size(); ++j)
{
GLuint length;
char* buf = new char[m_QueryTypes[j].counterBufferSize];
pglGetPerfQueryDataINTEL(frame.events[i].queries[j], INTEL_PERFQUERIES_BLOCK, m_QueryTypes[j].counterBufferSize, buf, &length);
ogl_WarnIfError();
ENSURE(length == m_QueryTypes[j].counterBufferSize);
m_Storage.RecordAttributePrintf("-- %hs --", m_QueryTypes[j].name.c_str());
for (size_t k = 0; k < m_QueryTypes[j].counters.size(); ++k)
{
SPerfCounter& counter = m_QueryTypes[j].counters[k];
if (counter.type == INTEL_PERFQUERIES_TYPE_UNSIGNED_INT)
{
ENSURE(counter.size == 4);
GLuint value;
memcpy(&value, buf + counter.offset, counter.size);
m_Storage.RecordAttributePrintf("%hs: %d", counter.name.c_str(), value);
}
else if (counter.type == INTEL_PERFQUERIES_TYPE_UNSIGNED_INT64)
{
ENSURE(counter.size == 8);
GLuint64 value;
memcpy(&value, buf + counter.offset, counter.size);
m_Storage.RecordAttributePrintf("%hs: %.0f", counter.name.c_str(), (double)value);
if (counter.name == "TotalTime")
elapsed = (double)value / 1e6;
}
else if (counter.type == INTEL_PERFQUERIES_TYPE_FLOAT)
{
ENSURE(counter.size == 4);
GLfloat value;
memcpy(&value, buf + counter.offset, counter.size);
m_Storage.RecordAttributePrintf("%hs: %f", counter.name.c_str(), value);
}
else if (counter.type == INTEL_PERFQUERIES_TYPE_BOOL)
{
ENSURE(counter.size == 4);
GLuint value;
memcpy(&value, buf + counter.offset, counter.size);
ENSURE(value == 0 || value == 1);
m_Storage.RecordAttributePrintf("%hs: %d", counter.name.c_str(), value);
}
else
{
debug_warn(L"unrecognised Intel performance counter type");
}
}
delete[] buf;
}
endTimes.push(lastTime + elapsed);
}
else
{
lastTime = endTimes.top();
endTimes.pop();
m_Storage.Record(CProfiler2::ITEM_LEAVE, lastTime, frame.events[i].id);
}
}
PopFrontFrame();
}
}
void PopFrontFrame()
{
ENSURE(!m_Frames.empty());
SFrame& frame = m_Frames.front();
for (size_t i = 0; i < frame.events.size(); ++i)
if (frame.events[i].isEnter)
for (size_t j = 0; j < m_QueryTypes.size(); ++j)
m_QueryTypes[j].freeQueries.push_back(frame.events[i].queries[j]);
m_Frames.pop_front();
}
void LoadPerfCounters()
{
GLuint queryTypeId;
pglGetFirstPerfQueryIdINTEL(&queryTypeId);
ogl_WarnIfError();
do
{
char queryName[256];
GLuint counterBufferSize, numCounters, maxQueries, unknown;
pglGetPerfQueryInfoINTEL(queryTypeId, ARRAY_SIZE(queryName), queryName, &counterBufferSize, &numCounters, &maxQueries, &unknown);
ogl_WarnIfError();
ENSURE(unknown == 1);
SPerfQueryType query;
query.queryTypeId = queryTypeId;
query.name = queryName;
query.counterBufferSize = counterBufferSize;
for (GLuint counterId = 1; counterId <= numCounters; ++counterId)
{
char counterName[256];
char counterDesc[2048];
GLuint counterOffset, counterSize, counterUsage, counterType;
GLuint64 unknown2;
pglGetPerfCounterInfoINTEL(queryTypeId, counterId, ARRAY_SIZE(counterName), counterName, ARRAY_SIZE(counterDesc), counterDesc, &counterOffset, &counterSize, &counterUsage, &counterType, &unknown2);
ogl_WarnIfError();
ENSURE(unknown2 == 0 || unknown2 == 1);
SPerfCounter counter;
counter.name = counterName;
counter.desc = counterDesc;
counter.offset = counterOffset;
counter.size = counterSize;
counter.type = counterType;
query.counters.push_back(counter);
}
m_QueryTypes.push_back(query);
pglGetNextPerfQueryIdINTEL(queryTypeId, &queryTypeId);
ogl_WarnIfError();
} while (queryTypeId);
}
};
//////////////////////////////////////////////////////////////////////////
CProfiler2GPU::CProfiler2GPU(CProfiler2& profiler) :
m_Profiler(profiler), m_ProfilerARB(NULL), m_ProfilerEXT(NULL), m_ProfilerINTEL(NULL)
{
bool enabledARB = false;
bool enabledEXT = false;
bool enabledINTEL = false;
CFG_GET_USER_VAL("profiler2.gpu.arb.enable", Bool, enabledARB);
CFG_GET_USER_VAL("profiler2.gpu.ext.enable", Bool, enabledEXT);
CFG_GET_USER_VAL("profiler2.gpu.intel.enable", Bool, enabledINTEL);
if (enabledARB && CProfiler2GPU_ARB_timer_query::IsSupported())
{
m_ProfilerARB = new CProfiler2GPU_ARB_timer_query(m_Profiler);
}
if (enabledEXT && CProfiler2GPU_EXT_timer_query::IsSupported())
{
m_ProfilerEXT = new CProfiler2GPU_EXT_timer_query(m_Profiler);
}
if (enabledINTEL && CProfiler2GPU_INTEL_performance_queries::IsSupported())
{
m_ProfilerINTEL = new CProfiler2GPU_INTEL_performance_queries(m_Profiler);
}
}
CProfiler2GPU::~CProfiler2GPU()
{
SAFE_DELETE(m_ProfilerARB);
SAFE_DELETE(m_ProfilerEXT);
SAFE_DELETE(m_ProfilerINTEL);
}
void CProfiler2GPU::FrameStart()
{
if (m_ProfilerARB)
m_ProfilerARB->FrameStart();
if (m_ProfilerEXT)
m_ProfilerEXT->FrameStart();
if (m_ProfilerINTEL)
m_ProfilerINTEL->FrameStart();
}
void CProfiler2GPU::FrameEnd()
{
if (m_ProfilerARB)
m_ProfilerARB->FrameEnd();
if (m_ProfilerEXT)
m_ProfilerEXT->FrameEnd();
if (m_ProfilerINTEL)
m_ProfilerINTEL->FrameEnd();
}
void CProfiler2GPU::RegionEnter(const char* id)
{
if (m_ProfilerARB)
m_ProfilerARB->RegionEnter(id);
if (m_ProfilerEXT)
m_ProfilerEXT->RegionEnter(id);
if (m_ProfilerINTEL)
m_ProfilerINTEL->RegionEnter(id);
}
void CProfiler2GPU::RegionLeave(const char* id)
{
if (m_ProfilerARB)
m_ProfilerARB->RegionLeave(id);
if (m_ProfilerEXT)
m_ProfilerEXT->RegionLeave(id);
if (m_ProfilerINTEL)
m_ProfilerINTEL->RegionLeave(id);
}

50
source/ps/Profiler2GPU.h Normal file
View File

@ -0,0 +1,50 @@
/* Copyright (c) 2011 Wildfire Games
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
class CProfiler2;
class CProfiler2GPU_ARB_timer_query;
class CProfiler2GPU_EXT_timer_query;
class CProfiler2GPU_INTEL_performance_queries;
/**
* Used by CProfiler2 for GPU profiling support.
*/
class CProfiler2GPU
{
NONCOPYABLE(CProfiler2GPU);
public:
CProfiler2GPU(CProfiler2& profiler);
~CProfiler2GPU();
void FrameStart();
void FrameEnd();
void RegionEnter(const char* id);
void RegionLeave(const char* id);
private:
CProfiler2& m_Profiler;
CProfiler2GPU_ARB_timer_query* m_ProfilerARB;
CProfiler2GPU_EXT_timer_query* m_ProfilerEXT;
CProfiler2GPU_INTEL_performance_queries* m_ProfilerINTEL;
};

View File

@ -28,6 +28,7 @@
#include "lib/sysdep/sysdep.h"
#include "ps/ConfigDB.h"
#include "ps/Filesystem.h"
#include "ps/Profiler2.h"
#include "ps/ThreadUtil.h"
#define DEBUG_UPLOADS 0
@ -230,6 +231,7 @@ private:
static void* RunThread(void* data)
{
debug_SetThreadName("CUserReportWorker");
g_Profiler2.RegisterCurrentThread("userreport");
static_cast<CUserReporterWorker*>(data)->Run();
@ -242,8 +244,12 @@ private:
// (This has to be done in the thread because it's potentially very slow)
SetStatus("proxy");
std::wstring proxy;
if (sys_get_proxy_config(wstring_from_utf8(m_URL), proxy) == INFO::OK)
curl_easy_setopt(m_Curl, CURLOPT_PROXY, utf8_from_wstring(proxy).c_str());
{
PROFILE2("get proxy config");
if (sys_get_proxy_config(wstring_from_utf8(m_URL), proxy) == INFO::OK)
curl_easy_setopt(m_Curl, CURLOPT_PROXY, utf8_from_wstring(proxy).c_str());
}
SetStatus("waiting");
@ -267,9 +273,13 @@ private:
* occasionally so it can check its timer.
*/
g_Profiler2.RecordRegionEnter("semaphore wait");
// Wait until the main thread wakes us up
while (SDL_SemWait(m_WorkerSem) == 0)
{
g_Profiler2.RecordRegionLeave("semaphore wait");
// Handle shutdown requests as soon as possible
if (GetShutdown())
return;
@ -291,6 +301,8 @@ private:
return;
}
}
g_Profiler2.RecordRegionLeave("semaphore wait");
}
bool GetEnabled()
@ -316,6 +328,8 @@ private:
bool ProcessReport()
{
PROFILE2("process report");
shared_ptr<CUserReport> report;
{

View File

@ -138,7 +138,7 @@ void OverlayRenderer::PrepareForRendering()
void OverlayRenderer::RenderOverlaysBeforeWater()
{
PROFILE3("render overlays (before)");
PROFILE3_GPU("overlays (before)");
glDisable(GL_TEXTURE_2D);
glEnable(GL_BLEND);
@ -166,7 +166,7 @@ void OverlayRenderer::RenderOverlaysBeforeWater()
void OverlayRenderer::RenderOverlaysAfterWater()
{
PROFILE3("render overlays (after)");
PROFILE3_GPU("overlays (after)");
if (!m->texlines.empty())
{
@ -262,7 +262,7 @@ void OverlayRenderer::RenderOverlaysAfterWater()
void OverlayRenderer::RenderForegroundOverlays(const CCamera& viewCamera)
{
PROFILE3("render overlays (fg)");
PROFILE3_GPU("overlays (fg)");
glEnable(GL_TEXTURE_2D);
glEnable(GL_BLEND);

View File

@ -933,7 +933,7 @@ void CRenderer::SetClearColor(SColor4ub color)
void CRenderer::RenderShadowMap()
{
PROFILE3("render shadow map");
PROFILE3_GPU("shadow map");
m->shadow->BeginRender();
@ -978,7 +978,7 @@ void CRenderer::RenderShadowMap()
void CRenderer::RenderPatches(const CFrustum* frustum)
{
PROFILE3("render patches");
PROFILE3_GPU("patches");
bool filtered = false;
if (frustum)
@ -1050,7 +1050,7 @@ private:
void CRenderer::RenderModels(const CFrustum* frustum)
{
PROFILE3("render models");
PROFILE3_GPU("models");
int flags = 0;
if (frustum)
@ -1087,7 +1087,7 @@ void CRenderer::RenderModels(const CFrustum* frustum)
void CRenderer::RenderTransparentModels(ETransparentMode transparentMode, const CFrustum* frustum)
{
PROFILE3("render transparent models");
PROFILE3_GPU("transparent models");
int flags = 0;
if (frustum)
@ -1165,7 +1165,10 @@ void CRenderer::SetObliqueFrustumClipping(const CVector4D& worldPlane)
CVector4D camPlane = normalMatrix.Transform(worldPlane);
// Grab the current projection matrix from OpenGL
{
PROFILE3("get proj matrix (oblique clipping)"); // sometimes the vsync delay gets accounted here
glGetFloatv(GL_PROJECTION_MATRIX, matrix);
}
// Calculate the clip-space corner point opposite the clipping plane
// as (sgn(camPlane.x), sgn(camPlane.y), 1, 1) and
@ -1197,7 +1200,7 @@ void CRenderer::SetObliqueFrustumClipping(const CVector4D& worldPlane)
// RenderReflections: render the water reflections to the reflection texture
SScreenRect CRenderer::RenderReflections(const CBound& scissor)
{
PROFILE("render reflections");
PROFILE3_GPU("water reflections");
WaterManager& wm = m->waterManager;
@ -1284,7 +1287,7 @@ SScreenRect CRenderer::RenderReflections(const CBound& scissor)
// RenderRefractions: render the water refractions to the refraction texture
SScreenRect CRenderer::RenderRefractions(const CBound &scissor)
{
PROFILE("render refractions");
PROFILE3_GPU("water refractions");
WaterManager& wm = m->waterManager;
@ -1360,7 +1363,7 @@ SScreenRect CRenderer::RenderRefractions(const CBound &scissor)
void CRenderer::RenderSilhouettes()
{
PROFILE3("render silhouettes");
PROFILE3_GPU("silhouettes");
// Render silhouettes of units hidden behind terrain or occluders.
// To avoid breaking the standard rendering of alpha-blended objects, this
@ -1473,7 +1476,7 @@ void CRenderer::RenderParticles()
if (GetRenderPath() != RP_SHADER)
return;
PROFILE3("render particles");
PROFILE3_GPU("particles");
m->particleRenderer.RenderParticles();
@ -1529,11 +1532,11 @@ void CRenderer::RenderSubmissions()
RenderShadowMap();
}
// clear buffers
PROFILE_START("clear buffers");
glClearColor(m_ClearColor[0],m_ClearColor[1],m_ClearColor[2],m_ClearColor[3]);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
PROFILE_END("clear buffers");
{
PROFILE3_GPU("clear buffers");
glClearColor(m_ClearColor[0], m_ClearColor[1], m_ClearColor[2], m_ClearColor[3]);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
}
ogl_WarnIfError();
@ -1545,6 +1548,8 @@ void CRenderer::RenderSubmissions()
{
SScreenRect reflectionScissor = RenderReflections(waterScissor);
SScreenRect refractionScissor = RenderRefractions(waterScissor);
PROFILE3_GPU("water scissor");
SScreenRect dirty;
dirty.x1 = std::min(reflectionScissor.x1, refractionScissor.x1);
dirty.y1 = std::min(reflectionScissor.y1, refractionScissor.y1);
@ -1690,7 +1695,7 @@ void CRenderer::DisplayFrustum()
// Text overlay rendering
void CRenderer::RenderTextOverlays()
{
PROFILE("render text overlays");
PROFILE3_GPU("text overlays");
if (m_DisplayTerrainPriorities)
m->terrainRenderer->RenderPriorities();

View File

@ -93,7 +93,7 @@ void TerrainOverlay::RenderOverlays()
if (g_TerrainOverlayList.size() == 0)
return;
PROFILE3("render terrain overlays");
PROFILE3_GPU("terrain overlays");
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);

View File

@ -610,7 +610,7 @@ CBound TerrainRenderer::ScissorWater(const CMatrix3D &viewproj)
// Render fancy water
bool TerrainRenderer::RenderFancyWater()
{
PROFILE3("render fancy water");
PROFILE3_GPU("fancy water");
// If we're using fancy water, make sure its shader is loaded
if (!m->fancyWaterShader)
@ -739,7 +739,7 @@ bool TerrainRenderer::RenderFancyWater()
void TerrainRenderer::RenderSimpleWater()
{
PROFILE3("render simple water");
PROFILE3_GPU("simple water");
WaterManager* WaterMgr = g_Renderer.GetWaterManager();
CLOSTexture& losTexture = g_Game->GetView()->GetLOSTexture();
@ -849,7 +849,7 @@ void TerrainRenderer::RenderWater()
void TerrainRenderer::RenderPriorities()
{
PROFILE("render priorities");
PROFILE("priorities");
ENSURE(m->phase == Phase_Render);

View File

@ -85,6 +85,9 @@ public:
{
JSContext* cx = GetSimContext().GetScriptInterface().GetContext();
PROFILE2_EVENT("post net command");
PROFILE2_ATTR("command: %s", GetSimContext().GetScriptInterface().StringifyJSON(cmd.get(), false).c_str());
// TODO: would be nicer to not use globals
g_Game->GetTurnManager()->PostCommand(CScriptValRooted(cx, cmd));
}

View File

@ -7,7 +7,8 @@ $(refresh);
</script>
<style>
canvas { border: 1px #ddd solid; }
#tooltip { background: #ffd; padding: 4px; font: 16px sans-serif; }
#tooltip { background: #ffd; padding: 4px; font: 12px sans-serif; border: 1px #880 solid; }
#tooltip.long { -moz-column-count: 2; }
</style>
<button onclick="refresh()">Refresh</button>

View File

@ -130,6 +130,7 @@ function update_display(range)
// display_top_items(main_events, g_data.text_output);
display_frames(processed_main, g_data.canvas_frames);
display_events(processed_main, g_data.canvas_frames);
$(g_data.threads[0].canvas).unbind();
$(g_data.canvas_zoom).unbind();
@ -284,21 +285,66 @@ function compute_intervals(data, range)
var num_colours = 0;
var events = [];
var intervals = [];
var stack = [];
for (var i = start; i <= end; ++i)
// Read events for the entire data period (not just start..end)
var lastWasEvent = false;
for (var i = 0; i < data.length; ++i)
{
if (data[i][0] == ITEM_EVENT)
{
events.push({'t': data[i][1], 'id': data[i][2]});
lastWasEvent = true;
}
else if (data[i][0] == ITEM_ATTRIBUTE)
{
if (lastWasEvent)
{
if (!events[events.length-1].attrs)
events[events.length-1].attrs = [];
events[events.length-1].attrs.push(data[i][1]);
}
}
else
{
lastWasEvent = false;
}
}
var intervals = [];
// Read intervals from the focused data period (start..end)
var stack = [];
var lastT = 0;
var lastWasEvent = false;
for (var i = start; i <= end; ++i)
{
if (data[i][0] == ITEM_EVENT)
{
// if (data[i][1] < lastT)
// console.log('Time went backwards: ' + (data[i][1] - lastT));
lastT = data[i][1];
lastWasEvent = true;
}
else if (data[i][0] == ITEM_ENTER)
{
// if (data[i][1] < lastT)
// console.log('Time went backwards: ' + (data[i][1] - lastT));
stack.push({'t0': data[i][1], 'id': data[i][2]});
lastT = data[i][1];
lastWasEvent = false;
}
else if (data[i][0] == ITEM_LEAVE)
{
// if (data[i][1] < lastT)
// console.log('Time went backwards: ' + (data[i][1] - lastT));
lastT = data[i][1];
lastWasEvent = false;
if (!stack.length)
continue;
var interval = stack.pop();
@ -319,13 +365,7 @@ function compute_intervals(data, range)
}
else if (data[i][0] == ITEM_ATTRIBUTE)
{
if (i > 0 && data[i-1][0] == ITEM_EVENT)
{
if (!events[events.length-1].attrs)
events[events.length-1].attrs = [];
events[events.length-1].attrs.push(data[i][1]);
}
else if (stack.length)
if (!lastWasEvent && stack.length)
{
if (!stack[stack.length-1].attrs)
stack[stack.length-1].attrs = [];
@ -365,6 +405,9 @@ function display_frames(data, canvas)
canvas._zoomData = {
'x_to_t': function(x) {
return tmin + (x - xpadding) / dx;
},
't_to_x': function(t) {
return (t - tmin) * dx + xpadding;
}
};
@ -411,6 +454,56 @@ function display_frames(data, canvas)
ctx.restore();
}
function display_events(data, canvas)
{
var ctx = canvas.getContext('2d');
ctx.save();
var x_to_time = canvas._zoomData.x_to_t;
var time_to_x = canvas._zoomData.t_to_x;
for (var i = 0; i < data.events.length; ++i)
{
var event = data.events[i];
if (event.id == '__framestart')
continue;
if (event.id == 'gui event' && event.attrs && event.attrs[0] == 'type: mousemove')
continue;
var x = time_to_x(event.t);
var y = 32;
var x0 = x;
var x1 = x;
var y0 = y-4;
var y1 = y+4;
ctx.strokeStyle = 'rgb(255, 0, 0)';
ctx.beginPath();
ctx.moveTo(x0, y0);
ctx.lineTo(x1, y1);
ctx.stroke();
canvas._tooltips.push({
'x0': x0, 'x1': x1,
'y0': y0, 'y1': y1,
'text': function(event) { return function() {
var t = '<b>' + event.id + '</b><br>';
if (event.attrs)
{
event.attrs.forEach(function(attr) {
t += attr + '<br>';
});
}
return t;
}} (event)
});
}
ctx.restore();
}
function display_hierarchy(main_data, data, canvas, range, zoom)
{
canvas._tooltips = [];
@ -483,7 +576,12 @@ function display_hierarchy(main_data, data, canvas, range, zoom)
var label = interval.id;
if (interval.attrs)
label += ' [...]';
{
if (/^\d+$/.exec(interval.attrs[0]))
label += ' ' + interval.attrs[0];
else
label += ' [...]';
}
var x0 = Math.floor(time_to_x(interval.t0));
var x1 = Math.floor(time_to_x(interval.t1));
var y0 = padding_top + interval.depth * BAR_SPACING;
@ -532,7 +630,7 @@ function display_hierarchy(main_data, data, canvas, range, zoom)
ctx.moveTo(x+0.5, 0);
ctx.lineTo(x+0.5, canvas.height);
ctx.stroke();
ctx.fillText('Frame [' + ((frame.t1 - frame.t0) * 1000).toFixed(0)+'ms]', x+2, padding_top - 24);
ctx.fillText(((frame.t1 - frame.t0) * 1000).toFixed(0)+'ms', x+2, padding_top - 24);
ctx.restore();
}
@ -636,7 +734,7 @@ function set_tooltip_handlers(canvas)
for (var i = 0; i < tooltips.length; ++i)
{
var t = tooltips[i];
if (t.x0 <= relativeX && relativeX <= t.x1 && t.y0 <= relativeY && relativeY <= t.y1)
if (t.x0-1 <= relativeX && relativeX <= t.x1+1 && t.y0 <= relativeY && relativeY <= t.y1)
{
text = t.text();
break;
@ -644,6 +742,10 @@ function set_tooltip_handlers(canvas)
}
if (text)
{
if (text.length > 512)
$('#tooltip').addClass('long');
else
$('#tooltip').removeClass('long');
$('#tooltip').css('left', (event.pageX+16)+'px');
$('#tooltip').css('top', (event.pageY+8)+'px');
$('#tooltip').html(text);