# Add GPU performance profiling support.
Add some profiler region attributes. Render events in profile visualiser. Change profiler from GPL to MIT. This was SVN commit r10492.
This commit is contained in:
parent
8fba3f92b1
commit
cfb79c3062
@ -201,7 +201,13 @@ hotkey.text.move.right = "Ctrl+RightArrow" ; Move cursor to start of word to
|
||||
; > PROFILER
|
||||
hotkey.profile.toggle = "F11" ; Enable/disable real-time profiler
|
||||
hotkey.profile.save = "Shift+F11" ; Save current profiler data to logs/profile.txt
|
||||
hotkey.profile2.enable = "F11" ; Enable HTTP server for new profiler
|
||||
hotkey.profile2.enable = "F11" ; Enable HTTP/GPU modes for new profiler
|
||||
|
||||
profiler2.http.autoenable = false ; Enable HTTP server output at startup (default off for security/performance)
|
||||
profiler2.gpu.autoenable = false ; Enable GPU timing at startup (default off for performance/compatibility)
|
||||
profiler2.gpu.arb.enable = true ; Allow GL_ARB_timer_query timing mode when available
|
||||
profiler2.gpu.ext.enable = true ; Allow GL_EXT_timer_query timing mode when available
|
||||
profiler2.gpu.intel.enable = true ; Allow GL_INTEL_performance_queries timing mode when available
|
||||
|
||||
; > QUICKSAVE
|
||||
hotkey.quicksave = "Shift+F5"
|
||||
|
@ -272,7 +272,7 @@ void CGUIManager::TickObjects()
|
||||
|
||||
void CGUIManager::Draw()
|
||||
{
|
||||
PROFILE3("render gui");
|
||||
PROFILE3_GPU("gui");
|
||||
|
||||
for (PageStackType::iterator it = m_PageStack.begin(); it != m_PageStack.end(); ++it)
|
||||
it->gui->Draw();
|
||||
|
@ -495,6 +495,10 @@ void IGUIObject::SetScriptHandler(const CStr& Action, JSObject* Function)
|
||||
|
||||
InReaction IGUIObject::SendEvent(EGUIMessageType type, const CStr& EventName)
|
||||
{
|
||||
PROFILE2_EVENT("gui event");
|
||||
PROFILE2_ATTR("type: %s", EventName.c_str());
|
||||
PROFILE2_ATTR("object: %s", m_Name.c_str());
|
||||
|
||||
SGUIMessage msg(type);
|
||||
HandleMessage(msg);
|
||||
|
||||
|
@ -41,6 +41,9 @@ template<> jsval ScriptInterface::ToJSVal<SDL_Event_>(JSContext* cx, SDL_Event_
|
||||
case SDL_MOUSEMOTION: typeName = "mousemotion"; break;
|
||||
case SDL_MOUSEBUTTONDOWN: typeName = "mousebuttondown"; break;
|
||||
case SDL_MOUSEBUTTONUP: typeName = "mousebuttonup"; break;
|
||||
case SDL_QUIT: typeName = "quit"; break;
|
||||
case SDL_VIDEOEXPOSE: typeName = "videoexpose"; break;
|
||||
case SDL_VIDEORESIZE: typeName = "videoresize"; break;
|
||||
case SDL_HOTKEYDOWN: typeName = "hotkeydown"; break;
|
||||
case SDL_HOTKEYUP: typeName = "hotkeyup"; break;
|
||||
default: typeName = "(unknown)"; break;
|
||||
|
@ -235,6 +235,40 @@ FUNC2(void, glGetVertexAttribfvARB, glGetVertexAttribfv, "2.0", (GLuint index, G
|
||||
FUNC2(void, glGetVertexAttribivARB, glGetVertexAttribiv, "2.0", (GLuint index, GLenum pname, GLint *params))
|
||||
FUNC2(void, glGetVertexAttribPointervARB, glGetVertexAttribPointerv, "2.0", (GLuint index, GLenum pname, void **pointer))
|
||||
|
||||
// GL_ARB_occlusion_query / GL1.5:
|
||||
FUNC2(void, glGenQueriesARB, glGenQueries, "1.5", (GLsizei n, GLuint *ids))
|
||||
FUNC2(void, glDeleteQueriesARB, glDeleteQueries, "1.5", (GLsizei n, const GLuint *ids))
|
||||
FUNC2(GLboolean, glIsQueryARB, glIsQuery, "1.5", (GLuint id))
|
||||
FUNC2(void, glBeginQueryARB, glBeginQuery, "1.5", (GLenum target, GLuint id))
|
||||
FUNC2(void, glEndQueryARB, glEndQuery, "1.5", (GLenum target))
|
||||
FUNC2(void, glGetQueryivARB, glGetQueryiv, "1.5", (GLenum target, GLenum pname, GLint *params))
|
||||
FUNC2(void, glGetQueryObjectivARB, glGetQueryObjectiv, "1.5", (GLuint id, GLenum pname, GLint *params))
|
||||
FUNC2(void, glGetQueryObjectuivARB, glGetQueryObjectuiv, "1.5", (GLuint id, GLenum pname, GLuint *params))
|
||||
|
||||
// GL_ARB_sync / GL3.2:
|
||||
FUNC2(void, glGetInteger64v, glGetInteger64v, "3.2", (GLenum pname, GLint64 *params))
|
||||
|
||||
// GL_ARB_timer_query / GL3.3:
|
||||
FUNC2(void, glQueryCounter, glQueryCounter, "3.3", (GLuint id, GLenum target))
|
||||
FUNC2(void, glGetQueryObjecti64v, glGetQueryObjecti64v, "3.3", (GLuint id, GLenum pname, GLint64 *params))
|
||||
FUNC2(void, glGetQueryObjectui64v, glGetQueryObjectui64v, "3.3", (GLuint id, GLenum pname, GLuint64 *params))
|
||||
|
||||
// GL_GREMEDY_string_marker (from gDEBugger)
|
||||
FUNC(int, glStringMarkerGREMEDY, (GLsizei len, const GLvoid *string))
|
||||
|
||||
// GL_INTEL_performance_queries (undocumented, may be unstable, use at own risk;
|
||||
// see http://zaynar.co.uk/docs/gl-intel-performance-queries.html)
|
||||
FUNC(void, glGetFirstPerfQueryIdINTEL, (GLuint *queryId))
|
||||
FUNC(void, glGetNextPerfQueryIdINTEL, (GLuint prevQueryId, GLuint *queryId))
|
||||
FUNC(void, glGetPerfQueryInfoINTEL, (GLuint queryId, GLuint nameMaxLength, char *name, GLuint *counterBufferSize, GLuint *numCounters, GLuint *maxQueries, GLuint *))
|
||||
FUNC(void, glGetPerfCounterInfoINTEL, (GLuint queryId, GLuint counterId, GLuint nameMaxLength, char *name, GLuint descMaxLength, char *desc, GLuint *offset, GLuint *size, GLuint *usage, GLuint *type, GLuint64 *))
|
||||
FUNC(void, glCreatePerfQueryINTEL, (GLuint queryId, GLuint *id))
|
||||
FUNC(void, glBeginPerfQueryINTEL, (GLuint id))
|
||||
FUNC(void, glEndPerfQueryINTEL, (GLuint id))
|
||||
FUNC(void, glDeletePerfQueryINTEL, (GLuint id))
|
||||
FUNC(void, glGetPerfQueryDataINTEL, (GLuint id, GLenum requestType, GLuint maxLength, char *buffer, GLuint *length))
|
||||
|
||||
|
||||
#if OS_WIN
|
||||
// WGL_EXT_swap_control
|
||||
FUNC(int, wglSwapIntervalEXT, (int))
|
||||
@ -250,7 +284,4 @@ FUNC(int, wglQueryPbufferARB, (HPBUFFERARB, int, int*))
|
||||
FUNC(int, wglGetPixelFormatAttribivARB, (HDC, int, int, unsigned int, const int*, int*))
|
||||
FUNC(int, wglGetPixelFormatAttribfvARB, (HDC, int, int, unsigned int, const int*, float*))
|
||||
FUNC(int, wglChoosePixelFormatARB, (HDC, const int *, const float*, unsigned int, int*, unsigned int*))
|
||||
|
||||
// GL_GREMEDY_string_marker (from gDEBugger)
|
||||
FUNC(int, glStringMarkerGREMEDY, (GLsizei len, const GLvoid *string))
|
||||
#endif // OS_WIN
|
||||
|
@ -135,6 +135,7 @@ static InReaction MainInputHandler(const SDL_Event_* ev)
|
||||
}
|
||||
else if (hotkey == "profile2.enable")
|
||||
{
|
||||
g_Profiler2.EnableGPU();
|
||||
g_Profiler2.EnableHTTP();
|
||||
return IN_HANDLED;
|
||||
}
|
||||
@ -151,8 +152,17 @@ static void PumpEvents()
|
||||
PROFILE3("dispatch events");
|
||||
|
||||
SDL_Event_ ev;
|
||||
while(SDL_PollEvent(&ev.ev))
|
||||
while (SDL_PollEvent(&ev.ev))
|
||||
{
|
||||
PROFILE2("event");
|
||||
if (g_GUI)
|
||||
{
|
||||
std::string data = g_GUI->GetScriptInterface().StringifyJSON(
|
||||
ScriptInterface::ToJSVal(g_GUI->GetScriptInterface().GetContext(), ev));
|
||||
PROFILE2_ATTR("%s", data.c_str());
|
||||
}
|
||||
in_dispatch_event(&ev);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -253,6 +263,8 @@ static void Frame()
|
||||
{
|
||||
g_Profiler2.RecordFrameStart();
|
||||
PROFILE2("frame");
|
||||
g_Profiler2.IncrementFrameNumber();
|
||||
PROFILE2_ATTR("%d", g_Profiler2.GetFrameNumber());
|
||||
|
||||
ogl_WarnIfError();
|
||||
|
||||
|
@ -175,7 +175,7 @@ void CConsole::Render()
|
||||
{
|
||||
if (! (m_bVisible || m_bToggle) ) return;
|
||||
|
||||
PROFILE3("render console");
|
||||
PROFILE3_GPU("console");
|
||||
|
||||
CFont font(CONSOLE_FONT);
|
||||
font.Bind();
|
||||
|
@ -277,7 +277,7 @@ void CLogger::LogError(const wchar_t* fmt, ...)
|
||||
|
||||
void CLogger::Render()
|
||||
{
|
||||
PROFILE3("render logger");
|
||||
PROFILE3_GPU("logger");
|
||||
|
||||
CleanupRenderQueue();
|
||||
|
||||
|
@ -275,7 +275,11 @@ bool CGame::Update(double deltaTime, bool doInterpolate)
|
||||
|
||||
if (m_TurnManager->Update(deltaTime, maxTurns))
|
||||
{
|
||||
g_GUI->SendEventToAll("SimulationUpdate");
|
||||
{
|
||||
PROFILE3("gui sim update");
|
||||
g_GUI->SendEventToAll("SimulationUpdate");
|
||||
}
|
||||
|
||||
GetView()->GetLOSTexture().MakeDirty();
|
||||
}
|
||||
}
|
||||
|
@ -198,6 +198,10 @@ void Render()
|
||||
|
||||
ogl_WarnIfError();
|
||||
|
||||
g_Profiler2.RecordGPUFrameStart();
|
||||
|
||||
ogl_WarnIfError();
|
||||
|
||||
CStr skystring = "255 0 255";
|
||||
CFG_GET_USER_VAL("skycolor", String, skystring);
|
||||
CColor skycol;
|
||||
@ -272,7 +276,7 @@ void Render()
|
||||
// Draw the cursor (or set the Windows cursor, on Windows)
|
||||
if (g_DoRenderCursor)
|
||||
{
|
||||
PROFILE3("render cursor");
|
||||
PROFILE3_GPU("cursor");
|
||||
CStrW cursorName = g_CursorName;
|
||||
if (cursorName.empty())
|
||||
{
|
||||
@ -303,6 +307,10 @@ void Render()
|
||||
PROFILE2_ATTR("particles: %d", (int)g_Renderer.GetStats().m_Particles);
|
||||
|
||||
ogl_WarnIfError();
|
||||
|
||||
g_Profiler2.RecordGPUFrameEnd();
|
||||
|
||||
ogl_WarnIfError();
|
||||
}
|
||||
|
||||
|
||||
@ -647,6 +655,8 @@ void Shutdown(int UNUSED(flags))
|
||||
|
||||
tex_codec_unregister_all();
|
||||
|
||||
g_Profiler2.ShutdownGPU();
|
||||
|
||||
TIMER_BEGIN(L"shutdown SDL");
|
||||
ShutdownSDL();
|
||||
TIMER_END(L"shutdown SDL");
|
||||
@ -850,6 +860,13 @@ void Init(const CmdLineArgs& args, int UNUSED(flags))
|
||||
// g_ConfigDB, command line args, globals
|
||||
CONFIG_Init(args);
|
||||
|
||||
// Optionally start profiler HTTP output automatically
|
||||
// (By default it's only enabled by a hotkey, for security/performance)
|
||||
bool profilerHTTPEnable = false;
|
||||
CFG_GET_USER_VAL("profiler2.http.autoenable", Bool, profilerHTTPEnable);
|
||||
if (profilerHTTPEnable)
|
||||
g_Profiler2.EnableHTTP();
|
||||
|
||||
if (!g_Quickstart)
|
||||
g_UserReporter.Initialize(); // after config
|
||||
|
||||
@ -879,6 +896,13 @@ void InitGraphics(const CmdLineArgs& args, int flags)
|
||||
|
||||
ogl_WarnIfError();
|
||||
|
||||
// Optionally start profiler GPU timings automatically
|
||||
// (By default it's only enabled by a hotkey, for performance/compatibility)
|
||||
bool profilerGPUEnable = false;
|
||||
CFG_GET_USER_VAL("profiler2.gpu.autoenable", Bool, profilerGPUEnable);
|
||||
if (profilerGPUEnable)
|
||||
g_Profiler2.EnableGPU();
|
||||
|
||||
if(!g_Quickstart)
|
||||
{
|
||||
WriteSystemInfo();
|
||||
|
@ -198,4 +198,7 @@ public:
|
||||
// Do both old and new profilers simultaneously (1+2=3), for convenience.
|
||||
#define PROFILE3(name) PROFILE(name); PROFILE2(name)
|
||||
|
||||
// Also do GPU
|
||||
#define PROFILE3_GPU(name) PROFILE(name); PROFILE2(name); PROFILE2_GPU(name)
|
||||
|
||||
#endif // INCLUDED_PROFILE
|
||||
|
@ -159,7 +159,7 @@ void CProfileViewer::RenderProfile()
|
||||
return;
|
||||
}
|
||||
|
||||
PROFILE3("render profiler");
|
||||
PROFILE3_GPU("profile viewer");
|
||||
|
||||
glEnable(GL_BLEND);
|
||||
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
|
||||
|
@ -1,18 +1,23 @@
|
||||
/* Copyright (C) 2011 Wildfire Games.
|
||||
* This file is part of 0 A.D.
|
||||
/* Copyright (c) 2011 Wildfire Games
|
||||
*
|
||||
* 0 A.D. is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* 0 A.D. is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with 0 A.D. If not, see <http://www.gnu.org/licenses/>.
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "precompiled.h"
|
||||
@ -21,6 +26,7 @@
|
||||
|
||||
#include "lib/allocators/shared_ptr.h"
|
||||
#include "ps/CLogger.h"
|
||||
#include "ps/Profiler2GPU.h"
|
||||
#include "third_party/mongoose/mongoose.h"
|
||||
|
||||
CProfiler2 g_Profiler2;
|
||||
@ -29,10 +35,15 @@ CProfiler2 g_Profiler2;
|
||||
const u8 CProfiler2::RESYNC_MAGIC[8] = {0x11, 0x22, 0x33, 0x44, 0xf4, 0x93, 0xbe, 0x15};
|
||||
|
||||
CProfiler2::CProfiler2() :
|
||||
m_Initialised(false), m_MgContext(NULL)
|
||||
m_Initialised(false), m_FrameNumber(0), m_MgContext(NULL), m_GPU(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
CProfiler2::~CProfiler2()
|
||||
{
|
||||
ENSURE(!m_Initialised); // should have called Shutdown() explicitly
|
||||
}
|
||||
|
||||
/**
|
||||
* Mongoose callback. Run in an arbitrary thread (possibly concurrently with other requests).
|
||||
*/
|
||||
@ -133,6 +144,12 @@ void CProfiler2::Initialise()
|
||||
RegisterCurrentThread("main");
|
||||
}
|
||||
|
||||
void CProfiler2::InitialiseGPU()
|
||||
{
|
||||
ENSURE(!m_GPU);
|
||||
m_GPU = new CProfiler2GPU(*this);
|
||||
}
|
||||
|
||||
void CProfiler2::EnableHTTP()
|
||||
{
|
||||
ENSURE(m_Initialised);
|
||||
@ -150,10 +167,24 @@ void CProfiler2::EnableHTTP()
|
||||
ENSURE(m_MgContext);
|
||||
}
|
||||
|
||||
void CProfiler2::EnableGPU()
|
||||
{
|
||||
ENSURE(m_Initialised);
|
||||
if (!m_GPU)
|
||||
InitialiseGPU();
|
||||
}
|
||||
|
||||
void CProfiler2::ShutdownGPU()
|
||||
{
|
||||
SAFE_DELETE(m_GPU);
|
||||
}
|
||||
|
||||
void CProfiler2::Shutdown()
|
||||
{
|
||||
ENSURE(m_Initialised);
|
||||
|
||||
ENSURE(!m_GPU); // must shutdown GPU before profiler
|
||||
|
||||
if (m_MgContext)
|
||||
{
|
||||
mg_stop(m_MgContext);
|
||||
@ -167,6 +198,30 @@ void CProfiler2::Shutdown()
|
||||
m_Initialised = false;
|
||||
}
|
||||
|
||||
void CProfiler2::RecordGPUFrameStart()
|
||||
{
|
||||
if (m_GPU)
|
||||
m_GPU->FrameStart();
|
||||
}
|
||||
|
||||
void CProfiler2::RecordGPUFrameEnd()
|
||||
{
|
||||
if (m_GPU)
|
||||
m_GPU->FrameEnd();
|
||||
}
|
||||
|
||||
void CProfiler2::RecordGPURegionEnter(const char* id)
|
||||
{
|
||||
if (m_GPU)
|
||||
m_GPU->RegionEnter(id);
|
||||
}
|
||||
|
||||
void CProfiler2::RecordGPURegionLeave(const char* id)
|
||||
{
|
||||
if (m_GPU)
|
||||
m_GPU->RegionLeave(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called by pthreads when a registered thread is destroyed.
|
||||
*/
|
||||
@ -174,12 +229,7 @@ void CProfiler2::TLSDtor(void* data)
|
||||
{
|
||||
ThreadStorage* storage = (ThreadStorage*)data;
|
||||
|
||||
CProfiler2& profiler = storage->GetProfiler();
|
||||
|
||||
{
|
||||
CScopeLock lock(profiler.m_Mutex);
|
||||
profiler.m_Threads.erase(std::find(profiler.m_Threads.begin(), profiler.m_Threads.end(), storage));
|
||||
}
|
||||
storage->GetProfiler().RemoveThreadStorage(storage);
|
||||
|
||||
delete (ThreadStorage*)data;
|
||||
}
|
||||
@ -197,10 +247,21 @@ void CProfiler2::RegisterCurrentThread(const std::string& name)
|
||||
RecordSyncMarker();
|
||||
RecordEvent("thread start");
|
||||
|
||||
AddThreadStorage(storage);
|
||||
}
|
||||
|
||||
void CProfiler2::AddThreadStorage(ThreadStorage* storage)
|
||||
{
|
||||
CScopeLock lock(m_Mutex);
|
||||
m_Threads.push_back(storage);
|
||||
}
|
||||
|
||||
void CProfiler2::RemoveThreadStorage(ThreadStorage* storage)
|
||||
{
|
||||
CScopeLock lock(m_Mutex);
|
||||
m_Threads.erase(std::find(m_Threads.begin(), m_Threads.end(), storage));
|
||||
}
|
||||
|
||||
CProfiler2::ThreadStorage::ThreadStorage(CProfiler2& profiler, const std::string& name) :
|
||||
m_Profiler(profiler), m_Name(name), m_BufferPos0(0), m_BufferPos1(0), m_LastTime(timer_Time())
|
||||
{
|
||||
@ -256,6 +317,8 @@ void CProfiler2::ThreadStorage::RecordAttribute(const char* fmt, va_list argp)
|
||||
|
||||
void CProfiler2::ConstructJSONOverview(std::ostream& stream)
|
||||
{
|
||||
TIMER(L"profile2 overview");
|
||||
|
||||
CScopeLock lock(m_Mutex);
|
||||
|
||||
stream << "{\"threads\":[";
|
||||
@ -275,6 +338,8 @@ void CProfiler2::ConstructJSONOverview(std::ostream& stream)
|
||||
template<typename V>
|
||||
void RunBufferVisitor(const std::string& buffer, V& visitor)
|
||||
{
|
||||
TIMER(L"profile2 visitor");
|
||||
|
||||
// The buffer doesn't necessarily start at the beginning of an item
|
||||
// (we just grabbed it from some arbitrary point in the middle),
|
||||
// so scan forwards until we find a sync marker.
|
||||
@ -421,24 +486,33 @@ public:
|
||||
|
||||
const char* CProfiler2::ConstructJSONResponse(std::ostream& stream, const std::string& thread)
|
||||
{
|
||||
CScopeLock lock(m_Mutex);
|
||||
TIMER(L"profile2 query");
|
||||
|
||||
std::string buffer;
|
||||
|
||||
ThreadStorage* storage = NULL;
|
||||
for (size_t i = 0; i < m_Threads.size(); ++i)
|
||||
{
|
||||
if (m_Threads[i]->GetName() == thread)
|
||||
TIMER(L"profile2 get buffer");
|
||||
|
||||
CScopeLock lock(m_Mutex); // lock against changes to m_Threads or deletions of ThreadStorage
|
||||
|
||||
ThreadStorage* storage = NULL;
|
||||
for (size_t i = 0; i < m_Threads.size(); ++i)
|
||||
{
|
||||
storage = m_Threads[i];
|
||||
break;
|
||||
if (m_Threads[i]->GetName() == thread)
|
||||
{
|
||||
storage = m_Threads[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!storage)
|
||||
return "cannot find named thread";
|
||||
|
||||
stream << "{\"events\":[\n";
|
||||
|
||||
buffer = storage->GetBuffer();
|
||||
}
|
||||
|
||||
if (!storage)
|
||||
return "cannot find named thread";
|
||||
|
||||
stream << "{\"events\":[\n";
|
||||
|
||||
std::string buffer = storage->GetBuffer();
|
||||
BufferVisitor_Dump visitor(stream);
|
||||
RunBufferVisitor(buffer, visitor);
|
||||
|
||||
|
@ -1,18 +1,23 @@
|
||||
/* Copyright (C) 2011 Wildfire Games.
|
||||
* This file is part of 0 A.D.
|
||||
/* Copyright (c) 2011 Wildfire Games
|
||||
*
|
||||
* 0 A.D. is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* 0 A.D. is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with 0 A.D. If not, see <http://www.gnu.org/licenses/>.
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
@ -80,8 +85,12 @@ struct mg_context;
|
||||
// Note: Lots of functions are defined inline, to hypothetically
|
||||
// minimise performance overhead.
|
||||
|
||||
class CProfiler2GPU;
|
||||
|
||||
class CProfiler2
|
||||
{
|
||||
friend class CProfiler2GPU_base;
|
||||
|
||||
public:
|
||||
// Items stored in the buffers:
|
||||
|
||||
@ -113,7 +122,7 @@ public:
|
||||
private:
|
||||
// TODO: what's a good size?
|
||||
// TODO: different threads might want different sizes
|
||||
static const size_t BUFFER_SIZE = 128*1024;
|
||||
static const size_t BUFFER_SIZE = 1024*1024;
|
||||
|
||||
/**
|
||||
* Class instantiated in every registered thread.
|
||||
@ -153,6 +162,14 @@ private:
|
||||
}
|
||||
|
||||
void RecordAttribute(const char* fmt, va_list argp);
|
||||
|
||||
void RecordAttributePrintf(const char* fmt, ...)
|
||||
{
|
||||
va_list argp;
|
||||
va_start(argp, fmt);
|
||||
RecordAttribute(fmt, argp);
|
||||
va_end(argp);
|
||||
}
|
||||
|
||||
CProfiler2& GetProfiler()
|
||||
{
|
||||
@ -230,6 +247,7 @@ private:
|
||||
|
||||
public:
|
||||
CProfiler2();
|
||||
~CProfiler2();
|
||||
|
||||
/**
|
||||
* Call in main thread to set up the profiler,
|
||||
@ -244,6 +262,18 @@ public:
|
||||
*/
|
||||
void EnableHTTP();
|
||||
|
||||
/**
|
||||
* Call in main thread to enable the GPU profiling support,
|
||||
* after OpenGL has been initialised.
|
||||
*/
|
||||
void EnableGPU();
|
||||
|
||||
/**
|
||||
* Call in main thread to shut down the GPU profiling support,
|
||||
* before shutting down OpenGL.
|
||||
*/
|
||||
void ShutdownGPU();
|
||||
|
||||
/**
|
||||
* Call in main thread to shut everything down.
|
||||
* All other profiled threads should have been terminated already.
|
||||
@ -272,6 +302,7 @@ public:
|
||||
*/
|
||||
void RecordFrameStart()
|
||||
{
|
||||
ENSURE(ThreadUtil::IsMainThread());
|
||||
GetThreadStorage().RecordFrameStart(GetTime());
|
||||
}
|
||||
|
||||
@ -298,6 +329,11 @@ public:
|
||||
va_end(argp);
|
||||
}
|
||||
|
||||
void RecordGPUFrameStart();
|
||||
void RecordGPUFrameEnd();
|
||||
void RecordGPURegionEnter(const char* id);
|
||||
void RecordGPURegionLeave(const char* id);
|
||||
|
||||
/**
|
||||
* Call in any thread to produce a JSON representation of the general
|
||||
* state of the application.
|
||||
@ -311,7 +347,27 @@ public:
|
||||
*/
|
||||
const char* ConstructJSONResponse(std::ostream& stream, const std::string& thread);
|
||||
|
||||
double GetTime()
|
||||
{
|
||||
return timer_Time();
|
||||
}
|
||||
|
||||
int GetFrameNumber()
|
||||
{
|
||||
return m_FrameNumber;
|
||||
}
|
||||
|
||||
void IncrementFrameNumber()
|
||||
{
|
||||
++m_FrameNumber;
|
||||
}
|
||||
|
||||
void AddThreadStorage(ThreadStorage* storage);
|
||||
void RemoveThreadStorage(ThreadStorage* storage);
|
||||
|
||||
private:
|
||||
void InitialiseGPU();
|
||||
|
||||
static void TLSDtor(void* data);
|
||||
|
||||
ThreadStorage& GetThreadStorage()
|
||||
@ -321,17 +377,16 @@ private:
|
||||
return *storage;
|
||||
}
|
||||
|
||||
double GetTime()
|
||||
{
|
||||
return timer_Time();
|
||||
}
|
||||
|
||||
bool m_Initialised;
|
||||
|
||||
int m_FrameNumber;
|
||||
|
||||
mg_context* m_MgContext;
|
||||
|
||||
pthread_key_t m_TLS;
|
||||
|
||||
CProfiler2GPU* m_GPU;
|
||||
|
||||
CMutex m_Mutex;
|
||||
std::vector<ThreadStorage*> m_Threads; // thread-safe; protected by m_Mutex
|
||||
};
|
||||
@ -356,6 +411,24 @@ private:
|
||||
const char* m_Name;
|
||||
};
|
||||
|
||||
/**
|
||||
* Scope-based GPU enter/leave helper.
|
||||
*/
|
||||
class CProfile2GPURegion
|
||||
{
|
||||
public:
|
||||
CProfile2GPURegion(const char* name) : m_Name(name)
|
||||
{
|
||||
g_Profiler2.RecordGPURegionEnter(m_Name);
|
||||
}
|
||||
~CProfile2GPURegion()
|
||||
{
|
||||
g_Profiler2.RecordGPURegionLeave(m_Name);
|
||||
}
|
||||
private:
|
||||
const char* m_Name;
|
||||
};
|
||||
|
||||
/**
|
||||
* Starts timing from now until the end of the current scope.
|
||||
* @p region is the name to associate with this region (should be
|
||||
@ -365,6 +438,8 @@ private:
|
||||
*/
|
||||
#define PROFILE2(region) CProfile2Region profile2__(region)
|
||||
|
||||
#define PROFILE2_GPU(region) CProfile2GPURegion profile2gpu__(region)
|
||||
|
||||
/**
|
||||
* Record the named event at the current time.
|
||||
*/
|
||||
|
841
source/ps/Profiler2GPU.cpp
Normal file
841
source/ps/Profiler2GPU.cpp
Normal file
@ -0,0 +1,841 @@
|
||||
/* Copyright (c) 2011 Wildfire Games
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "precompiled.h"
|
||||
|
||||
#include "Profiler2GPU.h"
|
||||
|
||||
#include "lib/ogl.h"
|
||||
#include "lib/allocators/shared_ptr.h"
|
||||
#include "ps/ConfigDB.h"
|
||||
#include "ps/Profiler2.h"
|
||||
|
||||
class CProfiler2GPU_base
|
||||
{
|
||||
NONCOPYABLE(CProfiler2GPU_base);
|
||||
|
||||
protected:
|
||||
CProfiler2GPU_base(CProfiler2& profiler, const char* name) :
|
||||
m_Profiler(profiler), m_Storage(profiler, name)
|
||||
{
|
||||
m_Storage.RecordSyncMarker(m_Profiler.GetTime());
|
||||
m_Storage.Record(CProfiler2::ITEM_EVENT, m_Profiler.GetTime(), "thread start");
|
||||
|
||||
m_Profiler.AddThreadStorage(&m_Storage);
|
||||
}
|
||||
|
||||
~CProfiler2GPU_base()
|
||||
{
|
||||
m_Profiler.RemoveThreadStorage(&m_Storage);
|
||||
}
|
||||
|
||||
CProfiler2& m_Profiler;
|
||||
CProfiler2::ThreadStorage m_Storage;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Base class for ARB_timer_query, EXT_timer_query
|
||||
class CProfiler2GPU_timer_query : public CProfiler2GPU_base
|
||||
{
|
||||
protected:
|
||||
CProfiler2GPU_timer_query(CProfiler2& profiler, const char* name) :
|
||||
CProfiler2GPU_base(profiler, name)
|
||||
{
|
||||
}
|
||||
|
||||
~CProfiler2GPU_timer_query()
|
||||
{
|
||||
pglDeleteQueriesARB(m_FreeQueries.size(), m_FreeQueries.data());
|
||||
ogl_WarnIfError();
|
||||
}
|
||||
|
||||
// Returns a new GL query object (or a recycled old one)
|
||||
GLuint NewQuery()
|
||||
{
|
||||
if (m_FreeQueries.empty())
|
||||
{
|
||||
// Generate a batch of new queries
|
||||
m_FreeQueries.resize(8);
|
||||
pglGenQueriesARB(m_FreeQueries.size(), m_FreeQueries.data());
|
||||
ogl_WarnIfError();
|
||||
}
|
||||
|
||||
GLuint query = m_FreeQueries.back();
|
||||
m_FreeQueries.pop_back();
|
||||
return query;
|
||||
}
|
||||
|
||||
std::vector<GLuint> m_FreeQueries; // query objects that are allocated but not currently in used
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
* GL_ARB_timer_query supports sync and async queries for absolute GPU
|
||||
* timestamps, which lets us time regions of code relative to the CPU.
|
||||
* At the start of a frame, we record the CPU time and sync GPU timestamp,
|
||||
* giving the time-vs-timestamp offset.
|
||||
* At each enter/leave-region event, we do an async GPU timestamp query.
|
||||
* When all the queries for a frame have their results available,
|
||||
* we convert their GPU timestamps into CPU times and record the data.
|
||||
*/
|
||||
class CProfiler2GPU_ARB_timer_query : public CProfiler2GPU_timer_query
|
||||
{
|
||||
struct SEvent
|
||||
{
|
||||
const char* id;
|
||||
GLuint query;
|
||||
bool isEnter; // true if entering region; false if leaving
|
||||
};
|
||||
|
||||
struct SFrame
|
||||
{
|
||||
u32 num;
|
||||
|
||||
double syncTimeStart; // CPU time at start of maybe this frame or a recent one
|
||||
GLint64 syncTimestampStart; // GL timestamp corresponding to timeStart
|
||||
|
||||
std::vector<SEvent> events;
|
||||
};
|
||||
|
||||
std::deque<SFrame> m_Frames;
|
||||
|
||||
public:
|
||||
static bool IsSupported()
|
||||
{
|
||||
return ogl_HaveExtension("GL_ARB_timer_query");
|
||||
}
|
||||
|
||||
CProfiler2GPU_ARB_timer_query(CProfiler2& profiler) :
|
||||
CProfiler2GPU_timer_query(profiler, "gpu_arb")
|
||||
{
|
||||
// TODO: maybe we should check QUERY_COUNTER_BITS to ensure it's
|
||||
// high enough (but apparently it might trigger GL errors on ATI)
|
||||
}
|
||||
|
||||
~CProfiler2GPU_ARB_timer_query()
|
||||
{
|
||||
// Pop frames to return queries to the free list
|
||||
while (!m_Frames.empty())
|
||||
PopFrontFrame();
|
||||
}
|
||||
|
||||
void FrameStart()
|
||||
{
|
||||
ProcessFrames();
|
||||
|
||||
SFrame frame;
|
||||
frame.num = m_Profiler.GetFrameNumber();
|
||||
|
||||
// On (at least) some NVIDIA Windows drivers, when GPU-bound, or when
|
||||
// vsync enabled and not CPU-bound, the first glGet* call at the start
|
||||
// of a frame appears to trigger a wait (to stop the GPU getting too
|
||||
// far behind, or to wait for the vsync period).
|
||||
// That will be this GL_TIMESTAMP get, which potentially distorts the
|
||||
// reported results. So we'll only do it fairly rarely, and for most
|
||||
// frames we'll just assume the clocks don't drift much
|
||||
|
||||
const double RESYNC_PERIOD = 1.0; // seconds
|
||||
|
||||
double now = m_Profiler.GetTime();
|
||||
|
||||
if (m_Frames.empty() || now > m_Frames.back().syncTimeStart + RESYNC_PERIOD)
|
||||
{
|
||||
PROFILE2("profile timestamp resync");
|
||||
|
||||
pglGetInteger64v(GL_TIMESTAMP, &frame.syncTimestampStart);
|
||||
ogl_WarnIfError();
|
||||
|
||||
frame.syncTimeStart = m_Profiler.GetTime();
|
||||
// (Have to do GetTime again after GL_TIMESTAMP, because GL_TIMESTAMP
|
||||
// might wait a while before returning its now-current timestamp)
|
||||
}
|
||||
else
|
||||
{
|
||||
// Reuse the previous frame's sync data
|
||||
frame.syncTimeStart = m_Frames[m_Frames.size()-1].syncTimeStart;
|
||||
frame.syncTimestampStart = m_Frames[m_Frames.size()-1].syncTimestampStart;
|
||||
}
|
||||
|
||||
m_Frames.push_back(frame);
|
||||
|
||||
RegionEnter("frame");
|
||||
}
|
||||
|
||||
void FrameEnd()
|
||||
{
|
||||
RegionLeave("frame");
|
||||
}
|
||||
|
||||
void RecordRegion(const char* id, bool isEnter)
|
||||
{
|
||||
ENSURE(!m_Frames.empty());
|
||||
SFrame& frame = m_Frames.back();
|
||||
|
||||
SEvent event;
|
||||
event.id = id;
|
||||
event.query = NewQuery();
|
||||
event.isEnter = isEnter;
|
||||
|
||||
pglQueryCounter(event.query, GL_TIMESTAMP);
|
||||
ogl_WarnIfError();
|
||||
|
||||
frame.events.push_back(event);
|
||||
}
|
||||
|
||||
void RegionEnter(const char* id)
|
||||
{
|
||||
RecordRegion(id, true);
|
||||
}
|
||||
|
||||
void RegionLeave(const char* id)
|
||||
{
|
||||
RecordRegion(id, false);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void ProcessFrames()
|
||||
{
|
||||
while (!m_Frames.empty())
|
||||
{
|
||||
SFrame& frame = m_Frames.front();
|
||||
|
||||
// Queries become available in order so we only need to check the last one
|
||||
GLint available = 0;
|
||||
pglGetQueryObjectivARB(frame.events.back().query, GL_QUERY_RESULT_AVAILABLE, &available);
|
||||
ogl_WarnIfError();
|
||||
if (!available)
|
||||
break;
|
||||
|
||||
// The frame's queries are now available, so retrieve and record all their results:
|
||||
|
||||
for (size_t i = 0; i < frame.events.size(); ++i)
|
||||
{
|
||||
GLuint64 queryTimestamp = 0;
|
||||
pglGetQueryObjectui64v(frame.events[i].query, GL_QUERY_RESULT, &queryTimestamp);
|
||||
ogl_WarnIfError();
|
||||
|
||||
// Convert to absolute CPU-clock time
|
||||
double t = frame.syncTimeStart + (double)(queryTimestamp - frame.syncTimestampStart) / 1e9;
|
||||
|
||||
// Record a frame-start for syncing
|
||||
if (i == 0)
|
||||
m_Storage.RecordFrameStart(t);
|
||||
|
||||
if (frame.events[i].isEnter)
|
||||
m_Storage.Record(CProfiler2::ITEM_ENTER, t, frame.events[i].id);
|
||||
else
|
||||
m_Storage.Record(CProfiler2::ITEM_LEAVE, t, frame.events[i].id);
|
||||
|
||||
// Associate the frame number with the "frame" region
|
||||
if (i == 0)
|
||||
m_Storage.RecordAttributePrintf("%d", frame.num);
|
||||
}
|
||||
|
||||
PopFrontFrame();
|
||||
}
|
||||
}
|
||||
|
||||
void PopFrontFrame()
|
||||
{
|
||||
ENSURE(!m_Frames.empty());
|
||||
SFrame& frame = m_Frames.front();
|
||||
for (size_t i = 0; i < frame.events.size(); ++i)
|
||||
m_FreeQueries.push_back(frame.events[i].query);
|
||||
m_Frames.pop_front();
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
* GL_EXT_timer_query only supports async queries for elapsed time,
|
||||
* and only a single simultaneous query.
|
||||
* We can't correctly convert it to absolute time, so we just pretend
|
||||
* each GPU frame starts the same time as the CPU for that frame.
|
||||
* We do a query for elapsed time between every adjacent enter/leave-region event.
|
||||
* When all the queries for a frame have their results available,
|
||||
* we sum the elapsed times to calculate when each event occurs within the
|
||||
* frame, and record the data.
|
||||
*/
|
||||
class CProfiler2GPU_EXT_timer_query : public CProfiler2GPU_timer_query
|
||||
{
|
||||
struct SEvent
|
||||
{
|
||||
const char* id;
|
||||
GLuint query; // query for time elapsed from this event until the next, or 0 for final event
|
||||
bool isEnter; // true if entering region; false if leaving
|
||||
};
|
||||
|
||||
struct SFrame
|
||||
{
|
||||
u32 num;
|
||||
double timeStart; // CPU time at frame start
|
||||
std::vector<SEvent> events;
|
||||
};
|
||||
|
||||
std::deque<SFrame> m_Frames;
|
||||
|
||||
public:
|
||||
static bool IsSupported()
|
||||
{
|
||||
return ogl_HaveExtension("GL_EXT_timer_query");
|
||||
}
|
||||
|
||||
CProfiler2GPU_EXT_timer_query(CProfiler2& profiler) :
|
||||
CProfiler2GPU_timer_query(profiler, "gpu_ext")
|
||||
{
|
||||
}
|
||||
|
||||
~CProfiler2GPU_EXT_timer_query()
|
||||
{
|
||||
// Pop frames to return queries to the free list
|
||||
while (!m_Frames.empty())
|
||||
PopFrontFrame();
|
||||
}
|
||||
|
||||
void FrameStart()
|
||||
{
|
||||
ProcessFrames();
|
||||
|
||||
SFrame frame;
|
||||
frame.num = m_Profiler.GetFrameNumber();
|
||||
frame.timeStart = m_Profiler.GetTime();
|
||||
|
||||
m_Frames.push_back(frame);
|
||||
|
||||
RegionEnter("frame");
|
||||
}
|
||||
|
||||
void FrameEnd()
|
||||
{
|
||||
RegionLeave("frame");
|
||||
|
||||
pglEndQueryARB(GL_TIME_ELAPSED);
|
||||
ogl_WarnIfError();
|
||||
}
|
||||
|
||||
void RecordRegion(const char* id, bool isEnter)
|
||||
{
|
||||
ENSURE(!m_Frames.empty());
|
||||
SFrame& frame = m_Frames.back();
|
||||
|
||||
SEvent event;
|
||||
event.id = id;
|
||||
event.query = NewQuery();
|
||||
event.isEnter = isEnter;
|
||||
|
||||
if (!frame.events.empty())
|
||||
{
|
||||
pglEndQueryARB(GL_TIME_ELAPSED);
|
||||
ogl_WarnIfError();
|
||||
}
|
||||
|
||||
pglBeginQueryARB(GL_TIME_ELAPSED, event.query);
|
||||
ogl_WarnIfError();
|
||||
|
||||
frame.events.push_back(event);
|
||||
}
|
||||
|
||||
void RegionEnter(const char* id)
|
||||
{
|
||||
RecordRegion(id, true);
|
||||
}
|
||||
|
||||
void RegionLeave(const char* id)
|
||||
{
|
||||
RecordRegion(id, false);
|
||||
}
|
||||
|
||||
private:
|
||||
void ProcessFrames()
|
||||
{
|
||||
while (!m_Frames.empty())
|
||||
{
|
||||
SFrame& frame = m_Frames.front();
|
||||
|
||||
// Queries become available in order so we only need to check the last one
|
||||
GLint available = 0;
|
||||
pglGetQueryObjectivARB(frame.events.back().query, GL_QUERY_RESULT_AVAILABLE, &available);
|
||||
ogl_WarnIfError();
|
||||
if (!available)
|
||||
break;
|
||||
|
||||
// The frame's queries are now available, so retrieve and record all their results:
|
||||
|
||||
double t = frame.timeStart;
|
||||
m_Storage.RecordFrameStart(t);
|
||||
|
||||
for (size_t i = 0; i < frame.events.size(); ++i)
|
||||
{
|
||||
if (frame.events[i].isEnter)
|
||||
m_Storage.Record(CProfiler2::ITEM_ENTER, t, frame.events[i].id);
|
||||
else
|
||||
m_Storage.Record(CProfiler2::ITEM_LEAVE, t, frame.events[i].id);
|
||||
|
||||
// Associate the frame number with the "frame" region
|
||||
if (i == 0)
|
||||
m_Storage.RecordAttributePrintf("%d", frame.num);
|
||||
|
||||
// Advance by the elapsed time to the next event
|
||||
GLuint64 queryElapsed = 0;
|
||||
pglGetQueryObjectui64v(frame.events[i].query, GL_QUERY_RESULT, &queryElapsed);
|
||||
ogl_WarnIfError();
|
||||
t += (double)queryElapsed / 1e9;
|
||||
}
|
||||
|
||||
PopFrontFrame();
|
||||
}
|
||||
}
|
||||
|
||||
void PopFrontFrame()
|
||||
{
|
||||
ENSURE(!m_Frames.empty());
|
||||
SFrame& frame = m_Frames.front();
|
||||
for (size_t i = 0; i < frame.events.size(); ++i)
|
||||
m_FreeQueries.push_back(frame.events[i].query);
|
||||
m_Frames.pop_front();
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
* GL_INTEL_performance_queries is not officially documented
|
||||
* (see http://zaynar.co.uk/docs/gl-intel-performance-queries.html)
|
||||
* but it's potentially useful so we'll support it anyway.
|
||||
* It supports async queries giving elapsed time plus a load of other
|
||||
* counters that we'd like to use, and supports many simultaneous queries
|
||||
* (unlike GL_EXT_timer_query).
|
||||
* There are multiple query types (typically 2), each with its own set of
|
||||
* multiple counters.
|
||||
* On each enter-region event, we start a new set of queries.
|
||||
* On each leave-region event, we end the corresponding set of queries.
|
||||
* We can't tell the offsets between the enter events of nested regions,
|
||||
* so we pretend they all got entered at the same time.
|
||||
*/
|
||||
class CProfiler2GPU_INTEL_performance_queries : public CProfiler2GPU_base
|
||||
{
|
||||
struct SEvent
|
||||
{
|
||||
const char* id;
|
||||
bool isEnter;
|
||||
std::vector<GLuint> queries; // if isEnter, one per SPerfQueryType; else empty
|
||||
};
|
||||
|
||||
struct SFrame
|
||||
{
|
||||
u32 num;
|
||||
double timeStart; // CPU time at frame start
|
||||
std::vector<SEvent> events;
|
||||
std::vector<size_t> activeRegions; // stack of indexes into events
|
||||
};
|
||||
|
||||
std::deque<SFrame> m_Frames;
|
||||
|
||||
// Counters listed by the graphics driver for a particular query type
|
||||
struct SPerfCounter
|
||||
{
|
||||
std::string name;
|
||||
std::string desc;
|
||||
GLuint offset;
|
||||
GLuint size;
|
||||
GLuint type;
|
||||
};
|
||||
|
||||
// Query types listed by the graphics driver
|
||||
struct SPerfQueryType
|
||||
{
|
||||
GLuint queryTypeId;
|
||||
std::string name;
|
||||
GLuint counterBufferSize;
|
||||
std::vector<SPerfCounter> counters;
|
||||
|
||||
std::vector<GLuint> freeQueries; // query objects that are allocated but not currently in use
|
||||
};
|
||||
|
||||
std::vector<SPerfQueryType> m_QueryTypes;
|
||||
|
||||
#define INTEL_PERFQUERIES_NONBLOCK 0x83FA
|
||||
#define INTEL_PERFQUERIES_BLOCK 0x83FB
|
||||
#define INTEL_PERFQUERIES_TYPE_UNSIGNED_INT 0x9402
|
||||
#define INTEL_PERFQUERIES_TYPE_UNSIGNED_INT64 0x9403
|
||||
#define INTEL_PERFQUERIES_TYPE_FLOAT 0x9404
|
||||
#define INTEL_PERFQUERIES_TYPE_BOOL 0x9406
|
||||
|
||||
public:
|
||||
static bool IsSupported()
|
||||
{
|
||||
return ogl_HaveExtension("GL_INTEL_performance_queries");
|
||||
}
|
||||
|
||||
CProfiler2GPU_INTEL_performance_queries(CProfiler2& profiler) :
|
||||
CProfiler2GPU_base(profiler, "gpu_intel")
|
||||
{
|
||||
LoadPerfCounters();
|
||||
}
|
||||
|
||||
~CProfiler2GPU_INTEL_performance_queries()
|
||||
{
|
||||
// Pop frames to return queries to the free list
|
||||
while (!m_Frames.empty())
|
||||
PopFrontFrame();
|
||||
|
||||
for (size_t i = 0; i < m_QueryTypes.size(); ++i)
|
||||
for (size_t j = 0; j < m_QueryTypes[i].freeQueries.size(); ++j)
|
||||
pglDeletePerfQueryINTEL(m_QueryTypes[i].freeQueries[j]);
|
||||
|
||||
ogl_WarnIfError();
|
||||
}
|
||||
|
||||
void FrameStart()
|
||||
{
|
||||
ProcessFrames();
|
||||
|
||||
SFrame frame;
|
||||
frame.num = m_Profiler.GetFrameNumber();
|
||||
frame.timeStart = m_Profiler.GetTime();
|
||||
|
||||
m_Frames.push_back(frame);
|
||||
|
||||
RegionEnter("frame");
|
||||
}
|
||||
|
||||
void FrameEnd()
|
||||
{
|
||||
RegionLeave("frame");
|
||||
}
|
||||
|
||||
void RegionEnter(const char* id)
|
||||
{
|
||||
ENSURE(!m_Frames.empty());
|
||||
SFrame& frame = m_Frames.back();
|
||||
|
||||
SEvent event;
|
||||
event.id = id;
|
||||
event.isEnter = true;
|
||||
|
||||
for (size_t i = 0; i < m_QueryTypes.size(); ++i)
|
||||
{
|
||||
GLuint id = NewQuery(i);
|
||||
pglBeginPerfQueryINTEL(id);
|
||||
ogl_WarnIfError();
|
||||
event.queries.push_back(id);
|
||||
}
|
||||
|
||||
frame.activeRegions.push_back(frame.events.size());
|
||||
|
||||
frame.events.push_back(event);
|
||||
}
|
||||
|
||||
void RegionLeave(const char* id)
|
||||
{
|
||||
ENSURE(!m_Frames.empty());
|
||||
SFrame& frame = m_Frames.back();
|
||||
|
||||
ENSURE(!frame.activeRegions.empty());
|
||||
SEvent& activeEvent = frame.events[frame.activeRegions.back()];
|
||||
|
||||
for (size_t i = 0; i < m_QueryTypes.size(); ++i)
|
||||
{
|
||||
pglEndPerfQueryINTEL(activeEvent.queries[i]);
|
||||
ogl_WarnIfError();
|
||||
}
|
||||
|
||||
frame.activeRegions.pop_back();
|
||||
|
||||
SEvent event;
|
||||
event.id = id;
|
||||
event.isEnter = false;
|
||||
frame.events.push_back(event);
|
||||
}
|
||||
|
||||
private:
|
||||
GLuint NewQuery(size_t queryIdx)
|
||||
{
|
||||
ENSURE(queryIdx < m_QueryTypes.size());
|
||||
|
||||
if (m_QueryTypes[queryIdx].freeQueries.empty())
|
||||
{
|
||||
GLuint id;
|
||||
pglCreatePerfQueryINTEL(m_QueryTypes[queryIdx].queryTypeId, &id);
|
||||
ogl_WarnIfError();
|
||||
return id;
|
||||
}
|
||||
|
||||
GLuint id = m_QueryTypes[queryIdx].freeQueries.back();
|
||||
m_QueryTypes[queryIdx].freeQueries.pop_back();
|
||||
return id;
|
||||
}
|
||||
|
||||
void ProcessFrames()
|
||||
{
|
||||
while (!m_Frames.empty())
|
||||
{
|
||||
SFrame& frame = m_Frames.front();
|
||||
|
||||
// Queries don't become available in order, so check them all before
|
||||
// trying to read the results from any
|
||||
for (size_t j = 0; j < m_QueryTypes.size(); ++j)
|
||||
{
|
||||
size_t size = m_QueryTypes[j].counterBufferSize;
|
||||
shared_ptr<char> buf(new char[size], ArrayDeleter());
|
||||
|
||||
for (size_t i = 0; i < frame.events.size(); ++i)
|
||||
{
|
||||
if (!frame.events[i].isEnter)
|
||||
continue;
|
||||
|
||||
GLuint length = 0;
|
||||
pglGetPerfQueryDataINTEL(frame.events[i].queries[j], INTEL_PERFQUERIES_NONBLOCK, size, buf.get(), &length);
|
||||
ogl_WarnIfError();
|
||||
if (length == 0)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
double lastTime = frame.timeStart;
|
||||
std::stack<double> endTimes;
|
||||
|
||||
m_Storage.RecordFrameStart(frame.timeStart);
|
||||
|
||||
for (size_t i = 0; i < frame.events.size(); ++i)
|
||||
{
|
||||
if (frame.events[i].isEnter)
|
||||
{
|
||||
m_Storage.Record(CProfiler2::ITEM_ENTER, lastTime, frame.events[i].id);
|
||||
|
||||
if (i == 0)
|
||||
m_Storage.RecordAttributePrintf("%d", frame.num);
|
||||
|
||||
double elapsed = 0.0;
|
||||
|
||||
for (size_t j = 0; j < m_QueryTypes.size(); ++j)
|
||||
{
|
||||
GLuint length;
|
||||
char* buf = new char[m_QueryTypes[j].counterBufferSize];
|
||||
pglGetPerfQueryDataINTEL(frame.events[i].queries[j], INTEL_PERFQUERIES_BLOCK, m_QueryTypes[j].counterBufferSize, buf, &length);
|
||||
ogl_WarnIfError();
|
||||
ENSURE(length == m_QueryTypes[j].counterBufferSize);
|
||||
|
||||
m_Storage.RecordAttributePrintf("-- %hs --", m_QueryTypes[j].name.c_str());
|
||||
|
||||
for (size_t k = 0; k < m_QueryTypes[j].counters.size(); ++k)
|
||||
{
|
||||
SPerfCounter& counter = m_QueryTypes[j].counters[k];
|
||||
|
||||
if (counter.type == INTEL_PERFQUERIES_TYPE_UNSIGNED_INT)
|
||||
{
|
||||
ENSURE(counter.size == 4);
|
||||
GLuint value;
|
||||
memcpy(&value, buf + counter.offset, counter.size);
|
||||
m_Storage.RecordAttributePrintf("%hs: %d", counter.name.c_str(), value);
|
||||
}
|
||||
else if (counter.type == INTEL_PERFQUERIES_TYPE_UNSIGNED_INT64)
|
||||
{
|
||||
ENSURE(counter.size == 8);
|
||||
GLuint64 value;
|
||||
memcpy(&value, buf + counter.offset, counter.size);
|
||||
m_Storage.RecordAttributePrintf("%hs: %.0f", counter.name.c_str(), (double)value);
|
||||
|
||||
if (counter.name == "TotalTime")
|
||||
elapsed = (double)value / 1e6;
|
||||
}
|
||||
else if (counter.type == INTEL_PERFQUERIES_TYPE_FLOAT)
|
||||
{
|
||||
ENSURE(counter.size == 4);
|
||||
GLfloat value;
|
||||
memcpy(&value, buf + counter.offset, counter.size);
|
||||
m_Storage.RecordAttributePrintf("%hs: %f", counter.name.c_str(), value);
|
||||
}
|
||||
else if (counter.type == INTEL_PERFQUERIES_TYPE_BOOL)
|
||||
{
|
||||
ENSURE(counter.size == 4);
|
||||
GLuint value;
|
||||
memcpy(&value, buf + counter.offset, counter.size);
|
||||
ENSURE(value == 0 || value == 1);
|
||||
m_Storage.RecordAttributePrintf("%hs: %d", counter.name.c_str(), value);
|
||||
}
|
||||
else
|
||||
{
|
||||
debug_warn(L"unrecognised Intel performance counter type");
|
||||
}
|
||||
}
|
||||
|
||||
delete[] buf;
|
||||
}
|
||||
|
||||
endTimes.push(lastTime + elapsed);
|
||||
}
|
||||
else
|
||||
{
|
||||
lastTime = endTimes.top();
|
||||
endTimes.pop();
|
||||
m_Storage.Record(CProfiler2::ITEM_LEAVE, lastTime, frame.events[i].id);
|
||||
}
|
||||
}
|
||||
|
||||
PopFrontFrame();
|
||||
}
|
||||
}
|
||||
|
||||
void PopFrontFrame()
|
||||
{
|
||||
ENSURE(!m_Frames.empty());
|
||||
SFrame& frame = m_Frames.front();
|
||||
for (size_t i = 0; i < frame.events.size(); ++i)
|
||||
if (frame.events[i].isEnter)
|
||||
for (size_t j = 0; j < m_QueryTypes.size(); ++j)
|
||||
m_QueryTypes[j].freeQueries.push_back(frame.events[i].queries[j]);
|
||||
m_Frames.pop_front();
|
||||
}
|
||||
|
||||
void LoadPerfCounters()
|
||||
{
|
||||
GLuint queryTypeId;
|
||||
pglGetFirstPerfQueryIdINTEL(&queryTypeId);
|
||||
ogl_WarnIfError();
|
||||
do
|
||||
{
|
||||
char queryName[256];
|
||||
GLuint counterBufferSize, numCounters, maxQueries, unknown;
|
||||
pglGetPerfQueryInfoINTEL(queryTypeId, ARRAY_SIZE(queryName), queryName, &counterBufferSize, &numCounters, &maxQueries, &unknown);
|
||||
ogl_WarnIfError();
|
||||
ENSURE(unknown == 1);
|
||||
|
||||
SPerfQueryType query;
|
||||
query.queryTypeId = queryTypeId;
|
||||
query.name = queryName;
|
||||
query.counterBufferSize = counterBufferSize;
|
||||
|
||||
for (GLuint counterId = 1; counterId <= numCounters; ++counterId)
|
||||
{
|
||||
char counterName[256];
|
||||
char counterDesc[2048];
|
||||
GLuint counterOffset, counterSize, counterUsage, counterType;
|
||||
GLuint64 unknown2;
|
||||
pglGetPerfCounterInfoINTEL(queryTypeId, counterId, ARRAY_SIZE(counterName), counterName, ARRAY_SIZE(counterDesc), counterDesc, &counterOffset, &counterSize, &counterUsage, &counterType, &unknown2);
|
||||
ogl_WarnIfError();
|
||||
ENSURE(unknown2 == 0 || unknown2 == 1);
|
||||
|
||||
SPerfCounter counter;
|
||||
counter.name = counterName;
|
||||
counter.desc = counterDesc;
|
||||
counter.offset = counterOffset;
|
||||
counter.size = counterSize;
|
||||
counter.type = counterType;
|
||||
query.counters.push_back(counter);
|
||||
}
|
||||
|
||||
m_QueryTypes.push_back(query);
|
||||
|
||||
pglGetNextPerfQueryIdINTEL(queryTypeId, &queryTypeId);
|
||||
ogl_WarnIfError();
|
||||
|
||||
} while (queryTypeId);
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
CProfiler2GPU::CProfiler2GPU(CProfiler2& profiler) :
|
||||
m_Profiler(profiler), m_ProfilerARB(NULL), m_ProfilerEXT(NULL), m_ProfilerINTEL(NULL)
|
||||
{
|
||||
bool enabledARB = false;
|
||||
bool enabledEXT = false;
|
||||
bool enabledINTEL = false;
|
||||
CFG_GET_USER_VAL("profiler2.gpu.arb.enable", Bool, enabledARB);
|
||||
CFG_GET_USER_VAL("profiler2.gpu.ext.enable", Bool, enabledEXT);
|
||||
CFG_GET_USER_VAL("profiler2.gpu.intel.enable", Bool, enabledINTEL);
|
||||
|
||||
if (enabledARB && CProfiler2GPU_ARB_timer_query::IsSupported())
|
||||
{
|
||||
m_ProfilerARB = new CProfiler2GPU_ARB_timer_query(m_Profiler);
|
||||
}
|
||||
|
||||
if (enabledEXT && CProfiler2GPU_EXT_timer_query::IsSupported())
|
||||
{
|
||||
m_ProfilerEXT = new CProfiler2GPU_EXT_timer_query(m_Profiler);
|
||||
}
|
||||
|
||||
if (enabledINTEL && CProfiler2GPU_INTEL_performance_queries::IsSupported())
|
||||
{
|
||||
m_ProfilerINTEL = new CProfiler2GPU_INTEL_performance_queries(m_Profiler);
|
||||
}
|
||||
}
|
||||
|
||||
CProfiler2GPU::~CProfiler2GPU()
|
||||
{
|
||||
SAFE_DELETE(m_ProfilerARB);
|
||||
SAFE_DELETE(m_ProfilerEXT);
|
||||
SAFE_DELETE(m_ProfilerINTEL);
|
||||
}
|
||||
|
||||
void CProfiler2GPU::FrameStart()
|
||||
{
|
||||
if (m_ProfilerARB)
|
||||
m_ProfilerARB->FrameStart();
|
||||
|
||||
if (m_ProfilerEXT)
|
||||
m_ProfilerEXT->FrameStart();
|
||||
|
||||
if (m_ProfilerINTEL)
|
||||
m_ProfilerINTEL->FrameStart();
|
||||
}
|
||||
|
||||
void CProfiler2GPU::FrameEnd()
|
||||
{
|
||||
if (m_ProfilerARB)
|
||||
m_ProfilerARB->FrameEnd();
|
||||
|
||||
if (m_ProfilerEXT)
|
||||
m_ProfilerEXT->FrameEnd();
|
||||
|
||||
if (m_ProfilerINTEL)
|
||||
m_ProfilerINTEL->FrameEnd();
|
||||
}
|
||||
|
||||
void CProfiler2GPU::RegionEnter(const char* id)
|
||||
{
|
||||
if (m_ProfilerARB)
|
||||
m_ProfilerARB->RegionEnter(id);
|
||||
|
||||
if (m_ProfilerEXT)
|
||||
m_ProfilerEXT->RegionEnter(id);
|
||||
|
||||
if (m_ProfilerINTEL)
|
||||
m_ProfilerINTEL->RegionEnter(id);
|
||||
}
|
||||
|
||||
void CProfiler2GPU::RegionLeave(const char* id)
|
||||
{
|
||||
if (m_ProfilerARB)
|
||||
m_ProfilerARB->RegionLeave(id);
|
||||
|
||||
if (m_ProfilerEXT)
|
||||
m_ProfilerEXT->RegionLeave(id);
|
||||
|
||||
if (m_ProfilerINTEL)
|
||||
m_ProfilerINTEL->RegionLeave(id);
|
||||
}
|
50
source/ps/Profiler2GPU.h
Normal file
50
source/ps/Profiler2GPU.h
Normal file
@ -0,0 +1,50 @@
|
||||
/* Copyright (c) 2011 Wildfire Games
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
class CProfiler2;
|
||||
class CProfiler2GPU_ARB_timer_query;
|
||||
class CProfiler2GPU_EXT_timer_query;
|
||||
class CProfiler2GPU_INTEL_performance_queries;
|
||||
|
||||
/**
|
||||
* Used by CProfiler2 for GPU profiling support.
|
||||
*/
|
||||
class CProfiler2GPU
|
||||
{
|
||||
NONCOPYABLE(CProfiler2GPU);
|
||||
|
||||
public:
|
||||
CProfiler2GPU(CProfiler2& profiler);
|
||||
~CProfiler2GPU();
|
||||
|
||||
void FrameStart();
|
||||
void FrameEnd();
|
||||
void RegionEnter(const char* id);
|
||||
void RegionLeave(const char* id);
|
||||
|
||||
private:
|
||||
CProfiler2& m_Profiler;
|
||||
|
||||
CProfiler2GPU_ARB_timer_query* m_ProfilerARB;
|
||||
CProfiler2GPU_EXT_timer_query* m_ProfilerEXT;
|
||||
CProfiler2GPU_INTEL_performance_queries* m_ProfilerINTEL;
|
||||
};
|
@ -28,6 +28,7 @@
|
||||
#include "lib/sysdep/sysdep.h"
|
||||
#include "ps/ConfigDB.h"
|
||||
#include "ps/Filesystem.h"
|
||||
#include "ps/Profiler2.h"
|
||||
#include "ps/ThreadUtil.h"
|
||||
|
||||
#define DEBUG_UPLOADS 0
|
||||
@ -230,6 +231,7 @@ private:
|
||||
static void* RunThread(void* data)
|
||||
{
|
||||
debug_SetThreadName("CUserReportWorker");
|
||||
g_Profiler2.RegisterCurrentThread("userreport");
|
||||
|
||||
static_cast<CUserReporterWorker*>(data)->Run();
|
||||
|
||||
@ -242,8 +244,12 @@ private:
|
||||
// (This has to be done in the thread because it's potentially very slow)
|
||||
SetStatus("proxy");
|
||||
std::wstring proxy;
|
||||
if (sys_get_proxy_config(wstring_from_utf8(m_URL), proxy) == INFO::OK)
|
||||
curl_easy_setopt(m_Curl, CURLOPT_PROXY, utf8_from_wstring(proxy).c_str());
|
||||
|
||||
{
|
||||
PROFILE2("get proxy config");
|
||||
if (sys_get_proxy_config(wstring_from_utf8(m_URL), proxy) == INFO::OK)
|
||||
curl_easy_setopt(m_Curl, CURLOPT_PROXY, utf8_from_wstring(proxy).c_str());
|
||||
}
|
||||
|
||||
SetStatus("waiting");
|
||||
|
||||
@ -267,9 +273,13 @@ private:
|
||||
* occasionally so it can check its timer.
|
||||
*/
|
||||
|
||||
g_Profiler2.RecordRegionEnter("semaphore wait");
|
||||
|
||||
// Wait until the main thread wakes us up
|
||||
while (SDL_SemWait(m_WorkerSem) == 0)
|
||||
{
|
||||
g_Profiler2.RecordRegionLeave("semaphore wait");
|
||||
|
||||
// Handle shutdown requests as soon as possible
|
||||
if (GetShutdown())
|
||||
return;
|
||||
@ -291,6 +301,8 @@ private:
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
g_Profiler2.RecordRegionLeave("semaphore wait");
|
||||
}
|
||||
|
||||
bool GetEnabled()
|
||||
@ -316,6 +328,8 @@ private:
|
||||
|
||||
bool ProcessReport()
|
||||
{
|
||||
PROFILE2("process report");
|
||||
|
||||
shared_ptr<CUserReport> report;
|
||||
|
||||
{
|
||||
|
@ -138,7 +138,7 @@ void OverlayRenderer::PrepareForRendering()
|
||||
|
||||
void OverlayRenderer::RenderOverlaysBeforeWater()
|
||||
{
|
||||
PROFILE3("render overlays (before)");
|
||||
PROFILE3_GPU("overlays (before)");
|
||||
|
||||
glDisable(GL_TEXTURE_2D);
|
||||
glEnable(GL_BLEND);
|
||||
@ -166,7 +166,7 @@ void OverlayRenderer::RenderOverlaysBeforeWater()
|
||||
|
||||
void OverlayRenderer::RenderOverlaysAfterWater()
|
||||
{
|
||||
PROFILE3("render overlays (after)");
|
||||
PROFILE3_GPU("overlays (after)");
|
||||
|
||||
if (!m->texlines.empty())
|
||||
{
|
||||
@ -262,7 +262,7 @@ void OverlayRenderer::RenderOverlaysAfterWater()
|
||||
|
||||
void OverlayRenderer::RenderForegroundOverlays(const CCamera& viewCamera)
|
||||
{
|
||||
PROFILE3("render overlays (fg)");
|
||||
PROFILE3_GPU("overlays (fg)");
|
||||
|
||||
glEnable(GL_TEXTURE_2D);
|
||||
glEnable(GL_BLEND);
|
||||
|
@ -933,7 +933,7 @@ void CRenderer::SetClearColor(SColor4ub color)
|
||||
|
||||
void CRenderer::RenderShadowMap()
|
||||
{
|
||||
PROFILE3("render shadow map");
|
||||
PROFILE3_GPU("shadow map");
|
||||
|
||||
m->shadow->BeginRender();
|
||||
|
||||
@ -978,7 +978,7 @@ void CRenderer::RenderShadowMap()
|
||||
|
||||
void CRenderer::RenderPatches(const CFrustum* frustum)
|
||||
{
|
||||
PROFILE3("render patches");
|
||||
PROFILE3_GPU("patches");
|
||||
|
||||
bool filtered = false;
|
||||
if (frustum)
|
||||
@ -1050,7 +1050,7 @@ private:
|
||||
|
||||
void CRenderer::RenderModels(const CFrustum* frustum)
|
||||
{
|
||||
PROFILE3("render models");
|
||||
PROFILE3_GPU("models");
|
||||
|
||||
int flags = 0;
|
||||
if (frustum)
|
||||
@ -1087,7 +1087,7 @@ void CRenderer::RenderModels(const CFrustum* frustum)
|
||||
|
||||
void CRenderer::RenderTransparentModels(ETransparentMode transparentMode, const CFrustum* frustum)
|
||||
{
|
||||
PROFILE3("render transparent models");
|
||||
PROFILE3_GPU("transparent models");
|
||||
|
||||
int flags = 0;
|
||||
if (frustum)
|
||||
@ -1165,7 +1165,10 @@ void CRenderer::SetObliqueFrustumClipping(const CVector4D& worldPlane)
|
||||
CVector4D camPlane = normalMatrix.Transform(worldPlane);
|
||||
|
||||
// Grab the current projection matrix from OpenGL
|
||||
{
|
||||
PROFILE3("get proj matrix (oblique clipping)"); // sometimes the vsync delay gets accounted here
|
||||
glGetFloatv(GL_PROJECTION_MATRIX, matrix);
|
||||
}
|
||||
|
||||
// Calculate the clip-space corner point opposite the clipping plane
|
||||
// as (sgn(camPlane.x), sgn(camPlane.y), 1, 1) and
|
||||
@ -1197,7 +1200,7 @@ void CRenderer::SetObliqueFrustumClipping(const CVector4D& worldPlane)
|
||||
// RenderReflections: render the water reflections to the reflection texture
|
||||
SScreenRect CRenderer::RenderReflections(const CBound& scissor)
|
||||
{
|
||||
PROFILE("render reflections");
|
||||
PROFILE3_GPU("water reflections");
|
||||
|
||||
WaterManager& wm = m->waterManager;
|
||||
|
||||
@ -1284,7 +1287,7 @@ SScreenRect CRenderer::RenderReflections(const CBound& scissor)
|
||||
// RenderRefractions: render the water refractions to the refraction texture
|
||||
SScreenRect CRenderer::RenderRefractions(const CBound &scissor)
|
||||
{
|
||||
PROFILE("render refractions");
|
||||
PROFILE3_GPU("water refractions");
|
||||
|
||||
WaterManager& wm = m->waterManager;
|
||||
|
||||
@ -1360,7 +1363,7 @@ SScreenRect CRenderer::RenderRefractions(const CBound &scissor)
|
||||
|
||||
void CRenderer::RenderSilhouettes()
|
||||
{
|
||||
PROFILE3("render silhouettes");
|
||||
PROFILE3_GPU("silhouettes");
|
||||
|
||||
// Render silhouettes of units hidden behind terrain or occluders.
|
||||
// To avoid breaking the standard rendering of alpha-blended objects, this
|
||||
@ -1473,7 +1476,7 @@ void CRenderer::RenderParticles()
|
||||
if (GetRenderPath() != RP_SHADER)
|
||||
return;
|
||||
|
||||
PROFILE3("render particles");
|
||||
PROFILE3_GPU("particles");
|
||||
|
||||
m->particleRenderer.RenderParticles();
|
||||
|
||||
@ -1529,11 +1532,11 @@ void CRenderer::RenderSubmissions()
|
||||
RenderShadowMap();
|
||||
}
|
||||
|
||||
// clear buffers
|
||||
PROFILE_START("clear buffers");
|
||||
glClearColor(m_ClearColor[0],m_ClearColor[1],m_ClearColor[2],m_ClearColor[3]);
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
|
||||
PROFILE_END("clear buffers");
|
||||
{
|
||||
PROFILE3_GPU("clear buffers");
|
||||
glClearColor(m_ClearColor[0], m_ClearColor[1], m_ClearColor[2], m_ClearColor[3]);
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
|
||||
}
|
||||
|
||||
ogl_WarnIfError();
|
||||
|
||||
@ -1545,6 +1548,8 @@ void CRenderer::RenderSubmissions()
|
||||
{
|
||||
SScreenRect reflectionScissor = RenderReflections(waterScissor);
|
||||
SScreenRect refractionScissor = RenderRefractions(waterScissor);
|
||||
|
||||
PROFILE3_GPU("water scissor");
|
||||
SScreenRect dirty;
|
||||
dirty.x1 = std::min(reflectionScissor.x1, refractionScissor.x1);
|
||||
dirty.y1 = std::min(reflectionScissor.y1, refractionScissor.y1);
|
||||
@ -1690,7 +1695,7 @@ void CRenderer::DisplayFrustum()
|
||||
// Text overlay rendering
|
||||
void CRenderer::RenderTextOverlays()
|
||||
{
|
||||
PROFILE("render text overlays");
|
||||
PROFILE3_GPU("text overlays");
|
||||
|
||||
if (m_DisplayTerrainPriorities)
|
||||
m->terrainRenderer->RenderPriorities();
|
||||
|
@ -93,7 +93,7 @@ void TerrainOverlay::RenderOverlays()
|
||||
if (g_TerrainOverlayList.size() == 0)
|
||||
return;
|
||||
|
||||
PROFILE3("render terrain overlays");
|
||||
PROFILE3_GPU("terrain overlays");
|
||||
|
||||
glEnable(GL_BLEND);
|
||||
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
|
||||
|
@ -610,7 +610,7 @@ CBound TerrainRenderer::ScissorWater(const CMatrix3D &viewproj)
|
||||
// Render fancy water
|
||||
bool TerrainRenderer::RenderFancyWater()
|
||||
{
|
||||
PROFILE3("render fancy water");
|
||||
PROFILE3_GPU("fancy water");
|
||||
|
||||
// If we're using fancy water, make sure its shader is loaded
|
||||
if (!m->fancyWaterShader)
|
||||
@ -739,7 +739,7 @@ bool TerrainRenderer::RenderFancyWater()
|
||||
|
||||
void TerrainRenderer::RenderSimpleWater()
|
||||
{
|
||||
PROFILE3("render simple water");
|
||||
PROFILE3_GPU("simple water");
|
||||
|
||||
WaterManager* WaterMgr = g_Renderer.GetWaterManager();
|
||||
CLOSTexture& losTexture = g_Game->GetView()->GetLOSTexture();
|
||||
@ -849,7 +849,7 @@ void TerrainRenderer::RenderWater()
|
||||
|
||||
void TerrainRenderer::RenderPriorities()
|
||||
{
|
||||
PROFILE("render priorities");
|
||||
PROFILE("priorities");
|
||||
|
||||
ENSURE(m->phase == Phase_Render);
|
||||
|
||||
|
@ -85,6 +85,9 @@ public:
|
||||
{
|
||||
JSContext* cx = GetSimContext().GetScriptInterface().GetContext();
|
||||
|
||||
PROFILE2_EVENT("post net command");
|
||||
PROFILE2_ATTR("command: %s", GetSimContext().GetScriptInterface().StringifyJSON(cmd.get(), false).c_str());
|
||||
|
||||
// TODO: would be nicer to not use globals
|
||||
g_Game->GetTurnManager()->PostCommand(CScriptValRooted(cx, cmd));
|
||||
}
|
||||
|
@ -7,7 +7,8 @@ $(refresh);
|
||||
</script>
|
||||
<style>
|
||||
canvas { border: 1px #ddd solid; }
|
||||
#tooltip { background: #ffd; padding: 4px; font: 16px sans-serif; }
|
||||
#tooltip { background: #ffd; padding: 4px; font: 12px sans-serif; border: 1px #880 solid; }
|
||||
#tooltip.long { -moz-column-count: 2; }
|
||||
</style>
|
||||
|
||||
<button onclick="refresh()">Refresh</button>
|
||||
|
@ -130,6 +130,7 @@ function update_display(range)
|
||||
// display_top_items(main_events, g_data.text_output);
|
||||
|
||||
display_frames(processed_main, g_data.canvas_frames);
|
||||
display_events(processed_main, g_data.canvas_frames);
|
||||
|
||||
$(g_data.threads[0].canvas).unbind();
|
||||
$(g_data.canvas_zoom).unbind();
|
||||
@ -284,21 +285,66 @@ function compute_intervals(data, range)
|
||||
var num_colours = 0;
|
||||
|
||||
var events = [];
|
||||
var intervals = [];
|
||||
|
||||
var stack = [];
|
||||
for (var i = start; i <= end; ++i)
|
||||
|
||||
// Read events for the entire data period (not just start..end)
|
||||
var lastWasEvent = false;
|
||||
for (var i = 0; i < data.length; ++i)
|
||||
{
|
||||
if (data[i][0] == ITEM_EVENT)
|
||||
{
|
||||
events.push({'t': data[i][1], 'id': data[i][2]});
|
||||
lastWasEvent = true;
|
||||
}
|
||||
else if (data[i][0] == ITEM_ATTRIBUTE)
|
||||
{
|
||||
if (lastWasEvent)
|
||||
{
|
||||
if (!events[events.length-1].attrs)
|
||||
events[events.length-1].attrs = [];
|
||||
events[events.length-1].attrs.push(data[i][1]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
lastWasEvent = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
var intervals = [];
|
||||
|
||||
// Read intervals from the focused data period (start..end)
|
||||
var stack = [];
|
||||
var lastT = 0;
|
||||
var lastWasEvent = false;
|
||||
for (var i = start; i <= end; ++i)
|
||||
{
|
||||
if (data[i][0] == ITEM_EVENT)
|
||||
{
|
||||
// if (data[i][1] < lastT)
|
||||
// console.log('Time went backwards: ' + (data[i][1] - lastT));
|
||||
|
||||
lastT = data[i][1];
|
||||
lastWasEvent = true;
|
||||
}
|
||||
else if (data[i][0] == ITEM_ENTER)
|
||||
{
|
||||
// if (data[i][1] < lastT)
|
||||
// console.log('Time went backwards: ' + (data[i][1] - lastT));
|
||||
|
||||
stack.push({'t0': data[i][1], 'id': data[i][2]});
|
||||
|
||||
lastT = data[i][1];
|
||||
lastWasEvent = false;
|
||||
}
|
||||
else if (data[i][0] == ITEM_LEAVE)
|
||||
{
|
||||
// if (data[i][1] < lastT)
|
||||
// console.log('Time went backwards: ' + (data[i][1] - lastT));
|
||||
|
||||
lastT = data[i][1];
|
||||
lastWasEvent = false;
|
||||
|
||||
if (!stack.length)
|
||||
continue;
|
||||
var interval = stack.pop();
|
||||
@ -319,13 +365,7 @@ function compute_intervals(data, range)
|
||||
}
|
||||
else if (data[i][0] == ITEM_ATTRIBUTE)
|
||||
{
|
||||
if (i > 0 && data[i-1][0] == ITEM_EVENT)
|
||||
{
|
||||
if (!events[events.length-1].attrs)
|
||||
events[events.length-1].attrs = [];
|
||||
events[events.length-1].attrs.push(data[i][1]);
|
||||
}
|
||||
else if (stack.length)
|
||||
if (!lastWasEvent && stack.length)
|
||||
{
|
||||
if (!stack[stack.length-1].attrs)
|
||||
stack[stack.length-1].attrs = [];
|
||||
@ -365,6 +405,9 @@ function display_frames(data, canvas)
|
||||
canvas._zoomData = {
|
||||
'x_to_t': function(x) {
|
||||
return tmin + (x - xpadding) / dx;
|
||||
},
|
||||
't_to_x': function(t) {
|
||||
return (t - tmin) * dx + xpadding;
|
||||
}
|
||||
};
|
||||
|
||||
@ -411,6 +454,56 @@ function display_frames(data, canvas)
|
||||
ctx.restore();
|
||||
}
|
||||
|
||||
function display_events(data, canvas)
|
||||
{
|
||||
var ctx = canvas.getContext('2d');
|
||||
ctx.save();
|
||||
|
||||
var x_to_time = canvas._zoomData.x_to_t;
|
||||
var time_to_x = canvas._zoomData.t_to_x;
|
||||
|
||||
for (var i = 0; i < data.events.length; ++i)
|
||||
{
|
||||
var event = data.events[i];
|
||||
|
||||
if (event.id == '__framestart')
|
||||
continue;
|
||||
|
||||
if (event.id == 'gui event' && event.attrs && event.attrs[0] == 'type: mousemove')
|
||||
continue;
|
||||
|
||||
var x = time_to_x(event.t);
|
||||
var y = 32;
|
||||
|
||||
var x0 = x;
|
||||
var x1 = x;
|
||||
var y0 = y-4;
|
||||
var y1 = y+4;
|
||||
|
||||
ctx.strokeStyle = 'rgb(255, 0, 0)';
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(x0, y0);
|
||||
ctx.lineTo(x1, y1);
|
||||
ctx.stroke();
|
||||
canvas._tooltips.push({
|
||||
'x0': x0, 'x1': x1,
|
||||
'y0': y0, 'y1': y1,
|
||||
'text': function(event) { return function() {
|
||||
var t = '<b>' + event.id + '</b><br>';
|
||||
if (event.attrs)
|
||||
{
|
||||
event.attrs.forEach(function(attr) {
|
||||
t += attr + '<br>';
|
||||
});
|
||||
}
|
||||
return t;
|
||||
}} (event)
|
||||
});
|
||||
}
|
||||
|
||||
ctx.restore();
|
||||
}
|
||||
|
||||
function display_hierarchy(main_data, data, canvas, range, zoom)
|
||||
{
|
||||
canvas._tooltips = [];
|
||||
@ -483,7 +576,12 @@ function display_hierarchy(main_data, data, canvas, range, zoom)
|
||||
|
||||
var label = interval.id;
|
||||
if (interval.attrs)
|
||||
label += ' [...]';
|
||||
{
|
||||
if (/^\d+$/.exec(interval.attrs[0]))
|
||||
label += ' ' + interval.attrs[0];
|
||||
else
|
||||
label += ' [...]';
|
||||
}
|
||||
var x0 = Math.floor(time_to_x(interval.t0));
|
||||
var x1 = Math.floor(time_to_x(interval.t1));
|
||||
var y0 = padding_top + interval.depth * BAR_SPACING;
|
||||
@ -532,7 +630,7 @@ function display_hierarchy(main_data, data, canvas, range, zoom)
|
||||
ctx.moveTo(x+0.5, 0);
|
||||
ctx.lineTo(x+0.5, canvas.height);
|
||||
ctx.stroke();
|
||||
ctx.fillText('Frame [' + ((frame.t1 - frame.t0) * 1000).toFixed(0)+'ms]', x+2, padding_top - 24);
|
||||
ctx.fillText(((frame.t1 - frame.t0) * 1000).toFixed(0)+'ms', x+2, padding_top - 24);
|
||||
ctx.restore();
|
||||
}
|
||||
|
||||
@ -636,7 +734,7 @@ function set_tooltip_handlers(canvas)
|
||||
for (var i = 0; i < tooltips.length; ++i)
|
||||
{
|
||||
var t = tooltips[i];
|
||||
if (t.x0 <= relativeX && relativeX <= t.x1 && t.y0 <= relativeY && relativeY <= t.y1)
|
||||
if (t.x0-1 <= relativeX && relativeX <= t.x1+1 && t.y0 <= relativeY && relativeY <= t.y1)
|
||||
{
|
||||
text = t.text();
|
||||
break;
|
||||
@ -644,6 +742,10 @@ function set_tooltip_handlers(canvas)
|
||||
}
|
||||
if (text)
|
||||
{
|
||||
if (text.length > 512)
|
||||
$('#tooltip').addClass('long');
|
||||
else
|
||||
$('#tooltip').removeClass('long');
|
||||
$('#tooltip').css('left', (event.pageX+16)+'px');
|
||||
$('#tooltip').css('top', (event.pageY+8)+'px');
|
||||
$('#tooltip').html(text);
|
||||
|
Loading…
Reference in New Issue
Block a user