Removes usages of EXT and INTEL timer query GL extensions.
Tested By: Langbart Differential Revision: https://code.wildfiregames.com/D4571 This was SVN commit r26724.
This commit is contained in:
parent
4013004040
commit
c842678591
@ -529,9 +529,7 @@ netwarnings = "true" ; Show warnings if the network connection is b
|
|||||||
|
|
||||||
[profiler2]
|
[profiler2]
|
||||||
autoenable = false ; Enable HTTP server output at startup (default off for security/performance)
|
autoenable = false ; Enable HTTP server output at startup (default off for security/performance)
|
||||||
gpu.arb.enable = true ; Allow GL_ARB_timer_query timing mode when available
|
gpu.arb.enable = true ; Allow GL_ARB_timer_query timing mode when available.
|
||||||
gpu.ext.enable = true ; Allow GL_EXT_timer_query timing mode when available
|
|
||||||
gpu.intel.enable = true ; Allow GL_INTEL_performance_queries timing mode when available
|
|
||||||
|
|
||||||
[rlinterface]
|
[rlinterface]
|
||||||
address = "127.0.0.1:6000"
|
address = "127.0.0.1:6000"
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2021 Wildfire Games.
|
/* Copyright (C) 2022 Wildfire Games.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
* a copy of this software and associated documentation files (the
|
* a copy of this software and associated documentation files (the
|
||||||
@ -95,7 +95,7 @@ class CProfiler2GPU;
|
|||||||
|
|
||||||
class CProfiler2
|
class CProfiler2
|
||||||
{
|
{
|
||||||
friend class CProfiler2GPU_base;
|
friend class CProfiler2GPUARB;
|
||||||
friend class CProfile2SpikeRegion;
|
friend class CProfile2SpikeRegion;
|
||||||
friend class CProfile2AggregatedRegion;
|
friend class CProfile2AggregatedRegion;
|
||||||
public:
|
public:
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2021 Wildfire Games.
|
/* Copyright (C) 2022 Wildfire Games.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
* a copy of this software and associated documentation files (the
|
* a copy of this software and associated documentation files (the
|
||||||
@ -34,68 +34,6 @@
|
|||||||
|
|
||||||
#if !CONFIG2_GLES
|
#if !CONFIG2_GLES
|
||||||
|
|
||||||
class CProfiler2GPU_base
|
|
||||||
{
|
|
||||||
NONCOPYABLE(CProfiler2GPU_base);
|
|
||||||
|
|
||||||
protected:
|
|
||||||
CProfiler2GPU_base(CProfiler2& profiler, const char* name) :
|
|
||||||
m_Profiler(profiler), m_Storage(*new CProfiler2::ThreadStorage(profiler, name))
|
|
||||||
{
|
|
||||||
m_Storage.RecordSyncMarker(m_Profiler.GetTime());
|
|
||||||
m_Storage.Record(CProfiler2::ITEM_EVENT, m_Profiler.GetTime(), "thread start");
|
|
||||||
|
|
||||||
m_Profiler.AddThreadStorage(&m_Storage);
|
|
||||||
}
|
|
||||||
|
|
||||||
~CProfiler2GPU_base()
|
|
||||||
{
|
|
||||||
m_Profiler.RemoveThreadStorage(&m_Storage);
|
|
||||||
}
|
|
||||||
|
|
||||||
CProfiler2& m_Profiler;
|
|
||||||
CProfiler2::ThreadStorage& m_Storage;
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
// Base class for ARB_timer_query, EXT_timer_query
|
|
||||||
class CProfiler2GPU_timer_query : public CProfiler2GPU_base
|
|
||||||
{
|
|
||||||
protected:
|
|
||||||
CProfiler2GPU_timer_query(CProfiler2& profiler, const char* name) :
|
|
||||||
CProfiler2GPU_base(profiler, name)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
~CProfiler2GPU_timer_query()
|
|
||||||
{
|
|
||||||
if (!m_FreeQueries.empty())
|
|
||||||
glDeleteQueriesARB(m_FreeQueries.size(), &m_FreeQueries[0]);
|
|
||||||
ogl_WarnIfError();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns a new GL query object (or a recycled old one)
|
|
||||||
GLuint NewQuery()
|
|
||||||
{
|
|
||||||
if (m_FreeQueries.empty())
|
|
||||||
{
|
|
||||||
// Generate a batch of new queries
|
|
||||||
m_FreeQueries.resize(8);
|
|
||||||
glGenQueriesARB(m_FreeQueries.size(), &m_FreeQueries[0]);
|
|
||||||
ogl_WarnIfError();
|
|
||||||
}
|
|
||||||
|
|
||||||
GLuint query = m_FreeQueries.back();
|
|
||||||
m_FreeQueries.pop_back();
|
|
||||||
return query;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<GLuint> m_FreeQueries; // query objects that are allocated but not currently in used
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GL_ARB_timer_query supports sync and async queries for absolute GPU
|
* GL_ARB_timer_query supports sync and async queries for absolute GPU
|
||||||
* timestamps, which lets us time regions of code relative to the CPU.
|
* timestamps, which lets us time regions of code relative to the CPU.
|
||||||
@ -105,8 +43,10 @@ protected:
|
|||||||
* When all the queries for a frame have their results available,
|
* When all the queries for a frame have their results available,
|
||||||
* we convert their GPU timestamps into CPU times and record the data.
|
* we convert their GPU timestamps into CPU times and record the data.
|
||||||
*/
|
*/
|
||||||
class CProfiler2GPU_ARB_timer_query : public CProfiler2GPU_timer_query
|
class CProfiler2GPUARB
|
||||||
{
|
{
|
||||||
|
NONCOPYABLE(CProfiler2GPUARB);
|
||||||
|
|
||||||
struct SEvent
|
struct SEvent
|
||||||
{
|
{
|
||||||
const char* id;
|
const char* id;
|
||||||
@ -132,18 +72,29 @@ public:
|
|||||||
return ogl_HaveExtension("GL_ARB_timer_query");
|
return ogl_HaveExtension("GL_ARB_timer_query");
|
||||||
}
|
}
|
||||||
|
|
||||||
CProfiler2GPU_ARB_timer_query(CProfiler2& profiler) :
|
CProfiler2GPUARB(CProfiler2& profiler)
|
||||||
CProfiler2GPU_timer_query(profiler, "gpu_arb")
|
: m_Profiler(profiler), m_Storage(*new CProfiler2::ThreadStorage(profiler, "gpu_arb"))
|
||||||
{
|
{
|
||||||
// TODO: maybe we should check QUERY_COUNTER_BITS to ensure it's
|
// TODO: maybe we should check QUERY_COUNTER_BITS to ensure it's
|
||||||
// high enough (but apparently it might trigger GL errors on ATI)
|
// high enough (but apparently it might trigger GL errors on ATI)
|
||||||
|
|
||||||
|
m_Storage.RecordSyncMarker(m_Profiler.GetTime());
|
||||||
|
m_Storage.Record(CProfiler2::ITEM_EVENT, m_Profiler.GetTime(), "thread start");
|
||||||
|
|
||||||
|
m_Profiler.AddThreadStorage(&m_Storage);
|
||||||
}
|
}
|
||||||
|
|
||||||
~CProfiler2GPU_ARB_timer_query()
|
~CProfiler2GPUARB()
|
||||||
{
|
{
|
||||||
// Pop frames to return queries to the free list
|
// Pop frames to return queries to the free list
|
||||||
while (!m_Frames.empty())
|
while (!m_Frames.empty())
|
||||||
PopFrontFrame();
|
PopFrontFrame();
|
||||||
|
|
||||||
|
if (!m_FreeQueries.empty())
|
||||||
|
glDeleteQueriesARB(m_FreeQueries.size(), &m_FreeQueries[0]);
|
||||||
|
ogl_WarnIfError();
|
||||||
|
|
||||||
|
m_Profiler.RemoveThreadStorage(&m_Storage);
|
||||||
}
|
}
|
||||||
|
|
||||||
void FrameStart()
|
void FrameStart()
|
||||||
@ -272,591 +223,65 @@ private:
|
|||||||
m_FreeQueries.push_back(frame.events[i].query);
|
m_FreeQueries.push_back(frame.events[i].query);
|
||||||
m_Frames.pop_front();
|
m_Frames.pop_front();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns a new GL query object (or a recycled old one)
|
||||||
|
GLuint NewQuery()
|
||||||
|
{
|
||||||
|
if (m_FreeQueries.empty())
|
||||||
|
{
|
||||||
|
// Generate a batch of new queries
|
||||||
|
m_FreeQueries.resize(8);
|
||||||
|
glGenQueriesARB(m_FreeQueries.size(), &m_FreeQueries[0]);
|
||||||
|
ogl_WarnIfError();
|
||||||
|
}
|
||||||
|
|
||||||
|
GLuint query = m_FreeQueries.back();
|
||||||
|
m_FreeQueries.pop_back();
|
||||||
|
return query;
|
||||||
|
}
|
||||||
|
|
||||||
|
CProfiler2& m_Profiler;
|
||||||
|
CProfiler2::ThreadStorage& m_Storage;
|
||||||
|
|
||||||
|
std::vector<GLuint> m_FreeQueries; // query objects that are allocated but not currently in used
|
||||||
};
|
};
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
/*
|
|
||||||
* GL_EXT_timer_query only supports async queries for elapsed time,
|
|
||||||
* and only a single simultaneous query.
|
|
||||||
* We can't correctly convert it to absolute time, so we just pretend
|
|
||||||
* each GPU frame starts the same time as the CPU for that frame.
|
|
||||||
* We do a query for elapsed time between every adjacent enter/leave-region event.
|
|
||||||
* When all the queries for a frame have their results available,
|
|
||||||
* we sum the elapsed times to calculate when each event occurs within the
|
|
||||||
* frame, and record the data.
|
|
||||||
*/
|
|
||||||
class CProfiler2GPU_EXT_timer_query : public CProfiler2GPU_timer_query
|
|
||||||
{
|
|
||||||
struct SEvent
|
|
||||||
{
|
|
||||||
const char* id;
|
|
||||||
GLuint query; // query for time elapsed from this event until the next, or 0 for final event
|
|
||||||
bool isEnter; // true if entering region; false if leaving
|
|
||||||
};
|
|
||||||
|
|
||||||
struct SFrame
|
|
||||||
{
|
|
||||||
u32 num;
|
|
||||||
double timeStart; // CPU time at frame start
|
|
||||||
std::vector<SEvent> events;
|
|
||||||
};
|
|
||||||
|
|
||||||
std::deque<SFrame> m_Frames;
|
|
||||||
|
|
||||||
public:
|
|
||||||
static bool IsSupported()
|
|
||||||
{
|
|
||||||
return ogl_HaveExtension("GL_EXT_timer_query");
|
|
||||||
}
|
|
||||||
|
|
||||||
CProfiler2GPU_EXT_timer_query(CProfiler2& profiler) :
|
|
||||||
CProfiler2GPU_timer_query(profiler, "gpu_ext")
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
~CProfiler2GPU_EXT_timer_query()
|
|
||||||
{
|
|
||||||
// Pop frames to return queries to the free list
|
|
||||||
while (!m_Frames.empty())
|
|
||||||
PopFrontFrame();
|
|
||||||
}
|
|
||||||
|
|
||||||
void FrameStart()
|
|
||||||
{
|
|
||||||
ProcessFrames();
|
|
||||||
|
|
||||||
SFrame frame;
|
|
||||||
frame.num = m_Profiler.GetFrameNumber();
|
|
||||||
frame.timeStart = m_Profiler.GetTime();
|
|
||||||
|
|
||||||
m_Frames.push_back(frame);
|
|
||||||
|
|
||||||
RegionEnter("frame");
|
|
||||||
}
|
|
||||||
|
|
||||||
void FrameEnd()
|
|
||||||
{
|
|
||||||
RegionLeave("frame");
|
|
||||||
|
|
||||||
glEndQueryARB(GL_TIME_ELAPSED);
|
|
||||||
ogl_WarnIfError();
|
|
||||||
}
|
|
||||||
|
|
||||||
void RecordRegion(const char* id, bool isEnter)
|
|
||||||
{
|
|
||||||
ENSURE(!m_Frames.empty());
|
|
||||||
SFrame& frame = m_Frames.back();
|
|
||||||
|
|
||||||
// Must call glEndQuery before calling glGenQueries (via NewQuery),
|
|
||||||
// for compatibility with the GL_EXT_timer_query spec (which says
|
|
||||||
// GL_INVALID_OPERATION if a query of any target is active; the ARB
|
|
||||||
// spec and OpenGL specs don't appear to say that, but the AMD drivers
|
|
||||||
// implement that error (see Trac #1033))
|
|
||||||
|
|
||||||
if (!frame.events.empty())
|
|
||||||
{
|
|
||||||
glEndQueryARB(GL_TIME_ELAPSED);
|
|
||||||
ogl_WarnIfError();
|
|
||||||
}
|
|
||||||
|
|
||||||
SEvent event;
|
|
||||||
event.id = id;
|
|
||||||
event.query = NewQuery();
|
|
||||||
event.isEnter = isEnter;
|
|
||||||
|
|
||||||
glBeginQueryARB(GL_TIME_ELAPSED, event.query);
|
|
||||||
ogl_WarnIfError();
|
|
||||||
|
|
||||||
frame.events.push_back(event);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RegionEnter(const char* id)
|
|
||||||
{
|
|
||||||
RecordRegion(id, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RegionLeave(const char* id)
|
|
||||||
{
|
|
||||||
RecordRegion(id, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
void ProcessFrames()
|
|
||||||
{
|
|
||||||
while (!m_Frames.empty())
|
|
||||||
{
|
|
||||||
SFrame& frame = m_Frames.front();
|
|
||||||
|
|
||||||
// Queries become available in order so we only need to check the last one
|
|
||||||
GLint available = 0;
|
|
||||||
glGetQueryObjectivARB(frame.events.back().query, GL_QUERY_RESULT_AVAILABLE, &available);
|
|
||||||
ogl_WarnIfError();
|
|
||||||
if (!available)
|
|
||||||
break;
|
|
||||||
|
|
||||||
// The frame's queries are now available, so retrieve and record all their results:
|
|
||||||
|
|
||||||
double t = frame.timeStart;
|
|
||||||
m_Storage.RecordFrameStart(t);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < frame.events.size(); ++i)
|
|
||||||
{
|
|
||||||
if (frame.events[i].isEnter)
|
|
||||||
m_Storage.Record(CProfiler2::ITEM_ENTER, t, frame.events[i].id);
|
|
||||||
else
|
|
||||||
m_Storage.RecordLeave(t);
|
|
||||||
|
|
||||||
// Associate the frame number with the "frame" region
|
|
||||||
if (i == 0)
|
|
||||||
m_Storage.RecordAttributePrintf("%u", frame.num);
|
|
||||||
|
|
||||||
// Advance by the elapsed time to the next event
|
|
||||||
GLuint64 queryElapsed = 0;
|
|
||||||
glGetQueryObjectui64vEXT(frame.events[i].query, GL_QUERY_RESULT, &queryElapsed);
|
|
||||||
// (use the EXT-suffixed function here, as defined by GL_EXT_timer_query)
|
|
||||||
ogl_WarnIfError();
|
|
||||||
t += (double)queryElapsed / 1e9;
|
|
||||||
}
|
|
||||||
|
|
||||||
PopFrontFrame();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void PopFrontFrame()
|
|
||||||
{
|
|
||||||
ENSURE(!m_Frames.empty());
|
|
||||||
SFrame& frame = m_Frames.front();
|
|
||||||
for (size_t i = 0; i < frame.events.size(); ++i)
|
|
||||||
m_FreeQueries.push_back(frame.events[i].query);
|
|
||||||
m_Frames.pop_front();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
/*
|
|
||||||
* GL_INTEL_performance_queries is not officially documented
|
|
||||||
* (see http://zaynar.co.uk/docs/gl-intel-performance-queries.html)
|
|
||||||
* but it's potentially useful so we'll support it anyway.
|
|
||||||
* It supports async queries giving elapsed time plus a load of other
|
|
||||||
* counters that we'd like to use, and supports many simultaneous queries
|
|
||||||
* (unlike GL_EXT_timer_query).
|
|
||||||
* There are multiple query types (typically 2), each with its own set of
|
|
||||||
* multiple counters.
|
|
||||||
* On each enter-region event, we start a new set of queries.
|
|
||||||
* On each leave-region event, we end the corresponding set of queries.
|
|
||||||
* We can't tell the offsets between the enter events of nested regions,
|
|
||||||
* so we pretend they all got entered at the same time.
|
|
||||||
*/
|
|
||||||
class CProfiler2GPU_INTEL_performance_queries : public CProfiler2GPU_base
|
|
||||||
{
|
|
||||||
struct SEvent
|
|
||||||
{
|
|
||||||
const char* id;
|
|
||||||
bool isEnter;
|
|
||||||
std::vector<GLuint> queries; // if isEnter, one per SPerfQueryType; else empty
|
|
||||||
};
|
|
||||||
|
|
||||||
struct SFrame
|
|
||||||
{
|
|
||||||
u32 num;
|
|
||||||
double timeStart; // CPU time at frame start
|
|
||||||
std::vector<SEvent> events;
|
|
||||||
std::vector<size_t> activeRegions; // stack of indexes into events
|
|
||||||
};
|
|
||||||
|
|
||||||
std::deque<SFrame> m_Frames;
|
|
||||||
|
|
||||||
// Counters listed by the graphics driver for a particular query type
|
|
||||||
struct SPerfCounter
|
|
||||||
{
|
|
||||||
std::string name;
|
|
||||||
std::string desc;
|
|
||||||
GLuint offset;
|
|
||||||
GLuint size;
|
|
||||||
GLuint type;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Query types listed by the graphics driver
|
|
||||||
struct SPerfQueryType
|
|
||||||
{
|
|
||||||
GLuint queryTypeId;
|
|
||||||
std::string name;
|
|
||||||
GLuint counterBufferSize;
|
|
||||||
std::vector<SPerfCounter> counters;
|
|
||||||
|
|
||||||
std::vector<GLuint> freeQueries; // query objects that are allocated but not currently in use
|
|
||||||
};
|
|
||||||
|
|
||||||
std::vector<SPerfQueryType> m_QueryTypes;
|
|
||||||
|
|
||||||
#define INTEL_PERFQUERIES_NONBLOCK 0x83FA
|
|
||||||
#define INTEL_PERFQUERIES_BLOCK 0x83FB
|
|
||||||
#define INTEL_PERFQUERIES_TYPE_UNSIGNED_INT 0x9402
|
|
||||||
#define INTEL_PERFQUERIES_TYPE_UNSIGNED_INT64 0x9403
|
|
||||||
#define INTEL_PERFQUERIES_TYPE_FLOAT 0x9404
|
|
||||||
#define INTEL_PERFQUERIES_TYPE_BOOL 0x9406
|
|
||||||
|
|
||||||
public:
|
|
||||||
static bool IsSupported()
|
|
||||||
{
|
|
||||||
return ogl_HaveExtension("GL_INTEL_performance_queries");
|
|
||||||
}
|
|
||||||
|
|
||||||
CProfiler2GPU_INTEL_performance_queries(CProfiler2& profiler) :
|
|
||||||
CProfiler2GPU_base(profiler, "gpu_intel")
|
|
||||||
{
|
|
||||||
LoadPerfCounters();
|
|
||||||
}
|
|
||||||
|
|
||||||
~CProfiler2GPU_INTEL_performance_queries()
|
|
||||||
{
|
|
||||||
// Pop frames to return queries to the free list
|
|
||||||
while (!m_Frames.empty())
|
|
||||||
PopFrontFrame();
|
|
||||||
|
|
||||||
for (size_t i = 0; i < m_QueryTypes.size(); ++i)
|
|
||||||
for (size_t j = 0; j < m_QueryTypes[i].freeQueries.size(); ++j)
|
|
||||||
glDeletePerfQueryINTEL(m_QueryTypes[i].freeQueries[j]);
|
|
||||||
|
|
||||||
ogl_WarnIfError();
|
|
||||||
}
|
|
||||||
|
|
||||||
void FrameStart()
|
|
||||||
{
|
|
||||||
ProcessFrames();
|
|
||||||
|
|
||||||
SFrame frame;
|
|
||||||
frame.num = m_Profiler.GetFrameNumber();
|
|
||||||
frame.timeStart = m_Profiler.GetTime();
|
|
||||||
|
|
||||||
m_Frames.push_back(frame);
|
|
||||||
|
|
||||||
RegionEnter("frame");
|
|
||||||
}
|
|
||||||
|
|
||||||
void FrameEnd()
|
|
||||||
{
|
|
||||||
RegionLeave("frame");
|
|
||||||
}
|
|
||||||
|
|
||||||
void RegionEnter(const char* id)
|
|
||||||
{
|
|
||||||
ENSURE(!m_Frames.empty());
|
|
||||||
SFrame& frame = m_Frames.back();
|
|
||||||
|
|
||||||
SEvent event;
|
|
||||||
event.id = id;
|
|
||||||
event.isEnter = true;
|
|
||||||
|
|
||||||
for (size_t i = 0; i < m_QueryTypes.size(); ++i)
|
|
||||||
{
|
|
||||||
GLuint local_id = NewQuery(i);
|
|
||||||
glBeginPerfQueryINTEL(local_id);
|
|
||||||
ogl_WarnIfError();
|
|
||||||
event.queries.push_back(local_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
frame.activeRegions.push_back(frame.events.size());
|
|
||||||
|
|
||||||
frame.events.push_back(event);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RegionLeave(const char* id)
|
|
||||||
{
|
|
||||||
ENSURE(!m_Frames.empty());
|
|
||||||
SFrame& frame = m_Frames.back();
|
|
||||||
|
|
||||||
ENSURE(!frame.activeRegions.empty());
|
|
||||||
SEvent& activeEvent = frame.events[frame.activeRegions.back()];
|
|
||||||
|
|
||||||
for (size_t i = 0; i < m_QueryTypes.size(); ++i)
|
|
||||||
{
|
|
||||||
glEndPerfQueryINTEL(activeEvent.queries[i]);
|
|
||||||
ogl_WarnIfError();
|
|
||||||
}
|
|
||||||
|
|
||||||
frame.activeRegions.pop_back();
|
|
||||||
|
|
||||||
SEvent event;
|
|
||||||
event.id = id;
|
|
||||||
event.isEnter = false;
|
|
||||||
frame.events.push_back(event);
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
GLuint NewQuery(size_t queryIdx)
|
|
||||||
{
|
|
||||||
ENSURE(queryIdx < m_QueryTypes.size());
|
|
||||||
|
|
||||||
if (m_QueryTypes[queryIdx].freeQueries.empty())
|
|
||||||
{
|
|
||||||
GLuint id;
|
|
||||||
glCreatePerfQueryINTEL(m_QueryTypes[queryIdx].queryTypeId, &id);
|
|
||||||
ogl_WarnIfError();
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
GLuint id = m_QueryTypes[queryIdx].freeQueries.back();
|
|
||||||
m_QueryTypes[queryIdx].freeQueries.pop_back();
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ProcessFrames()
|
|
||||||
{
|
|
||||||
std::vector<char> buffer;
|
|
||||||
while (!m_Frames.empty())
|
|
||||||
{
|
|
||||||
SFrame& frame = m_Frames.front();
|
|
||||||
|
|
||||||
// Queries don't become available in order, so check them all before
|
|
||||||
// trying to read the results from any
|
|
||||||
for (size_t j = 0; j < m_QueryTypes.size(); ++j)
|
|
||||||
{
|
|
||||||
const size_t size = m_QueryTypes[j].counterBufferSize;
|
|
||||||
buffer.resize(size);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < frame.events.size(); ++i)
|
|
||||||
{
|
|
||||||
if (!frame.events[i].isEnter)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
GLuint length = 0;
|
|
||||||
glGetPerfQueryDataINTEL(frame.events[i].queries[j], INTEL_PERFQUERIES_NONBLOCK, size, buffer.data(), &length);
|
|
||||||
ogl_WarnIfError();
|
|
||||||
if (length == 0)
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
double lastTime = frame.timeStart;
|
|
||||||
std::stack<double> endTimes;
|
|
||||||
|
|
||||||
m_Storage.RecordFrameStart(frame.timeStart);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < frame.events.size(); ++i)
|
|
||||||
{
|
|
||||||
if (frame.events[i].isEnter)
|
|
||||||
{
|
|
||||||
m_Storage.Record(CProfiler2::ITEM_ENTER, lastTime, frame.events[i].id);
|
|
||||||
|
|
||||||
if (i == 0)
|
|
||||||
m_Storage.RecordAttributePrintf("%u", frame.num);
|
|
||||||
|
|
||||||
double elapsed = 0.0;
|
|
||||||
|
|
||||||
for (size_t j = 0; j < m_QueryTypes.size(); ++j)
|
|
||||||
{
|
|
||||||
GLuint length;
|
|
||||||
buffer.resize(m_QueryTypes[j].counterBufferSize);
|
|
||||||
glGetPerfQueryDataINTEL(frame.events[i].queries[j], INTEL_PERFQUERIES_BLOCK, m_QueryTypes[j].counterBufferSize, buffer.data(), &length);
|
|
||||||
ogl_WarnIfError();
|
|
||||||
ENSURE(length == m_QueryTypes[j].counterBufferSize);
|
|
||||||
|
|
||||||
m_Storage.RecordAttributePrintf("-- %s --", m_QueryTypes[j].name.c_str());
|
|
||||||
|
|
||||||
for (size_t k = 0; k < m_QueryTypes[j].counters.size(); ++k)
|
|
||||||
{
|
|
||||||
SPerfCounter& counter = m_QueryTypes[j].counters[k];
|
|
||||||
|
|
||||||
if (counter.type == INTEL_PERFQUERIES_TYPE_UNSIGNED_INT)
|
|
||||||
{
|
|
||||||
ENSURE(counter.size == 4);
|
|
||||||
GLuint value = 0;
|
|
||||||
memcpy(&value, buffer.data() + counter.offset, counter.size);
|
|
||||||
m_Storage.RecordAttributePrintf("%s: %u", counter.name.c_str(), value);
|
|
||||||
}
|
|
||||||
else if (counter.type == INTEL_PERFQUERIES_TYPE_UNSIGNED_INT64)
|
|
||||||
{
|
|
||||||
ENSURE(counter.size == 8);
|
|
||||||
GLuint64 value = 0;
|
|
||||||
memcpy(&value, buffer.data() + counter.offset, counter.size);
|
|
||||||
m_Storage.RecordAttributePrintf("%s: %.0f", counter.name.c_str(), (double)value);
|
|
||||||
|
|
||||||
if (counter.name == "TotalTime")
|
|
||||||
elapsed = (double)value / 1e6;
|
|
||||||
}
|
|
||||||
else if (counter.type == INTEL_PERFQUERIES_TYPE_FLOAT)
|
|
||||||
{
|
|
||||||
ENSURE(counter.size == 4);
|
|
||||||
GLfloat value = 0;
|
|
||||||
memcpy(&value, buffer.data() + counter.offset, counter.size);
|
|
||||||
m_Storage.RecordAttributePrintf("%s: %f", counter.name.c_str(), value);
|
|
||||||
}
|
|
||||||
else if (counter.type == INTEL_PERFQUERIES_TYPE_BOOL)
|
|
||||||
{
|
|
||||||
ENSURE(counter.size == 4);
|
|
||||||
GLuint value = 0;
|
|
||||||
memcpy(&value, buffer.data() + counter.offset, counter.size);
|
|
||||||
ENSURE(value == 0 || value == 1);
|
|
||||||
m_Storage.RecordAttributePrintf("%s: %u", counter.name.c_str(), value);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
//debug_warn(L"unrecognised Intel performance counter type");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
endTimes.push(lastTime + elapsed);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
lastTime = endTimes.top();
|
|
||||||
endTimes.pop();
|
|
||||||
m_Storage.RecordLeave(lastTime);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
PopFrontFrame();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void PopFrontFrame()
|
|
||||||
{
|
|
||||||
ENSURE(!m_Frames.empty());
|
|
||||||
SFrame& frame = m_Frames.front();
|
|
||||||
for (size_t i = 0; i < frame.events.size(); ++i)
|
|
||||||
if (frame.events[i].isEnter)
|
|
||||||
for (size_t j = 0; j < m_QueryTypes.size(); ++j)
|
|
||||||
m_QueryTypes[j].freeQueries.push_back(frame.events[i].queries[j]);
|
|
||||||
m_Frames.pop_front();
|
|
||||||
}
|
|
||||||
|
|
||||||
void LoadPerfCounters()
|
|
||||||
{
|
|
||||||
GLuint queryTypeId;
|
|
||||||
glGetFirstPerfQueryIdINTEL(&queryTypeId);
|
|
||||||
ogl_WarnIfError();
|
|
||||||
do
|
|
||||||
{
|
|
||||||
char queryName[256];
|
|
||||||
GLuint counterBufferSize, numCounters, maxQueries, unknown;
|
|
||||||
glGetPerfQueryInfoINTEL(queryTypeId, ARRAY_SIZE(queryName), queryName, &counterBufferSize, &numCounters, &maxQueries, &unknown);
|
|
||||||
ogl_WarnIfError();
|
|
||||||
ENSURE(unknown == 1);
|
|
||||||
|
|
||||||
SPerfQueryType query;
|
|
||||||
query.queryTypeId = queryTypeId;
|
|
||||||
query.name = queryName;
|
|
||||||
query.counterBufferSize = counterBufferSize;
|
|
||||||
|
|
||||||
for (GLuint counterId = 1; counterId <= numCounters; ++counterId)
|
|
||||||
{
|
|
||||||
char counterName[256];
|
|
||||||
char counterDesc[2048];
|
|
||||||
GLuint counterOffset, counterSize, counterUsage, counterType;
|
|
||||||
GLuint64 unknown2;
|
|
||||||
glGetPerfCounterInfoINTEL(queryTypeId, counterId, ARRAY_SIZE(counterName), counterName, ARRAY_SIZE(counterDesc), counterDesc, &counterOffset, &counterSize, &counterUsage, &counterType, &unknown2);
|
|
||||||
ogl_WarnIfError();
|
|
||||||
ENSURE(unknown2 == 0 || unknown2 == 1);
|
|
||||||
|
|
||||||
SPerfCounter counter;
|
|
||||||
counter.name = counterName;
|
|
||||||
counter.desc = counterDesc;
|
|
||||||
counter.offset = counterOffset;
|
|
||||||
counter.size = counterSize;
|
|
||||||
counter.type = counterType;
|
|
||||||
query.counters.push_back(counter);
|
|
||||||
}
|
|
||||||
|
|
||||||
m_QueryTypes.push_back(query);
|
|
||||||
|
|
||||||
glGetNextPerfQueryIdINTEL(queryTypeId, &queryTypeId);
|
|
||||||
ogl_WarnIfError();
|
|
||||||
|
|
||||||
} while (queryTypeId);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
CProfiler2GPU::CProfiler2GPU(CProfiler2& profiler) :
|
CProfiler2GPU::CProfiler2GPU(CProfiler2& profiler) :
|
||||||
m_Profiler(profiler), m_ProfilerARB(NULL), m_ProfilerEXT(NULL), m_ProfilerINTEL(NULL)
|
m_Profiler(profiler)
|
||||||
{
|
{
|
||||||
bool enabledARB = false;
|
bool enabledARB = false;
|
||||||
bool enabledEXT = false;
|
|
||||||
bool enabledINTEL = false;
|
|
||||||
CFG_GET_VAL("profiler2.gpu.arb.enable", enabledARB);
|
CFG_GET_VAL("profiler2.gpu.arb.enable", enabledARB);
|
||||||
CFG_GET_VAL("profiler2.gpu.ext.enable", enabledEXT);
|
|
||||||
CFG_GET_VAL("profiler2.gpu.intel.enable", enabledINTEL);
|
|
||||||
|
|
||||||
// Only enable either ARB or EXT, not both, because they are redundant
|
if (enabledARB && CProfiler2GPUARB::IsSupported())
|
||||||
// (EXT is only needed for compatibility with older systems), and because
|
|
||||||
// using both triggers GL_INVALID_OPERATION on AMD drivers (see comment
|
|
||||||
// in CProfiler2GPU_EXT_timer_query::RecordRegion)
|
|
||||||
if (enabledARB && CProfiler2GPU_ARB_timer_query::IsSupported())
|
|
||||||
{
|
{
|
||||||
m_ProfilerARB = new CProfiler2GPU_ARB_timer_query(m_Profiler);
|
m_ProfilerARB = std::make_unique<CProfiler2GPUARB>(m_Profiler);
|
||||||
}
|
|
||||||
else if (enabledEXT && CProfiler2GPU_EXT_timer_query::IsSupported())
|
|
||||||
{
|
|
||||||
m_ProfilerEXT = new CProfiler2GPU_EXT_timer_query(m_Profiler);
|
|
||||||
}
|
|
||||||
|
|
||||||
// The INTEL mode should be compatible with ARB/EXT (though no current
|
|
||||||
// drivers support both), and provides complementary data, so enable it
|
|
||||||
// when possible
|
|
||||||
if (enabledINTEL && CProfiler2GPU_INTEL_performance_queries::IsSupported())
|
|
||||||
{
|
|
||||||
m_ProfilerINTEL = new CProfiler2GPU_INTEL_performance_queries(m_Profiler);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CProfiler2GPU::~CProfiler2GPU()
|
CProfiler2GPU::~CProfiler2GPU() = default;
|
||||||
{
|
|
||||||
SAFE_DELETE(m_ProfilerARB);
|
|
||||||
SAFE_DELETE(m_ProfilerEXT);
|
|
||||||
SAFE_DELETE(m_ProfilerINTEL);
|
|
||||||
}
|
|
||||||
|
|
||||||
void CProfiler2GPU::FrameStart()
|
void CProfiler2GPU::FrameStart()
|
||||||
{
|
{
|
||||||
if (m_ProfilerARB)
|
if (m_ProfilerARB)
|
||||||
m_ProfilerARB->FrameStart();
|
m_ProfilerARB->FrameStart();
|
||||||
|
|
||||||
if (m_ProfilerEXT)
|
|
||||||
m_ProfilerEXT->FrameStart();
|
|
||||||
|
|
||||||
if (m_ProfilerINTEL)
|
|
||||||
m_ProfilerINTEL->FrameStart();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void CProfiler2GPU::FrameEnd()
|
void CProfiler2GPU::FrameEnd()
|
||||||
{
|
{
|
||||||
if (m_ProfilerARB)
|
if (m_ProfilerARB)
|
||||||
m_ProfilerARB->FrameEnd();
|
m_ProfilerARB->FrameEnd();
|
||||||
|
|
||||||
if (m_ProfilerEXT)
|
|
||||||
m_ProfilerEXT->FrameEnd();
|
|
||||||
|
|
||||||
if (m_ProfilerINTEL)
|
|
||||||
m_ProfilerINTEL->FrameEnd();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void CProfiler2GPU::RegionEnter(const char* id)
|
void CProfiler2GPU::RegionEnter(const char* id)
|
||||||
{
|
{
|
||||||
if (m_ProfilerARB)
|
if (m_ProfilerARB)
|
||||||
m_ProfilerARB->RegionEnter(id);
|
m_ProfilerARB->RegionEnter(id);
|
||||||
|
|
||||||
if (m_ProfilerEXT)
|
|
||||||
m_ProfilerEXT->RegionEnter(id);
|
|
||||||
|
|
||||||
if (m_ProfilerINTEL)
|
|
||||||
m_ProfilerINTEL->RegionEnter(id);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void CProfiler2GPU::RegionLeave(const char* id)
|
void CProfiler2GPU::RegionLeave(const char* id)
|
||||||
{
|
{
|
||||||
if (m_ProfilerARB)
|
if (m_ProfilerARB)
|
||||||
m_ProfilerARB->RegionLeave(id);
|
m_ProfilerARB->RegionLeave(id);
|
||||||
|
|
||||||
if (m_ProfilerEXT)
|
|
||||||
m_ProfilerEXT->RegionLeave(id);
|
|
||||||
|
|
||||||
if (m_ProfilerINTEL)
|
|
||||||
m_ProfilerINTEL->RegionLeave(id);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#else // CONFIG2_GLES
|
#else // CONFIG2_GLES
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2011 Wildfire Games.
|
/* Copyright (C) 2022 Wildfire Games.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
* a copy of this software and associated documentation files (the
|
* a copy of this software and associated documentation files (the
|
||||||
@ -20,10 +20,10 @@
|
|||||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
class CProfiler2;
|
class CProfiler2;
|
||||||
class CProfiler2GPU_ARB_timer_query;
|
class CProfiler2GPUARB;
|
||||||
class CProfiler2GPU_EXT_timer_query;
|
|
||||||
class CProfiler2GPU_INTEL_performance_queries;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Used by CProfiler2 for GPU profiling support.
|
* Used by CProfiler2 for GPU profiling support.
|
||||||
@ -44,7 +44,5 @@ public:
|
|||||||
private:
|
private:
|
||||||
CProfiler2& m_Profiler;
|
CProfiler2& m_Profiler;
|
||||||
|
|
||||||
CProfiler2GPU_ARB_timer_query* m_ProfilerARB;
|
std::unique_ptr<CProfiler2GPUARB> m_ProfilerARB;
|
||||||
CProfiler2GPU_EXT_timer_query* m_ProfilerEXT;
|
|
||||||
CProfiler2GPU_INTEL_performance_queries* m_ProfilerINTEL;
|
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user