1
1
forked from 0ad/0ad
0ad/source/ps/Profiler2.h
2021-03-03 23:01:08 +00:00

558 lines
14 KiB
C++

/* Copyright (C) 2021 Wildfire Games.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @file
* New profiler (complementing the older CProfileManager)
*
* The profiler is designed for analysing framerate fluctuations or glitches,
* and temporal relationships between threads.
* This contrasts with CProfilerManager and most external profiling tools,
* which are designed more for measuring average throughput over a number of
* frames.
*
* To view the profiler output, press F11 to enable the HTTP output mode
* and then open source/tools/profiler2/profiler2.html in a web browser.
*
* There is a single global CProfiler2 instance (g_Profiler2),
* providing the API used by the rest of the game.
* The game can record the entry/exit timings of a region of code
* using the PROFILE2 macro, and can record other events using
* PROFILE2_EVENT.
* Regions and events can be annotated with arbitrary string attributes,
* specified with printf-style format strings, using PROFILE2_ATTR
* (e.g. PROFILE2_ATTR("frame: %d", m_FrameNum) ).
*
* This is designed for relatively coarse-grained profiling, or for rare events.
* Don't use it for regions that are typically less than ~0.1msecs, or that are
* called hundreds of times per frame. (The old CProfilerManager is better
* for that.)
*
* New threads must call g_Profiler2.RegisterCurrentThread before any other
* profiler functions.
*
* The main thread should call g_Profiler2.RecordFrameStart at the start of
* each frame.
* Other threads should call g_Profiler2.RecordSyncMarker occasionally,
* especially if it's been a long time since their last call to the profiler,
* or if they've made thousands of calls since the last sync marker.
*
* The profiler is implemented with thread-local fixed-size ring buffers,
* which store a sequence of variable-length items indicating the time
* of the event and associated data (pointers to names, attribute strings, etc).
* An HTTP server provides access to the data: when requested, it will make
* a copy of a thread's buffer, then parse the items and return them in JSON
* format. The profiler2.html requests and processes and visualises this data.
*
* The RecordSyncMarker calls are necessary to correct for time drift and to
* let the buffer parser accurately detect the start of an item in the byte stream.
*
* This design aims to minimise the performance overhead of recording data,
* and to simplify the visualisation of the data by doing it externally in an
* environment with better UI tools (i.e. HTML) instead of within the game engine.
*
* The initial setup of g_Profiler2 must happen in the game's main thread.
* RegisterCurrentThread and the Record functions may be called from any thread.
* The HTTP server runs its own threads, which may call the ConstructJSON functions.
*/
#ifndef INCLUDED_PROFILER2
#define INCLUDED_PROFILER2
#include "lib/timer.h"
#include "ps/ThreadUtil.h"
#include <mutex>
#include <string>
#include <thread>
#include <vector>
struct mg_context;
// Note: Lots of functions are defined inline, to hypothetically
// minimise performance overhead.
class CProfiler2GPU;
class CProfiler2
{
friend class CProfiler2GPU_base;
friend class CProfile2SpikeRegion;
friend class CProfile2AggregatedRegion;
public:
// Items stored in the buffers:
/// Item type identifiers
enum EItem
{
ITEM_NOP = 0,
ITEM_SYNC = 1, // magic value used for parse syncing
ITEM_EVENT = 2, // single event
ITEM_ENTER = 3, // entering a region
ITEM_LEAVE = 4, // leaving a region (must be correctly nested)
ITEM_ATTRIBUTE = 5, // arbitrary string associated with current region, or latest event (if the previous item was an event)
};
static const size_t MAX_ATTRIBUTE_LENGTH; // includes null terminator, which isn't stored
/// An arbitrary number to help resyncing with the item stream when parsing.
static const u8 RESYNC_MAGIC[8];
/**
* An item with a relative time and an ID string pointer.
*/
struct SItem_dt_id
{
float dt; // time relative to last event
const char* id;
};
private:
// TODO: different threads might want different sizes
static const size_t BUFFER_SIZE;
static const size_t HOLD_BUFFER_SIZE;
/**
* Class instantiated in every registered thread.
*/
class ThreadStorage
{
NONCOPYABLE(ThreadStorage);
public:
ThreadStorage(CProfiler2& profiler, const std::string& name);
~ThreadStorage();
enum { BUFFER_NORMAL, BUFFER_SPIKE, BUFFER_AGGREGATE };
void RecordSyncMarker(double t)
{
// Store the magic string followed by the absolute time
// (to correct for drift caused by the precision of relative
// times stored in other items)
u8 buffer[sizeof(RESYNC_MAGIC) + sizeof(t)];
memcpy(buffer, &RESYNC_MAGIC, sizeof(RESYNC_MAGIC));
memcpy(buffer + sizeof(RESYNC_MAGIC), &t, sizeof(t));
Write(ITEM_SYNC, buffer, ARRAY_SIZE(buffer));
m_LastTime = t;
}
void Record(EItem type, double t, const char* id)
{
// Store a relative time instead of absolute, so we can use floats
// (to save memory) without suffering from precision problems
SItem_dt_id item = { (float)(t - m_LastTime), id };
Write(type, &item, sizeof(item));
}
void RecordFrameStart(double t)
{
RecordSyncMarker(t);
Record(ITEM_EVENT, t, "__framestart"); // magic string recognised by the visualiser
}
void RecordLeave(double t)
{
float time = (float)(t - m_LastTime);
Write(ITEM_LEAVE, &time, sizeof(float));
}
void RecordAttribute(const char* fmt, va_list argp) VPRINTF_ARGS(2);
void RecordAttributePrintf(const char* fmt, ...) PRINTF_ARGS(2)
{
va_list argp;
va_start(argp, fmt);
RecordAttribute(fmt, argp);
va_end(argp);
}
size_t HoldLevel();
u8 HoldType();
void PutOnHold(u8 type);
void HoldToBuffer(bool condensed);
void ThrowawayHoldBuffer();
CProfiler2& GetProfiler()
{
return m_Profiler;
}
const std::string& GetName()
{
return m_Name;
}
/**
* Returns a copy of a subset of the thread's buffer.
* Not guaranteed to start on an item boundary.
* May be called by any thread.
*/
std::string GetBuffer();
private:
/**
* Store an item into the buffer.
*/
void Write(EItem type, const void* item, u32 itemSize);
void WriteHold(EItem type, const void* item, u32 itemSize);
CProfiler2& m_Profiler;
std::string m_Name;
double m_LastTime; // used for computing relative times
u8* m_Buffer;
struct HoldBuffer
{
friend class ThreadStorage;
public:
HoldBuffer()
{
buffer = new u8[HOLD_BUFFER_SIZE];
memset(buffer, ITEM_NOP, HOLD_BUFFER_SIZE);
pos = 0;
}
~HoldBuffer()
{
delete[] buffer;
}
void clear()
{
pos = 0;
}
void setType(u8 newType)
{
type = newType;
}
u8* buffer;
u32 pos;
u8 type;
};
HoldBuffer m_HoldBuffers[8];
size_t m_HeldDepth;
// To allow hopefully-safe reading of the buffer from a separate thread,
// without any expensive synchronisation in the recording thread,
// two copies of the current buffer write position are stored.
// BufferPos0 is updated before writing; BufferPos1 is updated after writing.
// GetBuffer can read Pos1, copy the buffer, read Pos0, then assume any bytes
// outside the range Pos1 <= x < Pos0 are safe to use. (Any in that range might
// be half-written and corrupted.) (All ranges are modulo BUFFER_SIZE.)
// Outside of Write(), these will always be equal.
//
// TODO: does this attempt at synchronisation (plus use of COMPILER_FENCE etc)
// actually work in practice?
u32 m_BufferPos0;
u32 m_BufferPos1;
};
public:
CProfiler2();
~CProfiler2();
/**
* Call in main thread to set up the profiler,
* before calling any other profiler functions.
*/
void Initialise();
/**
* Call in main thread to enable the HTTP server.
* (Disabled by default for security and performance
* and to avoid annoying a firewall.)
*/
void EnableHTTP();
/**
* Call in main thread to enable the GPU profiling support,
* after OpenGL has been initialised.
*/
void EnableGPU();
/**
* Call in main thread to shut down the GPU profiling support,
* before shutting down OpenGL.
*/
void ShutdownGPU();
/**
* Call in main thread to shut down the profiler's HTTP server
*/
void ShutDownHTTP();
/**
* Call in main thread to enable/disable the profiler
*/
void Toggle();
/**
* Call in main thread to shut everything down.
* All other profiled threads should have been terminated already.
*/
void Shutdown();
/**
* Call in any thread to enable the profiler in that thread.
* @p name should be unique, and is used by the visualiser to identify
* this thread.
*/
void RegisterCurrentThread(const std::string& name);
/**
* Non-main threads should call this occasionally,
* especially if it's been a long time since their last call to the profiler,
* or if they've made thousands of calls since the last sync marker.
*/
void RecordSyncMarker()
{
GetThreadStorage().RecordSyncMarker(GetTime());
}
/**
* Call in main thread at the start of a frame.
*/
void RecordFrameStart()
{
ENSURE(Threading::IsMainThread());
GetThreadStorage().RecordFrameStart(GetTime());
}
void RecordEvent(const char* id)
{
GetThreadStorage().Record(ITEM_EVENT, GetTime(), id);
}
void RecordRegionEnter(const char* id)
{
GetThreadStorage().Record(ITEM_ENTER, GetTime(), id);
}
void RecordRegionEnter(const char* id, double time)
{
GetThreadStorage().Record(ITEM_ENTER, time, id);
}
void RecordRegionLeave()
{
GetThreadStorage().RecordLeave(GetTime());
}
void RecordAttribute(const char* fmt, ...) PRINTF_ARGS(2)
{
va_list argp;
va_start(argp, fmt);
GetThreadStorage().RecordAttribute(fmt, argp);
va_end(argp);
}
void RecordGPUFrameStart();
void RecordGPUFrameEnd();
void RecordGPURegionEnter(const char* id);
void RecordGPURegionLeave(const char* id);
/**
* Hold onto messages until a call to release or write the held messages.
*/
size_t HoldLevel()
{
return GetThreadStorage().HoldLevel();
}
u8 HoldType()
{
return GetThreadStorage().HoldType();
}
void HoldMessages(u8 type)
{
GetThreadStorage().PutOnHold(type);
}
void StopHoldingMessages(bool writeToBuffer, bool condensed = false)
{
if (writeToBuffer)
GetThreadStorage().HoldToBuffer(condensed);
else
GetThreadStorage().ThrowawayHoldBuffer();
}
/**
* Call in any thread to produce a JSON representation of the general
* state of the application.
*/
void ConstructJSONOverview(std::ostream& stream);
/**
* Call in any thread to produce a JSON representation of the buffer
* for a given thread.
* Returns NULL on success, or an error string.
*/
const char* ConstructJSONResponse(std::ostream& stream, const std::string& thread);
/**
* Call in any thread to save a JSONP representation of the buffers
* for all threads, to a file named profile2.jsonp in the logs directory.
*/
void SaveToFile();
double GetTime()
{
return timer_Time();
}
int GetFrameNumber()
{
return m_FrameNumber;
}
void IncrementFrameNumber()
{
++m_FrameNumber;
}
void AddThreadStorage(ThreadStorage* storage);
void RemoveThreadStorage(ThreadStorage* storage);
private:
void InitialiseGPU();
ThreadStorage& GetThreadStorage()
{
ENSURE(m_CurrentStorage);
return *m_CurrentStorage;
}
bool m_Initialised;
int m_FrameNumber;
mg_context* m_MgContext;
CProfiler2GPU* m_GPU;
std::mutex m_Mutex;
static thread_local ThreadStorage* m_CurrentStorage;
std::vector<std::unique_ptr<ThreadStorage>> m_Threads; // thread-safe; protected by m_Mutex
};
extern CProfiler2 g_Profiler2;
/**
* Scope-based enter/leave helper.
*/
class CProfile2Region
{
public:
CProfile2Region(const char* name) : m_Name(name)
{
g_Profiler2.RecordRegionEnter(m_Name);
}
~CProfile2Region()
{
g_Profiler2.RecordRegionLeave();
}
protected:
const char* m_Name;
};
/**
* Scope-based enter/leave helper.
*/
class CProfile2SpikeRegion
{
public:
CProfile2SpikeRegion(const char* name, double spikeLimit);
~CProfile2SpikeRegion();
private:
const char* m_Name;
double m_Limit;
double m_StartTime;
bool m_PushedHold;
};
/**
* Scope-based enter/leave helper.
*/
class CProfile2AggregatedRegion
{
public:
CProfile2AggregatedRegion(const char* name, double spikeLimit);
~CProfile2AggregatedRegion();
private:
const char* m_Name;
double m_Limit;
double m_StartTime;
bool m_PushedHold;
};
/**
* Scope-based GPU enter/leave helper.
*/
class CProfile2GPURegion
{
public:
CProfile2GPURegion(const char* name) : m_Name(name)
{
g_Profiler2.RecordGPURegionEnter(m_Name);
}
~CProfile2GPURegion()
{
g_Profiler2.RecordGPURegionLeave(m_Name);
}
private:
const char* m_Name;
};
/**
* Starts timing from now until the end of the current scope.
* @p region is the name to associate with this region (should be
* a constant string literal; the pointer must remain valid forever).
* Regions may be nested, but preferably shouldn't be nested deeply since
* it hurts the visualisation.
*/
#define PROFILE2(region) CProfile2Region profile2__(region)
#define PROFILE2_IFSPIKE(region, limit) CProfile2SpikeRegion profile2__(region, limit)
#define PROFILE2_AGGREGATED(region, limit) CProfile2AggregatedRegion profile2__(region, limit)
#define PROFILE2_GPU(region) CProfile2GPURegion profile2gpu__(region)
/**
* Record the named event at the current time.
*/
#define PROFILE2_EVENT(name) g_Profiler2.RecordEvent(name)
/**
* Associates a string (with printf-style formatting) with the current
* region or event.
* (If the last profiler call was PROFILE2_EVENT, it associates with that
* event; otherwise it associates with the currently-active region.)
*/
#define PROFILE2_ATTR g_Profiler2.RecordAttribute
#endif // INCLUDED_PROFILER2