/* Copyright (c) 2011 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * @file * New profiler (complementing the older CProfileManager) * * The profiler is designed for analysing framerate fluctuations or glitches, * and temporal relationships between threads. * This contrasts with CProfilerManager and most external profiling tools, * which are designed more for measuring average throughput over a number of * frames. * * To view the profiler output, press F11 to enable the HTTP output mode * and then open source/tools/profiler2/profiler2.html in a web browser. * * There is a single global CProfiler2 instance (g_Profiler2), * providing the API used by the rest of the game. * The game can record the entry/exit timings of a region of code * using the PROFILE2 macro, and can record other events using * PROFILE2_EVENT. * Regions and events can be annotated with arbitrary string attributes, * specified with printf-style format strings, using PROFILE2_ATTR * (e.g. PROFILE2_ATTR("frame: %d", m_FrameNum) ). * * This is designed for relatively coarse-grained profiling, or for rare events. * Don't use it for regions that are typically less than ~0.1msecs, or that are * called hundreds of times per frame. (The old CProfilerManager is better * for that.) * * New threads must call g_Profiler2.RegisterCurrentThread before any other * profiler functions. * * The main thread should call g_Profiler2.RecordFrameStart at the start of * each frame. * Other threads should call g_Profiler2.RecordSyncMarker occasionally, * especially if it's been a long time since their last call to the profiler, * or if they've made thousands of calls since the last sync marker. * * The profiler is implemented with thread-local fixed-size ring buffers, * which store a sequence of variable-length items indicating the time * of the event and associated data (pointers to names, attribute strings, etc). * An HTTP server provides access to the data: when requested, it will make * a copy of a thread's buffer, then parse the items and return them in JSON * format. The profiler2.html requests and processes and visualises this data. * * The RecordSyncMarker calls are necessary to correct for time drift and to * let the buffer parser accurately detect the start of an item in the byte stream. * * This design aims to minimise the performance overhead of recording data, * and to simplify the visualisation of the data by doing it externally in an * environment with better UI tools (i.e. HTML) instead of within the game engine. * * The initial setup of g_Profiler2 must happen in the game's main thread. * RegisterCurrentThread and the Record functions may be called from any thread. * The HTTP server runs its own threads, which may call the ConstructJSON functions. */ #ifndef INCLUDED_PROFILER2 #define INCLUDED_PROFILER2 #include "lib/timer.h" #include "ps/ThreadUtil.h" struct mg_context; // Note: Lots of functions are defined inline, to hypothetically // minimise performance overhead. class CProfiler2GPU; class CProfiler2 { friend class CProfiler2GPU_base; public: // Items stored in the buffers: /// Item type identifiers enum EItem { ITEM_NOP = 0, ITEM_SYNC = 1, // magic value used for parse syncing ITEM_EVENT = 2, // single event ITEM_ENTER = 3, // entering a region ITEM_LEAVE = 4, // leaving a region (must be correctly nested) ITEM_ATTRIBUTE = 5, // arbitrary string associated with current region, or latest event (if the previous item was an event) }; static const size_t MAX_ATTRIBUTE_LENGTH = 256; // includes null terminator, which isn't stored /// An arbitrary number to help resyncing with the item stream when parsing. static const u8 RESYNC_MAGIC[8]; /** * An item with a relative time and an ID string pointer. */ struct SItem_dt_id { float dt; // time relative to last event const char* id; }; private: // TODO: what's a good size? // TODO: different threads might want different sizes static const size_t BUFFER_SIZE = 1024*1024; /** * Class instantiated in every registered thread. */ class ThreadStorage { NONCOPYABLE(ThreadStorage); public: ThreadStorage(CProfiler2& profiler, const std::string& name); ~ThreadStorage(); void RecordSyncMarker(double t) { // Store the magic string followed by the absolute time // (to correct for drift caused by the precision of relative // times stored in other items) u8 buffer[sizeof(RESYNC_MAGIC) + sizeof(t)]; memcpy(buffer, &RESYNC_MAGIC, sizeof(RESYNC_MAGIC)); memcpy(buffer + sizeof(RESYNC_MAGIC), &t, sizeof(t)); Write(ITEM_SYNC, buffer, ARRAY_SIZE(buffer)); m_LastTime = t; } void Record(EItem type, double t, const char* id) { // Store a relative time instead of absolute, so we can use floats // (to save memory) without suffering from precision problems SItem_dt_id item = { (float)(t - m_LastTime), id }; Write(type, &item, sizeof(item)); m_LastTime = t; } void RecordFrameStart(double t) { RecordSyncMarker(t); Record(ITEM_EVENT, t, "__framestart"); // magic string recognised by the visualiser } void RecordAttribute(const char* fmt, va_list argp) VPRINTF_ARGS(2); void RecordAttributePrintf(const char* fmt, ...) PRINTF_ARGS(2) { va_list argp; va_start(argp, fmt); RecordAttribute(fmt, argp); va_end(argp); } CProfiler2& GetProfiler() { return m_Profiler; } const std::string& GetName() { return m_Name; } /** * Returns a copy of a subset of the thread's buffer. * Not guaranteed to start on an item boundary. * May be called by any thread. */ std::string GetBuffer(); private: /** * Store an item into the buffer. */ void Write(EItem type, const void* item, u32 itemSize) { // See m_BufferPos0 etc for comments on synchronisation u32 size = 1 + itemSize; u32 start = m_BufferPos0; if (start + size > BUFFER_SIZE) { // The remainder of the buffer is too small - fill the rest // with NOPs then start from offset 0, so we don't have to // bother splitting the real item across the end of the buffer m_BufferPos0 = size; COMPILER_FENCE; // must write m_BufferPos0 before m_Buffer memset(m_Buffer + start, 0, BUFFER_SIZE - start); start = 0; } else { m_BufferPos0 = start + size; COMPILER_FENCE; // must write m_BufferPos0 before m_Buffer } m_Buffer[start] = (u8)type; memcpy(&m_Buffer[start + 1], item, itemSize); COMPILER_FENCE; // must write m_BufferPos1 after m_Buffer m_BufferPos1 = start + size; } CProfiler2& m_Profiler; std::string m_Name; double m_LastTime; // used for computing relative times u8* m_Buffer; // To allow hopefully-safe reading of the buffer from a separate thread, // without any expensive synchronisation in the recording thread, // two copies of the current buffer write position are stored. // BufferPos0 is updated before writing; BufferPos1 is updated after writing. // GetBuffer can read Pos1, copy the buffer, read Pos0, then assume any bytes // outside the range Pos1 <= x < Pos0 are safe to use. (Any in that range might // be half-written and corrupted.) (All ranges are modulo BUFFER_SIZE.) // Outside of Write(), these will always be equal. // // TODO: does this attempt at synchronisation (plus use of COMPILER_FENCE etc) // actually work in practice? u32 m_BufferPos0; u32 m_BufferPos1; }; public: CProfiler2(); ~CProfiler2(); /** * Call in main thread to set up the profiler, * before calling any other profiler functions. */ void Initialise(); /** * Call in main thread to enable the HTTP server. * (Disabled by default for security and performance * and to avoid annoying a firewall.) */ void EnableHTTP(); /** * Call in main thread to enable the GPU profiling support, * after OpenGL has been initialised. */ void EnableGPU(); /** * Call in main thread to shut down the GPU profiling support, * before shutting down OpenGL. */ void ShutdownGPU(); /** * Call in main thread to shut everything down. * All other profiled threads should have been terminated already. */ void Shutdown(); /** * Call in any thread to enable the profiler in that thread. * @p name should be unique, and is used by the visualiser to identify * this thread. */ void RegisterCurrentThread(const std::string& name); /** * Non-main threads should call this occasionally, * especially if it's been a long time since their last call to the profiler, * or if they've made thousands of calls since the last sync marker. */ void RecordSyncMarker() { GetThreadStorage().RecordSyncMarker(GetTime()); } /** * Call in main thread at the start of a frame. */ void RecordFrameStart() { ENSURE(ThreadUtil::IsMainThread()); GetThreadStorage().RecordFrameStart(GetTime()); } void RecordEvent(const char* id) { GetThreadStorage().Record(ITEM_EVENT, GetTime(), id); } void RecordRegionEnter(const char* id) { GetThreadStorage().Record(ITEM_ENTER, GetTime(), id); } void RecordRegionLeave(const char* id) { GetThreadStorage().Record(ITEM_LEAVE, GetTime(), id); } void RecordAttribute(const char* fmt, ...) PRINTF_ARGS(2) { va_list argp; va_start(argp, fmt); GetThreadStorage().RecordAttribute(fmt, argp); va_end(argp); } void RecordGPUFrameStart(); void RecordGPUFrameEnd(); void RecordGPURegionEnter(const char* id); void RecordGPURegionLeave(const char* id); /** * Call in any thread to produce a JSON representation of the general * state of the application. */ void ConstructJSONOverview(std::ostream& stream); /** * Call in any thread to produce a JSON representation of the buffer * for a given thread. * Returns NULL on success, or an error string. */ const char* ConstructJSONResponse(std::ostream& stream, const std::string& thread); /** * Call in any thread to save a JSONP representation of the buffers * for all threads, to a file named profile2.jsonp in the logs directory. */ void SaveToFile(); double GetTime() { return timer_Time(); } int GetFrameNumber() { return m_FrameNumber; } void IncrementFrameNumber() { ++m_FrameNumber; } void AddThreadStorage(ThreadStorage* storage); void RemoveThreadStorage(ThreadStorage* storage); private: void InitialiseGPU(); static void TLSDtor(void* data); ThreadStorage& GetThreadStorage() { ThreadStorage* storage = (ThreadStorage*)pthread_getspecific(m_TLS); ASSERT(storage); return *storage; } bool m_Initialised; int m_FrameNumber; mg_context* m_MgContext; pthread_key_t m_TLS; CProfiler2GPU* m_GPU; CMutex m_Mutex; std::vector m_Threads; // thread-safe; protected by m_Mutex }; extern CProfiler2 g_Profiler2; /** * Scope-based enter/leave helper. */ class CProfile2Region { public: CProfile2Region(const char* name) : m_Name(name) { g_Profiler2.RecordRegionEnter(m_Name); } ~CProfile2Region() { g_Profiler2.RecordRegionLeave(m_Name); } private: const char* m_Name; }; /** * Scope-based GPU enter/leave helper. */ class CProfile2GPURegion { public: CProfile2GPURegion(const char* name) : m_Name(name) { g_Profiler2.RecordGPURegionEnter(m_Name); } ~CProfile2GPURegion() { g_Profiler2.RecordGPURegionLeave(m_Name); } private: const char* m_Name; }; /** * Starts timing from now until the end of the current scope. * @p region is the name to associate with this region (should be * a constant string literal; the pointer must remain valid forever). * Regions may be nested, but preferably shouldn't be nested deeply since * it hurts the visualisation. */ #define PROFILE2(region) CProfile2Region profile2__(region) #define PROFILE2_GPU(region) CProfile2GPURegion profile2gpu__(region) /** * Record the named event at the current time. */ #define PROFILE2_EVENT(name) g_Profiler2.RecordEvent(name) /** * Associates a string (with printf-style formatting) with the current * region or event. * (If the last profiler call was PROFILE2_EVENT, it associates with that * event; otherwise it associates with the currently-active region.) */ #define PROFILE2_ATTR g_Profiler2.RecordAttribute #endif // INCLUDED_PROFILER2