Removes unused Windows sample-based custom profiler. Usage was removed in 56bd5b59b4.

Differential Revision: https://code.wildfiregames.com/D5047 This was SVN commit r27712.
2023-06-16 16:38:18 +00:00 · 2023-06-16 16:38:18 +00:00 · 186bedae56
commit 186bedae56
parent e4bcfd0556
1 changed files with 0 additions and 242 deletions
--- a/source/lib/sysdep/os/win/wprofiler.cpp
+++ b/source/lib/sysdep/os/win/wprofiler.cpp
@ -1,242 +0,0 @@
-/* Copyright (C) 2010 Wildfire Games.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "precompiled.h"
-
-#if 0
-
-// we need a means of measuring performance, since it is hard to predict and
-// depends on many factors. to cover a wider range of configurations, this
-// must also be possible on end-user systems lacking specialized developer
-// tools. therefore, we must ship our own implementation; this complements
-// Intel VTune et al.
-//
-// there are 3 approaches to the problem:
-// - single-step analysis logs every executed instruction. very thorough, but
-//   intolerably slow (~1000x) and not suitable for performance measurement.
-// - intrusive measuring tracks execution time of explicitly marked
-//   functions or 'zones'. more complex, requires adding code, and
-//   inaccurate when thread switches are frequent.
-// - IP sampling records the current instruction pointer at regular
-//   intervals; slow sections of code will over time appear more often.
-//   not exact, but simple and low-overhead.
-//
-// we implement IP sampling due to its simplicity. an intrusive approach
-// might also be added later to account for performance per-module
-// (helps spot the culprit in case hotspots are called from multiple sites).
-
-
-// on Windows, we retrieve the current IP with GetThreadContext. dox require
-// this to happen from another thread, and for the target to be suspended
-// (now enforced by XP SP2). this leads to all sorts of problems:
-// - if the suspended thread was dispatching an exception in the kernel,
-//   register state may be a mix between the correct values and
-//   those captured from the exception.
-// - if running on Win9x with real-mode drivers, interrupts may interfere
-//   with GetThreadContext. however, it's not supported anyway due to other
-//   deficiencies (e.g. lack of proper mmap support).
-// - the suspended thread may be holding locks; we need to be extremely
-//   careful to avoid deadlock! many win api functions acquire locks in
-//   non-obvious ways.
-
-static HANDLE prof_target_thread;
-
-static pthread_t prof_thread;
-
-// delay [ms] between samples. OS sleep timers usually provide only
-// ms resolution. increasing interval reduces overhead and accuracy.
-static const int PROFILE_INTERVAL_MS = 1;
-
-
-static uintptr_t get_target_pc()
-{
-	DWORD ret;
-	HANDLE hThread = prof_target_thread;	// convenience
-
-	ret = SuspendThread(hThread);
-	if(ret == (DWORD)-1)
-	{
-		DEBUG_WARN_ERR(ERR::LOGIC);	// get_target_pc: SuspendThread failed
-		return 0;
-	}
-	// note: we don't need to call more than once: this increments a DWORD
-	// 'suspend count'; target is guaranteed to be suspended unless
-	// the function failed.
-
-	/////////////////////////////////////////////
-
-	// be VERY CAREFUL to avoid anything that may acquire a lock until
-	// after ResumeThread! this includes locks taken by the OS,
-	// e.g. malloc -> heap or GetProcAddress -> loader.
-	// reason is, if the target thread was holding a lock we try to
-	// acquire here, a classic deadlock results.
-
-	uintptr_t pc = 0;	// => will return 0 if GetThreadContext fails
-
-	CONTEXT context;
-	context.ContextFlags = CONTEXT_CONTROL;
-	if(GetThreadContext(hThread, &context))
-		pc = context.PC_;
-
-	/////////////////////////////////////////////
-
-	ret = ResumeThread(hThread);
-	ENSURE(ret != 0);
-	// don't fail (we have a valid PC), but warn
-
-	return pc;
-}
-
-
-static pthread_t thread;
-static sem_t exit_flag;
-
-static void* prof_thread_func(void* UNUSED(data))
-{
-	debug_SetThreadName("eip_sampler");
-
-	const long _1e6 = 1000000;
-	const long _1e9 = 1000000000;
-
-	for(;;)
-	{
-		// calculate absolute timeout for sem_timedwait
-		struct timespec abs_timeout;
-		clock_gettime(CLOCK_REALTIME, &abs_timeout);
-		abs_timeout.tv_nsec += PROFILE_INTERVAL_MS * _1e6;
-		// .. handle nanosecond wraparound (must not be > 1000m)
-		if(abs_timeout.tv_nsec >= _1e9)
-		{
-			abs_timeout.tv_nsec -= _1e9;
-			abs_timeout.tv_sec++;
-		}
-
-		errno = 0;
-		// if we acquire the semaphore, exit was requested.
-		if(sem_timedwait(&exit_flag, &abs_timeout) == 0)
-			break;
-		// actual error: warn
-		if(errno != ETIMEDOUT)
-			DEBUG_WARN_ERR(ERR::LOGIC);	// wpcu prof_thread_func: sem_timedwait failed
-
-		uintptr_t pc = get_target_pc();
-		UNUSED2(pc);
-
-		// ADD TO LIST
-	}
-
-	return 0;
-}
-
-
-
-// call from thread that is to be profiled
-Status prof_start()
-{
-	// we need a real HANDLE to the target thread for use with
-	// Suspend|ResumeThread and GetThreadContext.
-	// alternative: DuplicateHandle on the current thread pseudo-HANDLE.
-	// this way is a bit more obvious/simple.
-	const DWORD access = THREAD_GET_CONTEXT|THREAD_SUSPEND_RESUME;
-	HANDLE hThread = OpenThread(access, FALSE, GetCurrentThreadId());
-	if(hThread == INVALID_HANDLE_VALUE)
-		WARN_RETURN(ERR::FAIL);
-
-	prof_target_thread = hThread;
-
-	sem_init(&exit_flag, 0, 0);
-	pthread_create(&thread, 0, prof_thread_func, 0);
-	return INFO::OK;
-}
-
-Status prof_shutdown()
-{
-	WARN_IF_FALSE(CloseHandle(prof_target_thread));
-	return INFO::OK;
-}
-
-
-
-/*
-open question: how to store the EIP values returned? some background:
-the mechanism above churns out an EIP value (may be in our process, but might
-also be bogus); we need to store it somehow pending analysis.
-
-when done with the current run, we'd want to resolve EIP -> function name,
-source file etc. (rather slow, so don't do it at runtime).
-
-so, how to store it in the meantime? 2 possibilities:
- simple array/vector of addresses (of course optimized to reduce allocs)
- fixed size array of 'bins' (range of addresses; may be as fine as 1 byte);
-each bin has a counter which is incremented when the bin's corresponding
-address has been hit.
-
-it's a size tradeoff here; for simple runs of < 1 min (60,000 ms), #1
-would use 240kb of mem. #2 requires sizeof_whole_program * bytes_per_counter
-up front, and has problems measuring DLLs (we'd have to explicitly map
-the DLL address range into a bin - ugh). however, if we ever want to
-test for say an hour (improves accuracy of profiling due to larger sample size),
-#1 would guzzle 15mb of memory.
-
-hm, another idea would be to write out #1's list of addresses periodically.
-to make sure the disk I/O doesn't come at a bad time, we could have the main
-thread call into the profiler and request it write out at that time.
-this would require extreme caution to avoid the deadlock problem, but looks
-doable.
-
-------- [2] ----------
-
-realistic profiler runs will take up to an hour.
-
-writing out to disk would work: could have main thread call back.
-that and adding EIP to list would be atomic (locked).
-BUT: large amount of data, that's bad (loading at 30mb/s => 500ms load time alone)
-
-problem with enumerating all symbols at startup: how do we enum all DLLs?
-
-hybrid idea: std::map of EIPs. we don't build the map at startup,
-but add when first seen and subsequently increment counter stored there.
-problem: uses more memory/slower access than list.
-would have to make sure EIPs are reused.
-to help that, could quantize down to 4 byte (or so) bins.
-accessing debug information at runtime to determine function length is too slow.
-
-maybe some weird data structure: one bucket controls say 256 bytes of code
-bucket is found by stripping off lower 8 bits. then, store only
-the hit count for that byte. where's the savings over normal count?
-
-TODO: what if the thread is sleeping at the time we query EIP?
-can't detect that - suspend count is only set by SuspendThread
-do we want to report that point (it's good to know), or try to access other threads?
-
-TODO split off target thread / get PC into sysdep; profiler thread is portable!
-
-
-at exit: resolve list to hotspots
-probably hard; a start would be just the function in which the address is, then hit count
-
-
-==========================================
-
-
-*/
-#endif