// partial pthread implementation for Win32 // // Copyright (c) 2003-2005 Jan Wassenberg // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2 of the // License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // // Contact info: // Jan.Wassenberg@stud.uni-karlsruhe.de // http://www.stud.uni-karlsruhe.de/~urkt/ #include "precompiled.h" #include #include #include "lib.h" #include "posix.h" #include "win_internal.h" #include "../cpu.h" // CAS static HANDLE HANDLE_from_pthread(pthread_t p) { return (HANDLE)((char*)0 + p); } static pthread_t pthread_from_HANDLE(HANDLE h) { return (pthread_t)(uintptr_t)h; } ////////////////////////////////////////////////////////////////////////////// // // misc // ////////////////////////////////////////////////////////////////////////////// pthread_t pthread_self(void) { return pthread_from_HANDLE(GetCurrentThread()); } int pthread_once(pthread_once_t* once, void (*init_routine)(void)) { if(CAS(once, 0, 1)) init_routine(); return 0; } int pthread_getschedparam(pthread_t thread, int* policy, struct sched_param* param) { if(policy) { DWORD pc = GetPriorityClass(GetCurrentProcess()); *policy = (pc >= HIGH_PRIORITY_CLASS)? SCHED_FIFO : SCHED_RR; } if(param) { const HANDLE hThread = HANDLE_from_pthread(thread); param->sched_priority = GetThreadPriority(hThread); } return 0; } int pthread_setschedparam(pthread_t thread, int policy, const struct sched_param* param) { const int pri = param->sched_priority; // additional boost for policy == SCHED_FIFO DWORD pri_class = NORMAL_PRIORITY_CLASS; if(policy == SCHED_FIFO) { pri_class = HIGH_PRIORITY_CLASS; if(pri == 2) pri_class = REALTIME_PRIORITY_CLASS; } SetPriorityClass(GetCurrentProcess(), pri_class); // choose fixed Windows values from pri const HANDLE hThread = HANDLE_from_pthread(thread); SetThreadPriority(hThread, pri); return 0; } ////////////////////////////////////////////////////////////////////////////// // // thread-local storage // ////////////////////////////////////////////////////////////////////////////// // minimum amount of TLS slots every Windows version provides; // used to validate indices. static const uint TLS_LIMIT = 64; // rationale: don't use an array of dtors for every possible TLS slot. // other DLLs may allocate any number of them in their DllMain, so the // array would have to be quite large. instead, store both key and dtor - // we are thus limited only by pthread_key_create calls (which we control). static const uint MAX_DTORS = 4; static struct { pthread_key_t key; void (*dtor)(void*); } dtors[MAX_DTORS]; int pthread_key_create(pthread_key_t* key, void (*dtor)(void*)) { DWORD idx = TlsAlloc(); if(idx == TLS_OUT_OF_INDEXES) return -ENOMEM; debug_assert(idx < TLS_LIMIT); *key = (pthread_key_t)idx; // acquire a free dtor slot uint i; for(i = 0; i < MAX_DTORS; i++) { if(CAS(&dtors[i].dtor, 0, dtor)) goto have_slot; } // not enough slots; we have a valid key, but its dtor won't be called. debug_warn("increase pthread MAX_DTORS"); return -1; have_slot: dtors[i].key = *key; return 0; } int pthread_key_delete(pthread_key_t key) { DWORD idx = (DWORD)key; debug_assert(idx < TLS_LIMIT); BOOL ret = TlsFree(idx); debug_assert(ret != 0); return 0; } void* pthread_getspecific(pthread_key_t key) { DWORD idx = (DWORD)key; debug_assert(idx < TLS_LIMIT); // TlsGetValue sets last error to 0 on success (boo). // we don't want this to hide previous errors, so it's restored below. DWORD last_err = GetLastError(); void* data = TlsGetValue(idx); // no error if(GetLastError() == 0) { // we care about performance here. SetLastError is low overhead, // but last error = 0 is expected. if(last_err != 0) SetLastError(last_err); } else debug_warn("pthread_getspecific: TlsGetValue failed"); return data; } int pthread_setspecific(pthread_key_t key, const void* value) { DWORD idx = (DWORD)key; debug_assert(idx < TLS_LIMIT); BOOL ret = TlsSetValue(idx, (void*)value); debug_assert(ret != 0); return 0; } static void call_tls_dtors() { again: bool had_valid_tls = false; // for each registered dtor: (call order unspecified by SUSv3) for(uint i = 0; i < MAX_DTORS; i++) { // is slot #i in use? void (*dtor)(void*) = dtors[i].dtor; if(!dtor) continue; // clear slot and call dtor with its previous value. const pthread_key_t key = dtors[i].key; void* tls = pthread_getspecific(key); if(tls) { WARN_ERR(pthread_setspecific(key, 0)); dtor(tls); had_valid_tls = true; } } // rationale: SUSv3 says we're allowed to loop infinitely. we do so to // expose any dtor bugs - this shouldn't normally happen. if(had_valid_tls) goto again; } ////////////////////////////////////////////////////////////////////////////// // // threads // ////////////////////////////////////////////////////////////////////////////// // _beginthreadex cannot call the user's thread function directly due to // differences in calling convention; we need to pass its address and // the user-specified data pointer to our trampoline. // a) a local variable in pthread_create isn't safe because the // new thread might not start before pthread_create returns. // b) allocating on the heap would work but we're trying to keep that // to a minimum. // c) we therefore use static data protected by a critical section. static struct FuncAndArg { void* (*func)(void*); void* arg; } func_and_arg; // bridge calling conventions required by _beginthreadex and POSIX. static unsigned __stdcall thread_start(void* UNUSED(param)) { void* (*func)(void*) = func_and_arg.func; void* arg = func_and_arg.arg; win_unlock(WPTHREAD_CS); void* ret = (void*)-1; __try { ret = func(arg); } __except(wdbg_exception_filter(GetExceptionInformation())) { } call_tls_dtors(); return (unsigned)(uintptr_t)ret; } int pthread_create(pthread_t* thread_id, const void* UNUSED(attr), void* (*func)(void*), void* arg) { win_lock(WPTHREAD_CS); func_and_arg.func = func; func_and_arg.arg = arg; // _beginthreadex has more overhead and no value added vs. // CreateThread, but it avoids small memory leaks in // ExitThread when using the statically-linked CRT (-> MSDN). const uintptr_t id = _beginthreadex(0, 0, thread_start, 0, 0, 0); if(!id) { win_unlock(WPTHREAD_CS); debug_warn("_beginthreadex failed"); return -1; } // SUSv3 doesn't specify whether this is optional - go the safe route. if(thread_id) *thread_id = (pthread_t)id; return 0; } int pthread_cancel(pthread_t thread) { HANDLE hThread = HANDLE_from_pthread(thread); TerminateThread(hThread, 0); debug_printf("WARNING: pthread_cancel is unsafe\n"); return 0; } int pthread_join(pthread_t thread, void** value_ptr) { HANDLE hThread = HANDLE_from_pthread(thread); // note: pthread_join doesn't call for a timeout. if this wait // locks up the process, at least it'll be easy to see why. DWORD ret = WaitForSingleObject(hThread, INFINITE); if(ret != WAIT_OBJECT_0) { debug_warn("pthread_join: WaitForSingleObject failed"); return -1; } // pass back the code that was passed to pthread_exit. // SUS says <*value_ptr> need only be set on success! if(value_ptr) GetExitCodeThread(hThread, (LPDWORD)value_ptr); CloseHandle(hThread); return 0; } ////////////////////////////////////////////////////////////////////////////// // // locks // ////////////////////////////////////////////////////////////////////////////// // rationale: CRITICAL_SECTIONS have less overhead than Win32 Mutex. // disadvantage is that pthread_mutex_timedlock isn't supported, but // the user can switch to semaphores if this facility is important. // DeleteCriticalSection currently doesn't complain if we double-free // (e.g. user calls destroy() and static initializer atexit runs), // and dox are ambiguous. // note: pthread_mutex_t must not be an opaque struct, because the // initializer returns pthread_mutex_t directly and CRITICAL_SECTIONS // shouldn't be copied. // // note: must not use new/malloc to allocate the critical section // because mmgr.cpp uses a mutex and must not be called to allocate // anything before it is initialized. pthread_mutex_t pthread_mutex_initializer() { CRITICAL_SECTION* cs = (CRITICAL_SECTION*)win_alloc(sizeof(CRITICAL_SECTION)); InitializeCriticalSection(cs); return (pthread_mutex_t)cs; } int pthread_mutex_destroy(pthread_mutex_t* m) { CRITICAL_SECTION* cs = (CRITICAL_SECTION*)(*m); DeleteCriticalSection(cs); win_free(cs); return 0; } int pthread_mutex_init(pthread_mutex_t* m, const pthread_mutexattr_t*) { *m = pthread_mutex_initializer(); return 0; } int pthread_mutex_lock(pthread_mutex_t* m) { CRITICAL_SECTION* cs = (CRITICAL_SECTION*)(*m); EnterCriticalSection(cs); return 0; } int pthread_mutex_trylock(pthread_mutex_t* m) { CRITICAL_SECTION* cs = (CRITICAL_SECTION*)(*m); BOOL got_it = TryEnterCriticalSection(cs); return got_it? 0 : -1; } int pthread_mutex_unlock(pthread_mutex_t* m) { CRITICAL_SECTION* cs = (CRITICAL_SECTION*)(*m); LeaveCriticalSection(cs); return 0; } // not implemented - pthread_mutex is based on CRITICAL_SECTION, // which doesn't support timeouts. use sem_timedwait instead. int pthread_mutex_timedlock(pthread_mutex_t* UNUSED(m), const struct timespec* UNUSED(abs_timeout)) { return -ENOSYS; } ////////////////////////////////////////////////////////////////////////////// static HANDLE HANDLE_from_sem_t(sem_t* sem) { return (HANDLE)*sem; } int sem_init(sem_t* sem, int pshared, unsigned value) { SECURITY_ATTRIBUTES sec = { sizeof(SECURITY_ATTRIBUTES) }; sec.bInheritHandle = (BOOL)pshared; HANDLE h = CreateSemaphore(&sec, (LONG)value, 0x7fffffff, 0); WARN_IF_FALSE(h); *sem = (uintptr_t)h; return 0; } int sem_post(sem_t* sem) { HANDLE h = HANDLE_from_sem_t(sem); WARN_IF_FALSE(ReleaseSemaphore(h, 1, 0)); return 0; } int sem_wait(sem_t* sem) { HANDLE h = HANDLE_from_sem_t(sem); DWORD ret = WaitForSingleObject(h, INFINITE); if(ret != WAIT_OBJECT_0) debug_warn("unexpected WaitForSingleObject return value"); return 0; } int sem_destroy(sem_t* sem) { HANDLE h = HANDLE_from_sem_t(sem); WARN_IF_FALSE(CloseHandle(h)); return 0; } // helper function for sem_timedwait - multiple return is convenient. // converts an absolute timeout deadline into a relative length for use with // WaitForSingleObject with the following peculiarity: if the semaphore // could be locked immediately, abs_timeout must be ignored (see SUS). // to that end, we return a timeout of 0 and pass back = false if // abs_timeout is invalid. static DWORD calc_timeout_length_ms(const struct timespec* abs_timeout, bool& timeout_is_valid) { timeout_is_valid = false; if(!abs_timeout) return 0; // SUS requires we fail if not normalized if(abs_timeout->tv_nsec >= 1000000000) return 0; struct timespec cur_time; if(clock_gettime(CLOCK_REALTIME, &cur_time) != 0) return 0; timeout_is_valid = true; // convert absolute deadline to relative length, rounding up to [ms]. // note: use i64 to avoid overflow in multiply. const i64 ds = abs_timeout->tv_sec - cur_time.tv_sec; const long dn = abs_timeout->tv_nsec - cur_time.tv_nsec; i64 length_ms = ds*1000 + (dn+500000)/1000000; // .. deadline already reached; we'll still attempt to lock once if(length_ms < 0) return 0; // .. length > 49 days => result won't fit in 32 bits. most likely bogus. // note: we're careful to avoid returning exactly -1 since // that's the Win32 INFINITE value. if(length_ms >= 0xffffffff) { debug_warn("calc_timeout_length_ms: 32-bit overflow"); length_ms = 0xfffffffe; } return (DWORD)(length_ms & 0xffffffff); } int sem_timedwait(sem_t* sem, const struct timespec* abs_timeout) { bool timeout_is_valid; DWORD timeout_ms = calc_timeout_length_ms(abs_timeout, timeout_is_valid); HANDLE h = HANDLE_from_sem_t(sem); DWORD ret = WaitForSingleObject(h, timeout_ms); // successfully decremented semaphore; bail. if(ret == WAIT_OBJECT_0) return 0; // we're going to return -1. decide what happened: // .. abs_timeout was invalid (must not check this before trying to lock) if(!timeout_is_valid) errno = EINVAL; // .. timeout reached (not a failure) else if(ret == WAIT_TIMEOUT) errno = ETIMEDOUT; return -1; } // wait until semaphore is locked or a message arrives. non-portable. // // background: on Win32, UI threads must periodically pump messages, or // else deadlock may result (see WaitForSingleObject docs). that entails // avoiding any blocking functions. when event waiting is needed, // one cheap workaround would be to time out periodically and pump messages. // that would work, but either wastes CPU time waiting, or introduces // message latency. to avoid this, we provide an API similar to sem_wait and // sem_timedwait that gives MsgWaitForMultipleObjects functionality. // // return value: 0 if the semaphore has been locked (SUS terminology), // -1 otherwise. errno differentiates what happened: ETIMEDOUT if a // message arrived (this is to ease switching between message waiting and // periodic timeout), or an error indication. int sem_msgwait_np(sem_t* sem) { HANDLE h = HANDLE_from_sem_t(sem); DWORD ret = MsgWaitForMultipleObjects(1, &h, FALSE, INFINITE, QS_ALLEVENTS); // semaphore is signalled if(ret == WAIT_OBJECT_0) return 0; // something else: // .. message came up if(ret == WAIT_OBJECT_0+1) errno = ETIMEDOUT; // .. error else { errno = EINVAL; debug_warn("unexpected MsgWaitForMultipleObjects return value"); } return -1; }