From 121780b733a60c7ac0e7403912e7ca25a20407cb Mon Sep 17 00:00:00 2001 From: janwas Date: Sat, 4 Jun 2005 18:17:56 +0000 Subject: [PATCH] win_internal: add DataKind (fix for dbghelp.h which left it out) wdbg: work around several dbghelp flaws (most importantly concerning address of symbol, which was leading to completely wrong display). refactored sym_dump_*. also further cleanup + dox. This was SVN commit r2373. --- source/lib/sysdep/win/wdbg.cpp | 1279 +++++++++++++++++--------------- 1 file changed, 688 insertions(+), 591 deletions(-) diff --git a/source/lib/sysdep/win/wdbg.cpp b/source/lib/sysdep/win/wdbg.cpp index 56057fb56f..e33f60a87a 100755 --- a/source/lib/sysdep/win/wdbg.cpp +++ b/source/lib/sysdep/win/wdbg.cpp @@ -71,6 +71,12 @@ static void unlock() } +enum WdbgError +{ + WDBG_UNRETRIEVABLE_STATIC = -100000 +}; + + ////////////////////////////////////////////////////////////////////////////// @@ -525,12 +531,6 @@ pcontext = &context; the intrinsics only give us EIP reliably. AoRA - 4 is a hack -#ifdef __cplusplus -#define EXTERNC extern "C" -#else -#define EXTERNC -#endif - // _ReturnAddress and _AddressOfReturnAddress should be prototyped before use EXTERNC void * _AddressOfReturnAddress(void); EXTERNC void * _ReturnAddress(void); @@ -548,7 +548,7 @@ fp = (DWORD64)_AddressOfReturnAddress()-sizeof(void*); -// _ReturnAddress and _AddressOfReturnAddress should be prototyped before use +// _ReturnAddress must be prototyped before use EXTERN_C void* _ReturnAddress(void); #pragma intrinsic(_ReturnAddress) @@ -564,19 +564,18 @@ EXTERN_C void* _ReturnAddress(void); # define PC_ Rip # define FP_ Rbp # define SP_ Rsp -# define GET_FP __asm mov [fp], rbp -# define GET_SP __asm mov [sp], rsp +# define GET_FP_SP\ + __asm mov [fp], rbp\ + __asm mov [sp], rsp #elif defined(_M_IX86) # define PC_ Eip # define FP_ Ebp # define SP_ Esp -# define GET_FP\ +# define GET_FP_SP\ + __asm xor eax, eax\ __asm mov dword ptr [fp_], ebp\ - __asm xor eax, eax\ - __asm mov dword ptr [fp_+4], eax -# define GET_SP\ + __asm mov dword ptr [fp_+4], eax\ __asm mov dword ptr [sp_], esp\ - __asm xor eax, eax\ __asm mov dword ptr [sp_+4], eax #else # error "port" @@ -645,9 +644,7 @@ static int walk_stack(StackFrameCallback cb, void* user_arg = 0, uint skip = 0, // rationale: see above. STACKFRAME64 sf; - static DWORD64 fp_, sp_; - GET_FP; - GET_SP; + static DWORD64 fp_, sp_; GET_FP_SP; init_STACKFRAME64(&sf, pcontext, fp_, sp_); @@ -657,19 +654,19 @@ static int walk_stack(StackFrameCallback cb, void* user_arg = 0, uint skip = 0, if(!pcontext) skip++; -/* + CONTEXT context; EXCEPTION_POINTERS* ep; __try { - RaiseException(0x10000, 0, 0,0); + RaiseException(0, 0, 0, 0); } __except(ep = GetExceptionInformation(), memcpy(&context, ep->ContextRecord, sizeof(CONTEXT)), EXCEPTION_CONTINUE_EXECUTION) { assert(0); // never reached } pcontext = &context; -*/ + // StackWalk64 may write to pcontext, but there's no mention of // EXCEPTION_POINTERS.ContextRecord being read-only, so don't copy it. @@ -678,7 +675,8 @@ pcontext = &context; for(;;) { lock(); - BOOL ok = StackWalk64(machine, hProcess, hThread, &sf, (void*)pcontext, 0, SymFunctionTableAccess64, SymGetModuleBase64, 0); + BOOL ok = StackWalk64(machine, hProcess, hThread, &sf, (void*)pcontext, + 0, SymFunctionTableAccess64, SymGetModuleBase64, 0); unlock(); /* @@ -690,7 +688,7 @@ display_msg("after StackWalk", buf); // callback never indicated success and no (more) frames found: abort. // note: also test FP because StackWalk64 sometimes erroneously // reports success. unfortunately it doesn't SetLastError either, - // so we can't indicate the cause of fialure *sigh*. + // so we can't indicate the cause of failure. *sigh* if(!ok || !sf.AddrFrame.Offset) return -911; // distinctive error value @@ -751,11 +749,13 @@ struct DumpState // keep in sync with MAX_* above uint level : 8; uint indirection : 8; + uint fits_on_one_line : 1; DumpState() { level = 0; indirection = 0; + fits_on_one_line = 0; } }; @@ -804,7 +804,7 @@ static void out_reset() // algorithm: scan the "string" and count # text chars vs. garbage. static bool is_string(const u8* p, size_t stride) { - // note: access violations are caught by dump_data_sym; output is "?". + // note: access violations are caught by dump_sym; output is "?". int score = 0; for(;;) { @@ -844,500 +844,86 @@ static bool is_bogus_pointer(const void* p) } -////////////////////////////////////////////////////////////////////////////// -// -// output values of specific types of local variables -// -////////////////////////////////////////////////////////////////////////////// - -// forward decl; called by dump_UDT. -static int dump_data_sym(DWORD data_idx, const u8* p, DumpState state); - -// forward decl; called by dump_array, dump_pointer and dump_typedef. -static int dump_type_sym(DWORD type_idx, const u8* p, DumpState state); - - -// these functions return -1 if they're not able to produce any reasonable -// output; dump_data_sym will display value as "?" - - - -static int dump_sequence(const u8* p, uint num_elements, DWORD el_type_idx, size_t el_size, DumpState state) +// provide c_str() access for any specialization of std::basic_string +// (since dump_string doesn't know type at compile-time). +// also performs a basic sanity check to see if the object is initialized. +struct AnyString : public std::string { - // special case for character arrays: display as string - if(el_size == sizeof(char) || el_size == sizeof(wchar_t)) - if(is_string(p, el_size)) - { - // make sure it's 0-terminated - wchar_t buf[512]; - if(el_size == sizeof(wchar_t)) - wcscpy_s(buf, ARRAY_SIZE(buf), (const wchar_t*)p); - else - { - size_t i; - for(i = 0; i < ARRAY_SIZE(buf)-1; i++) - { - buf[i] = (wchar_t)p[i]; - if(buf[i] == '\0') - break; - } - buf[i] = '\0'; - } - - out(L"\"%s\"", buf); + const void* safe_c_str(size_t el_size) const + { + // bogus + if(_Myres < _Mysize) return 0; - } - - // regular array: - const uint num_elements_to_show = MIN(20, num_elements); - - const bool fits_on_one_line = - (el_size == sizeof(char) && num_elements <= 16) || - (el_size <= sizeof(int ) && num_elements <= 8); - - out(fits_on_one_line? L"{ " : L"\r\n"); - state.level++; - - int err = 0; - for(uint i = 0; i < num_elements_to_show; i++) - { - if(!fits_on_one_line) - INDENT; - - int ret = dump_type_sym(el_type_idx, p + i*el_size, state); - if(err == 0) // remember first error - err = ret; - - // add separator unless this is the last element - if(i != num_elements_to_show-1) - out(fits_on_one_line? L", " : L",\r\n"); + return (_Myres < 16/el_size)? _Bx._Buf : _Bx._Ptr; } - // we truncated some - if(num_elements != num_elements_to_show) - out(L" ..."); - - if(fits_on_one_line) - out(L" }"); - return err; -} - - - -// is a SymTagPointerType; output its value. -// called by dump_type_sym; lock is held. -static int dump_pointer_sym(DWORD type_idx, const u8* p, size_t size, DumpState state) -{ - // read+output pointer's value. - p = (const u8*)movzx_64le(p, size); - out(L"0x%p", p); - - // bail if it's obvious the pointer is bogus - // (=> can't display what it's pointing to) - if(is_bogus_pointer(p)) - return 0; - - // display what the pointer is pointing to. if the pointer is invalid - // (despite "bogus" check above), dump_type_sym recovers via SEH and - // returns < 0; dump_data_sym will print "?". - out(L" -> "); // we out_erase this if it's a void* pointer - if(!SymGetTypeInfo(hProcess, mod_base, type_idx, TI_GET_TYPEID, &type_idx)) - return -1; - state.indirection++; - return dump_type_sym(type_idx, p, state); -} - - -////////////////////////////////////////////////////////////////////////////// - - -// is a SymTagBaseType; output its value. -// called by dump_type_sym; lock is held. -static int dump_base_type_sym(DWORD type_idx, const u8* p, size_t size, DumpState state) -{ - DWORD base_type; - if(!SymGetTypeInfo(hProcess, mod_base, type_idx, TI_GET_BASETYPE, &base_type)) - return -1; - - u64 data = movzx_64le(p, size); - - // single out() call. note: we pass a single u64 for all sizes, - // which will only work on little-endian systems. - const wchar_t* fmt; - - switch(base_type) - { - // boolean - case btBool: - assert(size == sizeof(bool)); - fmt = L"%hs"; - data = (u64)(data? "true " : "false"); - break; - - // floating-point - // note: we special-case 0xCC..CC ("uninitialized mem"); - // interpreting that as float|double results in garbage. - case btFloat: - if(size == sizeof(float)) - fmt = (data != 0xCCCCCCCC)? L"%g" : L"0x%08X"; - else if(size == sizeof(double)) - fmt = (data != 0xCCCCCCCCCCCCCCCC)? L"%lg" : L"0x%016I64X"; - else - debug_warn("dump_base_type_sym: invalid float size"); - break; - - // signed integers (displayed as decimal) - case btInt: - case btLong: - if(size == 1 || size == 2 || size == 4 || size == 8) - fmt = L"%I64d"; - else - debug_warn("dump_base_type_sym: invalid int size"); - break; - - // unsigned integers (displayed as hex) - // note: 0x00000000 can get annoying (0 would be nicer), - // but it indicates the variable size and makes for consistently - // formatted structs/arrays. (0x1234 0 0x5678 is ugly) - case btUInt: - case btULong: - if(size == 1) - { - // _TUCHAR - if(state.indirection) - { - state.indirection = 0; - return dump_sequence(p, 8, type_idx, size, state); - } - fmt = L"0x%02X"; - } - else if(size == 2) - fmt = L"0x%04X"; - else if(size == 4) - fmt = L"0x%08X"; - else if(size == 8) - fmt = L"0x%016I64X"; - else - debug_warn("dump_base_type_sym: invalid uint size"); - break; - - // character - case btChar: - case btWChar: - assert(size == sizeof(char) || size == sizeof(wchar_t)); - // char*, wchar_t* - if(state.indirection) - { - state.indirection = 0; - return dump_sequence(p, 8, type_idx, size, state); - } - // either integer or character; - // if printable, the character will be appended below. - fmt = L"%d"; - break; - - // note: void* is sometimes indicated as (pointer, btNoType). - case btVoid: - case btNoType: - // void* - cannot display what it's pointing to (type unknown). - if(state.indirection) - { - out_erase(4); // " -> " - fmt = L""; - } - else - debug_warn("dump_base_type_sym: non-pointer btVoid or btNoType"); - break; - - default: - debug_warn("dump_base_type_sym: unknown type"); - //-fallthrough - - // unsupported complex types - case btBCD: - case btCurrency: - case btDate: - case btVariant: - case btComplex: - case btBit: - case btBSTR: - case btHresult: - return -1; - } - - out(fmt, data); - - // if the current value is a printable character, display in that form. - // this isn't only done in btChar because sometimes ints store characters. - if(data < 0x100) - { - int c = (int)data; - if(isprint(c)) - out(L" ('%hc')", c); - } - - return 0; -} - - -////////////////////////////////////////////////////////////////////////////// - - -// is a SymTagEnum; output its value. -// called by dump_type_sym; lock is held. -static int dump_enum_sym(DWORD type_idx, const u8* p, size_t size, DumpState state) -{ - const i64 current_value = movsx_64le(p, size); - - DWORD num_children; - if(!SymGetTypeInfo(hProcess, mod_base, type_idx, TI_GET_CHILDRENCOUNT, &num_children)) - goto name_unavailable; - - // alloc an array to hold child IDs - const size_t MAX_CHILDREN = 1000; - char child_buf[sizeof(TI_FINDCHILDREN_PARAMS)+MAX_CHILDREN*sizeof(DWORD)]; - TI_FINDCHILDREN_PARAMS* fcp = (TI_FINDCHILDREN_PARAMS*)child_buf; - fcp->Start = 0; - fcp->Count = MIN(num_children, MAX_CHILDREN); - - if(!SymGetTypeInfo(hProcess, mod_base, type_idx, TI_FINDCHILDREN, fcp)) - goto name_unavailable; - - for(uint i = 0; i < fcp->Count; i++) - { - DWORD child_data_idx = fcp->ChildId[i]; - - // get enum value. don't make any assumptions about the - // variant's type (i.e. size) - no restriction is documented. - // also don't do this manually - it's tedious and we might not - // cover everything. OLE DLL is already pulled in anyway. - VARIANT v; - SymGetTypeInfo(hProcess, mod_base, child_data_idx, TI_GET_VALUE, &v); - if(VariantChangeType(&v, &v, 0, VT_I8) != S_OK) - continue; - - if(current_value == v.llVal) - { - WCHAR* name; - if(!SymGetTypeInfo(hProcess, mod_base, child_data_idx, TI_GET_SYMNAME, &name)) - goto name_unavailable; - - out(L"%s", name); - LocalFree(name); - return 0; - } - } - -name_unavailable: - // we can produce reasonable output (the numeric value), - // but weren't able to retrieve the matching enum name. - out(L"%I64d", current_value); - return 1; -} - - -////////////////////////////////////////////////////////////////////////////// - - -// is a SymTagArrayType; output its value. -// called by dump_type_sym; lock is held. -static int dump_array_sym(DWORD type_idx, const u8* p, size_t size, DumpState state) -{ - // get element count and size - DWORD el_type_idx = 0; - if(!SymGetTypeInfo(hProcess, mod_base, type_idx, TI_GET_TYPEID, &el_type_idx)) - return -1; - // .. workaround: TI_GET_COUNT returns total struct size for - // arrays-of-struct. therefore, calculate as size / el_size. - ULONG64 el_size_; - if(!SymGetTypeInfo(hProcess, mod_base, el_type_idx, TI_GET_LENGTH, &el_size_)) - return -1; - const size_t el_size = (size_t)el_size_; - const uint num_elements = (uint)(size / el_size); - assert2(num_elements != 0); - - // display element count - out_erase(3); // " = " - out(L"[%d] = ", num_elements); - - return dump_sequence(p, num_elements, el_type_idx, el_size, state); -} - - -////////////////////////////////////////////////////////////////////////////// - - -// is a SymTagTypedef; output its value. -// called by dump_type_sym; lock is held. -static int dump_typedef_sym(DWORD type_idx, const u8* p, size_t size, DumpState state) -{ - if(!SymGetTypeInfo(hProcess, mod_base, type_idx, TI_GET_TYPEID, &type_idx)) - return -1; - return dump_type_sym(type_idx, p, state); -} - - -////////////////////////////////////////////////////////////////////////////// - - -// is a SymTagFunction; output its value. -// called by dump_type_sym; lock is held. -static int dump_function_type_sym(DWORD type_idx, const u8* p, size_t size, DumpState state) -{ - // this symbol gives class parent, return type, and parameter count. - // unfortunately the one thing we care about, its name, - // isn't exposed via TI_GET_SYMNAME, so we resolve it ourselves. - - // output address in case resolve below fails. - out(L"0x%p", p); - - char name[DBG_SYMBOL_LEN]; - int err = debug_resolve_symbol((void*)p, name, 0, 0); - if(err == 0) - out(L" (%hs)", name); - return 0; -} - - -////////////////////////////////////////////////////////////////////////////// - - -// is a SymTagUDT; output its value. -// called by dump_type_sym; lock is held. -static int dump_udt_sym(DWORD type_idx, const u8* p, size_t size, DumpState state) -{ - // get array of child symbols (one for each member, plus base class). - DWORD num_children; - if(!SymGetTypeInfo(hProcess, mod_base, type_idx, TI_GET_CHILDRENCOUNT, &num_children)) - return -1; - const size_t MAX_CHILDREN = 1000; - char child_buf[sizeof(TI_FINDCHILDREN_PARAMS)+MAX_CHILDREN*sizeof(DWORD)]; - TI_FINDCHILDREN_PARAMS* fcp = (TI_FINDCHILDREN_PARAMS*)child_buf; - fcp->Start = 0; - fcp->Count = MIN(num_children, MAX_CHILDREN); - if(!SymGetTypeInfo(hProcess, mod_base, type_idx, TI_FINDCHILDREN, fcp)) - return -1; - - const size_t avg_size = size / num_children; - // note: no need to check if avg_size == 0. if num_children is huge - // (e.g. due to base class info), fits_on_one_line is false anyway. - const bool fits_on_one_line = (num_children <= 3) && (avg_size <= sizeof(int)); - - if(!fits_on_one_line) - out(L"\r\n"); - - // recursively display each child (call back to dump_data_sym) - state.level++; - int err = 0; - for(uint i = 0; i < fcp->Count; i++) - { - DWORD child_data_idx = fcp->ChildId[i]; - - // make sure this is a data member (avoids confusing dump_data_sym and - // messing up indentation). - DWORD type_tag; - if(!SymGetTypeInfo(hProcess, mod_base, child_data_idx, TI_GET_SYMTAG, &type_tag)) - continue; - if(type_tag != SymTagData) - continue; - DWORD ofs; - if(!SymGetTypeInfo(hProcess, mod_base, child_data_idx, TI_GET_OFFSET, &ofs)) - continue; - assert(ofs < size); - - if(!fits_on_one_line) - INDENT; - - int ret = dump_data_sym(child_data_idx, p+ofs, state); - if(err == 0) // remember first error - err = ret; - - out(fits_on_one_line? L"; " : L"\r\n"); - } - - // note: we can't prevent this from being written by checking - // if i == fcp->Count-1: that symbol may not be a data member. - out_erase(2); // "; " or "\r\n" - return err; -} - - -////////////////////////////////////////////////////////////////////////////// - - -static int dump_unknown_sym(DWORD type_idx, const u8* p, size_t size, DumpState state) -{ - // redundant (already done in dump_type_sym), but this is rare. - DWORD type_tag; - if(!SymGetTypeInfo(hProcess, mod_base, type_idx, TI_GET_SYMTAG, &type_tag)) - { - debug_warn("dump_unknown_sym: tag query failed"); - return -1; - } - - debug_printf("Unknown tag: %d\n", type_tag); - return -1; -} - - -////////////////////////////////////////////////////////////////////////////// -// -// stack trace -// -////////////////////////////////////////////////////////////////////////////// - -struct string -{ - union _Bxty - { // storage for small buffer or pointer to larger one - u8 _Buf[16]; - void* _Ptr; - } _Bx; - - size_t _Mysize; // current length of string - size_t _Myres; // current storage reserved for string }; -static bool special_case_udt(WCHAR* type_name, const u8* p, size_t size) +static int dump_string(WCHAR* type_name, const u8* p, size_t size, DumpState state) { - if(!wcsncmp(type_name, L"std::basic_string", 17)) + size_t el_size; + const WCHAR* pretty_name = type_name; + const void* string_data = 0; + + // Pyrogenesis CStr + if(!wcsncmp(type_name, L"CStr", 4)) { - assert(size == sizeof(std::string)); + assert(size == 32/*sizeof(CStr)*/); + + // determine type + if(type_name[4] == '8') + el_size = sizeof(char); + else if(type_name[4] == 'W') + el_size = sizeof(wchar_t); + // .. unknown, shouldn't handle it + else + return 1; + + p += 4; // skip vptr (mixed in by ISerializable) + string_data = ((AnyString*)p)->safe_c_str(el_size); + } + // std::basic_string and its specializations + else if(!wcsncmp(type_name, L"std::basic_string", 17)) + { + assert(size == sizeof(std::string) || size == 16); + // dbghelp bug: std::wstring size is given as 16 // determine type - size_t el_size = sizeof(char); - const wchar_t* fmt = L"\"%hs\""; if(!wcsncmp(type_name+18, L"char", 4)) - ; // already set above - else if(!wcsncmp(type_name+18, L"wchar_t", 7)) + { + el_size = sizeof(char); + pretty_name = L"std::string"; + } + else if(!wcsncmp(type_name+18, L"unsigned short", 14)) { el_size = sizeof(wchar_t); - fmt = L"\"%s\""; + pretty_name = L"std::wstring"; } // .. unknown, shouldn't handle it else - return false; + return 1; - // try to see if it's initialized and valid - string* s = (string*)p; - const bool uses_buf = s->_Myres < 16/el_size; - void* string_data = uses_buf? s->_Bx._Buf : s->_Bx._Ptr; - if(s->_Myres < s->_Mysize || - is_bogus_pointer(string_data) || - !is_string((const u8*)string_data, el_size)) - { - out(L"uninitialized/invalid std::basic_string"); - return true; - } + string_data = ((AnyString*)p)->safe_c_str(el_size); + } + // type_name isn't a known string object; we can't handle it. + else + return 1; + // type_name is known but its contents are bogus; so indicate. + if(is_bogus_pointer(string_data) || !is_string((const u8*)string_data, el_size)) + out(L"(uninitialized/invalid %s)", pretty_name); + // valid; display it. + else + { + const wchar_t* fmt = (el_size == sizeof(wchar_t))? L"\"%s\"" : L"\"%hs\""; out(fmt, string_data); - return true; } - return false; + // it was a string object (valid or not) -> we handled it. + return 0; } -static bool suppress_udt(WCHAR* type_name) +static bool should_suppress_udt(WCHAR* type_name) { // specialized HANDLEs are defined as pointers to structs by // DECLARE_HANDLE. we only want the numerical value (pointer address), @@ -1345,9 +931,9 @@ static bool suppress_udt(WCHAR* type_name) // note: no need to check for indirection; these are only found in // HANDLEs (which are pointers). // removed obsolete defs: HEVENT, HFILE, HUMPD -#define SUPPRESS_HANDLE(name) if(!wcscmp(type_name, L#name L"__")) return true; if(type_name[0] != 'H') goto not_handle; +#define SUPPRESS_HANDLE(name) if(!wcscmp(type_name, L#name L"__")) return true; SUPPRESS_HANDLE(HACCEL); SUPPRESS_HANDLE(HBITMAP); SUPPRESS_HANDLE(HBRUSH); @@ -1386,115 +972,621 @@ not_handle: } -// given a data symbol's type identifier, output its type name (if -// applicable), determine what kind of variable it describes, and -// call the appropriate dump_* routine. -// -// split out of dump_data_sym so we can recurse for typedefs (cleaner than -// 'restart' via goto or loop). lock is held. -static int dump_type_sym(DWORD type_idx, const u8* p, DumpState state) +// forward decl; called by dump_sequence and some of dump_sym_*. +static int dump_sym(DWORD idx, const u8* p, DumpState state); + + +static int dump_sequence(const u8* p, uint num_elements, DWORD el_idx, size_t el_size, DumpState state) { - DWORD type_tag; - if(!SymGetTypeInfo(hProcess, mod_base, type_idx, TI_GET_SYMTAG, &type_tag)) + // special case for character arrays: display as string + if(el_size == sizeof(char) || el_size == sizeof(wchar_t)) + if(is_string(p, el_size)) + { + // make sure it's 0-terminated + wchar_t buf[512]; + if(el_size == sizeof(wchar_t)) + wcscpy_s(buf, ARRAY_SIZE(buf), (const wchar_t*)p); + else + { + size_t i; + for(i = 0; i < ARRAY_SIZE(buf)-1; i++) + { + buf[i] = (wchar_t)p[i]; + if(buf[i] == '\0') + break; + } + buf[i] = '\0'; + } + + out(L"\"%s\"", buf); + return 0; + } + + // regular array: + const uint num_elements_to_show = MIN(20, num_elements); + const bool fits_on_one_line = + (el_size == sizeof(char) && num_elements <= 16) || + (el_size <= sizeof(int ) && num_elements <= 8); + + state.level++; + state.fits_on_one_line = fits_on_one_line; + + out(fits_on_one_line? L"{ " : L"\r\n"); + + int err = 0; + for(uint i = 0; i < num_elements_to_show; i++) + { + int ret = dump_sym(el_idx, p + i*el_size, state); + if(err == 0) // remember first error + err = ret; + + // add separator unless this is the last element + // (can't just erase below due to additional "...") + if(i != num_elements_to_show-1 && fits_on_one_line) + out(L", "); + } + // we truncated some + if(num_elements != num_elements_to_show) + out(L" ..."); + + if(fits_on_one_line) + out(L" }"); + return err; +} + + +static int determine_symbol_address(DWORD idx, const u8** pp, size_t size) +{ + DWORD data_kind; + if(!SymGetTypeInfo(hProcess, mod_base, idx, TI_GET_DATAKIND, &data_kind)) + { +SGTI_FAILED: + debug_warn("determine_symbol_address: SymGetTypeInfo failed"); + return -1; + } + + DWORD ofs = 0; + ULONG64 addr = 0; + switch(data_kind) + { + // plain variables: p is already correct + case DataIsLocal: + case DataIsParam: + break; + + case DataIsGlobal: + case DataIsStaticLocal: + case DataIsFileStatic: + if(!SymGetTypeInfo(hProcess, mod_base, idx, TI_GET_ADDRESS, &addr)) + goto SGTI_FAILED; + if(!SymGetTypeInfo(hProcess, mod_base, idx, TI_GET_ADDRESSOFFSET, &ofs)) + goto SGTI_FAILED; + *pp = (const u8*)addr + ofs; + break; + + // UDT member: get offset + case DataIsMember: + if(!SymGetTypeInfo(hProcess, mod_base, idx, TI_GET_OFFSET, &ofs)) + goto SGTI_FAILED; + assert(!size || ofs < size); + *pp += ofs; + break; + + // note: sometimes erroneously reported, but there's nothing we can do + // because TI_GET_ADDRESS returns mod_base, TI_GET_ADDRESSOFFSET 0, + // and TI_GET_OFFSET fails (it's only for members). + case DataIsStaticMember: + return WDBG_UNRETRIEVABLE_STATIC; + + default: + debug_warn("dump_sym_data: invalid data kind"); + return -1; + } + + // success + return 0; +} + + + +////////////////////////////////////////////////////////////////////////////// +// +// dump routines for each dbghelp symbol type +// +////////////////////////////////////////////////////////////////////////////// + +// these functions return -1 if they're not able to produce any reasonable +// output; dump_data_sym will display value as "?" +// called by dump_sym; lock is held. + + +static int dump_sym_array(DWORD idx, const u8* p, size_t size, DumpState state) +{ + // get element count and size + DWORD el_idx = 0; + if(!SymGetTypeInfo(hProcess, mod_base, idx, TI_GET_TYPEID, &el_idx)) + return -1; + // .. workaround: TI_GET_COUNT returns total struct size for + // arrays-of-struct. therefore, calculate as size / el_size. + ULONG64 el_size_; + if(!SymGetTypeInfo(hProcess, mod_base, el_idx, TI_GET_LENGTH, &el_size_)) + return -1; + const size_t el_size = (size_t)el_size_; + const uint num_elements = (uint)(size / el_size); + assert2(num_elements != 0); + + // display element count + out_erase(3); // " = " + out(L"[%d] = ", num_elements); + + return dump_sequence(p, num_elements, el_idx, el_size, state); +} + + +////////////////////////////////////////////////////////////////////////////// + + +static int dump_sym_base_type(DWORD idx, const u8* p, size_t size, DumpState state) +{ + DWORD base_type; + if(!SymGetTypeInfo(hProcess, mod_base, idx, TI_GET_BASETYPE, &base_type)) return -1; - ULONG64 size_ = 0; - SymGetTypeInfo(hProcess, mod_base, type_idx, TI_GET_LENGTH, &size_); - // note: fails when type_tag == SymTagFunction, so don't abort - const size_t size = (size_t)size_; - - // get "type name" (only available for SymTagUDT, SymTagEnum, and - // SymTagTypedef types). - // note: can't use SymFromIndex to get tag as well as name, because it - // fails when name isn't available (e.g. if this is a SymTagBaseType). - WCHAR* type_name; - if(SymGetTypeInfo(hProcess, mod_base, type_idx, TI_GET_SYMNAME, &type_name)) + u64 data = movzx_64le(p, size); + // if value is 0xCC..CC (uninitialized mem), we display as hex. + // the output would otherwise be garbage; this makes it obvious. + // note: be very careful to correctly handle size=0 (e.g. void*). + for(size_t i = 0; i < size; i++) { - const bool suppress = suppress_udt(type_name); - const bool handled = special_case_udt(type_name, p, size); - LocalFree(type_name); + if(p[i] != 0xCC) + break; + if(i == size-1) + goto uninitialized; + } - if(handled) - return 0; + // single out() call. note: we pass a single u64 for all sizes, + // which will only work on little-endian systems. + const wchar_t* fmt; - if(suppress) - { - // remove " -> " if it was a pointer + switch(base_type) + { + // boolean + case btBool: + assert(size == sizeof(bool)); + fmt = L"%hs"; + data = (u64)(data? "true " : "false"); + break; + + // floating-point + case btFloat: + if(size == sizeof(float)) + fmt = L"%g"; + else if(size == sizeof(double)) + fmt = L"%lg"; + else + debug_warn("dump_sym_base_type: invalid float size"); + break; + + // signed integers (displayed as decimal) + case btInt: + case btLong: + if(size == 1 || size == 2 || size == 4 || size == 8) + fmt = L"%I64d"; + else + debug_warn("dump_sym_base_type: invalid int size"); + break; + + // unsigned integers (displayed as hex) + // note: 0x00000000 can get annoying (0 would be nicer), + // but it indicates the variable size and makes for consistently + // formatted structs/arrays. (0x1234 0 0x5678 is ugly) + case btUInt: + case btULong: +uninitialized: + if(size == 1) + { + // _TUCHAR + if(state.indirection) + { + state.indirection = 0; + return dump_sequence(p, 8, idx, size, state); + } + fmt = L"0x%02X"; + } + else if(size == 2) + fmt = L"0x%04X"; + else if(size == 4) + fmt = L"0x%08X"; + else if(size == 8) + fmt = L"0x%016I64X"; + else + debug_warn("dump_sym_base_type: invalid uint size"); + break; + + // character + case btChar: + case btWChar: + assert(size == sizeof(char) || size == sizeof(wchar_t)); + // char*, wchar_t* if(state.indirection) - out_erase(4); + { + state.indirection = 0; + return dump_sequence(p, 8, idx, size, state); + } + // either integer or character; + // if printable, the character will be appended below. + fmt = L"%d"; + break; + + // note: void* is sometimes indicated as (pointer, btNoType). + case btVoid: + case btNoType: + // void* - cannot display what it's pointing to (type unknown). + if(state.indirection) + { + out_erase(4); // " -> " + fmt = L""; + } + else + debug_warn("dump_sym_base_type: non-pointer btVoid or btNoType"); + break; + + default: + debug_warn("dump_sym_base_type: unknown type"); + //-fallthrough + + // unsupported complex types + case btBCD: + case btCurrency: + case btDate: + case btVariant: + case btComplex: + case btBit: + case btBSTR: + case btHresult: + return -1; + } + + out(fmt, data); + + // if the current value is a printable character, display in that form. + // this isn't only done in btChar because sometimes ints store characters. + if(data < 0x100) + { + int c = (int)data; + if(isprint(c)) + out(L" ('%hc')", c); + } + + return 0; +} + + +////////////////////////////////////////////////////////////////////////////// + + +static int dump_sym_base_class(DWORD idx, const u8* p, size_t size, DumpState state) +{ + // unsupported: virtual base classes would require reading the VTbl, + // which is difficult given lack of documentation and not worth it. + return 0; +} + + +////////////////////////////////////////////////////////////////////////////// + + +static int dump_sym_data(DWORD idx, const u8* p, size_t size, DumpState state) +{ + // SymFromIndexW will fail if dataKind happens to be DataIsMember, so + // we use SymGetTypeInfo (slower and less convenient, but no choice). + DWORD type_idx; + WCHAR* name; + if(!SymGetTypeInfo(hProcess, mod_base, idx, TI_GET_TYPEID, &type_idx)) + { +SGTI_FAILED: + debug_warn("dump_sym_data: SymGetTypeInfo failed"); + return -1; + } + if(!SymGetTypeInfo(hProcess, mod_base, idx, TI_GET_SYMNAME, &name)) + goto SGTI_FAILED; + + + if(!state.fits_on_one_line) + INDENT; + out(L"%s = ", name); + LocalFree(name); + + int err; + __try + { + err = determine_symbol_address(idx, &p, size); + if(err == 0) + err = dump_sym(type_idx, p, state); + } + __except(EXCEPTION_EXECUTE_HANDLER) + { + err = -1; + } + // .. dbghelp flaw; see above. + if(err == WDBG_UNRETRIEVABLE_STATIC) + out(L"(unavailable - located in another module)"); + // .. failed to produce any reasonable output for whatever reason. + else if(err < 0) + out(L"?"); + + out(state.fits_on_one_line? L", " : L"\r\n"); + return 0; + // by aborting *for this symbol* and displaying value as "?", + // any errors are considered handled. we don't want one faulty + // member to prevent the entire remaining UDT from being displayed. + // anything really serious (unknown ATM) should be special-cased. +} + + +////////////////////////////////////////////////////////////////////////////// + + +static int dump_sym_enum(DWORD idx, const u8* p, size_t size, DumpState state) +{ + const i64 current_value = movsx_64le(p, size); + + DWORD num_children; + if(!SymGetTypeInfo(hProcess, mod_base, idx, TI_GET_CHILDRENCOUNT, &num_children)) + goto name_unavailable; + + // alloc an array to hold child IDs + const size_t MAX_CHILDREN = 1000; + char child_buf[sizeof(TI_FINDCHILDREN_PARAMS)+MAX_CHILDREN*sizeof(DWORD)]; + TI_FINDCHILDREN_PARAMS* fcp = (TI_FINDCHILDREN_PARAMS*)child_buf; + fcp->Start = 0; + fcp->Count = MIN(num_children, MAX_CHILDREN); + + if(!SymGetTypeInfo(hProcess, mod_base, idx, TI_FINDCHILDREN, fcp)) + goto name_unavailable; + + for(uint i = 0; i < fcp->Count; i++) + { + DWORD child_data_idx = fcp->ChildId[i]; + + // get enum value. don't make any assumptions about the + // variant's type (i.e. size) - no restriction is documented. + // also don't do this manually - it's tedious and we might not + // cover everything. OLE DLL is already pulled in anyway. + VARIANT v; + SymGetTypeInfo(hProcess, mod_base, child_data_idx, TI_GET_VALUE, &v); + if(VariantChangeType(&v, &v, 0, VT_I8) != S_OK) + continue; + + if(current_value == v.llVal) + { + WCHAR* name; + if(!SymGetTypeInfo(hProcess, mod_base, child_data_idx, TI_GET_SYMNAME, &name)) + goto name_unavailable; + + out(L"%s", name); + LocalFree(name); return 0; } } +name_unavailable: + // we can produce reasonable output (the numeric value), + // but weren't able to retrieve the matching enum name. + out(L"%I64d", current_value); + return 1; +} + + +////////////////////////////////////////////////////////////////////////////// + + +static int dump_sym_function(DWORD idx, const u8* p, size_t size, DumpState state) +{ + return 0; +} + + +////////////////////////////////////////////////////////////////////////////// + + +static int dump_sym_function_type(DWORD idx, const u8* p, size_t size, DumpState state) +{ + // this symbol gives class parent, return type, and parameter count. + // unfortunately the one thing we care about, its name, + // isn't exposed via TI_GET_SYMNAME, so we resolve it ourselves. + + unlock(); // prevent recursive lock + + char name[DBG_SYMBOL_LEN]; + int err = debug_resolve_symbol((void*)p, name, 0, 0); + + lock(); + + out(L"0x%p", p); + if(err == 0) + out(L" (%hs)", name); + return 0; +} + + +////////////////////////////////////////////////////////////////////////////// + + +static int dump_sym_pointer(DWORD idx, const u8* p, size_t size, DumpState state) +{ + // read+output pointer's value. + p = (const u8*)movzx_64le(p, size); + out(L"0x%p", p); + + // bail if it's obvious the pointer is bogus + // (=> can't display what it's pointing to) + if(is_bogus_pointer(p)) + return 0; + + // display what the pointer is pointing to. if the pointer is invalid + // (despite "bogus" check above), dump_sym recovers via SEH and + // returns -1; dump_sym_data will print "?" + out(L" -> "); // we out_erase this if it's a void* pointer + if(!SymGetTypeInfo(hProcess, mod_base, idx, TI_GET_TYPEID, &idx)) + return -1; + state.indirection++; + return dump_sym(idx, p, state); +} + + +////////////////////////////////////////////////////////////////////////////// + + +static int dump_sym_typedef(DWORD idx, const u8* p, size_t size, DumpState state) +{ + if(!SymGetTypeInfo(hProcess, mod_base, idx, TI_GET_TYPEID, &idx)) + return -1; + return dump_sym(idx, p, state); +} + + +////////////////////////////////////////////////////////////////////////////// + + +static int dump_sym_udt(DWORD idx, const u8* p, size_t size, DumpState state) +{ + // handle special cases (e.g. HANDLE, std::string). + WCHAR* type_name; + if(!SymGetTypeInfo(hProcess, mod_base, idx, TI_GET_SYMNAME, &type_name)) + debug_warn("dump_sym_udt: TI_GET_SYMNAME failed"); + else + { + int dump_err = -1; + BOOL suppressed = should_suppress_udt(type_name); + if(suppressed) + { + // the data symbol is pointer-to-UDT. since we won't display its + // contents, leave only the pointer's value. + if(state.indirection) + out_erase(4); // " -> " + } + else + dump_err = dump_string(type_name, p, size, state); + + LocalFree(type_name); + + if(suppressed || dump_err == 0) + return 0; // done + } + + // get array of child symbols (members/functions/base classes). + DWORD num_children; + if(!SymGetTypeInfo(hProcess, mod_base, idx, TI_GET_CHILDRENCOUNT, &num_children)) + return -1; + const size_t MAX_CHILDREN = 1000; + char child_buf[sizeof(TI_FINDCHILDREN_PARAMS)+MAX_CHILDREN*sizeof(DWORD)]; + TI_FINDCHILDREN_PARAMS* fcp = (TI_FINDCHILDREN_PARAMS*)child_buf; + fcp->Start = 0; + fcp->Count = MIN(num_children, MAX_CHILDREN); + if(!SymGetTypeInfo(hProcess, mod_base, idx, TI_FINDCHILDREN, fcp)) + return -1; + + const size_t avg_size = size / num_children; + // if num_children ends up large (e.g. due to member functions), + // avg_size is 0. fits_on_one_line will then be false anyway. + const bool fits_on_one_line = (num_children <= 3) && (avg_size <= sizeof(int)); + + out(fits_on_one_line? L"{ " : L"\r\n"); + + // recursively display each child + state.level++; + state.fits_on_one_line = fits_on_one_line; + int err = 0; + for(uint i = 0; i < fcp->Count; i++) + { + int ret = dump_sym(fcp->ChildId[i], p, state); + if(err == 0) + err = ret; + } + + if(fits_on_one_line) + { + // note: can't avoid writing this by checking if i == fcp->Count-1: + // each child might be the last valid data member. + out_erase(2); // ", " + out(L" }"); + } + + return err; +} + + +////////////////////////////////////////////////////////////////////////////// + + +static int dump_sym_unknown(DWORD idx, const u8* p, size_t size, DumpState state) +{ + // redundant (already done in dump_sym), but this is rare. + DWORD type_tag; + if(!SymGetTypeInfo(hProcess, mod_base, idx, TI_GET_SYMTAG, &type_tag)) + { + debug_warn("dump_sym_unknown: tag query failed"); + return -1; + } + + debug_printf("Unknown tag: %d\n", type_tag); + return -1; +} + + +////////////////////////////////////////////////////////////////////////////// + + +// write name and value of the symbol to the output buffer. +// delegates to dump_sym_* depending on the symbol's tag. +static int dump_sym(DWORD idx, const u8* p, DumpState state) +{ + DWORD type_tag; + if(!SymGetTypeInfo(hProcess, mod_base, idx, TI_GET_SYMTAG, &type_tag)) + return -1; + + // get symbol size. half of dump_sym_* need this, so we query it + // here and relay to all of them. this property isn't defined for + // all symbol types (e.g. function), so ignore failures. + ULONG64 size_ = 0; + SymGetTypeInfo(hProcess, mod_base, idx, TI_GET_LENGTH, &size_); + const size_t size = (size_t)size_; + switch(type_tag) { - case SymTagUDT: - return dump_udt_sym (type_idx, p, size, state); - case SymTagEnum: - return dump_enum_sym (type_idx, p, size, state); - case SymTagFunctionType: - return dump_function_type_sym (type_idx, p, size, state); - case SymTagPointerType: - return dump_pointer_sym (type_idx, p, size, state); case SymTagArrayType: - return dump_array_sym (type_idx, p, size, state); + return dump_sym_array (idx, p, size, state); case SymTagBaseType: - return dump_base_type_sym (type_idx, p, size, state); + return dump_sym_base_type (idx, p, size, state); + case SymTagBaseClass: + return dump_sym_base_class (idx, p, size, state); + case SymTagData: + return dump_sym_data (idx, p, size, state); + case SymTagEnum: + return dump_sym_enum (idx, p, size, state); + case SymTagFunction: + return dump_sym_function (idx, p, size, state); + case SymTagFunctionType: + return dump_sym_function_type (idx, p, size, state); + case SymTagPointerType: + return dump_sym_pointer (idx, p, size, state); case SymTagTypedef: - return dump_typedef_sym (type_idx, p, size, state); + return dump_sym_typedef (idx, p, size, state); + case SymTagUDT: + return dump_sym_udt (idx, p, size, state); default: - return dump_unknown_sym (type_idx, p, size, state); + return dump_sym_unknown (idx, p, size, state); } } ////////////////////////////////////////////////////////////////////////////// - - -// xxx indent to current nesting level, display name, and output value via -// dump_type_sym. // -// split out of dump_sym_cb so dump_UDT can call back here for its members. -// lock is held. -static int dump_data_sym(DWORD data_idx, const u8* p, DumpState state) -{ - // return both type_idx and name in one call for convenience. - // this is also more efficient than TI_GET_SYMNAME (avoids 1 LocalAlloc). - SYMBOL_INFO_PACKAGEW sp; - SYMBOL_INFOW* sym = &sp.si; - sym->SizeOfStruct = sizeof(sp.si); - sym->MaxNameLen = MAX_SYM_NAME; - if(!SymFromIndexW(hProcess, mod_base, data_idx, sym)) - return -1; - - if(sym->Tag != SymTagData) - { - debug_warn("dump_data_sym: unexpected tag"); - return -1; - } - - out(L"%s = ", sym->Name); - - int ret; - __try - { - ret = dump_type_sym(sym->TypeIndex, p, state); - } - __except(EXCEPTION_EXECUTE_HANDLER) - { - ret = -1; - } - - // couldn't produce any reasonable output; show value as "?" - if(ret < 0) - out(L"?"); - return ret; -} - - +// stack trace +// ////////////////////////////////////////////////////////////////////////////// - struct DumpSymParams { const STACKFRAME64* sf; @@ -1503,7 +1595,7 @@ struct DumpSymParams // get actual address of what the symbol represents (may be relative // to frame pointer); demarcate local/param sections; output name+value via -// dump_data_sym. +// dump_sym_data. // // called from dump_frame_cb for each local symbol; lock is held. static BOOL CALLBACK dump_sym_cb(SYMBOL_INFO* sym, ULONG sym_size, void* ctx) @@ -1547,9 +1639,7 @@ static BOOL CALLBACK dump_sym_cb(SYMBOL_INFO* sym, ULONG sym_size, void* ctx) } DumpState state; - INDENT; - dump_data_sym(sym->Index, (const u8*)addr, state); - out(L"\r\n"); + dump_sym(sym->Index, (const u8*)addr, state); return TRUE; // continue } @@ -1603,9 +1693,6 @@ static int dump_frame_cb(const STACKFRAME64* sf, void* user_arg) } -////////////////////////////////////////////////////////////////////////////// - - // most recent stack frames will be skipped // (we don't want to show e.g. GetThreadContext / this call) static const wchar_t* dump_stack(uint skip, const CONTEXT* pcontext = 0) @@ -2202,8 +2289,6 @@ static int screwaround() - - // // analyze exceptions; determine their type and locus // @@ -2431,9 +2516,10 @@ static LONG WINAPI unhandled_exception_filter(EXCEPTION_POINTERS* ep) // called from wdbg_init. // -// rationale: we want to replace the OS "program error" dialog box because +// rationale: +// we want to replace the OS "program error" dialog box because // it is not all too helpful in debugging. to that end, there are -// 4 ways to make sure unhandled exceptions are caught: +// 4 ways to make sure unhandled SEH exceptions are caught: // - via WaitForDebugEvent; the app is run from a separate debugger process. // this complicates analysis, since the exception is in another // address space. also, we are basically implementing a full-featured @@ -2446,7 +2532,18 @@ static LONG WINAPI unhandled_exception_filter(EXCEPTION_POINTERS* ep) // with the following caveat: it is never called when a debugger is active. // workaround: call from a regular SEH __except, e.g. wrapped around main(). // -// note: this also catches regular C++ exceptions! +// since C++ exceptions are implemented via SEH, we can also catch those here; +// it's nicer than a global try{} and avoids duplicating this code. +// we can still get at the C++ information (std::exception.what()) by +// examining the internal exception data structures. these are +// compiler-specific, but haven't changed from VC5-VC7.1. +// alternatively, _set_se_translator could be used to translate all +// SEH exceptions to C++. this way is more reliable/documented, but has +// several drawbacks: +// - it wouldn't work at all in C programs, +// - a new fat exception class would have to be created to hold the +// SEH exception information (e.g. CONTEXT for a stack trace), and +// - this information would not be available for C++ exceptions. static void set_exception_handler() { void* prev_filter = SetUnhandledExceptionFilter(unhandled_exception_filter);