From d7a057143d71aba53ac03702ec79925d9e1bce71 Mon Sep 17 00:00:00 2001 From: Ykkrosh Date: Fri, 6 Nov 2009 22:48:11 +0000 Subject: [PATCH] An initial Dehydra-based static analysis script for type-checking printf-style functions This was SVN commit r7175. --- build/dehydra/README.txt | 18 ++ build/dehydra/printf-type-check.js | 239 ++++++++++++++++++++++ build/dehydra/tests/printf-type-check.cpp | 42 ++++ build/dehydra/tests/printf-type-check.txt | 18 ++ 4 files changed, 317 insertions(+) create mode 100644 build/dehydra/README.txt create mode 100644 build/dehydra/printf-type-check.js create mode 100644 build/dehydra/tests/printf-type-check.cpp create mode 100644 build/dehydra/tests/printf-type-check.txt diff --git a/build/dehydra/README.txt b/build/dehydra/README.txt new file mode 100644 index 0000000000..95ace8e5f9 --- /dev/null +++ b/build/dehydra/README.txt @@ -0,0 +1,18 @@ +Dehydra is a tool that allows custom static analysis of C++ code, with analysis code written in JavaScript, running as a GCC plugin. + +This directory has some analysis scripts. The setup is a bit ad hoc and not well tested or integrated into the build system or anything, so use at your own risk. + +General usage instructions: + + * Run Linux. (It might work on OS X too.) + + * Install Dehydra, as per https://developer.mozilla.org/En/Dehydra/Installing_Dehydra + + * Build 0 A.D. from build/workspaces/gcc: + export CXX="$HOME/gcc-dehydra/installed/bin/g++ -fplugin=$HOME/gcc-dehydra/dehydra/gcc_treehydra.so -fplugin-arg=../../dehydra/printf-type-check.js -DCONFIG_DEHYDRA=1" + make + # (or "make test -j3 -k" to build the engine and tests and to do 3 files in parallel and continue past errors, etc) + + * Wait (it's quite slow) and look for the new compiler warnings/errors. + +The "tests" directory doesn't actually contain any proper tests, just some example files and expected outputs for rough sanity checking. diff --git a/build/dehydra/printf-type-check.js b/build/dehydra/printf-type-check.js new file mode 100644 index 0000000000..01be3af3d7 --- /dev/null +++ b/build/dehydra/printf-type-check.js @@ -0,0 +1,239 @@ +// This script attempts to check argument types of printf/wprintf/scanf/wscanf style functions. +// Mostly it's similar to built-in GCC warning functionality, but with the benefit that it can check +// wchar_t* format strings too. +// +// (This is somewhat duplicating the functionality of https://bug493996.bugzilla.mozilla.org/attachment.cgi?id=388700) + +include('treehydra.js'); +include('gcc_compat.js'); +include('gcc_util.js'); +include('gcc_print.js'); + +// Get string corresponding to string literal expressions +function get_string_constant(expr) { + if (TREE_CODE(expr) == NOP_EXPR) + return get_string_constant(expr.operands()[0]); + else if (TREE_CODE(expr) == ADDR_EXPR && TREE_CODE(expr.operands()[0]) == STRING_CST) { + return expr.operands()[0].string.str; + } +} + +function is_vararg(decl) { + // Non-vararg functions end with a VOID_TYPE sentinel + for (var t in flatten_chain(TYPE_ARG_TYPES(TREE_TYPE(decl)))) { + if (TREE_CODE(TREE_VALUE(t)) == VOID_TYPE) + return false; + } + return true; +} + +// Return ['a' (ascii) or 'w' (wide), 'printf' or 'scanf', string-index, first-to-check] or undefined +function find_printf_type(decl, loc) { + if (! is_vararg(decl)) + return; + + var decl_attrs = translate_attributes(DECL_ATTRIBUTES(decl)); // 'user' attrs are here + var type_attrs = translate_attributes(TYPE_ATTRIBUTES(TREE_TYPE(decl))); // 'format' attrs are here + + for each (var a in decl_attrs.concat(type_attrs)) { + if (a.name == 'format') { + var start = a.value[1]; + var first = a.value[2]; + if (a.value[0] == 'printf') + return ['a', 'printf', start, first]; + else if (a.value[0] == 'scanf') + return ['a', 'scanf', start, first]; + else + error('Unrecognised format attribute type "'+a.value[0]+'"', loc()); + } else if (a.name == 'user' && a.value[0].match(/^format/)) { + var [ctype, functype, start, first] = a.value[0].split(/,\s*/).slice(1); + if (first == '+1') first = (+start) + 1; // bit ugly, but lets our macros work easily + return [ctype, functype, start, first]; + } + } + + var name = decl_name(decl); + + // Special cases for functions we use and aren't declared with attributes: + if (name == 'sscanf') + return ['a', 'scanf', 2, 3]; + else if (name == 'swscanf') + return ['w', 'scanf', 2, 3]; + else if (name == 'snprintf') + return ['a', 'printf', 3, 4]; + else if (name == 'wprintf') + return ['w', 'printf', 1, 2]; + else if (name == 'fwprintf') + return ['w', 'printf', 2, 3]; + else if (name == 'swprintf') + return ['w', 'printf', 3, 4]; + else if (name == 'JS_ReportError') + return ['a', 'printf', 2, 3]; + + // Ignore vararg functions that we know aren't using normal format strings + if (name.match(/^(__builtin_va_start|execlp|open|fcntl|ioctl|sem_open|h_alloc|sys_wopen|ogl_HaveExtensions|JS_ConvertArguments)$/)) + return; + + warning('Ignoring unannotated vararg function "'+name+'"', loc()); +} + +function compare_format_type(ctype, functype, fmt, arg, loc) { + var m, len, spec; + if (functype == 'printf') { + m = fmt.match(/^%([-+ #0]*)(\*|\d+)?(\.\*|\.-?\d+)?(hh|h|ll|l|j|z|t|L)?([diouxXfFeEgGaAcspn%])$/); + if (m) { + len = m[4] || ''; + spec = m[5]; + } + } else if (functype == 'scanf') { + m = fmt.match(/^%(\*?)(\d*)(hh|h|ll|l|j|z|t|L)?([diouxaefgcs[pn%])$/); + if (m) { + len = m[3] || ''; + spec = m[4]; + } + } else { + error('Internal error: unknown functype '+functype, loc()); + return true; + } + + if (! spec) { + error('Invalid format specifier "'+fmt+'"', loc()); + return true; + } + + var t = len+spec; + + if (ctype == 'w' && t == 's') + error('Non-portable %s used in wprintf-style function', loc()); + if (ctype == 'a' && t == 'hs') + error('Illegal %hs used in printf-style function', loc()); + + if (functype == 'printf') { + if (t.match(/^[diouxXc]$/)) + return (arg == 'int' || arg == 'unsigned int'); + if (t.match(/^lc$/)) + return (arg == 'int' || arg == 'unsigned int'); // spec says wint_t + if (t.match(/^l[diouxX]$/)) + return (arg == 'long int' || arg == 'long unsigned int'); + if (t.match(/^[fFeEgGaA]$/)) + return (arg == 'double'); + if (t.match(/^p$/)) + return (arg.match(/\*$/)); + // ... + } else if (functype == 'scanf') { + if (t.match(/^[dioux]$/)) + return (arg == 'int*'); + if (t.match(/^l[diouxX]$/)) + return (arg == 'long int*'); + if (t.match(/^z[diouxX]$/)) + return (arg == 'long unsigned int*'); // spec says size_t* + if (t.match(/^[c[]$/)) + return (arg == 'char*' || arg == 'unsigned char*'); + if (t.match(/^l[c[]$/)) + return (arg == 'wchar_t*'); + if (t.match(/^[aefg]$/)) + return (arg == 'float*'); + if (t.match(/^l[aefg]$/)) + return (arg == 'double*'); + // ... + } + + if (t.match(/^h?s$/)) + return (arg.match(/^(const )?(unsigned )?char\*$/)); + if (t.match(/^ls$/)) + return (arg.match(/^(const )?(unsigned )?wchar_t\*$/)); + + error('Unrecognized format specifier "'+fmt+'"', loc()); + return true; +} + +function check_arg_types(ctype, functype, fmt_string, arg_type_names, loc) { + // Match a superset of printf and scanf format strings + var fmt_types = fmt_string.match(/%([-+ #0*]*)(\*|\d+)?(\.\*|\.-?\d+)?(hh|h|ll|l|j|z|t|L)?(.)/g); + + var num_fmt_types = 0; + for each (var fmt_type in fmt_types) + if (fmt_type != '%%') + ++num_fmt_types; + + if (num_fmt_types != arg_type_names.length) { + error('Number of format string specifiers ('+num_fmt_types+') != number of format arguments ('+arg_type_names.length+')', loc()); + return; + } + + for each (var fmt_type in fmt_types) { + if (fmt_type != '%%') { + var arg = arg_type_names.shift(); + if (! compare_format_type(ctype, functype, fmt_type, arg, loc)) { + error('Invalid argument type "'+arg+'" for format specifier "'+fmt_type+'"', loc()); + } + } + } +} + +function type_string_without_typedefs(type) { + // Walk up the typedef chain + while (TYPE_NAME(type) && TREE_CODE(TYPE_NAME(type)) == TYPE_DECL && DECL_ORIGINAL_TYPE(TYPE_NAME(type))) + type = DECL_ORIGINAL_TYPE(TYPE_NAME(type)); + // Recursively strip typedefs from pointer types + if (TREE_CODE(type) == POINTER_TYPE) { + // This bit is copied from type_string(): + let quals = []; + if (TYPE_VOLATILE(type)) quals.push('volatile'); + if (TYPE_RESTRICT(type)) quals.push('restrict'); + if (TYPE_READONLY(type)) quals.push('const'); + var suffix = quals.length ? ' ' + quals.join(' ') : ''; + return type_string_without_typedefs(TREE_TYPE(type)) + '*' + suffix; + } else { + return type_string(type); + } +} + +function walk_printfs(fndecl) { + function tree_walker(t, stack) { + function getLocation() { + var loc = location_of(t); + if (loc) return loc; + for (var i = stack.length - 1; i >= 0; --i) { + var loc = location_of(stack[i]); + if (loc) return loc; + } + return location_of(DECL_SAVED_TREE(fndecl)); + } + + var code = TREE_CODE(t); + if (code == 'CALL_EXPR') { + var decl = call_function_decl(t); + if (! decl) + return true; + + var printf_type = find_printf_type(decl, getLocation); + if (! printf_type) + return true; + +// print('--------------'); +// print(rectify_function_decl(decl)); +// print(printf_type); + + var fmt_arg = CALL_EXPR_ARG(t, printf_type[2]-1); + var fmt_string = get_string_constant(fmt_arg); + if (typeof fmt_string == 'undefined') { + warning('Non-constant format string argument - can\'t check types', getLocation()); + return true; + } + + var arg_type_names = []; + for (var operand in call_arg_iterator(t)) { + var type = type_string_without_typedefs(TREE_TYPE(operand)); + arg_type_names.push(type); + } + check_arg_types(printf_type[0], printf_type[1], fmt_string, arg_type_names.slice(printf_type[3]-1), getLocation); + } + return true; + } + walk_tree (DECL_SAVED_TREE(fndecl), tree_walker); +} + +function process_cp_pre_genericize(fndecl) { + walk_printfs(fndecl); +} diff --git a/build/dehydra/tests/printf-type-check.cpp b/build/dehydra/tests/printf-type-check.cpp new file mode 100644 index 0000000000..f170660d31 --- /dev/null +++ b/build/dehydra/tests/printf-type-check.cpp @@ -0,0 +1,42 @@ +#include +#include + +extern void foo(const char*, ...) __attribute__((user("format, a, printf, 1, 2"))); +extern void bar(const char*, ...) __attribute__((format(printf, 1, 2))); +extern void baz(const char*, ...); +extern void qux(const char*); + +int main() { + const char* s = "%s"; + char buf[256]; + typedef int i32; + typedef unsigned char u8; + + i32 n = 2; + printf("%d\n", 123); + printf("%z\n", 123); + printf("%d\n", (unsigned long)123); + printf("%lu\n", (long)123); + printf("%d%%\n", 123.0); + printf("%d %+02.7x\n", 123, 456); + foo("%s", 1); + foo("%s", n); + foo("%d", (u8*)"x"); + foo("%s", (unsigned short)3); + bar("%s", 1); + baz("%s", 1); + qux("%s"); + bar("xyz\0%s"); + printf(s, "x"); + printf("%s", "x"); + printf("%d%d", 1, n); + printf("%s", L"x"); + sprintf(buf, "%s", "x"); + sprintf(buf, "%s", L"x"); + wprintf(L"%s", "x"); + wprintf(L"%s", L"x"); + wprintf(L"%hs", "x"); + wprintf(L"%hs", L"x"); + wprintf(L"%ls", "x"); + wprintf(L"%ls", L"x"); +} diff --git a/build/dehydra/tests/printf-type-check.txt b/build/dehydra/tests/printf-type-check.txt new file mode 100644 index 0000000000..fd80b24038 --- /dev/null +++ b/build/dehydra/tests/printf-type-check.txt @@ -0,0 +1,18 @@ +tests/printf-type-check.cpp: In function ‘int main()’: +tests/printf-type-check.cpp:17: error: Invalid format specifier "%z" +tests/printf-type-check.cpp:18: error: Invalid argument type "long unsigned int" for format specifier "%d" +tests/printf-type-check.cpp:20: error: Invalid argument type "double" for format specifier "%d" +tests/printf-type-check.cpp:22: error: Invalid argument type "int" for format specifier "%s" +tests/printf-type-check.cpp:23: error: Invalid argument type "int" for format specifier "%s" +tests/printf-type-check.cpp:24: error: Invalid argument type "unsigned char*" for format specifier "%d" +tests/printf-type-check.cpp:25: error: Invalid argument type "int" for format specifier "%s" +tests/printf-type-check.cpp:26: error: Invalid argument type "int" for format specifier "%s" +tests/printf-type-check.cpp:27: warning: Ignoring unannotated vararg function "baz" +tests/printf-type-check.cpp:29: error: Number of format string specifiers (1) != number of format arguments (0) +tests/printf-type-check.cpp:30: warning: Non-constant format string argument - can't check types +tests/printf-type-check.cpp:33: error: Invalid argument type "const wchar_t*" for format specifier "%s" +tests/printf-type-check.cpp:36: error: Non-portable %s used in wprintf-style function +tests/printf-type-check.cpp:37: error: Non-portable %s used in wprintf-style function +tests/printf-type-check.cpp:37: error: Invalid argument type "const wchar_t*" for format specifier "%s" +tests/printf-type-check.cpp:39: error: Invalid argument type "const wchar_t*" for format specifier "%hs" +tests/printf-type-check.cpp:40: error: Invalid argument type "const char*" for format specifier "%ls"