1
0
forked from 0ad/0ad

An initial Dehydra-based static analysis script for type-checking printf-style functions

This was SVN commit r7175.
This commit is contained in:
Ykkrosh 2009-11-06 22:48:11 +00:00
parent 3334c83ce0
commit d7a057143d
4 changed files with 317 additions and 0 deletions

18
build/dehydra/README.txt Normal file
View File

@ -0,0 +1,18 @@
Dehydra is a tool that allows custom static analysis of C++ code, with analysis code written in JavaScript, running as a GCC plugin.
This directory has some analysis scripts. The setup is a bit ad hoc and not well tested or integrated into the build system or anything, so use at your own risk.
General usage instructions:
* Run Linux. (It might work on OS X too.)
* Install Dehydra, as per https://developer.mozilla.org/En/Dehydra/Installing_Dehydra
* Build 0 A.D. from build/workspaces/gcc:
export CXX="$HOME/gcc-dehydra/installed/bin/g++ -fplugin=$HOME/gcc-dehydra/dehydra/gcc_treehydra.so -fplugin-arg=../../dehydra/printf-type-check.js -DCONFIG_DEHYDRA=1"
make
# (or "make test -j3 -k" to build the engine and tests and to do 3 files in parallel and continue past errors, etc)
* Wait (it's quite slow) and look for the new compiler warnings/errors.
The "tests" directory doesn't actually contain any proper tests, just some example files and expected outputs for rough sanity checking.

View File

@ -0,0 +1,239 @@
// This script attempts to check argument types of printf/wprintf/scanf/wscanf style functions.
// Mostly it's similar to built-in GCC warning functionality, but with the benefit that it can check
// wchar_t* format strings too.
//
// (This is somewhat duplicating the functionality of https://bug493996.bugzilla.mozilla.org/attachment.cgi?id=388700)
include('treehydra.js');
include('gcc_compat.js');
include('gcc_util.js');
include('gcc_print.js');
// Get string corresponding to string literal expressions
function get_string_constant(expr) {
if (TREE_CODE(expr) == NOP_EXPR)
return get_string_constant(expr.operands()[0]);
else if (TREE_CODE(expr) == ADDR_EXPR && TREE_CODE(expr.operands()[0]) == STRING_CST) {
return expr.operands()[0].string.str;
}
}
function is_vararg(decl) {
// Non-vararg functions end with a VOID_TYPE sentinel
for (var t in flatten_chain(TYPE_ARG_TYPES(TREE_TYPE(decl)))) {
if (TREE_CODE(TREE_VALUE(t)) == VOID_TYPE)
return false;
}
return true;
}
// Return ['a' (ascii) or 'w' (wide), 'printf' or 'scanf', string-index, first-to-check] or undefined
function find_printf_type(decl, loc) {
if (! is_vararg(decl))
return;
var decl_attrs = translate_attributes(DECL_ATTRIBUTES(decl)); // 'user' attrs are here
var type_attrs = translate_attributes(TYPE_ATTRIBUTES(TREE_TYPE(decl))); // 'format' attrs are here
for each (var a in decl_attrs.concat(type_attrs)) {
if (a.name == 'format') {
var start = a.value[1];
var first = a.value[2];
if (a.value[0] == 'printf')
return ['a', 'printf', start, first];
else if (a.value[0] == 'scanf')
return ['a', 'scanf', start, first];
else
error('Unrecognised format attribute type "'+a.value[0]+'"', loc());
} else if (a.name == 'user' && a.value[0].match(/^format/)) {
var [ctype, functype, start, first] = a.value[0].split(/,\s*/).slice(1);
if (first == '+1') first = (+start) + 1; // bit ugly, but lets our macros work easily
return [ctype, functype, start, first];
}
}
var name = decl_name(decl);
// Special cases for functions we use and aren't declared with attributes:
if (name == 'sscanf')
return ['a', 'scanf', 2, 3];
else if (name == 'swscanf')
return ['w', 'scanf', 2, 3];
else if (name == 'snprintf')
return ['a', 'printf', 3, 4];
else if (name == 'wprintf')
return ['w', 'printf', 1, 2];
else if (name == 'fwprintf')
return ['w', 'printf', 2, 3];
else if (name == 'swprintf')
return ['w', 'printf', 3, 4];
else if (name == 'JS_ReportError')
return ['a', 'printf', 2, 3];
// Ignore vararg functions that we know aren't using normal format strings
if (name.match(/^(__builtin_va_start|execlp|open|fcntl|ioctl|sem_open|h_alloc|sys_wopen|ogl_HaveExtensions|JS_ConvertArguments)$/))
return;
warning('Ignoring unannotated vararg function "'+name+'"', loc());
}
function compare_format_type(ctype, functype, fmt, arg, loc) {
var m, len, spec;
if (functype == 'printf') {
m = fmt.match(/^%([-+ #0]*)(\*|\d+)?(\.\*|\.-?\d+)?(hh|h|ll|l|j|z|t|L)?([diouxXfFeEgGaAcspn%])$/);
if (m) {
len = m[4] || '';
spec = m[5];
}
} else if (functype == 'scanf') {
m = fmt.match(/^%(\*?)(\d*)(hh|h|ll|l|j|z|t|L)?([diouxaefgcs[pn%])$/);
if (m) {
len = m[3] || '';
spec = m[4];
}
} else {
error('Internal error: unknown functype '+functype, loc());
return true;
}
if (! spec) {
error('Invalid format specifier "'+fmt+'"', loc());
return true;
}
var t = len+spec;
if (ctype == 'w' && t == 's')
error('Non-portable %s used in wprintf-style function', loc());
if (ctype == 'a' && t == 'hs')
error('Illegal %hs used in printf-style function', loc());
if (functype == 'printf') {
if (t.match(/^[diouxXc]$/))
return (arg == 'int' || arg == 'unsigned int');
if (t.match(/^lc$/))
return (arg == 'int' || arg == 'unsigned int'); // spec says wint_t
if (t.match(/^l[diouxX]$/))
return (arg == 'long int' || arg == 'long unsigned int');
if (t.match(/^[fFeEgGaA]$/))
return (arg == 'double');
if (t.match(/^p$/))
return (arg.match(/\*$/));
// ...
} else if (functype == 'scanf') {
if (t.match(/^[dioux]$/))
return (arg == 'int*');
if (t.match(/^l[diouxX]$/))
return (arg == 'long int*');
if (t.match(/^z[diouxX]$/))
return (arg == 'long unsigned int*'); // spec says size_t*
if (t.match(/^[c[]$/))
return (arg == 'char*' || arg == 'unsigned char*');
if (t.match(/^l[c[]$/))
return (arg == 'wchar_t*');
if (t.match(/^[aefg]$/))
return (arg == 'float*');
if (t.match(/^l[aefg]$/))
return (arg == 'double*');
// ...
}
if (t.match(/^h?s$/))
return (arg.match(/^(const )?(unsigned )?char\*$/));
if (t.match(/^ls$/))
return (arg.match(/^(const )?(unsigned )?wchar_t\*$/));
error('Unrecognized format specifier "'+fmt+'"', loc());
return true;
}
function check_arg_types(ctype, functype, fmt_string, arg_type_names, loc) {
// Match a superset of printf and scanf format strings
var fmt_types = fmt_string.match(/%([-+ #0*]*)(\*|\d+)?(\.\*|\.-?\d+)?(hh|h|ll|l|j|z|t|L)?(.)/g);
var num_fmt_types = 0;
for each (var fmt_type in fmt_types)
if (fmt_type != '%%')
++num_fmt_types;
if (num_fmt_types != arg_type_names.length) {
error('Number of format string specifiers ('+num_fmt_types+') != number of format arguments ('+arg_type_names.length+')', loc());
return;
}
for each (var fmt_type in fmt_types) {
if (fmt_type != '%%') {
var arg = arg_type_names.shift();
if (! compare_format_type(ctype, functype, fmt_type, arg, loc)) {
error('Invalid argument type "'+arg+'" for format specifier "'+fmt_type+'"', loc());
}
}
}
}
function type_string_without_typedefs(type) {
// Walk up the typedef chain
while (TYPE_NAME(type) && TREE_CODE(TYPE_NAME(type)) == TYPE_DECL && DECL_ORIGINAL_TYPE(TYPE_NAME(type)))
type = DECL_ORIGINAL_TYPE(TYPE_NAME(type));
// Recursively strip typedefs from pointer types
if (TREE_CODE(type) == POINTER_TYPE) {
// This bit is copied from type_string():
let quals = [];
if (TYPE_VOLATILE(type)) quals.push('volatile');
if (TYPE_RESTRICT(type)) quals.push('restrict');
if (TYPE_READONLY(type)) quals.push('const');
var suffix = quals.length ? ' ' + quals.join(' ') : '';
return type_string_without_typedefs(TREE_TYPE(type)) + '*' + suffix;
} else {
return type_string(type);
}
}
function walk_printfs(fndecl) {
function tree_walker(t, stack) {
function getLocation() {
var loc = location_of(t);
if (loc) return loc;
for (var i = stack.length - 1; i >= 0; --i) {
var loc = location_of(stack[i]);
if (loc) return loc;
}
return location_of(DECL_SAVED_TREE(fndecl));
}
var code = TREE_CODE(t);
if (code == 'CALL_EXPR') {
var decl = call_function_decl(t);
if (! decl)
return true;
var printf_type = find_printf_type(decl, getLocation);
if (! printf_type)
return true;
// print('--------------');
// print(rectify_function_decl(decl));
// print(printf_type);
var fmt_arg = CALL_EXPR_ARG(t, printf_type[2]-1);
var fmt_string = get_string_constant(fmt_arg);
if (typeof fmt_string == 'undefined') {
warning('Non-constant format string argument - can\'t check types', getLocation());
return true;
}
var arg_type_names = [];
for (var operand in call_arg_iterator(t)) {
var type = type_string_without_typedefs(TREE_TYPE(operand));
arg_type_names.push(type);
}
check_arg_types(printf_type[0], printf_type[1], fmt_string, arg_type_names.slice(printf_type[3]-1), getLocation);
}
return true;
}
walk_tree (DECL_SAVED_TREE(fndecl), tree_walker);
}
function process_cp_pre_genericize(fndecl) {
walk_printfs(fndecl);
}

View File

@ -0,0 +1,42 @@
#include <cstdio>
#include <cwchar>
extern void foo(const char*, ...) __attribute__((user("format, a, printf, 1, 2")));
extern void bar(const char*, ...) __attribute__((format(printf, 1, 2)));
extern void baz(const char*, ...);
extern void qux(const char*);
int main() {
const char* s = "%s";
char buf[256];
typedef int i32;
typedef unsigned char u8;
i32 n = 2;
printf("%d\n", 123);
printf("%z\n", 123);
printf("%d\n", (unsigned long)123);
printf("%lu\n", (long)123);
printf("%d%%\n", 123.0);
printf("%d %+02.7x\n", 123, 456);
foo("%s", 1);
foo("%s", n);
foo("%d", (u8*)"x");
foo("%s", (unsigned short)3);
bar("%s", 1);
baz("%s", 1);
qux("%s");
bar("xyz\0%s");
printf(s, "x");
printf("%s", "x");
printf("%d%d", 1, n);
printf("%s", L"x");
sprintf(buf, "%s", "x");
sprintf(buf, "%s", L"x");
wprintf(L"%s", "x");
wprintf(L"%s", L"x");
wprintf(L"%hs", "x");
wprintf(L"%hs", L"x");
wprintf(L"%ls", "x");
wprintf(L"%ls", L"x");
}

View File

@ -0,0 +1,18 @@
tests/printf-type-check.cpp: In function ‘int main()’:
tests/printf-type-check.cpp:17: error: Invalid format specifier "%z"
tests/printf-type-check.cpp:18: error: Invalid argument type "long unsigned int" for format specifier "%d"
tests/printf-type-check.cpp:20: error: Invalid argument type "double" for format specifier "%d"
tests/printf-type-check.cpp:22: error: Invalid argument type "int" for format specifier "%s"
tests/printf-type-check.cpp:23: error: Invalid argument type "int" for format specifier "%s"
tests/printf-type-check.cpp:24: error: Invalid argument type "unsigned char*" for format specifier "%d"
tests/printf-type-check.cpp:25: error: Invalid argument type "int" for format specifier "%s"
tests/printf-type-check.cpp:26: error: Invalid argument type "int" for format specifier "%s"
tests/printf-type-check.cpp:27: warning: Ignoring unannotated vararg function "baz"
tests/printf-type-check.cpp:29: error: Number of format string specifiers (1) != number of format arguments (0)
tests/printf-type-check.cpp:30: warning: Non-constant format string argument - can't check types
tests/printf-type-check.cpp:33: error: Invalid argument type "const wchar_t*" for format specifier "%s"
tests/printf-type-check.cpp:36: error: Non-portable %s used in wprintf-style function
tests/printf-type-check.cpp:37: error: Non-portable %s used in wprintf-style function
tests/printf-type-check.cpp:37: error: Invalid argument type "const wchar_t*" for format specifier "%s"
tests/printf-type-check.cpp:39: error: Invalid argument type "const wchar_t*" for format specifier "%hs"
tests/printf-type-check.cpp:40: error: Invalid argument type "const char*" for format specifier "%ls"