An initial Dehydra-based static analysis script for type-checking printf-style functions
This was SVN commit r7175.
This commit is contained in:
parent
3334c83ce0
commit
d7a057143d
18
build/dehydra/README.txt
Normal file
18
build/dehydra/README.txt
Normal file
@ -0,0 +1,18 @@
|
||||
Dehydra is a tool that allows custom static analysis of C++ code, with analysis code written in JavaScript, running as a GCC plugin.
|
||||
|
||||
This directory has some analysis scripts. The setup is a bit ad hoc and not well tested or integrated into the build system or anything, so use at your own risk.
|
||||
|
||||
General usage instructions:
|
||||
|
||||
* Run Linux. (It might work on OS X too.)
|
||||
|
||||
* Install Dehydra, as per https://developer.mozilla.org/En/Dehydra/Installing_Dehydra
|
||||
|
||||
* Build 0 A.D. from build/workspaces/gcc:
|
||||
export CXX="$HOME/gcc-dehydra/installed/bin/g++ -fplugin=$HOME/gcc-dehydra/dehydra/gcc_treehydra.so -fplugin-arg=../../dehydra/printf-type-check.js -DCONFIG_DEHYDRA=1"
|
||||
make
|
||||
# (or "make test -j3 -k" to build the engine and tests and to do 3 files in parallel and continue past errors, etc)
|
||||
|
||||
* Wait (it's quite slow) and look for the new compiler warnings/errors.
|
||||
|
||||
The "tests" directory doesn't actually contain any proper tests, just some example files and expected outputs for rough sanity checking.
|
239
build/dehydra/printf-type-check.js
Normal file
239
build/dehydra/printf-type-check.js
Normal file
@ -0,0 +1,239 @@
|
||||
// This script attempts to check argument types of printf/wprintf/scanf/wscanf style functions.
|
||||
// Mostly it's similar to built-in GCC warning functionality, but with the benefit that it can check
|
||||
// wchar_t* format strings too.
|
||||
//
|
||||
// (This is somewhat duplicating the functionality of https://bug493996.bugzilla.mozilla.org/attachment.cgi?id=388700)
|
||||
|
||||
include('treehydra.js');
|
||||
include('gcc_compat.js');
|
||||
include('gcc_util.js');
|
||||
include('gcc_print.js');
|
||||
|
||||
// Get string corresponding to string literal expressions
|
||||
function get_string_constant(expr) {
|
||||
if (TREE_CODE(expr) == NOP_EXPR)
|
||||
return get_string_constant(expr.operands()[0]);
|
||||
else if (TREE_CODE(expr) == ADDR_EXPR && TREE_CODE(expr.operands()[0]) == STRING_CST) {
|
||||
return expr.operands()[0].string.str;
|
||||
}
|
||||
}
|
||||
|
||||
function is_vararg(decl) {
|
||||
// Non-vararg functions end with a VOID_TYPE sentinel
|
||||
for (var t in flatten_chain(TYPE_ARG_TYPES(TREE_TYPE(decl)))) {
|
||||
if (TREE_CODE(TREE_VALUE(t)) == VOID_TYPE)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Return ['a' (ascii) or 'w' (wide), 'printf' or 'scanf', string-index, first-to-check] or undefined
|
||||
function find_printf_type(decl, loc) {
|
||||
if (! is_vararg(decl))
|
||||
return;
|
||||
|
||||
var decl_attrs = translate_attributes(DECL_ATTRIBUTES(decl)); // 'user' attrs are here
|
||||
var type_attrs = translate_attributes(TYPE_ATTRIBUTES(TREE_TYPE(decl))); // 'format' attrs are here
|
||||
|
||||
for each (var a in decl_attrs.concat(type_attrs)) {
|
||||
if (a.name == 'format') {
|
||||
var start = a.value[1];
|
||||
var first = a.value[2];
|
||||
if (a.value[0] == 'printf')
|
||||
return ['a', 'printf', start, first];
|
||||
else if (a.value[0] == 'scanf')
|
||||
return ['a', 'scanf', start, first];
|
||||
else
|
||||
error('Unrecognised format attribute type "'+a.value[0]+'"', loc());
|
||||
} else if (a.name == 'user' && a.value[0].match(/^format/)) {
|
||||
var [ctype, functype, start, first] = a.value[0].split(/,\s*/).slice(1);
|
||||
if (first == '+1') first = (+start) + 1; // bit ugly, but lets our macros work easily
|
||||
return [ctype, functype, start, first];
|
||||
}
|
||||
}
|
||||
|
||||
var name = decl_name(decl);
|
||||
|
||||
// Special cases for functions we use and aren't declared with attributes:
|
||||
if (name == 'sscanf')
|
||||
return ['a', 'scanf', 2, 3];
|
||||
else if (name == 'swscanf')
|
||||
return ['w', 'scanf', 2, 3];
|
||||
else if (name == 'snprintf')
|
||||
return ['a', 'printf', 3, 4];
|
||||
else if (name == 'wprintf')
|
||||
return ['w', 'printf', 1, 2];
|
||||
else if (name == 'fwprintf')
|
||||
return ['w', 'printf', 2, 3];
|
||||
else if (name == 'swprintf')
|
||||
return ['w', 'printf', 3, 4];
|
||||
else if (name == 'JS_ReportError')
|
||||
return ['a', 'printf', 2, 3];
|
||||
|
||||
// Ignore vararg functions that we know aren't using normal format strings
|
||||
if (name.match(/^(__builtin_va_start|execlp|open|fcntl|ioctl|sem_open|h_alloc|sys_wopen|ogl_HaveExtensions|JS_ConvertArguments)$/))
|
||||
return;
|
||||
|
||||
warning('Ignoring unannotated vararg function "'+name+'"', loc());
|
||||
}
|
||||
|
||||
function compare_format_type(ctype, functype, fmt, arg, loc) {
|
||||
var m, len, spec;
|
||||
if (functype == 'printf') {
|
||||
m = fmt.match(/^%([-+ #0]*)(\*|\d+)?(\.\*|\.-?\d+)?(hh|h|ll|l|j|z|t|L)?([diouxXfFeEgGaAcspn%])$/);
|
||||
if (m) {
|
||||
len = m[4] || '';
|
||||
spec = m[5];
|
||||
}
|
||||
} else if (functype == 'scanf') {
|
||||
m = fmt.match(/^%(\*?)(\d*)(hh|h|ll|l|j|z|t|L)?([diouxaefgcs[pn%])$/);
|
||||
if (m) {
|
||||
len = m[3] || '';
|
||||
spec = m[4];
|
||||
}
|
||||
} else {
|
||||
error('Internal error: unknown functype '+functype, loc());
|
||||
return true;
|
||||
}
|
||||
|
||||
if (! spec) {
|
||||
error('Invalid format specifier "'+fmt+'"', loc());
|
||||
return true;
|
||||
}
|
||||
|
||||
var t = len+spec;
|
||||
|
||||
if (ctype == 'w' && t == 's')
|
||||
error('Non-portable %s used in wprintf-style function', loc());
|
||||
if (ctype == 'a' && t == 'hs')
|
||||
error('Illegal %hs used in printf-style function', loc());
|
||||
|
||||
if (functype == 'printf') {
|
||||
if (t.match(/^[diouxXc]$/))
|
||||
return (arg == 'int' || arg == 'unsigned int');
|
||||
if (t.match(/^lc$/))
|
||||
return (arg == 'int' || arg == 'unsigned int'); // spec says wint_t
|
||||
if (t.match(/^l[diouxX]$/))
|
||||
return (arg == 'long int' || arg == 'long unsigned int');
|
||||
if (t.match(/^[fFeEgGaA]$/))
|
||||
return (arg == 'double');
|
||||
if (t.match(/^p$/))
|
||||
return (arg.match(/\*$/));
|
||||
// ...
|
||||
} else if (functype == 'scanf') {
|
||||
if (t.match(/^[dioux]$/))
|
||||
return (arg == 'int*');
|
||||
if (t.match(/^l[diouxX]$/))
|
||||
return (arg == 'long int*');
|
||||
if (t.match(/^z[diouxX]$/))
|
||||
return (arg == 'long unsigned int*'); // spec says size_t*
|
||||
if (t.match(/^[c[]$/))
|
||||
return (arg == 'char*' || arg == 'unsigned char*');
|
||||
if (t.match(/^l[c[]$/))
|
||||
return (arg == 'wchar_t*');
|
||||
if (t.match(/^[aefg]$/))
|
||||
return (arg == 'float*');
|
||||
if (t.match(/^l[aefg]$/))
|
||||
return (arg == 'double*');
|
||||
// ...
|
||||
}
|
||||
|
||||
if (t.match(/^h?s$/))
|
||||
return (arg.match(/^(const )?(unsigned )?char\*$/));
|
||||
if (t.match(/^ls$/))
|
||||
return (arg.match(/^(const )?(unsigned )?wchar_t\*$/));
|
||||
|
||||
error('Unrecognized format specifier "'+fmt+'"', loc());
|
||||
return true;
|
||||
}
|
||||
|
||||
function check_arg_types(ctype, functype, fmt_string, arg_type_names, loc) {
|
||||
// Match a superset of printf and scanf format strings
|
||||
var fmt_types = fmt_string.match(/%([-+ #0*]*)(\*|\d+)?(\.\*|\.-?\d+)?(hh|h|ll|l|j|z|t|L)?(.)/g);
|
||||
|
||||
var num_fmt_types = 0;
|
||||
for each (var fmt_type in fmt_types)
|
||||
if (fmt_type != '%%')
|
||||
++num_fmt_types;
|
||||
|
||||
if (num_fmt_types != arg_type_names.length) {
|
||||
error('Number of format string specifiers ('+num_fmt_types+') != number of format arguments ('+arg_type_names.length+')', loc());
|
||||
return;
|
||||
}
|
||||
|
||||
for each (var fmt_type in fmt_types) {
|
||||
if (fmt_type != '%%') {
|
||||
var arg = arg_type_names.shift();
|
||||
if (! compare_format_type(ctype, functype, fmt_type, arg, loc)) {
|
||||
error('Invalid argument type "'+arg+'" for format specifier "'+fmt_type+'"', loc());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function type_string_without_typedefs(type) {
|
||||
// Walk up the typedef chain
|
||||
while (TYPE_NAME(type) && TREE_CODE(TYPE_NAME(type)) == TYPE_DECL && DECL_ORIGINAL_TYPE(TYPE_NAME(type)))
|
||||
type = DECL_ORIGINAL_TYPE(TYPE_NAME(type));
|
||||
// Recursively strip typedefs from pointer types
|
||||
if (TREE_CODE(type) == POINTER_TYPE) {
|
||||
// This bit is copied from type_string():
|
||||
let quals = [];
|
||||
if (TYPE_VOLATILE(type)) quals.push('volatile');
|
||||
if (TYPE_RESTRICT(type)) quals.push('restrict');
|
||||
if (TYPE_READONLY(type)) quals.push('const');
|
||||
var suffix = quals.length ? ' ' + quals.join(' ') : '';
|
||||
return type_string_without_typedefs(TREE_TYPE(type)) + '*' + suffix;
|
||||
} else {
|
||||
return type_string(type);
|
||||
}
|
||||
}
|
||||
|
||||
function walk_printfs(fndecl) {
|
||||
function tree_walker(t, stack) {
|
||||
function getLocation() {
|
||||
var loc = location_of(t);
|
||||
if (loc) return loc;
|
||||
for (var i = stack.length - 1; i >= 0; --i) {
|
||||
var loc = location_of(stack[i]);
|
||||
if (loc) return loc;
|
||||
}
|
||||
return location_of(DECL_SAVED_TREE(fndecl));
|
||||
}
|
||||
|
||||
var code = TREE_CODE(t);
|
||||
if (code == 'CALL_EXPR') {
|
||||
var decl = call_function_decl(t);
|
||||
if (! decl)
|
||||
return true;
|
||||
|
||||
var printf_type = find_printf_type(decl, getLocation);
|
||||
if (! printf_type)
|
||||
return true;
|
||||
|
||||
// print('--------------');
|
||||
// print(rectify_function_decl(decl));
|
||||
// print(printf_type);
|
||||
|
||||
var fmt_arg = CALL_EXPR_ARG(t, printf_type[2]-1);
|
||||
var fmt_string = get_string_constant(fmt_arg);
|
||||
if (typeof fmt_string == 'undefined') {
|
||||
warning('Non-constant format string argument - can\'t check types', getLocation());
|
||||
return true;
|
||||
}
|
||||
|
||||
var arg_type_names = [];
|
||||
for (var operand in call_arg_iterator(t)) {
|
||||
var type = type_string_without_typedefs(TREE_TYPE(operand));
|
||||
arg_type_names.push(type);
|
||||
}
|
||||
check_arg_types(printf_type[0], printf_type[1], fmt_string, arg_type_names.slice(printf_type[3]-1), getLocation);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
walk_tree (DECL_SAVED_TREE(fndecl), tree_walker);
|
||||
}
|
||||
|
||||
function process_cp_pre_genericize(fndecl) {
|
||||
walk_printfs(fndecl);
|
||||
}
|
42
build/dehydra/tests/printf-type-check.cpp
Normal file
42
build/dehydra/tests/printf-type-check.cpp
Normal file
@ -0,0 +1,42 @@
|
||||
#include <cstdio>
|
||||
#include <cwchar>
|
||||
|
||||
extern void foo(const char*, ...) __attribute__((user("format, a, printf, 1, 2")));
|
||||
extern void bar(const char*, ...) __attribute__((format(printf, 1, 2)));
|
||||
extern void baz(const char*, ...);
|
||||
extern void qux(const char*);
|
||||
|
||||
int main() {
|
||||
const char* s = "%s";
|
||||
char buf[256];
|
||||
typedef int i32;
|
||||
typedef unsigned char u8;
|
||||
|
||||
i32 n = 2;
|
||||
printf("%d\n", 123);
|
||||
printf("%z\n", 123);
|
||||
printf("%d\n", (unsigned long)123);
|
||||
printf("%lu\n", (long)123);
|
||||
printf("%d%%\n", 123.0);
|
||||
printf("%d %+02.7x\n", 123, 456);
|
||||
foo("%s", 1);
|
||||
foo("%s", n);
|
||||
foo("%d", (u8*)"x");
|
||||
foo("%s", (unsigned short)3);
|
||||
bar("%s", 1);
|
||||
baz("%s", 1);
|
||||
qux("%s");
|
||||
bar("xyz\0%s");
|
||||
printf(s, "x");
|
||||
printf("%s", "x");
|
||||
printf("%d%d", 1, n);
|
||||
printf("%s", L"x");
|
||||
sprintf(buf, "%s", "x");
|
||||
sprintf(buf, "%s", L"x");
|
||||
wprintf(L"%s", "x");
|
||||
wprintf(L"%s", L"x");
|
||||
wprintf(L"%hs", "x");
|
||||
wprintf(L"%hs", L"x");
|
||||
wprintf(L"%ls", "x");
|
||||
wprintf(L"%ls", L"x");
|
||||
}
|
18
build/dehydra/tests/printf-type-check.txt
Normal file
18
build/dehydra/tests/printf-type-check.txt
Normal file
@ -0,0 +1,18 @@
|
||||
tests/printf-type-check.cpp: In function ‘int main()’:
|
||||
tests/printf-type-check.cpp:17: error: Invalid format specifier "%z"
|
||||
tests/printf-type-check.cpp:18: error: Invalid argument type "long unsigned int" for format specifier "%d"
|
||||
tests/printf-type-check.cpp:20: error: Invalid argument type "double" for format specifier "%d"
|
||||
tests/printf-type-check.cpp:22: error: Invalid argument type "int" for format specifier "%s"
|
||||
tests/printf-type-check.cpp:23: error: Invalid argument type "int" for format specifier "%s"
|
||||
tests/printf-type-check.cpp:24: error: Invalid argument type "unsigned char*" for format specifier "%d"
|
||||
tests/printf-type-check.cpp:25: error: Invalid argument type "int" for format specifier "%s"
|
||||
tests/printf-type-check.cpp:26: error: Invalid argument type "int" for format specifier "%s"
|
||||
tests/printf-type-check.cpp:27: warning: Ignoring unannotated vararg function "baz"
|
||||
tests/printf-type-check.cpp:29: error: Number of format string specifiers (1) != number of format arguments (0)
|
||||
tests/printf-type-check.cpp:30: warning: Non-constant format string argument - can't check types
|
||||
tests/printf-type-check.cpp:33: error: Invalid argument type "const wchar_t*" for format specifier "%s"
|
||||
tests/printf-type-check.cpp:36: error: Non-portable %s used in wprintf-style function
|
||||
tests/printf-type-check.cpp:37: error: Non-portable %s used in wprintf-style function
|
||||
tests/printf-type-check.cpp:37: error: Invalid argument type "const wchar_t*" for format specifier "%s"
|
||||
tests/printf-type-check.cpp:39: error: Invalid argument type "const wchar_t*" for format specifier "%hs"
|
||||
tests/printf-type-check.cpp:40: error: Invalid argument type "const char*" for format specifier "%ls"
|
Loading…
Reference in New Issue
Block a user