We looked at several examples in class.
MemCheck for memory analyses
Uninitialized values:
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char** argv) {
int* a = malloc(10 * sizeof(int));
a[5] = 0;
if (a[argc])
printf("xx\n");
return 0;
}
clang -g vg1.c
valgrind ./a.out
Allocated Buffer Overflows:
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char** argv){
int i;
int *a = malloc(sizeof(int) * 10);
if (!a) return -1; /*malloc failed*/
for (i = 0; i < 11; i++){
a[i] = i;
}
free(a);
return 0;
}
clang -g vg2.c
valgrind ./a.out
Memory Leaks:
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char** argv){
int i;
int *a;
for (i=0; i < 10; i++){
a = malloc(sizeof(int) * 100);
}
free(a);
return 0;
}
clang -g vg3.c
valgrind ./a.out
valgrind --leak-check=full ./a.out
Data Races:
#include <pthread.h>
#include <stdio.h>
int global;
void *
thread1(void *x) {
global++;
return NULL;
}
void *
thread2(void *x) {
global--;
return NULL;
}
int
main() {
for (unsigned i = 0; i < 30; ++i) {
pthread_t t[2];
pthread_create(&t[0], NULL, thread1, NULL);
pthread_create(&t[1], NULL, thread2, NULL);
pthread_join(t[0], NULL);
pthread_join(t[1], NULL);
printf("Global was: %d\n", global);
}
return 0;
}
clang race.c -g -lpthread -o race
valgrind --tool=helgrind ./race
valgrind --tool=drd ./race
?????:
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char** argv){
int i;
int x =0;
int a[10];
for (i = 0; i < 11; i++)
a[i] = i;
printf("x is %d\n", x);
return 0;
}
clang -g vg4.c
valgrind ./a.out
It can't handle everything!
?????:
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char** argv){
char *str = malloc(10);
gets(str);
printf("%s\n",str);
free(str);
return 0;
}
clang -g vg5.c
echo abcabc | valgrind ./a.out
echo abcabcabca | valgrind ./a.out
It can't always handle everything!
You can also track memory usage and reduce memory allocation. (Examples derived from the ACCU)
#include <chrono>
#include <memory>
#include <vector>
#include <thread>
const size_t DATA_SIZE = 100;
const size_t BLOCK_SIZE = 1000000;
int
main() {
std::vector<std::unique_ptr<long[]>> data{DATA_SIZE};
for (auto &element : data) {
element = std::make_unique<long[]>(BLOCK_SIZE);
// do something with element
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
std::this_thread::sleep_for(std::chrono::seconds(1));
return 0;
}
g++ -g -std=c++14 mem1.cpp
valgrind --time-unit=ms --tool=massif ./a.out
massif-visualizer massif.out.PID
#include <chrono>
#include <memory>
#include <vector>
#include <thread>
const size_t DATA_SIZE = 100;
const size_t BLOCK_SIZE = 1000000;
int
main() {
std::vector<std::unique_ptr<long[]>> data{DATA_SIZE};
for (auto &element : data) {
element = std::make_unique<long[]>(BLOCK_SIZE);
// do something with element
std::this_thread::sleep_for(std::chrono::milliseconds(100));
element.reset();
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
std::this_thread::sleep_for(std::chrono::seconds(1));
return 0;
}
g++ -g -std=c++14 mem2.cpp
valgrind --time-unit=ms --tool=massif ./a.out
massif-visualizer massif.out.PID
Originally, the sanitizer infrastructure was developed to work within the
clang compiler built on top of
LLVM.
Thankfully, these options are now available for gcc
as well.
Just replace clang
in any of the invocations below with gcc
.
MemorySanitizer
clang -g -fsanitize=memory -fno-omit-frame-pointer vg1.c
AddressSanitizer
clang -g -fsanitize=address -fno-omit-frame-pointer vg2.c
clang -g -fsanitize=address -fno-omit-frame-pointer vg3.c
clang -g -fsanitize=address -fno-omit-frame-pointer vg4.c
clang -g -fsanitize=address -fno-omit-frame-pointer vg5.c
Extracting symbolic information in the reports can require slightly more system configuration.
Thread Sanitizer
clang race.c -fsanitize=thread -g -lpthread -o race
Undefined Behavior
#include <limits.h>
int main (void) {
return INT_MIN / -1;
}
clang -g -fsanitize=undefined ub1.c
#include <limits.h>
int main (void) {
return -INT_MIN;
}
clang -g -fsanitize=undefined ub2.c
int main (void) {
return 0xffff << 16;
}
clang -g -fsanitize=undefined ub3.c
Simply prepend scan-build before the normal build commands:
scan-build clang -g vg5.c
scan-build clang -g vg4.c
scan-build ./configure
scan-build make
java -jar $FINDBUGS_HOME/lib/findbugs.jar
Simply add the jar and/or class files through the GUI interface.
For a simple comparison with the clang static analyzer:
scan-build vg1.c
cbmc --bounds-check --pointer-check vg1.c
cbmc --bounds-check --pointer-check vg4.c
And showing the pitfalls of loops:
cbmc binsearch.c --function binsearch --bounds-check
cbmc binsearch.c --function binsearch --unwind 6 --bounds-check
cbmc lock-example.c
cbmc lock-example.c --unwind 2
See also LLBMC for bounded model checking of programs compiled by clang/LLVM.
Dynamic analysis tools usually have some mechanism of error suppression. Users can specify suppression criteria, and any warning that matches these criteria can be prevented.
You can generate suppression criteria for all warnings in an execution via the
--gen-suppressions=all
option and capture the output using --log-file
:
valgrind --gen-suppressions=all --log-file=suppressionfile ./a.out
Edit this file to match only the warnings you recognize as false positives.
You can then use the suppression file to avoid warnings in the future.
valgrind --suppressions=suppressionfile ./a.out
All Clang Sanitizers support special case lists / blacklists of errors that
ought to be suppressed. Given a particular suppression file, the suppressions
are used at compile time via the -fsanitize-blacklist=
option:
clang -g -fsanitize=address -fsanitize-blacklist=blacklist.txt vg2.c
More information is available here.
The static analyzer learns about potential bugs based on the the analyzed code. Thus, to avoid false positives, you can either add assertions to the code or remove unnecessary statements that imply potentially bad behavior. In more extreme cases, annotations may be helpful.
A more complete description can be found here.
American Fuzzy Lop, or AFL, uses compile time instrumentation to perform more intelligent fuzzing than simple black box testing would allow. In order to start using AFL, you first need to download the tool, build it, and configure your environment:
1
2
3
4
5
git clone https://github.com/AFLplusplus/AFLplusplus
cd AFL<TAB>
make source-only
export PATH=$PATH:`pwd`
export AFL_PATH=`pwd`
And enable core dumps via:
1
echo core > /proc/sys/kernel/core_pattern
or on Ubuntu:
1
sudo service apport stop
Once you have done this once, you can just configure your environment again using the last three lines the next time that you log into your machine.
This particular example comes from a blog post
#include <assert.h>
#define H 7
#define W 11
char maze[H][W] = { "+-+---+---+",
"| | |#|",
"| | --+ | |",
"| | | | |",
"| +-- | | |",
"| | |",
"+-----+---+" };
int
runMaze(char *program, unsigned size) {
int x, y;
int ox, oy;
int i = 0;
x = 1;
y = 1;
maze[y][x]='X';
while (i < size) {
ox = x;
oy = y;
switch (program[i]) {
case 'w':
y--;
break;
case 's':
y++;
break;
case 'a':
x--;
break;
case 'd':
x++;
break;
default:
printf("Wrong command!(only w,s,a,d accepted!)\n");
printf("You lose!\n");
return -1;
}
if (maze[y][x] == '#') {
printf ("You win!\n");
printf ("Your solution \n",program);
return 1;
}
if (maze[y][x] != ' '
&& !((y == 2 && maze[y][x] == '|' && x > 0 && x < W))) {
x = ox;
y = oy;
}
if (ox==x && oy==y){
printf("You lose\n");
return -2;
}
maze[y][x]='X';
i++;
}
printf("You lose\n");
return 0;
}
int
main(int argc, char *argv[]) {
#define ITERS 28
char program[ITERS];
read(0, program, ITERS);
int result = runMaze(program, ITERS);
assert(1 != result);
return 0;
}
1
2
3
4
afl-gcc afl.c
mkdir in out
echo ssssdwdsd > in/seed
AFL_SKIP_CPUFREQ=1 afl-fuzz -o out -i in -- ./a.out
A more complicated/realistic project will require you to integrate the afl-fuzz
compiler into the build system of the project itself.For instance, the following
will fuzz test indent
using its regression
tests as input seeds.
1
2
3
4
5
CC=afl-gcc CXX=afl-g++ ./configure --disable-shared
AFL_HARDEN=1 make clean all
mkdir in out
cp regression/*.c in/
AFL_SKIP_CPUFREQ=1 afl-fuzz -i in/ -o out/ -t 10000 src/indent -st @@
Which in turn produces output like:
These examples assume that you are using the Docker image for KLEE, available here. The examples are drawn from the original KLEE tutorials as well as blog posts that walk through them in greater detail. You can start an instance of the KLEE Docker image with one of your paths mapped/mounted inside the instance via:
docker run -ti --name=<instance name> --ulimit='stack=-1:-1' -v <path/in/filesystem>:</path/in/instance> klee/klee
In this case, we are testing the get_sign()
function, and we include a
main()
that acts as a test driver
#include <klee/klee.h>
int get_sign(int x) {
if (x == 0)
return 0;
if (x < 0)
return -1;
else
return 1;
}
int main() {
int a;
klee_make_symbolic(&a, sizeof(a), "a");
return get_sign(a);
}
1
2
3
4
5
clang -c -g -emit-llvm -I ~/klee_src/include/ klee1.c -o klee1.bc
klee klee1.bc
ktest-tool klee-last/test000001.ktest
ktest-tool klee-last/test000002.ktest
ktest-tool klee-last/test000003.ktest
#include <klee/klee.h>
#define H 7
#define W 11
char maze[H][W] = { "+-+---+---+",
"| | |#|",
"| | --+ | |",
"| | | | |",
"| +-- | | |",
"| | |",
"+-----+---+" };
void
draw() {
int i, j;
for (i = 0; i < H; i++) {
for (j = 0; j < W; j++) {
printf("%c", maze[i][j]);
}
printf("\n");
}
printf("\n");
}
int
runMaze(char *program, unsigned size) {
int x, y;
int ox, oy;
int i = 0;
x = 1;
y = 1;
maze[y][x]='X';
while (i < size) {
ox = x;
oy = y;
switch (program[i]) {
case 'w':
y--;
break;
case 's':
y++;
break;
case 'a':
x--;
break;
case 'd':
x++;
break;
default:
printf("Wrong command!(only w,s,a,d accepted!)\n");
printf("You lose!\n");
return -1;
}
if (maze[y][x] == '#') {
printf ("You win!\n");
printf ("Your solution \n",program);
return 1;
}
if (maze[y][x] != ' '
&& !((y == 2 && maze[y][x] == '|' && x > 0 && x < W))) {
x = ox;
y = oy;
}
if (ox==x && oy==y){
printf("You lose\n");
return -2;
}
maze[y][x]='X';
draw();
i++;
sleep(1);
}
printf("You lose\n");
return 0;
}
int
main(int argc, char *argv[]) {
#define ITERS 28
char program[ITERS];
draw();
//read(0, program, ITERS);
klee_make_symbolic(program,ITERS,"program");
int result = runMaze(program, ITERS);
klee_assert(1 != result);
return 0;
}
1
2
3
clang -c -g -emit-llvm -I ~/klee_src/include/ klee2.c -o klee2.bc
klee -emit-all-errors klee2.bc
ls klee-last/*.err