Program Analysis Tools

We looked at several examples in class.

Dynamic Techniques

Valgrind

MemCheck for memory analyses
Uninitialized values:

vg1.c
#include <stdio.h>
#include <stdlib.h>

int main(int argc, char** argv) {
  int* a = malloc(10 * sizeof(int));
  a[5] = 0;
  if (a[argc])
    printf("xx\n");
  return 0;
}

clang -g vg1.c
valgrind ./a.out

Allocated Buffer Overflows:

vg2.c
#include <stdio.h>
#include <stdlib.h>

int main(int argc, char** argv){
  int i;
  int *a = malloc(sizeof(int) * 10);
  if (!a) return -1; /*malloc failed*/
  for (i = 0; i < 11; i++){
    a[i] = i;
  }
  free(a);
  return 0;
}

clang -g vg2.c
valgrind ./a.out

Memory Leaks:

vg3.c
#include <stdio.h>
#include <stdlib.h>

int main(int argc, char** argv){
  int i;
  int *a;

  for (i=0; i < 10; i++){
    a = malloc(sizeof(int) * 100);
  }
  free(a);
  return 0;
}

clang -g vg3.c
valgrind ./a.out
valgrind --leak-check=full ./a.out

Data Races:

race.c
#include <pthread.h>
#include <stdio.h>

int global;

void *
thread1(void *x) {
  global++;
  return NULL;
}

void *
thread2(void *x) {
  global--;
  return NULL;
}

int
main() {
  for (unsigned i = 0; i < 30; ++i) {
    pthread_t t[2];
    pthread_create(&t[0], NULL, thread1, NULL);
    pthread_create(&t[1], NULL, thread2, NULL);
    pthread_join(t[0], NULL);
    pthread_join(t[1], NULL);
    printf("Global was: %d\n", global);
  }
  return 0;
}

clang race.c -g -lpthread -o race
valgrind --tool=helgrind ./race
valgrind --tool=drd ./race

?????:

vg4.c
#include <stdio.h>
#include <stdlib.h>

int main(int argc, char** argv){
  int i;
  int x =0;
  int a[10];
  for (i = 0; i < 11; i++)
    a[i] = i;
    
  printf("x is %d\n", x);
  return 0;
}

clang -g vg4.c
valgrind ./a.out
It can't handle everything!

?????:

vg5.c
#include <stdio.h>
#include <stdlib.h>

int main(int argc, char** argv){
  char *str = malloc(10);
  gets(str);
  printf("%s\n",str);
  free(str);

  return 0;
}

clang -g vg5.c
echo abcabc | valgrind ./a.out
echo abcabcabca | valgrind ./a.out
It can't always handle everything!

You can also track memory usage and reduce memory allocation. (Examples derived from the ACCU)

mem1.cpp
#include <chrono>
#include <memory>
#include <vector>
#include <thread>

const size_t DATA_SIZE  = 100;
const size_t BLOCK_SIZE = 1000000;

int
main() {
  std::vector<std::unique_ptr<long[]>> data{DATA_SIZE};

  for (auto &element : data) {
    element = std::make_unique<long[]>(BLOCK_SIZE);
    // do something with element
    std::this_thread::sleep_for(std::chrono::milliseconds(100));
  }

  std::this_thread::sleep_for(std::chrono::seconds(1));
  return 0;
}

g++ -g -std=c++14 mem1.cpp
valgrind --time-unit=ms --tool=massif ./a.out
massif-visualizer massif.out.PID

mem2.cpp
#include <chrono>
#include <memory>
#include <vector>
#include <thread>

const size_t DATA_SIZE  = 100;
const size_t BLOCK_SIZE = 1000000;

int
main() {
  std::vector<std::unique_ptr<long[]>> data{DATA_SIZE};

  for (auto &element : data) {
    element = std::make_unique<long[]>(BLOCK_SIZE);
    // do something with element
    std::this_thread::sleep_for(std::chrono::milliseconds(100));
    element.reset();
    std::this_thread::sleep_for(std::chrono::milliseconds(100));
  }

  std::this_thread::sleep_for(std::chrono::seconds(1));
  return 0;
}

g++ -g -std=c++14 mem2.cpp
valgrind --time-unit=ms --tool=massif ./a.out
massif-visualizer massif.out.PID

Clang/GCC Sanitizers

Originally, the sanitizer infrastructure was developed to work within the clang compiler built on top of LLVM. Thankfully, these options are now available for gcc as well. Just replace clang in any of the invocations below with gcc.

MemorySanitizer
clang -g -fsanitize=memory -fno-omit-frame-pointer vg1.c

AddressSanitizer
clang -g -fsanitize=address -fno-omit-frame-pointer vg2.c
clang -g -fsanitize=address -fno-omit-frame-pointer vg3.c
clang -g -fsanitize=address -fno-omit-frame-pointer vg4.c
clang -g -fsanitize=address -fno-omit-frame-pointer vg5.c

Extracting symbolic information in the reports can require slightly more system configuration.

Thread Sanitizer
clang race.c -fsanitize=thread -g -lpthread -o race

Undefined Behavior

ub1.c
#include <limits.h>

int main (void) {
  return INT_MIN / -1;
}

clang -g -fsanitize=undefined ub1.c

ub2.c
#include <limits.h>

int main (void) {
  return -INT_MIN;
}

clang -g -fsanitize=undefined ub2.c

ub3.c
int main (void) {
  return 0xffff << 16;
}

clang -g -fsanitize=undefined ub3.c

Static Techniques

Clang Static Analyzer (scan-build)

Simply prepend scan-build before the normal build commands:
scan-build clang -g vg5.c
scan-build clang -g vg4.c

scan-build ./configure
scan-build make

Some example results

FindBugs

java -jar $FINDBUGS_HOME/lib/findbugs.jar
Simply add the jar and/or class files through the GUI interface.

CBMC

For a simple comparison with the clang static analyzer:
scan-build vg1.c
cbmc --bounds-check --pointer-check vg1.c
cbmc --bounds-check --pointer-check vg4.c

And showing the pitfalls of loops:
cbmc binsearch.c --function binsearch --bounds-check
cbmc binsearch.c --function binsearch --unwind 6 --bounds-check

cbmc lock-example.c
cbmc lock-example.c --unwind 2

See also LLBMC for bounded model checking of programs compiled by clang/LLVM.

Managing False Positives

Suppression

Dynamic analysis tools usually have some mechanism of error suppression. Users can specify suppression criteria, and any warning that matches these criteria can be prevented.

In Valgrind

You can generate suppression criteria for all warnings in an execution via the --gen-suppressions=all option and capture the output using --log-file:
valgrind --gen-suppressions=all --log-file=suppressionfile ./a.out

Edit this file to match only the warnings you recognize as false positives. You can then use the suppression file to avoid warnings in the future.
valgrind --suppressions=suppressionfile ./a.out

In Clang Sanitizers

All Clang Sanitizers support special case lists / blacklists of errors that ought to be suppressed. Given a particular suppression file, the suppressions are used at compile time via the -fsanitize-blacklist= option:
clang -g -fsanitize=address -fsanitize-blacklist=blacklist.txt vg2.c

More information is available here.

In Clang Static Analyzer

The static analyzer learns about potential bugs based on the the analyzed code. Thus, to avoid false positives, you can either add assertions to the code or remove unnecessary statements that imply potentially bad behavior. In more extreme cases, annotations may be helpful.

A more complete description can be found here.

Automated Test Generation

American Fuzzy Lop

American Fuzzy Lop, or AFL, uses compile time instrumentation to perform more intelligent fuzzing than simple black box testing would allow. In order to start using AFL, you first need to download the tool, build it, and configure your environment:

1
2
3
4
5
git clone https://github.com/AFLplusplus/AFLplusplus
cd AFL<TAB>
make source-only
export PATH=$PATH:`pwd`
export AFL_PATH=`pwd`

And enable core dumps via:

1
echo core > /proc/sys/kernel/core_pattern  

or on Ubuntu:

1
sudo service apport stop  

Once you have done this once, you can just configure your environment again using the last three lines the next time that you log into your machine.

This particular example comes from a blog post

afl.c
#include <assert.h>

#define H 7
#define W 11
char maze[H][W] = { "+-+---+---+",
                    "| |     |#|",
                    "| | --+ | |",
                    "| |   | | |",
                    "| +-- | | |",
                    "|     |   |",
                    "+-----+---+" };

int
runMaze(char *program, unsigned size) {
  int x, y;
  int ox, oy;
  int i = 0;

  x = 1;
  y = 1;
  maze[y][x]='X';

  while (i < size) {
    ox = x;
    oy = y;

    switch (program[i]) {
      case 'w':
        y--;
        break;
      case 's':
        y++;
        break;
      case 'a':
        x--;
        break;
      case 'd':
        x++;
        break;
      default:
        printf("Wrong command!(only w,s,a,d accepted!)\n");
        printf("You lose!\n");
        return -1;
    }

    if (maze[y][x] == '#') {
      printf ("You win!\n");
      printf ("Your solution \n",program);
      return 1;
    }

    if (maze[y][x] != ' '
        && !((y == 2 && maze[y][x] == '|' && x > 0 && x < W))) {
      x = ox;
      y = oy;
    }

    if (ox==x && oy==y){
      printf("You lose\n");
      return -2;
    }

    maze[y][x]='X';
    i++;
  }

  printf("You lose\n");
  return 0;
}

int
main(int argc, char *argv[]) {
  #define ITERS 28
  char program[ITERS];

  read(0, program, ITERS);
  int result = runMaze(program, ITERS);
  assert(1 != result);

  return 0;
}
1
2
3
4
afl-gcc afl.c
mkdir in out
echo ssssdwdsd > in/seed 
AFL_SKIP_CPUFREQ=1 afl-fuzz -o out -i in -- ./a.out

A more complicated/realistic project will require you to integrate the afl-fuzz compiler into the build system of the project itself.For instance, the following will fuzz test indent using its regression tests as input seeds.

1
2
3
4
5
CC=afl-gcc CXX=afl-g++ ./configure --disable-shared
AFL_HARDEN=1 make clean all
mkdir in out
cp regression/*.c in/
AFL_SKIP_CPUFREQ=1 afl-fuzz -i in/ -o out/ -t 10000 src/indent -st @@

Which in turn produces output like:

american fuzzy lop 2.05b (indent) ┌─ process timing ─────────────────────────────────────┬─ overall results ─────┐ │ run time : 0 days, 2 hrs, 20 min, 56 sec │ cycles done : 0 │ │ last new path : 0 days, 0 hrs, 23 min, 8 sec │ total paths : 2437 │ │ last uniq crash : 0 days, 1 hrs, 10 min, 22 sec │ uniq crashes : 86 │ │ last uniq hang : none seen yet │ uniq hangs : 0 │ ├─ cycle progress ────────────────────┬─ map coverage ─┴───────────────────────┤ │ now processing : 258 (10.59%) │ map density : 3670 (5.60%) │ │ paths timed out : 0 (0.00%) │ count coverage : 5.85 bits/tuple │ ├─ stage progress ────────────────────┼─ findings in depth ────────────────────┤ │ now trying : arith 8/8 │ favored paths : 222 (9.11%) │ │ stage execs : 263k/411k (63.87%) │ new edges on : 399 (16.37%) │ │ total execs : 7.00M │ total crashes : 300 (86 unique) │ │ exec speed : 291.0/sec │ total hangs : 0 (0 unique) │ ├─ fuzzing strategy yields ───────────┴───────────────┬─ path geometry ────────┤ │ bit flips : 546/279k, 74/279k, 26/279k │ levels : 3 │ │ byte flips : 0/35.0k, 12/26.2k, 23/26.3k │ pending : 2407 │ │ arithmetics : 103/1.07M, 0/84.3k, 0/0 │ pend fav : 215 │ │ known ints : 4/99.1k, 17/533k, 27/851k │ own finds : 2339 │ │ dictionary : 0/0, 0/0, 19/398k │ imported : n/a │ │ havoc : 1574/2.74M, 0/0 │ variable : 0 │ │ trim : 0.81%/7417, 25.08% ├────────────────────────┘ └─────────────────────────────────────────────────────┘ [cpu: 27%]

KLEE

These examples assume that you are using the Docker image for KLEE, available here. The examples are drawn from the original KLEE tutorials as well as blog posts that walk through them in greater detail. You can start an instance of the KLEE Docker image with one of your paths mapped/mounted inside the instance via:

docker run -ti --name=<instance name> --ulimit='stack=-1:-1' -v <path/in/filesystem>:</path/in/instance> klee/klee

In this case, we are testing the get_sign() function, and we include a main() that acts as a test driver

klee1.c
#include <klee/klee.h>

int get_sign(int x) {
  if (x == 0)
    return 0;
  
  if (x < 0)
    return -1;
  else 
    return 1;
} 

int main() {
  int a;
  klee_make_symbolic(&a, sizeof(a), "a");
  return get_sign(a);
}
1
2
3
4
5
clang -c -g -emit-llvm -I ~/klee_src/include/ klee1.c -o klee1.bc
klee klee1.bc
ktest-tool klee-last/test000001.ktest
ktest-tool klee-last/test000002.ktest
ktest-tool klee-last/test000003.ktest
klee2.c
#include <klee/klee.h>

#define H 7
#define W 11
char maze[H][W] = { "+-+---+---+",
                    "| |     |#|",
                    "| | --+ | |",
                    "| |   | | |",
                    "| +-- | | |",
                    "|     |   |",
                    "+-----+---+" };

void
draw() {
  int i, j;
  for (i = 0; i < H; i++) {
    for (j = 0; j < W; j++) {
      printf("%c", maze[i][j]);
    }
    printf("\n");
  }
  printf("\n");
}

int
runMaze(char *program, unsigned size) {
  int x, y;
  int ox, oy;
  int i = 0;

  x = 1;
  y = 1;
  maze[y][x]='X';

  while (i < size) {
    ox = x;
    oy = y;

    switch (program[i]) {
      case 'w':
        y--;
        break;
      case 's':
        y++;
        break;
      case 'a':
        x--;
        break;
      case 'd':
        x++;
        break;
      default:
        printf("Wrong command!(only w,s,a,d accepted!)\n");
        printf("You lose!\n");
        return -1;
    }

    if (maze[y][x] == '#') {
      printf ("You win!\n");
      printf ("Your solution \n",program);
      return 1;
    }

    if (maze[y][x] != ' '
        && !((y == 2 && maze[y][x] == '|' && x > 0 && x < W))) {
      x = ox;
      y = oy;
    }

    if (ox==x && oy==y){
      printf("You lose\n");
      return -2;
    }

    maze[y][x]='X';
    draw();
    i++;
    sleep(1);
  }

  printf("You lose\n");
  return 0;
}

int
main(int argc, char *argv[]) {
  #define ITERS 28
  char program[ITERS];

  draw();
  //read(0, program, ITERS);
  klee_make_symbolic(program,ITERS,"program");
  int result = runMaze(program, ITERS);
  klee_assert(1 != result);

  return 0;
}
1
2
3
clang -c -g -emit-llvm -I ~/klee_src/include/ klee2.c -o klee2.bc
klee -emit-all-errors klee2.bc
ls klee-last/*.err