From 446d906a523c7eaf1c9694b8c49cdba71debfdd0 Mon Sep 17 00:00:00 2001
From: Kostya Serebryany <kcc@google.com>
Date: Thu, 3 Sep 2015 20:23:46 +0000
Subject: [PATCH] [libFuzzer] adding a parser for AFL-style dictionaries +
 tests.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@246800 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Fuzzer/FuzzerDriver.cpp        |  7 +++
 lib/Fuzzer/FuzzerFlags.def         |  1 +
 lib/Fuzzer/FuzzerInternal.h        | 11 +++++
 lib/Fuzzer/FuzzerUtil.cpp          | 74 ++++++++++++++++++++++++++++++
 lib/Fuzzer/test/FuzzerUnittest.cpp | 48 +++++++++++++++++++
 5 files changed, 141 insertions(+)
diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp
index 610b3c9d07e..7cb4f3dc1c5 100644
--- a/lib/Fuzzer/FuzzerDriver.cpp
+++ b/lib/Fuzzer/FuzzerDriver.cpp
@@ -256,6 +256,13 @@ int FuzzerDriver(int argc, char **argv, UserSuppliedFuzzer &USF) {
   if (Flags.apply_tokens)
     return ApplyTokens(F, Flags.apply_tokens);
 
+  if (Flags.dict)
+    if (!ParseDictionaryFile(FileToString(Flags.dict), &Options.Dictionary))
+      return 1;
+
+  if (Flags.verbosity > 0 && !Options.Dictionary.empty())
+    Printf("Dictionary: %zd entries\n", Options.Dictionary.size());
+
   unsigned Seed = Flags.seed;
   // Initialize Seed.
   if (Seed == 0)
diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def
index 534c867b8e2..ea47565f892 100644
--- a/lib/Fuzzer/FuzzerFlags.def
+++ b/lib/Fuzzer/FuzzerFlags.def
@@ -63,6 +63,7 @@ FUZZER_FLAG_INT(report_slow_units, 10,
     "Report slowest units if they run for more than this number of seconds.")
 FUZZER_FLAG_INT(only_ascii, 0,
                 "If 1, generate only ASCII (isprint+isspace) inputs.")
+FUZZER_FLAG_STRING(dict, "Experimental. Use the dictionary file.")
 FUZZER_FLAG_INT(tbm_depth, 5, "Apply at most this number of consecutive"
                                "trace-based-mutations (tbm).")
 FUZZER_FLAG_INT(tbm_width, 5, "Apply at most this number of independent"
diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h
index c44f0ea34d5..32edc91636a 100644
--- a/lib/Fuzzer/FuzzerInternal.h
+++ b/lib/Fuzzer/FuzzerInternal.h
@@ -57,6 +57,16 @@ bool IsASCII(const Unit &U);
 
 int NumberOfCpuCores();
 
+// Dictionary.
+
+// Parses one dictionary entry.
+// If successfull, write the enty to Unit and returns true,
+// otherwise returns false.
+bool ParseOneDictionaryEntry(const std::string &Str, Unit *U);
+// Parses the dictionary file, fills Units, returns true iff all lines
+// were parsed succesfully.
+bool ParseDictionaryFile(const std::string &Text, std::vector<Unit> *Units);
+
 class Fuzzer {
  public:
   struct FuzzingOptions {
@@ -80,6 +90,7 @@ class Fuzzer {
     std::string OutputCorpus;
     std::string SyncCommand;
     std::vector<std::string> Tokens;
+    std::vector<Unit> Dictionary;
   };
   Fuzzer(UserSuppliedFuzzer &USF, FuzzingOptions Options);
   void AddToCorpus(const Unit &U) { Corpus.push_back(U); }
diff --git a/lib/Fuzzer/FuzzerUtil.cpp b/lib/Fuzzer/FuzzerUtil.cpp
index b04d76d316d..a8856ab3bce 100644
--- a/lib/Fuzzer/FuzzerUtil.cpp
+++ b/lib/Fuzzer/FuzzerUtil.cpp
@@ -16,6 +16,7 @@
 #include <cassert>
 #include <cstring>
 #include <signal.h>
+#include <sstream>
 #include <unistd.h>
 
 namespace fuzzer {
@@ -92,4 +93,77 @@ bool IsASCII(const Unit &U) {
   return true;
 }
 
+bool ParseOneDictionaryEntry(const std::string &Str, Unit *U) {
+  U->clear();
+  if (Str.empty()) return false;
+  size_t L = 0, R = Str.size() - 1;  // We are parsing the range [L,R].
+  // Skip spaces from both sides.
+  while (L < R && isspace(Str[L])) L++;
+  while (R > L && isspace(Str[R])) R--;
+  if (R - L < 2) return false;
+  // Check the closing "
+  if (Str[R] != '"') return false;
+  R--;
+  // Find the opening "
+  while (L < R && Str[L] != '"') L++;
+  if (L >= R) return false;
+  assert(Str[L] == '\"');
+  L++;
+  assert(L <= R);
+  for (size_t Pos = L; Pos <= R; Pos++) {
+    uint8_t V = (uint8_t)Str[Pos];
+    if (!isprint(V) && !isspace(V)) return false;
+    if (V =='\\') {
+      // Handle '\\'
+      if (Pos + 1 <= R && (Str[Pos + 1] == '\\' || Str[Pos + 1] == '"')) {
+        U->push_back(Str[Pos + 1]);
+        Pos++;
+        continue;
+      }
+      // Handle '\xAB'
+      if (Pos + 3 <= R && Str[Pos + 1] == 'x'
+           && isxdigit(Str[Pos + 2]) && isxdigit(Str[Pos + 3])) {
+        char Hex[] = "0xAA";
+        Hex[2] = Str[Pos + 2];
+        Hex[3] = Str[Pos + 3];
+        U->push_back(strtol(Hex, nullptr, 16));
+        Pos += 3;
+        continue;
+      }
+      return false;  // Invalid escape.
+    } else {
+      // Any other character.
+      U->push_back(V);
+    }
+  }
+  return true;
+}
+
+bool ParseDictionaryFile(const std::string &Text, std::vector<Unit> *Units) {
+  if (Text.empty()) {
+    Printf("ParseDictionaryFile: file does not exist or is empty\n");
+    return false;
+  }
+  std::istringstream ISS(Text);
+  Units->clear();
+  Unit U;
+  int LineNo = 0;
+  std::string S;
+  while (std::getline(ISS, S, '\n')) {
+    LineNo++;
+    size_t Pos = 0;
+    while (Pos < S.size() && isspace(S[Pos])) Pos++;  // Skip spaces.
+    if (Pos == S.size()) continue;  // Empty line.
+    if (S[Pos] == '#') continue;  // Comment line.
+    if (ParseOneDictionaryEntry(S, &U)) {
+      Units->push_back(U);
+    } else {
+      Printf("ParseDictionaryFile: error in line %d\n\t\t%s\n", LineNo,
+             S.c_str());
+      return false;
+    }
+  }
+  return true;
+}
+
 }  // namespace fuzzer
diff --git a/lib/Fuzzer/test/FuzzerUnittest.cpp b/lib/Fuzzer/test/FuzzerUnittest.cpp
index 518cfab5d93..a06e0ac4831 100644
--- a/lib/Fuzzer/test/FuzzerUnittest.cpp
+++ b/lib/Fuzzer/test/FuzzerUnittest.cpp
@@ -215,3 +215,51 @@ void TestShuffleBytes(Mutator M, int NumIter) {
 
 TEST(FuzzerMutate, ShuffleBytes1) { TestShuffleBytes(Mutate_ShuffleBytes, 1 << 15); }
 TEST(FuzzerMutate, ShuffleBytes2) { TestShuffleBytes(Mutate, 1 << 16); }
+
+TEST(FuzzerDictionary, ParseOneDictionaryEntry) {
+  Unit U;
+  EXPECT_FALSE(ParseOneDictionaryEntry("", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry(" ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("\t  ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("  \" ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("  zz\" ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("  \"zz ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("  \"\" ", &U));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"a\"", &U));
+  EXPECT_EQ(U, Unit({'a'}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"abc\"", &U));
+  EXPECT_EQ(U, Unit({'a', 'b', 'c'}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("abc=\"abc\"", &U));
+  EXPECT_EQ(U, Unit({'a', 'b', 'c'}));
+  EXPECT_FALSE(ParseOneDictionaryEntry("\"\\\"", &U));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"\\\\\"", &U));
+  EXPECT_EQ(U, Unit({'\\'}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"\\xAB\"", &U));
+  EXPECT_EQ(U, Unit({0xAB}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"\\xABz\\xDE\"", &U));
+  EXPECT_EQ(U, Unit({0xAB, 'z', 0xDE}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"#\"", &U));
+  EXPECT_EQ(U, Unit({'#'}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"\\\"\"", &U));
+  EXPECT_EQ(U, Unit({'"'}));
+}
+
+TEST(FuzzerDictionary, ParseDictionaryFile) {
+  std::vector<Unit> Units;
+  EXPECT_FALSE(ParseDictionaryFile("zzz\n", &Units));
+  EXPECT_FALSE(ParseDictionaryFile("", &Units));
+  EXPECT_TRUE(ParseDictionaryFile("\n", &Units));
+  EXPECT_EQ(Units.size(), 0U);
+  EXPECT_TRUE(ParseDictionaryFile("#zzzz a b c d\n", &Units));
+  EXPECT_EQ(Units.size(), 0U);
+  EXPECT_TRUE(ParseDictionaryFile(" #zzzz\n", &Units));
+  EXPECT_EQ(Units.size(), 0U);
+  EXPECT_TRUE(ParseDictionaryFile("  #zzzz\n", &Units));
+  EXPECT_EQ(Units.size(), 0U);
+  EXPECT_TRUE(ParseDictionaryFile("  #zzzz\naaa=\"aa\"", &Units));
+  EXPECT_EQ(Units, std::vector<Unit>({Unit({'a', 'a'})}));
+  EXPECT_TRUE(
+      ParseDictionaryFile("  #zzzz\naaa=\"aa\"\n\nabc=\"abc\"", &Units));
+  EXPECT_EQ(Units,
+            std::vector<Unit>({Unit({'a', 'a'}), Unit({'a', 'b', 'c'})}));
+}
-- 
2.34.1