Document the edit-distance algorithm used in StringRef, switch it over
authorDouglas Gregor <dgregor@apple.com>
Thu, 31 Dec 2009 04:24:34 +0000 (04:24 +0000)
committerDouglas Gregor <dgregor@apple.com>
Thu, 31 Dec 2009 04:24:34 +0000 (04:24 +0000)
to SmallVector, and add a unit test.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@92340 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Support/StringRef.cpp
unittests/ADT/StringRefTest.cpp

index 9084ea6ece01aa9b4a7f1bca95f2cd7e94707584..e4a9984828f3edc74e88270c223395fd34f5cb35 100644 (file)
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/StringRef.h"
-#include <vector>
+#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
 // MSVC emits references to this into the translation units which reference it.
@@ -36,17 +36,26 @@ int StringRef::compare_lower(StringRef RHS) const {
   return Length < RHS.Length ? -1 : 1;
 }
 
-/// \brief Compute the edit distance between the two given strings.
+// Compute the edit distance between the two given strings.
 unsigned StringRef::edit_distance(llvm::StringRef Other, 
                                   bool AllowReplacements) {
+  // The algorithm implemented below is the "classic"
+  // dynamic-programming algorithm for computing the Levenshtein
+  // distance, which is described here:
+  //
+  //   http://en.wikipedia.org/wiki/Levenshtein_distance
+  //
+  // Although the algorithm is typically described using an m x n
+  // array, only two rows are used at a time, so this implemenation
+  // just keeps two separate vectors for those two rows.
   size_type m = size();
   size_type n = Other.size();
 
-  std::vector<unsigned> previous(n+1, 0);
-  for (std::vector<unsigned>::size_type i = 0; i <= n; ++i) 
+  SmallVector<unsigned, 32> previous(n+1, 0);
+  for (SmallVector<unsigned, 32>::size_type i = 0; i <= n; ++i) 
     previous[i] = i;
 
-  std::vector<unsigned> current(n+1, 0);
+  SmallVector<unsigned, 32> current(n+1, 0);
   for (size_type y = 1; y <= m; ++y) {
     current.assign(n+1, 0);
     current[0] = y;
index 6507c20b2b1fbfbd5eb19d226e847dbef1012bf4..8507efa1865fb665b6acb76bdd36fcad821ac05f 100644 (file)
@@ -247,6 +247,11 @@ TEST(StringRefTest, Count) {
   EXPECT_EQ(0U, Str.count("zz"));
 }
 
+TEST(StringRefTest, EditDistance) {
+  StringRef Str("hello");
+  EXPECT_EQ(2, Str.edit_distance("hill"));
+}
+
 TEST(StringRefTest, Misc) {
   std::string Storage;
   raw_string_ostream OS(Storage);