Factor out JSON stripComments().
authorJez Ng <jezng@fb.com>
Fri, 15 Aug 2014 17:29:26 +0000 (10:29 -0700)
committerSara Golemon <sgolemon@fb.com>
Tue, 9 Sep 2014 21:22:22 +0000 (14:22 -0700)
Summary:
Comments are a useful extension to JSON, especially for configuration files.

Facebook: twagent would previously barf on JSON files that contained '//' in their strings, and this commit allows twagent to strip comments properly. Fixes T4686066.

Test Plan: fbconfig common/json mcrouter/lib/config tupperware/agent && fbmake runtests

Reviewed By: aravindn@fb.com

Subscribers: anarayanan, pavlo, stepan, dipanshu, alikhtarov

FB internal diff: D1493963

Tasks: 4686066

folly/json.cpp
folly/json.h
folly/test/JsonTest.cpp
folly/test/json_test_data/commented.json [new file with mode: 0644]
folly/test/json_test_data/commented.json.exp [new file with mode: 0644]

index ddd0afbec0f616feca79b08ecba7f1bd7b972f73..f51b038b4f186891b6be8614bbc467eeded69a68 100644 (file)
@@ -684,6 +684,62 @@ void escapeString(StringPiece input,
   out.push_back('\"');
 }
 
+fbstring stripComments(StringPiece jsonC) {
+  fbstring result;
+  enum class State {
+    None,
+    InString,
+    InlineComment,
+    LineComment
+  } state = State::None;
+
+  for (size_t i = 0; i < jsonC.size(); ++i) {
+    auto s = jsonC.subpiece(i);
+    switch (state) {
+      case State::None:
+        if (s.startsWith("/*")) {
+          state = State::InlineComment;
+          ++i;
+          continue;
+        } else if (s.startsWith("//")) {
+          state = State::LineComment;
+          ++i;
+          continue;
+        } else if (s.startsWith("\"")) {
+          state = State::InString;
+        }
+        result.push_back(s[0]);
+        break;
+      case State::InString:
+        if (s.startsWith("\\\"")) {
+          result.push_back(s[0]);
+          result.push_back(s[1]);
+          ++i;
+          continue;
+        } else if (s.startsWith("\"")) {
+          state = State::None;
+        }
+        result.push_back(s[0]);
+        break;
+      case State::InlineComment:
+        if (s.startsWith("*/")) {
+          state = State::None;
+          ++i;
+        }
+        break;
+      case State::LineComment:
+        if (s.startsWith("\n")) {
+          // skip the line break. It doesn't matter.
+          state = State::None;
+        }
+        break;
+      default:
+        throw std::logic_error("Unknown comment state");
+    }
+  }
+  return result;
+}
+
 }
 
 //////////////////////////////////////////////////////////////////////
index 5610e1cdd87573de2d387cbe82714c3fe400f020..458ab63a3b67686777aa7ee0d7ebc63b8bdeb921 100644 (file)
@@ -142,6 +142,11 @@ namespace json {
   void escapeString(StringPiece input,
                     fbstring& out,
                     const serialization_opts& opts);
+
+  /*
+   * Strip all C99-like comments (i.e. // and / * ... * /)
+   */
+  fbstring stripComments(StringPiece jsonC);
 }
 
 //////////////////////////////////////////////////////////////////////
index 03bbcb4a516fa299dd206bb1340b87cc80f0736b..b9b1c74d99a7243d1c3583bed6b1b0da2ec3b4ff 100644 (file)
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <folly/FileUtil.h>
 #include <folly/json.h>
 #include <gtest/gtest.h>
 #include <gflags/gflags.h>
@@ -379,6 +380,23 @@ TEST(Json, SortKeys) {
   EXPECT_EQ(sorted_keys, folly::json::serialize(value, opts_on));
 }
 
+TEST(Json, StripComments) {
+  const std::string kTestFile =
+    "folly/test/json_test_data/commented.json";
+  const std::string kTestExpected =
+    "folly/test/json_test_data/commented.json.exp";
+
+  std::string testStr;
+  std::string expectedStr;
+  if (!folly::readFile(kTestFile.data(), testStr)) {
+    FAIL() << "can not read test file " << kTestFile;
+  }
+  if (!folly::readFile(kTestExpected.data(), expectedStr)) {
+    FAIL() << "can not read test file " << kTestExpected;
+  }
+  EXPECT_EQ(expectedStr, folly::json::stripComments(testStr));
+}
+
 BENCHMARK(jsonSerialize, iters) {
   folly::json::serialization_opts opts;
   for (int i = 0; i < iters; ++i) {
diff --git a/folly/test/json_test_data/commented.json b/folly/test/json_test_data/commented.json
new file mode 100644 (file)
index 0000000..d718ef1
--- /dev/null
@@ -0,0 +1,11 @@
+{
+  // comment
+  "test": "foo", // comment
+  "test2": "foo // bar", // more comments
+  /*
+  "test3": "baz"
+  */
+  "test4": "foo /* bar", /* comment */
+  "te//": "foo",
+  "te/*": "bar"
+}
diff --git a/folly/test/json_test_data/commented.json.exp b/folly/test/json_test_data/commented.json.exp
new file mode 100644 (file)
index 0000000..637e2c0
--- /dev/null
@@ -0,0 +1,6 @@
+{
+    "test": "foo",   "test2": "foo // bar",   
+  "test4": "foo /* bar", 
+  "te//": "foo",
+  "te/*": "bar"
+}