Use UTF-8 strings for strings with multi-byte Unicode code points in them
authorChristopher Dykes <cdykes@fb.com>
Fri, 1 Jul 2016 01:20:43 +0000 (18:20 -0700)
committerFacebook Github Bot 8 <facebook-github-bot-8-bot@fb.com>
Fri, 1 Jul 2016 01:24:14 +0000 (18:24 -0700)
Summary: Because MSVC doesn't support strings with multi-byte Unicode code points in them unless it's in a UTF-8 string.

Reviewed By: yfeldblum

Differential Revision: D3507197

fbshipit-source-id: 27bff1efee03180716418fbfa9ef98f9c04929d9

folly/test/JsonTest.cpp
folly/test/StringTest.cpp

index e6820f7f01b526ae2f31cf8c6ad438ead13c6e6e..a57b493c127775b49d1722f98538aae2b6d61923 100644 (file)
@@ -24,15 +24,15 @@ using folly::parseJson;
 using folly::toJson;
 
 TEST(Json, Unicode) {
-  auto val = parseJson("\"I \u2665 UTF-8\"");
-  EXPECT_EQ("I \u2665 UTF-8", val.asString());
+  auto val = parseJson(u8"\"I \u2665 UTF-8\"");
+  EXPECT_EQ(u8"I \u2665 UTF-8", val.asString());
   val = parseJson("\"I \\u2665 UTF-8\"");
-  EXPECT_EQ("I \u2665 UTF-8", val.asString());
-  val = parseJson("\"I \U0001D11E playing in G-clef\"");
-  EXPECT_EQ("I \U0001D11E playing in G-clef", val.asString());
+  EXPECT_EQ(u8"I \u2665 UTF-8", val.asString());
+  val = parseJson(u8"\"I \U0001D11E playing in G-clef\"");
+  EXPECT_EQ(u8"I \U0001D11E playing in G-clef", val.asString());
 
   val = parseJson("\"I \\uD834\\uDD1E playing in G-clef\"");
-  EXPECT_EQ("I \U0001D11E playing in G-clef", val.asString());
+  EXPECT_EQ(u8"I \U0001D11E playing in G-clef", val.asString());
 }
 
 TEST(Json, Parse) {
@@ -258,7 +258,7 @@ TEST(Json, JsonNonAsciiEncoding) {
 TEST(Json, UTF8Retention) {
 
   // test retention with valid utf8 strings
-  std::string input = "\u2665";
+  std::string input = u8"\u2665";
   std::string jsonInput = folly::toJson(input);
   std::string output = folly::parseJson(jsonInput).asString();
   std::string jsonOutput = folly::toJson(output);
@@ -280,7 +280,7 @@ TEST(Json, UTF8EncodeNonAsciiRetention) {
   opts.encode_non_ascii = true;
 
   // test encode_non_ascii valid utf8 strings
-  std::string input = "\u2665";
+  std::string input = u8"\u2665";
   std::string jsonInput = folly::json::serialize(input, opts);
   std::string output = folly::parseJson(jsonInput).asString();
   std::string jsonOutput = folly::json::serialize(output, opts);
@@ -313,12 +313,15 @@ TEST(Json, UTF8Validation) {
   EXPECT_ANY_THROW(folly::json::serialize("a\xe0\xa0\x80z\xe0\x80\x80", opts));
 
   opts.skip_invalid_utf8 = true;
-  EXPECT_EQ(folly::json::serialize("a\xe0\xa0\x80z\xc0\x80", opts),
-            "\"a\xe0\xa0\x80z\ufffd\ufffd\"");
-  EXPECT_EQ(folly::json::serialize("a\xe0\xa0\x80z\xc0\x80\x80", opts),
-            "\"a\xe0\xa0\x80z\ufffd\ufffd\ufffd\"");
-  EXPECT_EQ(folly::json::serialize("z\xc0\x80z\xe0\xa0\x80", opts),
-            "\"z\ufffd\ufffdz\xe0\xa0\x80\"");
+  EXPECT_EQ(
+      folly::json::serialize("a\xe0\xa0\x80z\xc0\x80", opts),
+      u8"\"a\xe0\xa0\x80z\ufffd\ufffd\"");
+  EXPECT_EQ(
+      folly::json::serialize("a\xe0\xa0\x80z\xc0\x80\x80", opts),
+      u8"\"a\xe0\xa0\x80z\ufffd\ufffd\ufffd\"");
+  EXPECT_EQ(
+      folly::json::serialize("z\xc0\x80z\xe0\xa0\x80", opts),
+      u8"\"z\ufffd\ufffdz\xe0\xa0\x80\"");
 
   opts.encode_non_ascii = true;
   EXPECT_EQ(folly::json::serialize("a\xe0\xa0\x80z\xc0\x80", opts),
index 5e22294fe9b8e1e9b63f0040aec139976384286a..781aacdbbeb7da421b1365db0839a9b1ef43260d 100644 (file)
@@ -1333,7 +1333,7 @@ TEST(String, stripLeftMargin_no_post_whitespace) {
   EXPECT_EQ(expected, stripLeftMargin(input));
 }
 
-const folly::StringPiece kTestUTF8 = "This is \U0001F602 stuff!";
+const folly::StringPiece kTestUTF8 = u8"This is \U0001F602 stuff!";
 
 TEST(UTF8StringPiece, valid_utf8) {
   folly::StringPiece sp = kTestUTF8;
@@ -1357,7 +1357,7 @@ TEST(UTF8StringPiece, invalid_mid_codepoint) {
 }
 
 TEST(UTF8StringPiece, valid_implicit_conversion) {
-  std::string input = "\U0001F602\U0001F602\U0001F602";
+  std::string input = u8"\U0001F602\U0001F602\U0001F602";
   auto checkImplicitCtor = [](UTF8StringPiece implicitCtor) {
     return implicitCtor.walk_size();
   };