From 309ac912849eb8ae73c88da0fafd241ac0d2a04a Mon Sep 17 00:00:00 2001 From: Christopher Dykes Date: Thu, 30 Jun 2016 18:20:43 -0700 Subject: [PATCH] Use UTF-8 strings for strings with multi-byte Unicode code points in them Summary: Because MSVC doesn't support strings with multi-byte Unicode code points in them unless it's in a UTF-8 string. Reviewed By: yfeldblum Differential Revision: D3507197 fbshipit-source-id: 27bff1efee03180716418fbfa9ef98f9c04929d9 --- folly/test/JsonTest.cpp | 31 +++++++++++++++++-------------- folly/test/StringTest.cpp | 4 ++-- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/folly/test/JsonTest.cpp b/folly/test/JsonTest.cpp index e6820f7f..a57b493c 100644 --- a/folly/test/JsonTest.cpp +++ b/folly/test/JsonTest.cpp @@ -24,15 +24,15 @@ using folly::parseJson; using folly::toJson; TEST(Json, Unicode) { - auto val = parseJson("\"I \u2665 UTF-8\""); - EXPECT_EQ("I \u2665 UTF-8", val.asString()); + auto val = parseJson(u8"\"I \u2665 UTF-8\""); + EXPECT_EQ(u8"I \u2665 UTF-8", val.asString()); val = parseJson("\"I \\u2665 UTF-8\""); - EXPECT_EQ("I \u2665 UTF-8", val.asString()); - val = parseJson("\"I \U0001D11E playing in G-clef\""); - EXPECT_EQ("I \U0001D11E playing in G-clef", val.asString()); + EXPECT_EQ(u8"I \u2665 UTF-8", val.asString()); + val = parseJson(u8"\"I \U0001D11E playing in G-clef\""); + EXPECT_EQ(u8"I \U0001D11E playing in G-clef", val.asString()); val = parseJson("\"I \\uD834\\uDD1E playing in G-clef\""); - EXPECT_EQ("I \U0001D11E playing in G-clef", val.asString()); + EXPECT_EQ(u8"I \U0001D11E playing in G-clef", val.asString()); } TEST(Json, Parse) { @@ -258,7 +258,7 @@ TEST(Json, JsonNonAsciiEncoding) { TEST(Json, UTF8Retention) { // test retention with valid utf8 strings - std::string input = "\u2665"; + std::string input = u8"\u2665"; std::string jsonInput = folly::toJson(input); std::string output = folly::parseJson(jsonInput).asString(); std::string jsonOutput = folly::toJson(output); @@ -280,7 +280,7 @@ TEST(Json, UTF8EncodeNonAsciiRetention) { opts.encode_non_ascii = true; // test encode_non_ascii valid utf8 strings - std::string input = "\u2665"; + std::string input = u8"\u2665"; std::string jsonInput = folly::json::serialize(input, opts); std::string output = folly::parseJson(jsonInput).asString(); std::string jsonOutput = folly::json::serialize(output, opts); @@ -313,12 +313,15 @@ TEST(Json, UTF8Validation) { EXPECT_ANY_THROW(folly::json::serialize("a\xe0\xa0\x80z\xe0\x80\x80", opts)); opts.skip_invalid_utf8 = true; - EXPECT_EQ(folly::json::serialize("a\xe0\xa0\x80z\xc0\x80", opts), - "\"a\xe0\xa0\x80z\ufffd\ufffd\""); - EXPECT_EQ(folly::json::serialize("a\xe0\xa0\x80z\xc0\x80\x80", opts), - "\"a\xe0\xa0\x80z\ufffd\ufffd\ufffd\""); - EXPECT_EQ(folly::json::serialize("z\xc0\x80z\xe0\xa0\x80", opts), - "\"z\ufffd\ufffdz\xe0\xa0\x80\""); + EXPECT_EQ( + folly::json::serialize("a\xe0\xa0\x80z\xc0\x80", opts), + u8"\"a\xe0\xa0\x80z\ufffd\ufffd\""); + EXPECT_EQ( + folly::json::serialize("a\xe0\xa0\x80z\xc0\x80\x80", opts), + u8"\"a\xe0\xa0\x80z\ufffd\ufffd\ufffd\""); + EXPECT_EQ( + folly::json::serialize("z\xc0\x80z\xe0\xa0\x80", opts), + u8"\"z\ufffd\ufffdz\xe0\xa0\x80\""); opts.encode_non_ascii = true; EXPECT_EQ(folly::json::serialize("a\xe0\xa0\x80z\xc0\x80", opts), diff --git a/folly/test/StringTest.cpp b/folly/test/StringTest.cpp index 5e22294f..781aacdb 100644 --- a/folly/test/StringTest.cpp +++ b/folly/test/StringTest.cpp @@ -1333,7 +1333,7 @@ TEST(String, stripLeftMargin_no_post_whitespace) { EXPECT_EQ(expected, stripLeftMargin(input)); } -const folly::StringPiece kTestUTF8 = "This is \U0001F602 stuff!"; +const folly::StringPiece kTestUTF8 = u8"This is \U0001F602 stuff!"; TEST(UTF8StringPiece, valid_utf8) { folly::StringPiece sp = kTestUTF8; @@ -1357,7 +1357,7 @@ TEST(UTF8StringPiece, invalid_mid_codepoint) { } TEST(UTF8StringPiece, valid_implicit_conversion) { - std::string input = "\U0001F602\U0001F602\U0001F602"; + std::string input = u8"\U0001F602\U0001F602\U0001F602"; auto checkImplicitCtor = [](UTF8StringPiece implicitCtor) { return implicitCtor.walk_size(); }; -- 2.34.1