1 //===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "llvm/Support/ConvertUTF.h"
11 #include "llvm/Support/Format.h"
12 #include "gtest/gtest.h"
19 TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) {
20 // Src is the look of disapproval.
21 static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
22 ArrayRef<char> Ref(Src, sizeof(Src) - 1);
24 bool Success = convertUTF16ToUTF8String(Ref, Result);
26 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
27 EXPECT_EQ(Expected, Result);
30 TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) {
31 // Src is the look of disapproval.
32 static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0";
33 ArrayRef<char> Ref(Src, sizeof(Src) - 1);
35 bool Success = convertUTF16ToUTF8String(Ref, Result);
37 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
38 EXPECT_EQ(Expected, Result);
41 TEST(ConvertUTFTest, ConvertUTF8ToUTF16String) {
42 // Src is the look of disapproval.
43 static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
44 StringRef Ref(Src, sizeof(Src) - 1);
45 SmallVector<UTF16, 5> Result;
46 bool Success = convertUTF8ToUTF16String(Ref, Result);
48 static const UTF16 Expected[] = {0x0CA0, 0x005f, 0x0CA0, 0};
49 ASSERT_EQ(3u, Result.size());
50 for (int I = 0, E = 3; I != E; ++I)
51 EXPECT_EQ(Expected[I], Result[I]);
54 TEST(ConvertUTFTest, OddLengthInput) {
56 bool Success = convertUTF16ToUTF8String(makeArrayRef("xxxxx", 5), Result);
57 EXPECT_FALSE(Success);
60 TEST(ConvertUTFTest, Empty) {
62 bool Success = convertUTF16ToUTF8String(None, Result);
64 EXPECT_TRUE(Result.empty());
67 TEST(ConvertUTFTest, HasUTF16BOM) {
68 bool HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xff\xfe", 2));
70 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff", 2));
72 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff ", 3));
73 EXPECT_TRUE(HasBOM); // Don't care about odd lengths.
74 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff\x00asdf", 6));
77 HasBOM = hasUTF16ByteOrderMark(None);
79 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe", 1));
83 struct ConvertUTFResultContainer {
84 ConversionResult ErrorCode;
85 std::vector<unsigned> UnicodeScalars;
87 ConvertUTFResultContainer(ConversionResult ErrorCode)
88 : ErrorCode(ErrorCode) {}
90 ConvertUTFResultContainer
91 withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000,
92 unsigned US2 = 0x110000, unsigned US3 = 0x110000,
93 unsigned US4 = 0x110000, unsigned US5 = 0x110000,
94 unsigned US6 = 0x110000, unsigned US7 = 0x110000) {
95 ConvertUTFResultContainer Result(*this);
97 Result.UnicodeScalars.push_back(US0);
99 Result.UnicodeScalars.push_back(US1);
101 Result.UnicodeScalars.push_back(US2);
103 Result.UnicodeScalars.push_back(US3);
105 Result.UnicodeScalars.push_back(US4);
107 Result.UnicodeScalars.push_back(US5);
109 Result.UnicodeScalars.push_back(US6);
111 Result.UnicodeScalars.push_back(US7);
116 std::pair<ConversionResult, std::vector<unsigned>>
117 ConvertUTF8ToUnicodeScalarsLenient(StringRef S) {
118 const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
120 const UTF8 *SourceNext = SourceStart;
121 std::vector<UTF32> Decoded(S.size(), 0);
122 UTF32 *TargetStart = Decoded.data();
125 ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart,
126 Decoded.data() + Decoded.size(), lenientConversion);
128 Decoded.resize(TargetStart - Decoded.data());
130 return std::make_pair(ErrorCode, Decoded);
133 std::pair<ConversionResult, std::vector<unsigned>>
134 ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) {
135 const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
137 const UTF8 *SourceNext = SourceStart;
138 std::vector<UTF32> Decoded(S.size(), 0);
139 UTF32 *TargetStart = Decoded.data();
141 auto ErrorCode = ConvertUTF8toUTF32Partial(
142 &SourceNext, SourceStart + S.size(), &TargetStart,
143 Decoded.data() + Decoded.size(), lenientConversion);
145 Decoded.resize(TargetStart - Decoded.data());
147 return std::make_pair(ErrorCode, Decoded);
150 ::testing::AssertionResult
151 CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,
152 StringRef S, bool Partial = false) {
153 ConversionResult ErrorCode;
154 std::vector<unsigned> Decoded;
156 std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S);
158 std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S);
160 if (Expected.ErrorCode != ErrorCode)
161 return ::testing::AssertionFailure() << "Expected error code "
162 << Expected.ErrorCode << ", actual "
165 if (Expected.UnicodeScalars != Decoded)
166 return ::testing::AssertionFailure()
167 << "Expected lenient decoded result:\n"
168 << ::testing::PrintToString(Expected.UnicodeScalars) << "\n"
169 << "Actual result:\n" << ::testing::PrintToString(Decoded);
171 return ::testing::AssertionSuccess();
174 TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
180 // U+0041 LATIN CAPITAL LETTER A
181 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
182 ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41"));
188 // U+0283 LATIN SMALL LETTER ESH
189 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
190 ConvertUTFResultContainer(conversionOK).withScalars(0x0283),
193 // U+03BA GREEK SMALL LETTER KAPPA
194 // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
195 // U+03C3 GREEK SMALL LETTER SIGMA
196 // U+03BC GREEK SMALL LETTER MU
197 // U+03B5 GREEK SMALL LETTER EPSILON
198 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
199 ConvertUTFResultContainer(conversionOK)
200 .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
201 "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));
207 // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
208 // U+6587 CJK UNIFIED IDEOGRAPH-6587
209 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
210 ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587),
211 "\xe4\xbe\x8b\xe6\x96\x87"));
213 // U+D55C HANGUL SYLLABLE HAN
214 // U+AE00 HANGUL SYLLABLE GEUL
215 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
216 ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00),
217 "\xed\x95\x9c\xea\xb8\x80"));
219 // U+1112 HANGUL CHOSEONG HIEUH
220 // U+1161 HANGUL JUNGSEONG A
221 // U+11AB HANGUL JONGSEONG NIEUN
222 // U+1100 HANGUL CHOSEONG KIYEOK
223 // U+1173 HANGUL JUNGSEONG EU
224 // U+11AF HANGUL JONGSEONG RIEUL
225 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
226 ConvertUTFResultContainer(conversionOK)
227 .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
228 "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
235 // U+E0100 VARIATION SELECTOR-17
236 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
237 ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100),
238 "\xf3\xa0\x84\x80"));
241 // First possible sequence of a certain length
245 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
246 ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
247 StringRef("\x00", 1)));
249 // U+0080 PADDING CHARACTER
250 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
251 ConvertUTFResultContainer(conversionOK).withScalars(0x0080),
254 // U+0800 SAMARITAN LETTER ALAF
255 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
256 ConvertUTFResultContainer(conversionOK).withScalars(0x0800),
259 // U+10000 LINEAR B SYLLABLE B008 A
260 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
261 ConvertUTFResultContainer(conversionOK).withScalars(0x10000),
262 "\xf0\x90\x80\x80"));
264 // U+200000 (invalid)
265 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
266 ConvertUTFResultContainer(sourceIllegal)
267 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
268 "\xf8\x88\x80\x80\x80"));
270 // U+4000000 (invalid)
271 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
272 ConvertUTFResultContainer(sourceIllegal)
273 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
274 "\xfc\x84\x80\x80\x80\x80"));
277 // Last possible sequence of a certain length
281 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
282 ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f"));
284 // U+07FF (unassigned)
285 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
286 ConvertUTFResultContainer(conversionOK).withScalars(0x07ff),
289 // U+FFFF (noncharacter)
290 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
291 ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
294 // U+1FFFFF (invalid)
295 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
296 ConvertUTFResultContainer(sourceIllegal)
297 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
298 "\xf7\xbf\xbf\xbf"));
300 // U+3FFFFFF (invalid)
301 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
302 ConvertUTFResultContainer(sourceIllegal)
303 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
304 "\xfb\xbf\xbf\xbf\xbf"));
306 // U+7FFFFFFF (invalid)
307 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
308 ConvertUTFResultContainer(sourceIllegal)
309 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
310 "\xfd\xbf\xbf\xbf\xbf\xbf"));
313 // Other boundary conditions
316 // U+D7FF (unassigned)
317 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
318 ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff),
321 // U+E000 (private use)
322 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
323 ConvertUTFResultContainer(conversionOK).withScalars(0xe000),
326 // U+FFFD REPLACEMENT CHARACTER
327 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
328 ConvertUTFResultContainer(conversionOK).withScalars(0xfffd),
331 // U+10FFFF (noncharacter)
332 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
333 ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
334 "\xf4\x8f\xbf\xbf"));
336 // U+110000 (invalid)
337 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
338 ConvertUTFResultContainer(sourceIllegal)
339 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
340 "\xf4\x90\x80\x80"));
343 // Unexpected continuation bytes
346 // A sequence of unexpected continuation bytes that don't follow a first
347 // byte, every byte is a maximal subpart.
349 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
350 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80"));
351 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
352 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf"));
353 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
354 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
356 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
357 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
359 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
360 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
362 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
363 ConvertUTFResultContainer(sourceIllegal)
364 .withScalars(0xfffd, 0xfffd, 0xfffd),
366 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
367 ConvertUTFResultContainer(sourceIllegal)
368 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
369 "\x80\xbf\x80\xbf"));
370 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
371 ConvertUTFResultContainer(sourceIllegal)
372 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
373 "\x80\xbf\x82\xbf\xaa"));
374 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
375 ConvertUTFResultContainer(sourceIllegal)
376 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
377 "\xaa\xb0\xbb\xbf\xaa\xa0"));
378 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
379 ConvertUTFResultContainer(sourceIllegal)
380 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
381 "\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));
383 // All continuation bytes (0x80--0xbf).
384 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
385 ConvertUTFResultContainer(sourceIllegal)
386 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
387 0xfffd, 0xfffd, 0xfffd, 0xfffd)
388 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
389 0xfffd, 0xfffd, 0xfffd, 0xfffd)
390 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
391 0xfffd, 0xfffd, 0xfffd, 0xfffd)
392 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
393 0xfffd, 0xfffd, 0xfffd, 0xfffd)
394 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
395 0xfffd, 0xfffd, 0xfffd, 0xfffd)
396 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
397 0xfffd, 0xfffd, 0xfffd, 0xfffd)
398 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
399 0xfffd, 0xfffd, 0xfffd, 0xfffd)
400 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
401 0xfffd, 0xfffd, 0xfffd, 0xfffd),
402 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
403 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
404 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
405 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));
408 // Lonely start bytes
411 // Start bytes of 2-byte sequences (0xc0--0xdf).
412 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
413 ConvertUTFResultContainer(sourceIllegal)
414 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
415 0xfffd, 0xfffd, 0xfffd, 0xfffd)
416 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
417 0xfffd, 0xfffd, 0xfffd, 0xfffd)
418 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
419 0xfffd, 0xfffd, 0xfffd, 0xfffd)
420 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
421 0xfffd, 0xfffd, 0xfffd, 0xfffd),
422 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
423 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));
425 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
426 ConvertUTFResultContainer(sourceIllegal)
427 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
428 0xfffd, 0x0020, 0xfffd, 0x0020)
429 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
430 0xfffd, 0x0020, 0xfffd, 0x0020)
431 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
432 0xfffd, 0x0020, 0xfffd, 0x0020)
433 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
434 0xfffd, 0x0020, 0xfffd, 0x0020)
435 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
436 0xfffd, 0x0020, 0xfffd, 0x0020)
437 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
438 0xfffd, 0x0020, 0xfffd, 0x0020)
439 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
440 0xfffd, 0x0020, 0xfffd, 0x0020)
441 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
442 0xfffd, 0x0020, 0xfffd, 0x0020),
443 "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
444 "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
445 "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
446 "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));
448 // Start bytes of 3-byte sequences (0xe0--0xef).
449 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
450 ConvertUTFResultContainer(sourceIllegal)
451 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
452 0xfffd, 0xfffd, 0xfffd, 0xfffd)
453 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
454 0xfffd, 0xfffd, 0xfffd, 0xfffd),
455 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));
457 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
458 ConvertUTFResultContainer(sourceIllegal)
459 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
460 0xfffd, 0x0020, 0xfffd, 0x0020)
461 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
462 0xfffd, 0x0020, 0xfffd, 0x0020)
463 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
464 0xfffd, 0x0020, 0xfffd, 0x0020)
465 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
466 0xfffd, 0x0020, 0xfffd, 0x0020),
467 "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
468 "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));
470 // Start bytes of 4-byte sequences (0xf0--0xf7).
471 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
472 ConvertUTFResultContainer(sourceIllegal)
473 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
474 0xfffd, 0xfffd, 0xfffd, 0xfffd),
475 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));
477 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
478 ConvertUTFResultContainer(sourceIllegal)
479 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
480 0xfffd, 0x0020, 0xfffd, 0x0020)
481 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
482 0xfffd, 0x0020, 0xfffd, 0x0020),
483 "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));
485 // Start bytes of 5-byte sequences (0xf8--0xfb).
486 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
487 ConvertUTFResultContainer(sourceIllegal)
488 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
489 "\xf8\xf9\xfa\xfb"));
491 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
492 ConvertUTFResultContainer(sourceIllegal)
493 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
494 0xfffd, 0x0020, 0xfffd, 0x0020),
495 "\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));
497 // Start bytes of 6-byte sequences (0xfc--0xfd).
498 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
499 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
502 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
503 ConvertUTFResultContainer(sourceIllegal)
504 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020),
505 "\xfc\x20\xfd\x20"));
508 // Other bytes (0xc0--0xc1, 0xfe--0xff).
511 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
512 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0"));
513 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
514 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1"));
515 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
516 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe"));
517 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
518 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff"));
520 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
521 ConvertUTFResultContainer(sourceIllegal)
522 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
523 "\xc0\xc1\xfe\xff"));
525 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
526 ConvertUTFResultContainer(sourceIllegal)
527 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
528 "\xfe\xfe\xff\xff"));
530 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
531 ConvertUTFResultContainer(sourceIllegal)
532 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
533 "\xfe\x80\x80\x80\x80\x80"));
535 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
536 ConvertUTFResultContainer(sourceIllegal)
537 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
538 "\xff\x80\x80\x80\x80\x80"));
540 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
541 ConvertUTFResultContainer(sourceIllegal)
542 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
543 0xfffd, 0x0020, 0xfffd, 0x0020),
544 "\xc0\x20\xc1\x20\xfe\x20\xff\x20"));
547 // Sequences with one continuation byte missing
550 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
551 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2"));
552 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
553 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf"));
554 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
555 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
557 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
558 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
560 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
561 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
563 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
564 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
566 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
567 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
569 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
570 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
572 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
573 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
575 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
576 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
578 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
579 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
581 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
582 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
584 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
585 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
587 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
588 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
590 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
591 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
593 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
594 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
597 // Overlong sequences with one trailing byte missing.
598 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
599 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
601 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
602 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
604 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
605 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
607 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
608 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
610 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
611 ConvertUTFResultContainer(sourceIllegal)
612 .withScalars(0xfffd, 0xfffd, 0xfffd),
614 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
615 ConvertUTFResultContainer(sourceIllegal)
616 .withScalars(0xfffd, 0xfffd, 0xfffd),
618 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
619 ConvertUTFResultContainer(sourceIllegal)
620 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
621 "\xf8\x80\x80\x80"));
622 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
623 ConvertUTFResultContainer(sourceIllegal)
624 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
625 "\xfc\x80\x80\x80\x80"));
627 // Sequences that represent surrogates with one trailing byte missing.
629 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
630 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
632 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
633 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
635 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
636 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
639 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
640 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
642 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
643 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
645 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
646 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
649 // Ill-formed 4-byte sequences.
650 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
651 // U+1100xx (invalid)
652 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
653 ConvertUTFResultContainer(sourceIllegal)
654 .withScalars(0xfffd, 0xfffd, 0xfffd),
656 // U+13FBxx (invalid)
657 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
658 ConvertUTFResultContainer(sourceIllegal)
659 .withScalars(0xfffd, 0xfffd, 0xfffd),
661 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
662 ConvertUTFResultContainer(sourceIllegal)
663 .withScalars(0xfffd, 0xfffd, 0xfffd),
665 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
666 ConvertUTFResultContainer(sourceIllegal)
667 .withScalars(0xfffd, 0xfffd, 0xfffd),
669 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
670 ConvertUTFResultContainer(sourceIllegal)
671 .withScalars(0xfffd, 0xfffd, 0xfffd),
673 // U+1FFBxx (invalid)
674 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
675 ConvertUTFResultContainer(sourceIllegal)
676 .withScalars(0xfffd, 0xfffd, 0xfffd),
679 // Ill-formed 5-byte sequences.
680 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
681 // U+2000xx (invalid)
682 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
683 ConvertUTFResultContainer(sourceIllegal)
684 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
685 "\xf8\x88\x80\x80"));
686 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
687 ConvertUTFResultContainer(sourceIllegal)
688 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
689 "\xf8\xbf\xbf\xbf"));
690 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
691 ConvertUTFResultContainer(sourceIllegal)
692 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
693 "\xf9\x80\x80\x80"));
694 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
695 ConvertUTFResultContainer(sourceIllegal)
696 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
697 "\xfa\x80\x80\x80"));
698 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
699 ConvertUTFResultContainer(sourceIllegal)
700 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
701 "\xfb\x80\x80\x80"));
702 // U+3FFFFxx (invalid)
703 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
704 ConvertUTFResultContainer(sourceIllegal)
705 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
706 "\xfb\xbf\xbf\xbf"));
708 // Ill-formed 6-byte sequences.
709 // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
710 // U+40000xx (invalid)
711 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
712 ConvertUTFResultContainer(sourceIllegal)
713 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
714 "\xfc\x84\x80\x80\x80"));
715 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
716 ConvertUTFResultContainer(sourceIllegal)
717 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
718 "\xfc\xbf\xbf\xbf\xbf"));
719 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
720 ConvertUTFResultContainer(sourceIllegal)
721 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
722 "\xfd\x80\x80\x80\x80"));
723 // U+7FFFFFxx (invalid)
724 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
725 ConvertUTFResultContainer(sourceIllegal)
726 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
727 "\xfd\xbf\xbf\xbf\xbf"));
730 // Sequences with two continuation bytes missing
733 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
734 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
736 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
737 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
739 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
740 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
742 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
743 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
745 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
746 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
748 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
749 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
752 // Overlong sequences with two trailing byte missing.
753 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
754 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0"));
755 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
756 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
758 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
759 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
761 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
762 ConvertUTFResultContainer(sourceIllegal)
763 .withScalars(0xfffd, 0xfffd, 0xfffd),
765 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
766 ConvertUTFResultContainer(sourceIllegal)
767 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
768 "\xfc\x80\x80\x80"));
770 // Sequences that represent surrogates with two trailing bytes missing.
771 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
772 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed"));
774 // Ill-formed 4-byte sequences.
775 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
776 // U+110yxx (invalid)
777 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
778 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
780 // U+13Fyxx (invalid)
781 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
782 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
784 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
785 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
787 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
788 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
790 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
791 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
793 // U+1FFyxx (invalid)
794 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
795 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
798 // Ill-formed 5-byte sequences.
799 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
800 // U+200yxx (invalid)
801 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
802 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
804 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
805 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
807 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
808 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
810 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
811 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
813 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
814 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
816 // U+3FFFyxx (invalid)
817 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
818 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
821 // Ill-formed 6-byte sequences.
822 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
823 // U+4000yxx (invalid)
824 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
825 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
826 "\xfc\x84\x80\x80"));
827 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
828 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
829 "\xfc\xbf\xbf\xbf"));
830 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
831 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
832 "\xfd\x80\x80\x80"));
833 // U+7FFFFyxx (invalid)
834 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
835 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
836 "\xfd\xbf\xbf\xbf"));
839 // Sequences with three continuation bytes missing
842 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
843 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
844 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
845 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1"));
846 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
847 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2"));
848 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
849 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3"));
850 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
851 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4"));
853 // Broken overlong sequences.
854 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
855 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
856 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
857 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
859 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
860 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
863 // Ill-formed 4-byte sequences.
864 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
865 // U+14yyxx (invalid)
866 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
867 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5"));
868 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
869 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6"));
870 // U+1Cyyxx (invalid)
871 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
872 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7"));
874 // Ill-formed 5-byte sequences.
875 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
876 // U+20yyxx (invalid)
877 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
878 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
880 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
881 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
883 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
884 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
886 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
887 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
889 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
890 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
892 // U+3FCyyxx (invalid)
893 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
894 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
897 // Ill-formed 6-byte sequences.
898 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
899 // U+400yyxx (invalid)
900 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
901 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
903 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
904 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
906 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
907 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
909 // U+7FFCyyxx (invalid)
910 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
911 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
915 // Sequences with four continuation bytes missing
918 // Ill-formed 5-byte sequences.
919 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
920 // U+uzyyxx (invalid)
921 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
922 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
923 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
924 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9"));
925 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
926 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa"));
927 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
928 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
929 // U+3zyyxx (invalid)
930 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
931 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
933 // Broken overlong sequences.
934 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
935 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
936 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
937 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
940 // Ill-formed 6-byte sequences.
941 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
942 // U+uzzyyxx (invalid)
943 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
944 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
946 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
947 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
949 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
950 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
952 // U+7Fzzyyxx (invalid)
953 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
954 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
958 // Sequences with five continuation bytes missing
961 // Ill-formed 6-byte sequences.
962 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
963 // U+uzzyyxx (invalid)
964 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
965 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc"));
966 // U+uuzzyyxx (invalid)
967 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
968 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd"));
971 // Consecutive sequences with trailing bytes missing
974 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
975 ConvertUTFResultContainer(sourceIllegal)
976 .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
977 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
978 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd)
979 .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
980 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
981 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
982 "\xc0" "\xe0\x80" "\xf0\x80\x80"
984 "\xfc\x80\x80\x80\x80"
985 "\xdf" "\xef\xbf" "\xf7\xbf\xbf"
987 "\xfd\xbf\xbf\xbf\xbf"));
990 // Overlong UTF-8 sequences
994 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
995 ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f"));
997 // Overlong sequences of the above.
998 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
999 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1001 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1002 ConvertUTFResultContainer(sourceIllegal)
1003 .withScalars(0xfffd, 0xfffd, 0xfffd),
1005 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1006 ConvertUTFResultContainer(sourceIllegal)
1007 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1008 "\xf0\x80\x80\xaf"));
1009 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1010 ConvertUTFResultContainer(sourceIllegal)
1011 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1012 "\xf8\x80\x80\x80\xaf"));
1013 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1014 ConvertUTFResultContainer(sourceIllegal)
1015 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1016 "\xfc\x80\x80\x80\x80\xaf"));
1019 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1020 ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
1021 StringRef("\x00", 1)));
1023 // Overlong sequences of the above.
1024 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1025 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1027 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1028 ConvertUTFResultContainer(sourceIllegal)
1029 .withScalars(0xfffd, 0xfffd, 0xfffd),
1031 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1032 ConvertUTFResultContainer(sourceIllegal)
1033 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1034 "\xf0\x80\x80\x80"));
1035 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1036 ConvertUTFResultContainer(sourceIllegal)
1037 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1038 "\xf8\x80\x80\x80\x80"));
1039 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1040 ConvertUTFResultContainer(sourceIllegal)
1041 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1042 "\xfc\x80\x80\x80\x80\x80"));
1044 // Other overlong sequences.
1045 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1046 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1048 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1049 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1051 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1052 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1054 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1055 ConvertUTFResultContainer(sourceIllegal)
1056 .withScalars(0xfffd, 0xfffd, 0xfffd),
1058 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1059 ConvertUTFResultContainer(sourceIllegal)
1060 .withScalars(0xfffd, 0xfffd, 0xfffd),
1062 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1063 ConvertUTFResultContainer(sourceIllegal)
1064 .withScalars(0xfffd, 0xfffd, 0xfffd),
1066 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1067 ConvertUTFResultContainer(sourceIllegal)
1068 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1069 "\xf0\x8f\x80\x80"));
1070 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1071 ConvertUTFResultContainer(sourceIllegal)
1072 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1073 "\xf0\x8f\xbf\xbf"));
1074 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1075 ConvertUTFResultContainer(sourceIllegal)
1076 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1077 "\xf8\x87\xbf\xbf\xbf"));
1078 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1079 ConvertUTFResultContainer(sourceIllegal)
1080 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1081 "\xfc\x83\xbf\xbf\xbf\xbf"));
1084 // Isolated surrogates
1089 // D71. High-surrogate code point: A Unicode code point in the range
1090 // U+D800 to U+DBFF.
1092 // D73. Low-surrogate code point: A Unicode code point in the range
1093 // U+DC00 to U+DFFF.
1095 // Note: U+E0100 is <DB40 DD00> in UTF16.
1100 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1101 ConvertUTFResultContainer(sourceIllegal)
1102 .withScalars(0xfffd, 0xfffd, 0xfffd),
1106 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1107 ConvertUTFResultContainer(sourceIllegal)
1108 .withScalars(0xfffd, 0xfffd, 0xfffd),
1112 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1113 ConvertUTFResultContainer(sourceIllegal)
1114 .withScalars(0xfffd, 0xfffd, 0xfffd),
1120 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1121 ConvertUTFResultContainer(sourceIllegal)
1122 .withScalars(0xfffd, 0xfffd, 0xfffd),
1126 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1127 ConvertUTFResultContainer(sourceIllegal)
1128 .withScalars(0xfffd, 0xfffd, 0xfffd),
1132 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1133 ConvertUTFResultContainer(sourceIllegal)
1134 .withScalars(0xfffd, 0xfffd, 0xfffd),
1140 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1141 ConvertUTFResultContainer(sourceIllegal)
1142 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1143 "\xed\xa0\x80\xed\xb0\x80"));
1146 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1147 ConvertUTFResultContainer(sourceIllegal)
1148 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1149 "\xed\xa0\x80\xed\xb4\x80"));
1152 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1153 ConvertUTFResultContainer(sourceIllegal)
1154 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1155 "\xed\xa0\x80\xed\xbf\xbf"));
1158 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1159 ConvertUTFResultContainer(sourceIllegal)
1160 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1161 "\xed\xac\xa0\xed\xb0\x80"));
1164 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1165 ConvertUTFResultContainer(sourceIllegal)
1166 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1167 "\xed\xac\xa0\xed\xb4\x80"));
1170 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1171 ConvertUTFResultContainer(sourceIllegal)
1172 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1173 "\xed\xac\xa0\xed\xbf\xbf"));
1176 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1177 ConvertUTFResultContainer(sourceIllegal)
1178 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1179 "\xed\xaf\xbf\xed\xb0\x80"));
1182 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1183 ConvertUTFResultContainer(sourceIllegal)
1184 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1185 "\xed\xaf\xbf\xed\xb4\x80"));
1188 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1189 ConvertUTFResultContainer(sourceIllegal)
1190 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1191 "\xed\xaf\xbf\xed\xbf\xbf"));
1199 // D14. Noncharacter: A code point that is permanently reserved for
1200 // internal use and that should never be interchanged. Noncharacters
1201 // consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016)
1202 // and the values U+FDD0..U+FDEF.
1205 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1206 ConvertUTFResultContainer(conversionOK).withScalars(0xfffe),
1210 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1211 ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
1215 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1216 ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe),
1217 "\xf0\x9f\xbf\xbe"));
1220 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1221 ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff),
1222 "\xf0\x9f\xbf\xbf"));
1225 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1226 ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe),
1227 "\xf0\xaf\xbf\xbe"));
1230 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1231 ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff),
1232 "\xf0\xaf\xbf\xbf"));
1235 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1236 ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe),
1237 "\xf0\xbf\xbf\xbe"));
1240 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1241 ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff),
1242 "\xf0\xbf\xbf\xbf"));
1245 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1246 ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe),
1247 "\xf1\x8f\xbf\xbe"));
1250 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1251 ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff),
1252 "\xf1\x8f\xbf\xbf"));
1255 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1256 ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe),
1257 "\xf1\x9f\xbf\xbe"));
1260 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1261 ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff),
1262 "\xf1\x9f\xbf\xbf"));
1265 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1266 ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe),
1267 "\xf1\xaf\xbf\xbe"));
1270 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1271 ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff),
1272 "\xf1\xaf\xbf\xbf"));
1275 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1276 ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe),
1277 "\xf1\xbf\xbf\xbe"));
1280 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1281 ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff),
1282 "\xf1\xbf\xbf\xbf"));
1285 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1286 ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe),
1287 "\xf2\x8f\xbf\xbe"));
1290 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1291 ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff),
1292 "\xf2\x8f\xbf\xbf"));
1295 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1296 ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe),
1297 "\xf2\x9f\xbf\xbe"));
1300 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1301 ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff),
1302 "\xf2\x9f\xbf\xbf"));
1305 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1306 ConvertUTFResultContainer(conversionOK).withScalars(0xafffe),
1307 "\xf2\xaf\xbf\xbe"));
1310 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1311 ConvertUTFResultContainer(conversionOK).withScalars(0xaffff),
1312 "\xf2\xaf\xbf\xbf"));
1315 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1316 ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe),
1317 "\xf2\xbf\xbf\xbe"));
1320 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1321 ConvertUTFResultContainer(conversionOK).withScalars(0xbffff),
1322 "\xf2\xbf\xbf\xbf"));
1325 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1326 ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe),
1327 "\xf3\x8f\xbf\xbe"));
1330 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1331 ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF),
1332 "\xf3\x8f\xbf\xbf"));
1335 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1336 ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe),
1337 "\xf3\x9f\xbf\xbe"));
1340 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1341 ConvertUTFResultContainer(conversionOK).withScalars(0xdffff),
1342 "\xf3\x9f\xbf\xbf"));
1345 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1346 ConvertUTFResultContainer(conversionOK).withScalars(0xefffe),
1347 "\xf3\xaf\xbf\xbe"));
1350 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1351 ConvertUTFResultContainer(conversionOK).withScalars(0xeffff),
1352 "\xf3\xaf\xbf\xbf"));
1355 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1356 ConvertUTFResultContainer(conversionOK).withScalars(0xffffe),
1357 "\xf3\xbf\xbf\xbe"));
1360 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1361 ConvertUTFResultContainer(conversionOK).withScalars(0xfffff),
1362 "\xf3\xbf\xbf\xbf"));
1365 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1366 ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe),
1367 "\xf4\x8f\xbf\xbe"));
1370 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1371 ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
1372 "\xf4\x8f\xbf\xbf"));
1375 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1376 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0),
1380 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1381 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1),
1385 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1386 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2),
1390 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1391 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3),
1395 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1396 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4),
1400 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1401 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5),
1405 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1406 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6),
1410 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1411 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7),
1415 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1416 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8),
1420 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1421 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9),
1425 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1426 ConvertUTFResultContainer(conversionOK).withScalars(0xfdda),
1430 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1431 ConvertUTFResultContainer(conversionOK).withScalars(0xfddb),
1435 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1436 ConvertUTFResultContainer(conversionOK).withScalars(0xfddc),
1440 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1441 ConvertUTFResultContainer(conversionOK).withScalars(0xfddd),
1445 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1446 ConvertUTFResultContainer(conversionOK).withScalars(0xfdde),
1450 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1451 ConvertUTFResultContainer(conversionOK).withScalars(0xfddf),
1455 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1456 ConvertUTFResultContainer(conversionOK).withScalars(0xfde0),
1460 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1461 ConvertUTFResultContainer(conversionOK).withScalars(0xfde1),
1465 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1466 ConvertUTFResultContainer(conversionOK).withScalars(0xfde2),
1470 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1471 ConvertUTFResultContainer(conversionOK).withScalars(0xfde3),
1475 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1476 ConvertUTFResultContainer(conversionOK).withScalars(0xfde4),
1480 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1481 ConvertUTFResultContainer(conversionOK).withScalars(0xfde5),
1485 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1486 ConvertUTFResultContainer(conversionOK).withScalars(0xfde6),
1490 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1491 ConvertUTFResultContainer(conversionOK).withScalars(0xfde7),
1495 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1496 ConvertUTFResultContainer(conversionOK).withScalars(0xfde8),
1500 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1501 ConvertUTFResultContainer(conversionOK).withScalars(0xfde9),
1505 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1506 ConvertUTFResultContainer(conversionOK).withScalars(0xfdea),
1510 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1511 ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb),
1515 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1516 ConvertUTFResultContainer(conversionOK).withScalars(0xfdec),
1520 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1521 ConvertUTFResultContainer(conversionOK).withScalars(0xfded),
1525 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1526 ConvertUTFResultContainer(conversionOK).withScalars(0xfdee),
1530 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1531 ConvertUTFResultContainer(conversionOK).withScalars(0xfdef),
1535 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1536 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0),
1540 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1541 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1),
1545 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1546 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2),
1550 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1551 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3),
1555 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1556 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4),
1560 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1561 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5),
1565 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1566 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6),
1570 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1571 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7),
1575 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1576 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8),
1580 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1581 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9),
1585 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1586 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa),
1590 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1591 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb),
1595 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1596 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc),
1600 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1601 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd),
1605 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1606 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe),
1610 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1611 ConvertUTFResultContainer(conversionOK).withScalars(0xfdff),
1615 TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) {
1616 // U+0041 LATIN CAPITAL LETTER A
1617 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1618 ConvertUTFResultContainer(conversionOK).withScalars(0x0041),
1622 // Sequences with one continuation byte missing
1625 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1626 ConvertUTFResultContainer(sourceExhausted),
1628 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1629 ConvertUTFResultContainer(sourceExhausted),
1631 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1632 ConvertUTFResultContainer(sourceExhausted),
1634 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1635 ConvertUTFResultContainer(sourceExhausted),
1637 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1638 ConvertUTFResultContainer(sourceExhausted),
1640 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1641 ConvertUTFResultContainer(sourceExhausted),
1643 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1644 ConvertUTFResultContainer(sourceExhausted),
1646 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1647 ConvertUTFResultContainer(sourceExhausted),
1649 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1650 ConvertUTFResultContainer(sourceExhausted),
1652 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1653 ConvertUTFResultContainer(sourceExhausted),
1655 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1656 ConvertUTFResultContainer(sourceExhausted),
1657 "\xf0\x90\x80", true));
1658 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1659 ConvertUTFResultContainer(sourceExhausted),
1660 "\xf0\xbf\xbf", true));
1661 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1662 ConvertUTFResultContainer(sourceExhausted),
1663 "\xf1\x80\x80", true));
1664 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1665 ConvertUTFResultContainer(sourceExhausted),
1666 "\xf3\xbf\xbf", true));
1667 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1668 ConvertUTFResultContainer(sourceExhausted),
1669 "\xf4\x80\x80", true));
1670 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1671 ConvertUTFResultContainer(sourceExhausted),
1672 "\xf4\x8f\xbf", true));
1674 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1675 ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041),