X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FSupport%2FConvertUTFWrapper.cpp;h=1bbef233b82f4437d488c0a606fdda7cf5a5dc9d;hb=0bd34e693f223908930aa924e28e4cf23ba46c24;hp=e45335ddcb6c84467c184d95c10f0bea9c3ec04b;hpb=06c847e83e558f0cc6fea742498b2730eb6837c6;p=oota-llvm.git diff --git a/lib/Support/ConvertUTFWrapper.cpp b/lib/Support/ConvertUTFWrapper.cpp index e45335ddcb6..1bbef233b82 100644 --- a/lib/Support/ConvertUTFWrapper.cpp +++ b/lib/Support/ConvertUTFWrapper.cpp @@ -109,8 +109,9 @@ bool convertUTF16ToUTF8String(ArrayRef SrcBytes, std::string &Out) { if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_NATIVE) Src++; - // Just allocate enough space up front. We'll shrink it later. - Out.resize(SrcBytes.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT); + // Just allocate enough space up front. We'll shrink it later. Allocate + // enough that we can fit a null terminator without reallocating. + Out.resize(SrcBytes.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT + 1); UTF8 *Dst = reinterpret_cast(&Out[0]); UTF8 *DstEnd = Dst + Out.size(); @@ -124,6 +125,46 @@ bool convertUTF16ToUTF8String(ArrayRef SrcBytes, std::string &Out) { } Out.resize(reinterpret_cast(Dst) - &Out[0]); + Out.push_back(0); + Out.pop_back(); + return true; +} + +bool convertUTF8ToUTF16String(StringRef SrcUTF8, + SmallVectorImpl &DstUTF16) { + assert(DstUTF16.empty()); + + // Avoid OOB by returning early on empty input. + if (SrcUTF8.empty()) { + DstUTF16.push_back(0); + DstUTF16.pop_back(); + return true; + } + + const UTF8 *Src = reinterpret_cast(SrcUTF8.begin()); + const UTF8 *SrcEnd = reinterpret_cast(SrcUTF8.end()); + + // Allocate the same number of UTF-16 code units as UTF-8 code units. Encoding + // as UTF-16 should always require the same amount or less code units than the + // UTF-8 encoding. Allocate one extra byte for the null terminator though, + // so that someone calling DstUTF16.data() gets a null terminated string. + // We resize down later so we don't have to worry that this over allocates. + DstUTF16.resize(SrcUTF8.size()+1); + UTF16 *Dst = &DstUTF16[0]; + UTF16 *DstEnd = Dst + DstUTF16.size(); + + ConversionResult CR = + ConvertUTF8toUTF16(&Src, SrcEnd, &Dst, DstEnd, strictConversion); + assert(CR != targetExhausted); + + if (CR != conversionOK) { + DstUTF16.clear(); + return false; + } + + DstUTF16.resize(Dst - &DstUTF16[0]); + DstUTF16.push_back(0); + DstUTF16.pop_back(); return true; }