From b9a39da95e734d78d23a567fd8b3704900408aa8 Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Mon, 26 Jan 2015 22:05:50 +0000 Subject: [PATCH] Have the UTF conversion wrappers append a null terminator. This is especially useful for the UTF8 -> UTF16 direction, since there is no equivalent of llvm::SmallString<> for wide characters. This means that anyone who wants a null terminated string is forced to manually push and pop their own null terminator. Reviewed by: Reid Kleckner. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227143 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/ConvertUTFWrapper.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/Support/ConvertUTFWrapper.cpp b/lib/Support/ConvertUTFWrapper.cpp index 8f77bff4668..4feff01b08e 100644 --- a/lib/Support/ConvertUTFWrapper.cpp +++ b/lib/Support/ConvertUTFWrapper.cpp @@ -109,8 +109,9 @@ bool convertUTF16ToUTF8String(ArrayRef SrcBytes, std::string &Out) { if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_NATIVE) Src++; - // Just allocate enough space up front. We'll shrink it later. - Out.resize(SrcBytes.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT); + // Just allocate enough space up front. We'll shrink it later. Allocate + // enough that we can fit a null terminator without reallocating. + Out.resize(SrcBytes.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT + 1); UTF8 *Dst = reinterpret_cast(&Out[0]); UTF8 *DstEnd = Dst + Out.size(); @@ -124,6 +125,8 @@ bool convertUTF16ToUTF8String(ArrayRef SrcBytes, std::string &Out) { } Out.resize(reinterpret_cast(Dst) - &Out[0]); + Out.push_back(0); + Out.pop_back(); return true; } @@ -140,8 +143,10 @@ bool convertUTF8ToUTF16String(StringRef SrcUTF8, // Allocate the same number of UTF-16 code units as UTF-8 code units. Encoding // as UTF-16 should always require the same amount or less code units than the - // UTF-8 encoding. - DstUTF16.resize(SrcUTF8.size()); + // UTF-8 encoding. Allocate one extra byte for the null terminator though, + // so that someone calling DstUTF16.data() gets a null terminated string. + // We resize down later so we don't have to worry that this over allocates. + DstUTF16.resize(SrcUTF8.size()+1); UTF16 *Dst = &DstUTF16[0]; UTF16 *DstEnd = Dst + DstUTF16.size(); @@ -155,6 +160,8 @@ bool convertUTF8ToUTF16String(StringRef SrcUTF8, } DstUTF16.resize(Dst - &DstUTF16[0]); + DstUTF16.push_back(0); + DstUTF16.pop_back(); return true; } -- 2.34.1