From 8c74f7f2991a10977fd5a003b2901b56eb2e19a8 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 13 Sep 2011 19:42:16 +0000 Subject: [PATCH] Add the DataExtractor utility class. It is an endian-aware helper that can read data from a StringRef. It will come in handy for DWARF parsing. This class is inspired by LLDB's DataExtractor, but is stripped down to the bare minimum needed for DWARF. Comes with unit tests! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@139626 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/DataExtractor.h | 352 ++++++++++++++++++++++++ lib/Support/CMakeLists.txt | 1 + lib/Support/DataExtractor.cpp | 175 ++++++++++++ unittests/Support/DataExtractorTest.cpp | 111 ++++++++ 4 files changed, 639 insertions(+) create mode 100644 include/llvm/Support/DataExtractor.h create mode 100644 lib/Support/DataExtractor.cpp create mode 100644 unittests/Support/DataExtractorTest.cpp diff --git a/include/llvm/Support/DataExtractor.h b/include/llvm/Support/DataExtractor.h new file mode 100644 index 00000000000..506ec96930d --- /dev/null +++ b/include/llvm/Support/DataExtractor.h @@ -0,0 +1,352 @@ +//===-- DataExtractor.h -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_DATAEXTRACTOR_H +#define LLVM_SUPPORT_DATAEXTRACTOR_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { +class DataExtractor { + StringRef Data; + uint8_t IsLittleEndian; + uint8_t PointerSize; +public: + /// Construct with a buffer that is owned by the caller. + /// + /// This constructor allows us to use data that is owned by the + /// caller. The data must stay around as long as this object is + /// valid. + DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t PointerSize) + : Data(Data), IsLittleEndian(IsLittleEndian), PointerSize(PointerSize) {} + + /// getData - Get the data pointed to by this extractor. + StringRef getData() const { return Data; } + /// isLittleEndian - Get the endianess for this extractor. + bool isLittleEndian() const { return IsLittleEndian; } + /// getAddressSize - Get the address size for this extractor. + uint8_t getAddressSize() const { return PointerSize; } + + /// Extract a C string from \a *offset_ptr. + /// + /// Returns a pointer to a C String from the data at the offset + /// pointed to by \a offset_ptr. A variable length NULL terminated C + /// string will be extracted and the \a offset_ptr will be + /// updated with the offset of the byte that follows the NULL + /// terminator byte. + /// + /// @param[in,out] offset_ptr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// @return + /// A pointer to the C string value in the data. If the offset + /// pointed to by \a offset_ptr is out of bounds, or if the + /// offset plus the length of the C string is out of bounds, + /// NULL will be returned. + const char *getCStr(uint32_t *offset_ptr) const; + + /// Extract an unsigned integer of size \a byte_size from \a + /// *offset_ptr. + /// + /// Extract a single unsigned integer value and update the offset + /// pointed to by \a offset_ptr. The size of the extracted integer + /// is specified by the \a byte_size argument. \a byte_size should + /// have a value greater than or equal to one and less than or equal + /// to eight since the return value is 64 bits wide. Any + /// \a byte_size values less than 1 or greater than 8 will result in + /// nothing being extracted, and zero being returned. + /// + /// @param[in,out] offset_ptr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// @param[in] byte_size + /// The size in byte of the integer to extract. + /// + /// @return + /// The unsigned integer value that was extracted, or zero on + /// failure. + uint64_t getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const; + + /// Extract an signed integer of size \a byte_size from \a *offset_ptr. + /// + /// Extract a single signed integer value (sign extending if required) + /// and update the offset pointed to by \a offset_ptr. The size of + /// the extracted integer is specified by the \a byte_size argument. + /// \a byte_size should have a value greater than or equal to one + /// and less than or equal to eight since the return value is 64 + /// bits wide. Any \a byte_size values less than 1 or greater than + /// 8 will result in nothing being extracted, and zero being returned. + /// + /// @param[in,out] offset_ptr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// @param[in] byte_size + /// The size in byte of the integer to extract. + /// + /// @return + /// The sign extended signed integer value that was extracted, + /// or zero on failure. + int64_t getSigned(uint32_t *offset_ptr, uint32_t size) const; + + //------------------------------------------------------------------ + /// Extract an pointer from \a *offset_ptr. + /// + /// Extract a single pointer from the data and update the offset + /// pointed to by \a offset_ptr. The size of the extracted pointer + /// comes from the \a m_addr_size member variable and should be + /// set correctly prior to extracting any pointer values. + /// + /// @param[in,out] offset_ptr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// @return + /// The extracted pointer value as a 64 integer. + uint64_t getAddress(uint32_t *offset_ptr) const { + return getUnsigned(offset_ptr, PointerSize); + } + + /// Extract a uint8_t value from \a *offset_ptr. + /// + /// Extract a single uint8_t from the binary data at the offset + /// pointed to by \a offset_ptr, and advance the offset on success. + /// + /// @param[in,out] offset_ptr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// @return + /// The extracted uint8_t value. + uint8_t getU8(uint32_t *offset_ptr) const; + + /// Extract \a count uint8_t values from \a *offset_ptr. + /// + /// Extract \a count uint8_t values from the binary data at the + /// offset pointed to by \a offset_ptr, and advance the offset on + /// success. The extracted values are copied into \a dst. + /// + /// @param[in,out] offset_ptr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// @param[out] dst + /// A buffer to copy \a count uint8_t values into. \a dst must + /// be large enough to hold all requested data. + /// + /// @param[in] count + /// The number of uint8_t values to extract. + /// + /// @return + /// \a dst if all values were properly extracted and copied, + /// NULL otherise. + uint8_t *getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const; + + //------------------------------------------------------------------ + /// Extract a uint16_t value from \a *offset_ptr. + /// + /// Extract a single uint16_t from the binary data at the offset + /// pointed to by \a offset_ptr, and update the offset on success. + /// + /// @param[in,out] offset_ptr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// @return + /// The extracted uint16_t value. + //------------------------------------------------------------------ + uint16_t getU16(uint32_t *offset_ptr) const; + + /// Extract \a count uint16_t values from \a *offset_ptr. + /// + /// Extract \a count uint16_t values from the binary data at the + /// offset pointed to by \a offset_ptr, and advance the offset on + /// success. The extracted values are copied into \a dst. + /// + /// @param[in,out] offset_ptr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// @param[out] dst + /// A buffer to copy \a count uint16_t values into. \a dst must + /// be large enough to hold all requested data. + /// + /// @param[in] count + /// The number of uint16_t values to extract. + /// + /// @return + /// \a dst if all values were properly extracted and copied, + /// NULL otherise. + uint16_t *getU16(uint32_t *offset_ptr, uint16_t *dst, uint32_t count) const; + + /// Extract a uint32_t value from \a *offset_ptr. + /// + /// Extract a single uint32_t from the binary data at the offset + /// pointed to by \a offset_ptr, and update the offset on success. + /// + /// @param[in,out] offset_ptr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// @return + /// The extracted uint32_t value. + uint32_t getU32(uint32_t *offset_ptr) const; + + /// Extract \a count uint32_t values from \a *offset_ptr. + /// + /// Extract \a count uint32_t values from the binary data at the + /// offset pointed to by \a offset_ptr, and advance the offset on + /// success. The extracted values are copied into \a dst. + /// + /// @param[in,out] offset_ptr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// @param[out] dst + /// A buffer to copy \a count uint32_t values into. \a dst must + /// be large enough to hold all requested data. + /// + /// @param[in] count + /// The number of uint32_t values to extract. + /// + /// @return + /// \a dst if all values were properly extracted and copied, + /// NULL otherise. + uint32_t *getU32(uint32_t *offset_ptr, uint32_t *dst, uint32_t count) const; + + /// Extract a uint64_t value from \a *offset_ptr. + /// + /// Extract a single uint64_t from the binary data at the offset + /// pointed to by \a offset_ptr, and update the offset on success. + /// + /// @param[in,out] offset_ptr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// @return + /// The extracted uint64_t value. + uint64_t getU64(uint32_t *offset_ptr) const; + + /// Extract \a count uint64_t values from \a *offset_ptr. + /// + /// Extract \a count uint64_t values from the binary data at the + /// offset pointed to by \a offset_ptr, and advance the offset on + /// success. The extracted values are copied into \a dst. + /// + /// @param[in,out] offset_ptr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// @param[out] dst + /// A buffer to copy \a count uint64_t values into. \a dst must + /// be large enough to hold all requested data. + /// + /// @param[in] count + /// The number of uint64_t values to extract. + /// + /// @return + /// \a dst if all values were properly extracted and copied, + /// NULL otherise. + uint64_t *getU64(uint32_t *offset_ptr, uint64_t *dst, uint32_t count) const; + + /// Extract a signed LEB128 value from \a *offset_ptr. + /// + /// Extracts an signed LEB128 number from this object's data + /// starting at the offset pointed to by \a offset_ptr. The offset + /// pointed to by \a offset_ptr will be updated with the offset of + /// the byte following the last extracted byte. + /// + /// @param[in,out] offset_ptr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// @return + /// The extracted signed integer value. + int64_t getSLEB128(uint32_t *offset_ptr) const; + + /// Extract a unsigned LEB128 value from \a *offset_ptr. + /// + /// Extracts an unsigned LEB128 number from this object's data + /// starting at the offset pointed to by \a offset_ptr. The offset + /// pointed to by \a offset_ptr will be updated with the offset of + /// the byte following the last extracted byte. + /// + /// @param[in,out] offset_ptr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// @return + /// The extracted unsigned integer value. + uint64_t getULEB128(uint32_t *offset_ptr) const; + + /// Test the validity of \a offset. + /// + /// @return + /// \b true if \a offset is a valid offset into the data in this + /// object, \b false otherwise. + bool isValidOffset(uint32_t offset) const { return Data.size() > offset; } + + /// Test the availability of \a length bytes of data from \a offset. + /// + /// @return + /// \b true if \a offset is a valid offset and there are \a + /// length bytes available at that offset, \b false otherwise. + bool isValidOffsetForDataOfSize(uint32_t offset, uint32_t length) const { + return offset + length >= offset && isValidOffset(offset + length - 1); + } +}; + +} // namespace llvm + +#endif diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 80a237ac74e..63a833c3804 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -15,6 +15,7 @@ add_llvm_library(LLVMSupport CommandLine.cpp ConstantRange.cpp CrashRecoveryContext.cpp + DataExtractor.cpp Debug.cpp DeltaAlgorithm.cpp DAGDeltaAlgorithm.cpp diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp new file mode 100644 index 00000000000..b946c1df836 --- /dev/null +++ b/lib/Support/DataExtractor.cpp @@ -0,0 +1,175 @@ +//===-- DataExtractor.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/SwapByteOrder.h" +using namespace llvm; + +template +static T getU(uint32_t *offset_ptr, const DataExtractor *de, + bool isLittleEndian, const char *Data) { + T val = 0; + uint32_t offset = *offset_ptr; + if (de->isValidOffsetForDataOfSize(offset, sizeof(val))) { + std::memcpy(&val, &Data[offset], sizeof(val)); + if (sys::isLittleEndianHost() != isLittleEndian) + val = sys::SwapByteOrder(val); + + // Advance the offset + *offset_ptr += sizeof(val); + } + return val; +} + +template +static T *getUs(uint32_t *offset_ptr, T *dst, uint32_t count, + const DataExtractor *de, bool isLittleEndian, const char *Data){ + uint32_t offset = *offset_ptr; + + if (count > 0 && de->isValidOffsetForDataOfSize(offset, sizeof(*dst)*count)) { + for (T *value_ptr = dst, *end = dst + count; value_ptr != end; + ++value_ptr, offset += sizeof(*dst)) + *value_ptr = getU(offset_ptr, de, isLittleEndian, Data); + // Advance the offset + *offset_ptr = offset; + // Return a non-NULL pointer to the converted data as an indicator of + // success + return dst; + } + return NULL; +} + +uint8_t DataExtractor::getU8(uint32_t *offset_ptr) const { + return getU(offset_ptr, this, IsLittleEndian, Data.data()); +} + +uint8_t * +DataExtractor::getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const { + return getUs(offset_ptr, dst, count, this, IsLittleEndian, + Data.data()); +} + + +uint16_t DataExtractor::getU16(uint32_t *offset_ptr) const { + return getU(offset_ptr, this, IsLittleEndian, Data.data()); +} + +uint16_t *DataExtractor::getU16(uint32_t *offset_ptr, uint16_t *dst, + uint32_t count) const { + return getUs(offset_ptr, dst, count, this, IsLittleEndian, + Data.data()); +} + +uint32_t DataExtractor::getU32(uint32_t *offset_ptr) const { + return getU(offset_ptr, this, IsLittleEndian, Data.data()); +} + +uint32_t *DataExtractor::getU32(uint32_t *offset_ptr, uint32_t *dst, + uint32_t count) const { + return getUs(offset_ptr, dst, count, this, IsLittleEndian, + Data.data());; +} + +uint64_t DataExtractor::getU64(uint32_t *offset_ptr) const { + return getU(offset_ptr, this, IsLittleEndian, Data.data()); +} + +uint64_t *DataExtractor::getU64(uint32_t *offset_ptr, uint64_t *dst, + uint32_t count) const { + return getUs(offset_ptr, dst, count, this, IsLittleEndian, + Data.data()); +} + +uint64_t +DataExtractor::getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const { + switch (byte_size) { + case 1: + return getU8(offset_ptr); + case 2: + return getU16(offset_ptr); + case 4: + return getU32(offset_ptr); + case 8: + return getU64(offset_ptr); + } + llvm_unreachable("getUnsigned unhandled case!"); +} + +int64_t +DataExtractor::getSigned(uint32_t *offset_ptr, uint32_t byte_size) const { + switch (byte_size) { + case 1: + return (int8_t)getU8(offset_ptr); + case 2: + return (int16_t)getU16(offset_ptr); + case 4: + return (int32_t)getU32(offset_ptr); + case 8: + return (int64_t)getU64(offset_ptr); + } + llvm_unreachable("getSigned unhandled case!"); +} + +const char *DataExtractor::getCStr(uint32_t *offset_ptr) const { + uint32_t offset = *offset_ptr; + StringRef::size_type pos = Data.find('\0', offset); + if (pos != StringRef::npos) { + *offset_ptr = pos + 1; + return Data.data() + offset; + } + return NULL; +} + +uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const { + uint64_t result = 0; + if (Data.empty()) + return 0; + + unsigned shift = 0; + uint32_t offset = *offset_ptr; + uint8_t byte = 0; + + while (isValidOffset(offset)) { + byte = Data[offset++]; + result |= (byte & 0x7f) << shift; + shift += 7; + if ((byte & 0x80) == 0) + break; + } + + *offset_ptr = offset; + return result; +} + +int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const { + int64_t result = 0; + if (Data.empty()) + return 0; + + unsigned shift = 0; + uint32_t offset = *offset_ptr; + uint8_t byte = 0; + + while (isValidOffset(offset)) { + byte = Data[offset++]; + result |= (byte & 0x7f) << shift; + shift += 7; + if ((byte & 0x80) == 0) + break; + } + + // Sign bit of byte is 2nd high order bit (0x40) + if (shift < 64 && (byte & 0x40)) + result |= -(1 << shift); + + *offset_ptr = offset; + return result; +} diff --git a/unittests/Support/DataExtractorTest.cpp b/unittests/Support/DataExtractorTest.cpp new file mode 100644 index 00000000000..6de96c40c40 --- /dev/null +++ b/unittests/Support/DataExtractorTest.cpp @@ -0,0 +1,111 @@ +//===- llvm/unittest/Support/DataExtractorTest.cpp - DataExtractor tests --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" +#include "llvm/Support/DataExtractor.h" +using namespace llvm; + +namespace { + +const char numberData[] = "\x80\x90\xFF\xFF\x80\x00\x00\x00"; +const char stringData[] = "hellohello\0hello"; +const char leb128data[] = "\xA6\x49"; + +TEST(DataExtractorTest, OffsetOverflow) { + DataExtractor DE(StringRef(numberData, sizeof(numberData)-1), false, 8); + EXPECT_FALSE(DE.isValidOffsetForDataOfSize(-2U, 5)); +} + +TEST(DataExtractorTest, UnsignedNumbers) { + DataExtractor DE(StringRef(numberData, sizeof(numberData)-1), false, 8); + uint32_t offset = 0; + + EXPECT_EQ(0x80U, DE.getU8(&offset)); + EXPECT_EQ(1U, offset); + offset = 0; + EXPECT_EQ(0x8090U, DE.getU16(&offset)); + EXPECT_EQ(2U, offset); + offset = 0; + EXPECT_EQ(0x8090FFFFU, DE.getU32(&offset)); + EXPECT_EQ(4U, offset); + offset = 0; + EXPECT_EQ(0x8090FFFF80000000U, DE.getU64(&offset)); + EXPECT_EQ(8U, offset); + offset = 0; + EXPECT_EQ(0x8090FFFF80000000U, DE.getAddress(&offset)); + EXPECT_EQ(8U, offset); + offset = 0; + + uint32_t data[2]; + EXPECT_EQ(data, DE.getU32(&offset, data, 2)); + EXPECT_EQ(0x8090FFFFU, data[0]); + EXPECT_EQ(0x80000000U, data[1]); + EXPECT_EQ(8U, offset); + offset = 0; + + // Now for little endian. + DE = DataExtractor(StringRef(numberData, sizeof(numberData)-1), true, 4); + EXPECT_EQ(0x9080U, DE.getU16(&offset)); + EXPECT_EQ(2U, offset); + offset = 0; + EXPECT_EQ(0xFFFF9080U, DE.getU32(&offset)); + EXPECT_EQ(4U, offset); + offset = 0; + EXPECT_EQ(0x80FFFF9080U, DE.getU64(&offset)); + EXPECT_EQ(8U, offset); + offset = 0; + EXPECT_EQ(0xFFFF9080U, DE.getAddress(&offset)); + EXPECT_EQ(4U, offset); + offset = 0; + + EXPECT_EQ(data, DE.getU32(&offset, data, 2)); + EXPECT_EQ(0xFFFF9080U, data[0]); + EXPECT_EQ(0x80U, data[1]); + EXPECT_EQ(8U, offset); +} + +TEST(DataExtractorTest, SignedNumbers) { + DataExtractor DE(StringRef(numberData, sizeof(numberData)-1), false, 8); + uint32_t offset = 0; + + EXPECT_EQ(-128, DE.getSigned(&offset, 1)); + EXPECT_EQ(1U, offset); + offset = 0; + EXPECT_EQ(-32624, DE.getSigned(&offset, 2)); + EXPECT_EQ(2U, offset); + offset = 0; + EXPECT_EQ(-2137980929, DE.getSigned(&offset, 4)); + EXPECT_EQ(4U, offset); + offset = 0; + EXPECT_EQ(-9182558167379214336LL, DE.getSigned(&offset, 8)); + EXPECT_EQ(8U, offset); +} + +TEST(DataExtractorTest, Strings) { + DataExtractor DE(StringRef(stringData, sizeof(stringData)-1), false, 8); + uint32_t offset = 0; + + EXPECT_EQ(stringData, DE.getCStr(&offset)); + EXPECT_EQ(11U, offset); + EXPECT_EQ(NULL, DE.getCStr(&offset)); + EXPECT_EQ(11U, offset); +} + +TEST(DataExtractorTest, LEB128) { + DataExtractor DE(StringRef(leb128data, sizeof(leb128data)-1), false, 8); + uint32_t offset = 0; + + EXPECT_EQ(9382ULL, DE.getULEB128(&offset)); + EXPECT_EQ(2U, offset); + offset = 0; + EXPECT_EQ(-7002LL, DE.getSLEB128(&offset)); + EXPECT_EQ(2U, offset); +} + +} -- 2.34.1