X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2FChecksum.cpp;h=5fc17c08574d5f1ca03751bbf18c184e4a123501;hb=79869083465aabfcb9d9abd7f31ecfe812e3464b;hp=74ceea8c11b947ea9b477e657d21b5df8e0e753e;hpb=321542683a01c3f334047531e9b487f047129775;p=folly.git diff --git a/folly/Checksum.cpp b/folly/Checksum.cpp index 74ceea8c..5fc17c08 100644 --- a/folly/Checksum.cpp +++ b/folly/Checksum.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2016 Facebook, Inc. + * Copyright 2017 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,57 +15,49 @@ */ #include -#include -#include #include #include +#include +#include +#include + +#if FOLLY_SSE_PREREQ(4, 2) +#include +#endif namespace folly { namespace detail { -#ifndef __has_builtin - /* nolint */ - #define __has_builtin(x) 0 -#endif +uint32_t +crc32c_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum); +#if FOLLY_SSE_PREREQ(4, 2) -#if __SSE4_2__ && \ - ((__has_builtin(__builtin_ia32_crc32qi) && \ - __has_builtin(__builtin_ia32_crc32di)) || \ - (FOLLY_X64 && defined(__GNUC__) && defined(__GNUC_MINOR__) && \ - (((__GNUC__ * 100) + __GNUC_MINOR__) >= 407))) +uint32_t +crc32_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum); -// Fast SIMD implementation of CRC-32C for x86 with SSE 4.2 -uint32_t crc32c_hw(const uint8_t *data, size_t nbytes, - uint32_t startingChecksum) { +// Fast SIMD implementation of CRC-32 for x86 with pclmul +uint32_t +crc32_hw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { uint32_t sum = startingChecksum; size_t offset = 0; - // Process bytes one at a time until we reach an 8-byte boundary and can - // start doing aligned 64-bit reads. - static uintptr_t ALIGN_MASK = sizeof(uint64_t) - 1; - size_t mask = (size_t)((uintptr_t)data & ALIGN_MASK); - if (mask != 0) { - size_t limit = std::min(nbytes, sizeof(uint64_t) - mask); - while (offset < limit) { - sum = (uint32_t)__builtin_ia32_crc32qi(sum, data[offset]); - offset++; - } + // Process unaligned bytes + if ((uintptr_t)data & 15) { + size_t limit = std::min(nbytes, -(uintptr_t)data & 15); + sum = crc32_sw(data, limit, sum); + offset += limit; + nbytes -= limit; } - // Process 8 bytes at a time until we have fewer than 8 bytes left. - while (offset + sizeof(uint64_t) <= nbytes) { - const uint64_t* src = (const uint64_t*)(data + offset); - sum = __builtin_ia32_crc32di(sum, *src); - offset += sizeof(uint64_t); + if (nbytes >= 16) { + sum = crc32_hw_aligned(sum, (const __m128i*)(data + offset), nbytes / 16); + offset += nbytes & ~15; + nbytes &= 15; } - // Process any bytes remaining after the last aligned 8-byte block. - while (offset < nbytes) { - sum = (uint32_t)__builtin_ia32_crc32qi(sum, data[offset]); - offset++; - } - return sum; + // Remaining unaligned bytes + return crc32_sw(data + offset, nbytes, sum); } bool crc32c_hw_supported() { @@ -73,9 +65,14 @@ bool crc32c_hw_supported() { return id.sse42(); } +bool crc32_hw_supported() { + static folly::CpuId id; + return id.sse42(); +} + #else -uint32_t crc32c_hw(const uint8_t *data, size_t nbytes, +uint32_t crc32_hw(const uint8_t *data, size_t nbytes, uint32_t startingChecksum) { throw std::runtime_error("crc32_hw is not implemented on this platform"); } @@ -84,11 +81,13 @@ bool crc32c_hw_supported() { return false; } +bool crc32_hw_supported() { + return false; +} #endif -uint32_t crc32c_sw(const uint8_t *data, size_t nbytes, - uint32_t startingChecksum) { - +template +uint32_t crc_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { // Reverse the bits in the starting checksum so they'll be in the // right internal format for Boost's CRC engine. // O(1)-time, branchless bit reversal algorithm from @@ -104,14 +103,25 @@ uint32_t crc32c_sw(const uint8_t *data, size_t nbytes, startingChecksum = (startingChecksum >> 16) | (startingChecksum << 16); - static const uint32_t CRC32C_POLYNOMIAL = 0x1EDC6F41; - boost::crc_optimal<32, CRC32C_POLYNOMIAL, ~0U, 0, true, true> sum( + boost::crc_optimal<32, CRC_POLYNOMIAL, ~0U, 0, true, true> sum( startingChecksum); sum.process_bytes(data, nbytes); return sum.checksum(); } -} // folly::detail +uint32_t +crc32c_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { + constexpr uint32_t CRC32C_POLYNOMIAL = 0x1EDC6F41; + return crc_sw(data, nbytes, startingChecksum); +} + +uint32_t +crc32_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { + constexpr uint32_t CRC32_POLYNOMIAL = 0x04C11DB7; + return crc_sw(data, nbytes, startingChecksum); +} + +} // namespace detail uint32_t crc32c(const uint8_t *data, size_t nbytes, uint32_t startingChecksum) { @@ -122,4 +132,17 @@ uint32_t crc32c(const uint8_t *data, size_t nbytes, } } -} // folly +uint32_t crc32(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { + if (detail::crc32_hw_supported()) { + return detail::crc32_hw(data, nbytes, startingChecksum); + } else { + return detail::crc32_sw(data, nbytes, startingChecksum); + } +} + +uint32_t +crc32_type(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { + return ~crc32(data, nbytes, startingChecksum); +} + +} // namespace folly