folly/Checksum.cpp

   1 /*
   2  * Copyright 2017 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <folly/Checksum.h>
  18 #include <boost/crc.hpp>
  19 #include <folly/CpuId.h>
  20 #include <folly/detail/ChecksumDetail.h>
  21 #include <algorithm>
  22 #include <stdexcept>
  23
  24 #if FOLLY_SSE_PREREQ(4, 2)
  25 #include <nmmintrin.h>
  26 #endif
  27
  28 namespace folly {
  29
  30 namespace detail {
  31
  32 uint32_t
  33 crc32c_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum);
  34 #if FOLLY_SSE_PREREQ(4, 2)
  35
  36 // Fast SIMD implementation of CRC-32C for x86 with SSE 4.2
  37 FOLLY_TARGET_ATTRIBUTE("sse4.2")
  38 uint32_t crc32c_hw(const uint8_t *data, size_t nbytes,
  39     uint32_t startingChecksum) {
  40   uint32_t sum = startingChecksum;
  41   size_t offset = 0;
  42
  43   // Process bytes one at a time until we reach an 8-byte boundary and can
  44   // start doing aligned 64-bit reads.
  45   static uintptr_t ALIGN_MASK = sizeof(uint64_t) - 1;
  46   size_t mask = (size_t)((uintptr_t)data & ALIGN_MASK);
  47   if (mask != 0) {
  48     size_t limit = std::min(nbytes, sizeof(uint64_t) - mask);
  49     while (offset < limit) {
  50       sum = (uint32_t)_mm_crc32_u8(sum, data[offset]);
  51       offset++;
  52     }
  53   }
  54
  55   // Process 8 bytes at a time until we have fewer than 8 bytes left.
  56   while (offset + sizeof(uint64_t) <= nbytes) {
  57     const uint64_t* src = (const uint64_t*)(data + offset);
  58     sum = uint32_t(_mm_crc32_u64(sum, *src));
  59     offset += sizeof(uint64_t);
  60   }
  61
  62   // Process any bytes remaining after the last aligned 8-byte block.
  63   while (offset < nbytes) {
  64     sum = (uint32_t)_mm_crc32_u8(sum, data[offset]);
  65     offset++;
  66   }
  67   return sum;
  68 }
  69
  70 uint32_t
  71 crc32_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum);
  72
  73 // Fast SIMD implementation of CRC-32 for x86 with pclmul
  74 uint32_t
  75 crc32_hw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) {
  76   uint32_t sum = startingChecksum;
  77   size_t offset = 0;
  78
  79   // Process unaligned bytes
  80   if ((uintptr_t)data & 15) {
  81     size_t limit = std::min(nbytes, -(uintptr_t)data & 15);
  82     sum = crc32_sw(data, limit, sum);
  83     offset += limit;
  84     nbytes -= limit;
  85   }
  86
  87   if (nbytes >= 16) {
  88     sum = crc32_hw_aligned(sum, (const __m128i*)(data + offset), nbytes / 16);
  89     offset += nbytes & ~15;
  90     nbytes &= 15;
  91   }
  92
  93   // Remaining unaligned bytes
  94   return crc32_sw(data + offset, nbytes, sum);
  95 }
  96
  97 bool crc32c_hw_supported() {
  98   static folly::CpuId id;
  99   return id.sse42();
 100 }
 101
 102 bool crc32_hw_supported() {
 103   static folly::CpuId id;
 104   return id.sse42();
 105 }
 106
 107 #else
 108
 109 uint32_t crc32c_hw(const uint8_t *data, size_t nbytes,
 110     uint32_t startingChecksum) {
 111   throw std::runtime_error("crc32_hw is not implemented on this platform");
 112 }
 113
 114 uint32_t crc32_hw(const uint8_t *data, size_t nbytes,
 115     uint32_t startingChecksum) {
 116   throw std::runtime_error("crc32_hw is not implemented on this platform");
 117 }
 118
 119 bool crc32c_hw_supported() {
 120   return false;
 121 }
 122
 123 bool crc32_hw_supported() {
 124   return false;
 125 }
 126 #endif
 127
 128 template <uint32_t CRC_POLYNOMIAL>
 129 uint32_t crc_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) {
 130   // Reverse the bits in the starting checksum so they'll be in the
 131   // right internal format for Boost's CRC engine.
 132   //     O(1)-time, branchless bit reversal algorithm from
 133   //     http://graphics.stanford.edu/~seander/bithacks.html
 134   startingChecksum = ((startingChecksum >> 1) & 0x55555555) |
 135       ((startingChecksum & 0x55555555) << 1);
 136   startingChecksum = ((startingChecksum >> 2) & 0x33333333) |
 137       ((startingChecksum & 0x33333333) << 2);
 138   startingChecksum = ((startingChecksum >> 4) & 0x0f0f0f0f) |
 139       ((startingChecksum & 0x0f0f0f0f) << 4);
 140   startingChecksum = ((startingChecksum >> 8) & 0x00ff00ff) |
 141       ((startingChecksum & 0x00ff00ff) << 8);
 142   startingChecksum = (startingChecksum >> 16) |
 143       (startingChecksum << 16);
 144
 145   boost::crc_optimal<32, CRC_POLYNOMIAL, ~0U, 0, true, true> sum(
 146       startingChecksum);
 147   sum.process_bytes(data, nbytes);
 148   return sum.checksum();
 149 }
 150
 151 uint32_t
 152 crc32c_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) {
 153   constexpr uint32_t CRC32C_POLYNOMIAL = 0x1EDC6F41;
 154   return crc_sw<CRC32C_POLYNOMIAL>(data, nbytes, startingChecksum);
 155 }
 156
 157 uint32_t
 158 crc32_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) {
 159   constexpr uint32_t CRC32_POLYNOMIAL = 0x04C11DB7;
 160   return crc_sw<CRC32_POLYNOMIAL>(data, nbytes, startingChecksum);
 161 }
 162
 163 } // folly::detail
 164
 165 uint32_t crc32c(const uint8_t *data, size_t nbytes,
 166     uint32_t startingChecksum) {
 167   if (detail::crc32c_hw_supported()) {
 168     return detail::crc32c_hw(data, nbytes, startingChecksum);
 169   } else {
 170     return detail::crc32c_sw(data, nbytes, startingChecksum);
 171   }
 172 }
 173
 174 uint32_t crc32(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) {
 175   if (detail::crc32_hw_supported()) {
 176     return detail::crc32_hw(data, nbytes, startingChecksum);
 177   } else {
 178     return detail::crc32_sw(data, nbytes, startingChecksum);
 179   }
 180 }
 181
 182 } // folly