/*
- * Copyright 2013 Facebook, Inc.
+ * Copyright 2016 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#include "folly/Checksum.h"
+#include <folly/Checksum.h>
#include <algorithm>
#include <stdexcept>
#include <boost/crc.hpp>
-#include "folly/CpuId.h"
+#include <folly/CpuId.h>
+
+#if FOLLY_X64 && (__SSE4_2__ || defined(__clang__) || __GNUC_PREREQ(4, 9))
+#include <nmmintrin.h>
+#endif
namespace folly {
namespace detail {
-#if defined(__x86_64__) && defined (__GNUC__)
+#if FOLLY_X64 && (__SSE4_2__ || defined(__clang__) || __GNUC_PREREQ(4, 9))
// Fast SIMD implementation of CRC-32C for x86 with SSE 4.2
+FOLLY_TARGET_ATTRIBUTE("sse4.2")
uint32_t crc32c_hw(const uint8_t *data, size_t nbytes,
uint32_t startingChecksum) {
uint32_t sum = startingChecksum;
if (mask != 0) {
size_t limit = std::min(nbytes, sizeof(uint64_t) - mask);
while (offset < limit) {
- sum = (uint32_t)__builtin_ia32_crc32qi(sum, data[offset]);
+ sum = (uint32_t)_mm_crc32_u8(sum, data[offset]);
offset++;
}
}
// Process 8 bytes at a time until we have fewer than 8 bytes left.
while (offset + sizeof(uint64_t) <= nbytes) {
const uint64_t* src = (const uint64_t*)(data + offset);
- sum = __builtin_ia32_crc32di(sum, *src);
+ sum = _mm_crc32_u64(sum, *src);
offset += sizeof(uint64_t);
}
// Process any bytes remaining after the last aligned 8-byte block.
while (offset < nbytes) {
- sum = (uint32_t)__builtin_ia32_crc32qi(sum, data[offset]);
+ sum = (uint32_t)_mm_crc32_u8(sum, data[offset]);
offset++;
}
return sum;