folly/experimental/StampedPtr.h

   1 /*
   2  * Copyright 2017 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #pragma once
  18
  19 #include <assert.h>
  20 #include <stdint.h>
  21
  22 namespace folly {
  23
  24 /**
  25  * StampedPtr packs both a pointer to T and a uint16_t into a 64-bit value,
  26  * exploiting the fact that current addresses are limited to 48 bits on
  27  * all current x86-64 and ARM64 processors.
  28  *
  29  * For both x86-64 and ARM64, 64-bit pointers have a canonical
  30  * form in which the upper 16 bits are equal to bit 47.  Intel has
  31  * announced a 57-bit addressing mode (see https://software.intel.com/
  32  * sites/default/files/managed/2b/80/5-level_paging_white_paper.pdf),
  33  * but it is not yet available.  The first problematic platform will
  34  * probably be ARMv8.2, which supports 52-bit virtual addresses.
  35  *
  36  * This code works on all of the platforms I have available for test,
  37  * and probably on all currently-shipping platforms that have a hope of
  38  * compiling folly.  Rather than enumerating the supported platforms via
  39  * ifdef, this code dynamically validates its packing assumption in debug
  40  * builds on each call to a mutating function.  Presumably by the time we
  41  * are running this process in an operating system image that can address
  42  * more than 256TB of memory, RAM cost and the latency of 128-bit CAS
  43  * will have improved enough that this optimization is no longer impactful.
  44  *
  45  * A common approach to this kind of packing seems to be to just assume
  46  * the top 16 bits are zero, but https://github.com/LuaJIT/LuaJIT/issues/49
  47  * indicates that ARM64 platforms in the wild are actually setting bit 47
  48  * in their stack addresses.  That means that we need to extend bit 47 to
  49  * do the right thing (it's not expensive, it compiles to one instruction
  50  * on x86-64 and arm64).
  51  *
  52  * Compare to PackedSyncPtr and DiscriminatedPtr, which perform similar
  53  * packing but add additional functionality.  The name is taken from
  54  * Java's AtomicStampedReference.  Unlike PackedSyncPtr, which tries to
  55  * act pointer-like, this class acts more like a pair whose elements are
  56  * named ptr and stamp.  It also allows direct access to the internal
  57  * raw field: since we're already at the metal you might want to play
  58  * additional games.  It is guaranteed that a zero raw value gets decoded
  59  * as a (ptr,stamp) of (nullptr,0).
  60  */
  61 template <typename T>
  62 struct StampedPtr {
  63   /**
  64    * The packing is not guaranteed, except that it is guaranteed that
  65    * raw == 0 iff ptr() == nullptr && stamp() == 0.
  66    */
  67   uint64_t raw;
  68
  69   /* IMPORTANT: default initialization doesn't result in a sane state */
  70
  71   T* ptr() const {
  72     return unpackPtr(raw);
  73   }
  74
  75   uint16_t stamp() const {
  76     return unpackStamp(raw);
  77   }
  78
  79   void set(T* ptr, uint16_t stamp) {
  80     raw = pack(ptr, stamp);
  81   }
  82
  83   void setPtr(T* ptr) {
  84     raw = pack(ptr, unpackStamp(raw));
  85   }
  86
  87   void setStamp(uint16_t stamp) {
  88     raw = pack(unpackPtr(raw), stamp);
  89   }
  90
  91   static T* unpackPtr(uint64_t raw) {
  92     // Canonical form means we need to extend bit 47 of the pointer to
  93     // bits 48..63 (unless the operating system never hands those pointers
  94     // to us, which is difficult to prove).  Signed right-shift of a
  95     // negative number is implementation-defined in C++ (not undefined!),
  96     // but actually does the right thing on all the platforms I can find.
  97     auto extended = static_cast<int64_t>(raw) >> kInternalStampBits;
  98     return reinterpret_cast<T*>(static_cast<intptr_t>(extended));
  99   }
 100
 101   static uint16_t unpackStamp(uint64_t raw) {
 102     return static_cast<uint16_t>(raw);
 103   }
 104
 105   static uint64_t pack(T* ptr, uint16_t stamp) {
 106     auto shifted = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(ptr))
 107         << kInternalStampBits;
 108     uint64_t raw = shifted | stamp;
 109     assert(unpackPtr(raw) == ptr);
 110     assert(unpackStamp(raw) == stamp);
 111     return raw;
 112   }
 113
 114  private:
 115   // On 32-bit platforms it works okay to store a ptr in the top 48
 116   // bits of a 64-bit value, but it will result in unnecessary work.
 117   // If we align the pointer part at word granularity when we have the
 118   // space then no shifting will ever be needed.
 119   static constexpr unsigned kInternalStampBits = sizeof(void*) == 4 ? 32 : 16;
 120 };
 121
 122 template <typename T>
 123 StampedPtr<T> makeStampedPtr(T* ptr, uint16_t stamp) {
 124   return StampedPtr<T>{StampedPtr<T>::pack(ptr, stamp)};
 125 }
 126
 127 } // namespace folly