tweak

[cdsspec-compiler.git] / benchmark / cliffc-hashtable / NonBlockingHashMap.java
diff --git a/benchmark/cliffc-hashtable/NonBlockingHashMap.java b/benchmark/cliffc-hashtable/NonBlockingHashMap.java

new file mode 100644 (file)

index 0000000..5995e48
--- /dev/null
+++ b/benchmark/cliffc-hashtable/NonBlockingHashMap.java
@@ -0,0 +1,1292 @@
+/*\r
+ * Written by Cliff Click and released to the public domain, as explained at\r
+ * http://creativecommons.org/licenses/publicdomain\r
+ */\r
+\r
+package org.cliffc.high_scale_lib;\r
+import java.io.IOException;\r
+import java.io.Serializable;\r
+import java.lang.reflect.Field;\r
+import java.util.*;\r
+import java.util.concurrent.ConcurrentMap;\r
+import java.util.concurrent.atomic.*;\r
+import sun.misc.Unsafe;\r
+\r
+/**\r
+ * A lock-free alternate implementation of {@link java.util.concurrent.ConcurrentHashMap}\r
+ * with better scaling properties and generally lower costs to mutate the Map.\r
+ * It provides identical correctness properties as ConcurrentHashMap.  All\r
+ * operations are non-blocking and multi-thread safe, including all update\r
+ * operations.  {@link NonBlockingHashMap} scales substatially better than\r
+ * {@link java.util.concurrent.ConcurrentHashMap} for high update rates, even with a\r
+ * large concurrency factor.  Scaling is linear up to 768 CPUs on a 768-CPU\r
+ * Azul box, even with 100% updates or 100% reads or any fraction in-between.\r
+ * Linear scaling up to all cpus has been observed on a 32-way Sun US2 box,\r
+ * 32-way Sun Niagra box, 8-way Intel box and a 4-way Power box.\r
+ *\r
+ * This class obeys the same functional specification as {@link\r
+ * java.util.Hashtable}, and includes versions of methods corresponding to\r
+ * each method of <tt>Hashtable</tt>. However, even though all operations are\r
+ * thread-safe, operations do <em>not</em> entail locking and there is\r
+ * <em>not</em> any support for locking the entire table in a way that\r
+ * prevents all access.  This class is fully interoperable with\r
+ * <tt>Hashtable</tt> in programs that rely on its thread safety but not on\r
+ * its synchronization details.\r
+ *\r
+ * <p> Operations (including <tt>put</tt>) generally do not block, so may\r
+ * overlap with other update operations (including other <tt>puts</tt> and\r
+ * <tt>removes</tt>).  Retrievals reflect the results of the most recently\r
+ * <em>completed</em> update operations holding upon their onset.  For\r
+ * aggregate operations such as <tt>putAll</tt>, concurrent retrievals may\r
+ * reflect insertion or removal of only some entries.  Similarly, Iterators\r
+ * and Enumerations return elements reflecting the state of the hash table at\r
+ * some point at or since the creation of the iterator/enumeration.  They do\r
+ * <em>not</em> throw {@link ConcurrentModificationException}.  However,\r
+ * iterators are designed to be used by only one thread at a time.\r
+ *\r
+ * <p> Very full tables, or tables with high reprobe rates may trigger an\r
+ * internal resize operation to move into a larger table.  Resizing is not\r
+ * terribly expensive, but it is not free either; during resize operations\r
+ * table throughput may drop somewhat.  All threads that visit the table\r
+ * during a resize will 'help' the resizing but will still be allowed to\r
+ * complete their operation before the resize is finished (i.e., a simple\r
+ * 'get' operation on a million-entry table undergoing resizing will not need\r
+ * to block until the entire million entries are copied).\r
+ *\r
+ * <p>This class and its views and iterators implement all of the\r
+ * <em>optional</em> methods of the {@link Map} and {@link Iterator}\r
+ * interfaces.\r
+ *\r
+ * <p> Like {@link Hashtable} but unlike {@link HashMap}, this class\r
+ * does <em>not</em> allow <tt>null</tt> to be used as a key or value.\r
+ *\r
+ *\r
+ * @since 1.5\r
+ * @author Cliff Click\r
+ * @param <TypeK> the type of keys maintained by this map\r
+ * @param <TypeV> the type of mapped values\r
+ *\r
+ * @version 1.1.2\r
+ * @author Prashant Deva - moved hash() function out of get_impl() so it is\r
+ * not calculated multiple times.\r
+ */\r
+\r
+public class NonBlockingHashMap<TypeK, TypeV>\r
+  extends AbstractMap<TypeK, TypeV>\r
+  implements ConcurrentMap<TypeK, TypeV>, Cloneable, Serializable {\r
+\r
+  private static final long serialVersionUID = 1234123412341234123L;\r
+\r
+  private static final int REPROBE_LIMIT=10; // Too many reprobes then force a table-resize\r
+\r
+  // --- Bits to allow Unsafe access to arrays\r
+  private static final Unsafe _unsafe = UtilUnsafe.getUnsafe();\r
+  private static final int _Obase  = _unsafe.arrayBaseOffset(Object[].class);\r
+  private static final int _Oscale = _unsafe.arrayIndexScale(Object[].class);\r
+  private static long rawIndex(final Object[] ary, final int idx) {\r
+    assert idx >= 0 && idx < ary.length;\r
+    return _Obase + idx * _Oscale;\r
+  }\r
+\r
+  // --- Setup to use Unsafe\r
+  private static final long _kvs_offset;\r
+  static {                      // <clinit>\r
+    Field f = null;\r
+    try { f = NonBlockingHashMap.class.getDeclaredField("_kvs"); }\r
+    catch( java.lang.NoSuchFieldException e ) { throw new RuntimeException(e); }\r
+    _kvs_offset = _unsafe.objectFieldOffset(f);\r
+  }\r
+  private final boolean CAS_kvs( final Object[] oldkvs, final Object[] newkvs ) {\r
+    return _unsafe.compareAndSwapObject(this, _kvs_offset, oldkvs, newkvs );\r
+  }\r
+\r
+  // --- Adding a 'prime' bit onto Values via wrapping with a junk wrapper class\r
+  private static final class Prime {\r
+    final Object _V;\r
+    Prime( Object V ) { _V = V; }\r
+    static Object unbox( Object V ) { return V instanceof Prime ? ((Prime)V)._V : V; }\r
+  }\r
+\r
+  // --- hash ----------------------------------------------------------------\r
+  // Helper function to spread lousy hashCodes\r
+  private static final int hash(final Object key) {\r
+    int h = key.hashCode();     // The real hashCode call\r
+    // Spread bits to regularize both segment and index locations,\r
+    // using variant of single-word Wang/Jenkins hash.\r
+    h += (h <<  15) ^ 0xffffcd7d;\r
+    h ^= (h >>> 10);\r
+    h += (h <<   3);\r
+    h ^= (h >>>  6);\r
+    h += (h <<   2) + (h << 14);\r
+    return h ^ (h >>> 16);\r
+  }\r
+\r
+  // --- The Hash Table --------------------\r
+  // Slot 0 is always used for a 'CHM' entry below to hold the interesting\r
+  // bits of the hash table.  Slot 1 holds full hashes as an array of ints.\r
+  // Slots {2,3}, {4,5}, etc hold {Key,Value} pairs.  The entire hash table\r
+  // can be atomically replaced by CASing the _kvs field.\r
+  //\r
+  // Why is CHM buried inside the _kvs Object array, instead of the other way\r
+  // around?  The CHM info is used during resize events and updates, but not\r
+  // during standard 'get' operations.  I assume 'get' is much more frequent\r
+  // than 'put'.  'get' can skip the extra indirection of skipping through the\r
+  // CHM to reach the _kvs array.\r
+  private transient Object[] _kvs;\r
+  private static final CHM   chm   (Object[] kvs) { return (CHM  )kvs[0]; }\r
+  private static final int[] hashes(Object[] kvs) { return (int[])kvs[1]; }\r
+  // Number of K,V pairs in the table\r
+  private static final int len(Object[] kvs) { return (kvs.length-2)>>1; }\r
+\r
+  // Time since last resize\r
+  private transient long _last_resize_milli;\r
+\r
+  // --- Minimum table size ----------------\r
+  // Pick size 8 K/V pairs, which turns into (8*2+2)*4+12 = 84 bytes on a\r
+  // standard 32-bit HotSpot, and (8*2+2)*8+12 = 156 bytes on 64-bit Azul.\r
+  private static final int MIN_SIZE_LOG=3;             //\r
+  private static final int MIN_SIZE=(1<<MIN_SIZE_LOG); // Must be power of 2\r
+\r
+  // --- Sentinels -------------------------\r
+  // No-Match-Old - putIfMatch does updates only if it matches the old value,\r
+  // and NO_MATCH_OLD basically counts as a wildcard match.\r
+  private static final Object NO_MATCH_OLD = new Object(); // Sentinel\r
+  // Match-Any-not-null - putIfMatch does updates only if it find a real old\r
+  // value.\r
+  private static final Object MATCH_ANY = new Object(); // Sentinel\r
+  // This K/V pair has been deleted (but the Key slot is forever claimed).\r
+  // The same Key can be reinserted with a new value later.\r
+  private static final Object TOMBSTONE = new Object();\r
+  // Prime'd or box'd version of TOMBSTONE.  This K/V pair was deleted, then a\r
+  // table resize started.  The K/V pair has been marked so that no new\r
+  // updates can happen to the old table (and since the K/V pair was deleted\r
+  // nothing was copied to the new table).\r
+  private static final Prime TOMBPRIME = new Prime(TOMBSTONE);\r
+\r
+  // --- key,val -------------------------------------------------------------\r
+  // Access K,V for a given idx\r
+  //\r
+  // Note that these are static, so that the caller is forced to read the _kvs\r
+  // field only once, and share that read across all key/val calls - lest the\r
+  // _kvs field move out from under us and back-to-back key & val calls refer\r
+  // to different _kvs arrays.\r
+  private static final Object key(Object[] kvs,int idx) { return kvs[(idx<<1)+2]; }\r
+  private static final Object val(Object[] kvs,int idx) { return kvs[(idx<<1)+3]; }\r
+  private static final boolean CAS_key( Object[] kvs, int idx, Object old, Object key ) {\r
+    return _unsafe.compareAndSwapObject( kvs, rawIndex(kvs,(idx<<1)+2), old, key );\r
+  }\r
+  private static final boolean CAS_val( Object[] kvs, int idx, Object old, Object val ) {\r
+    return _unsafe.compareAndSwapObject( kvs, rawIndex(kvs,(idx<<1)+3), old, val );\r
+  }\r
+\r
+\r
+  // --- dump ----------------------------------------------------------------\r
+  /** Verbose printout of table internals, useful for debugging.  */\r
+  public final void print() {\r
+    System.out.println("=========");\r
+    print2(_kvs);\r
+    System.out.println("=========");\r
+  }\r
+  // print the entire state of the table\r
+  private final void print( Object[] kvs ) {\r
+    for( int i=0; i<len(kvs); i++ ) {\r
+      Object K = key(kvs,i);\r
+      if( K != null ) {\r
+        String KS = (K == TOMBSTONE) ? "XXX" : K.toString();\r
+        Object V = val(kvs,i);\r
+        Object U = Prime.unbox(V);\r
+        String p = (V==U) ? "" : "prime_";\r
+        String US = (U == TOMBSTONE) ? "tombstone" : U.toString();\r
+        System.out.println(""+i+" ("+KS+","+p+US+")");\r
+      }\r
+    }\r
+    Object[] newkvs = chm(kvs)._newkvs; // New table, if any\r
+    if( newkvs != null ) {\r
+      System.out.println("----");\r
+      print(newkvs);\r
+    }\r
+  }\r
+  // print only the live values, broken down by the table they are in\r
+  private final void print2( Object[] kvs) {\r
+    for( int i=0; i<len(kvs); i++ ) {\r
+      Object key = key(kvs,i);\r
+      Object val = val(kvs,i);\r
+      Object U = Prime.unbox(val);\r
+      if( key != null && key != TOMBSTONE &&  // key is sane\r
+          val != null && U   != TOMBSTONE ) { // val is sane\r
+        String p = (val==U) ? "" : "prime_";\r
+        System.out.println(""+i+" ("+key+","+p+val+")");\r
+      }\r
+    }\r
+    Object[] newkvs = chm(kvs)._newkvs; // New table, if any\r
+    if( newkvs != null ) {\r
+      System.out.println("----");\r
+      print2(newkvs);\r
+    }\r
+  }\r
+\r
+  // Count of reprobes\r
+  private transient Counter _reprobes = new Counter();\r
+  /** Get and clear the current count of reprobes.  Reprobes happen on key\r
+   *  collisions, and a high reprobe rate may indicate a poor hash function or\r
+   *  weaknesses in the table resizing function.\r
+   *  @return the count of reprobes since the last call to {@link #reprobes}\r
+   *  or since the table was created.   */\r
+  public long reprobes() { long r = _reprobes.get(); _reprobes = new Counter(); return r; }\r
+\r
+\r
+  // --- reprobe_limit -----------------------------------------------------\r
+  // Heuristic to decide if we have reprobed toooo many times.  Running over\r
+  // the reprobe limit on a 'get' call acts as a 'miss'; on a 'put' call it\r
+  // can trigger a table resize.  Several places must have exact agreement on\r
+  // what the reprobe_limit is, so we share it here.\r
+  private static final int reprobe_limit( int len ) {\r
+    return REPROBE_LIMIT + (len>>2);\r
+  }\r
+\r
+  // --- NonBlockingHashMap --------------------------------------------------\r
+  // Constructors\r
+\r
+  /** Create a new NonBlockingHashMap with default minimum size (currently set\r
+   *  to 8 K/V pairs or roughly 84 bytes on a standard 32-bit JVM). */\r
+  public NonBlockingHashMap( ) { this(MIN_SIZE); }\r
+\r
+  /** Create a new NonBlockingHashMap with initial room for the given number of\r
+   *  elements, thus avoiding internal resizing operations to reach an\r
+   *  appropriate size.  Large numbers here when used with a small count of\r
+   *  elements will sacrifice space for a small amount of time gained.  The\r
+   *  initial size will be rounded up internally to the next larger power of 2. */\r
+  public NonBlockingHashMap( final int initial_sz ) { initialize(initial_sz); }\r
+  private final void initialize( int initial_sz ) {\r
+    if( initial_sz < 0 ) throw new IllegalArgumentException();\r
+    int i;                      // Convert to next largest power-of-2\r
+    if( initial_sz > 1024*1024 ) initial_sz = 1024*1024;\r
+    for( i=MIN_SIZE_LOG; (1<<i) < (initial_sz<<2); i++ ) ;\r
+    // Double size for K,V pairs, add 1 for CHM and 1 for hashes\r
+    _kvs = new Object[((1<<i)<<1)+2];\r
+    _kvs[0] = new CHM(new Counter()); // CHM in slot 0\r
+    _kvs[1] = new int[1<<i];          // Matching hash entries\r
+    _last_resize_milli = System.currentTimeMillis();\r
+  }\r
+  // Version for subclassed readObject calls, to be called after the defaultReadObject\r
+  protected final void initialize() { initialize(MIN_SIZE); }\r
+\r
+  // --- wrappers ------------------------------------------------------------\r
+\r
+  /** Returns the number of key-value mappings in this map.\r
+   *  @return the number of key-value mappings in this map */\r
+  @Override \r
+  public int     size       ( )                       { return chm(_kvs).size(); }\r
+  /** Returns <tt>size() == 0</tt>.\r
+   *  @return <tt>size() == 0</tt> */\r
+  @Override \r
+  public boolean isEmpty    ( )                       { return size() == 0;      }\r
+\r
+  /** Tests if the key in the table using the <tt>equals</tt> method.\r
+   * @return <tt>true</tt> if the key is in the table using the <tt>equals</tt> method\r
+   * @throws NullPointerException if the specified key is null  */\r
+  @Override \r
+  public boolean containsKey( Object key )            { return get(key) != null; }\r
+\r
+  /** Legacy method testing if some key maps into the specified value in this\r
+   *  table.  This method is identical in functionality to {@link\r
+   *  #containsValue}, and exists solely to ensure full compatibility with\r
+   *  class {@link java.util.Hashtable}, which supported this method prior to\r
+   *  introduction of the Java Collections framework.\r
+   *  @param  val a value to search for\r
+   *  @return <tt>true</tt> if this map maps one or more keys to the specified value\r
+   *  @throws NullPointerException if the specified value is null */\r
+  public boolean contains   ( Object val )            { return containsValue(val); }\r
+\r
+  /** Maps the specified key to the specified value in the table.  Neither key\r
+   *  nor value can be null.\r
+   *  <p> The value can be retrieved by calling {@link #get} with a key that is\r
+   *  equal to the original key.\r
+   *  @param key key with which the specified value is to be associated\r
+   *  @param val value to be associated with the specified key\r
+   *  @return the previous value associated with <tt>key</tt>, or\r
+   *          <tt>null</tt> if there was no mapping for <tt>key</tt>\r
+   *  @throws NullPointerException if the specified key or value is null  */\r
+  @Override\r
+  public TypeV   put        ( TypeK  key, TypeV val ) { return putIfMatch( key,      val, NO_MATCH_OLD); }\r
+\r
+  /** Atomically, do a {@link #put} if-and-only-if the key is not mapped.\r
+   *  Useful to ensure that only a single mapping for the key exists, even if\r
+   *  many threads are trying to create the mapping in parallel.\r
+   *  @return the previous value associated with the specified key,\r
+   *         or <tt>null</tt> if there was no mapping for the key\r
+   *  @throws NullPointerException if the specified key or value is null  */\r
+  public TypeV   putIfAbsent( TypeK  key, TypeV val ) { return putIfMatch( key,      val, TOMBSTONE   ); }\r
+\r
+  /** Removes the key (and its corresponding value) from this map.\r
+   *  This method does nothing if the key is not in the map.\r
+   *  @return the previous value associated with <tt>key</tt>, or\r
+   *         <tt>null</tt> if there was no mapping for <tt>key</tt>\r
+   *  @throws NullPointerException if the specified key is null */\r
+  @Override\r
+  public TypeV   remove     ( Object key )            { return putIfMatch( key,TOMBSTONE, NO_MATCH_OLD); }\r
+\r
+  /** Atomically do a {@link #remove(Object)} if-and-only-if the key is mapped\r
+   *  to a value which is <code>equals</code> to the given value.\r
+   *  @throws NullPointerException if the specified key or value is null */\r
+  public boolean remove     ( Object key,Object val ) { return putIfMatch( key,TOMBSTONE, val ) == val; }\r
+\r
+  /** Atomically do a <code>put(key,val)</code> if-and-only-if the key is\r
+   *  mapped to some value already.\r
+   *  @throws NullPointerException if the specified key or value is null */\r
+  public TypeV   replace    ( TypeK  key, TypeV val ) { return putIfMatch( key,      val,MATCH_ANY   ); }\r
+\r
+  /** Atomically do a <code>put(key,newValue)</code> if-and-only-if the key is\r
+   *  mapped a value which is <code>equals</code> to <code>oldValue</code>.\r
+   *  @throws NullPointerException if the specified key or value is null */\r
+  public boolean replace    ( TypeK  key, TypeV  oldValue, TypeV newValue ) {\r
+    return putIfMatch( key, newValue, oldValue ) == oldValue;\r
+  }\r
+\r
+  private final TypeV putIfMatch( Object key, Object newVal, Object oldVal ) {\r
+    if (oldVal == null || newVal == null) throw new NullPointerException();\r
+    final Object res = putIfMatch( this, _kvs, key, newVal, oldVal );\r
+    assert !(res instanceof Prime);\r
+    assert res != null;\r
+    return res == TOMBSTONE ? null : (TypeV)res;\r
+  }\r
+\r
+\r
+  /** Copies all of the mappings from the specified map to this one, replacing\r
+   *  any existing mappings.\r
+   *  @param m mappings to be stored in this map */\r
+  @Override\r
+  public void putAll(Map<? extends TypeK, ? extends TypeV> m) {\r
+    for (Map.Entry<? extends TypeK, ? extends TypeV> e : m.entrySet())\r
+      put(e.getKey(), e.getValue());\r
+  }\r
+\r
+  /** Removes all of the mappings from this map. */\r
+  @Override\r
+  public void clear() {         // Smack a new empty table down\r
+    Object[] newkvs = new NonBlockingHashMap(MIN_SIZE)._kvs;\r
+    while( !CAS_kvs(_kvs,newkvs) ) // Spin until the clear works\r
+      ;\r
+  }\r
+\r
+  /** Returns <tt>true</tt> if this Map maps one or more keys to the specified\r
+   *  value.  <em>Note</em>: This method requires a full internal traversal of the\r
+   *  hash table and is much slower than {@link #containsKey}.\r
+   *  @param val value whose presence in this map is to be tested\r
+   *  @return <tt>true</tt> if this map maps one or more keys to the specified value\r
+   *  @throws NullPointerException if the specified value is null */\r
+  @Override\r
+  public boolean containsValue( final Object val ) {\r
+    if( val == null ) throw new NullPointerException();\r
+    for( TypeV V : values() )\r
+      if( V == val || V.equals(val) )\r
+        return true;\r
+    return false;\r
+  }\r
+\r
+  // This function is supposed to do something for Hashtable, and the JCK\r
+  // tests hang until it gets called... by somebody ... for some reason,\r
+  // any reason....\r
+  protected void rehash() {\r
+  }\r
+\r
+  /**\r
+   * Creates a shallow copy of this hashtable. All the structure of the\r
+   * hashtable itself is copied, but the keys and values are not cloned.\r
+   * This is a relatively expensive operation.\r
+   *\r
+   * @return  a clone of the hashtable.\r
+   */\r
+  @Override\r
+  public Object clone() {\r
+    try {\r
+      // Must clone, to get the class right; NBHM might have been\r
+      // extended so it would be wrong to just make a new NBHM.\r
+      NonBlockingHashMap<TypeK,TypeV> t = (NonBlockingHashMap<TypeK,TypeV>) super.clone();\r
+      // But I don't have an atomic clone operation - the underlying _kvs\r
+      // structure is undergoing rapid change.  If I just clone the _kvs\r
+      // field, the CHM in _kvs[0] won't be in sync.\r
+      //\r
+      // Wipe out the cloned array (it was shallow anyways).\r
+      t.clear();\r
+      // Now copy sanely\r
+      for( TypeK K : keySet() ) {\r
+        final TypeV V = get(K);  // Do an official 'get'\r
+        t.put(K,V);\r
+      }\r
+      return t;\r
+    } catch (CloneNotSupportedException e) {\r
+      // this shouldn't happen, since we are Cloneable\r
+      throw new InternalError();\r
+    }\r
+  }\r
+\r
+  /**\r
+   * Returns a string representation of this map.  The string representation\r
+   * consists of a list of key-value mappings in the order returned by the\r
+   * map's <tt>entrySet</tt> view's iterator, enclosed in braces\r
+   * (<tt>"{}"</tt>).  Adjacent mappings are separated by the characters\r
+   * <tt>", "</tt> (comma and space).  Each key-value mapping is rendered as\r
+   * the key followed by an equals sign (<tt>"="</tt>) followed by the\r
+   * associated value.  Keys and values are converted to strings as by\r
+   * {@link String#valueOf(Object)}.\r
+   *\r
+   * @return a string representation of this map\r
+   */\r
+  @Override\r
+  public String toString() {\r
+    Iterator<Entry<TypeK,TypeV>> i = entrySet().iterator();\r
+    if( !i.hasNext())\r
+      return "{}";\r
+\r
+    StringBuilder sb = new StringBuilder();\r
+    sb.append('{');\r
+    for (;;) {\r
+      Entry<TypeK,TypeV> e = i.next();\r
+      TypeK key = e.getKey();\r
+      TypeV value = e.getValue();\r
+      sb.append(key   == this ? "(this Map)" : key);\r
+      sb.append('=');\r
+      sb.append(value == this ? "(this Map)" : value);\r
+      if( !i.hasNext())\r
+        return sb.append('}').toString();\r
+      sb.append(", ");\r
+    }\r
+  }\r
+\r
+  // --- keyeq ---------------------------------------------------------------\r
+  // Check for key equality.  Try direct pointer compare first, then see if\r
+  // the hashes are unequal (fast negative test) and finally do the full-on\r
+  // 'equals' v-call.\r
+  private static boolean keyeq( Object K, Object key, int[] hashes, int hash, int fullhash ) {\r
+    return\r
+      K==key ||                 // Either keys match exactly OR\r
+      // hash exists and matches?  hash can be zero during the install of a\r
+      // new key/value pair.\r
+      ((hashes[hash] == 0 || hashes[hash] == fullhash) &&\r
+       // Do not call the users' "equals()" call with a Tombstone, as this can\r
+       // surprise poorly written "equals()" calls that throw exceptions\r
+       // instead of simply returning false.\r
+       K != TOMBSTONE &&        // Do not call users' equals call with a Tombstone\r
+       // Do the match the hard way - with the users' key being the loop-\r
+       // invariant "this" pointer.  I could have flipped the order of\r
+       // operands (since equals is commutative), but I'm making mega-morphic\r
+       // v-calls in a reprobing loop and nailing down the 'this' argument\r
+       // gives both the JIT and the hardware a chance to prefetch the call target.\r
+       key.equals(K));          // Finally do the hard match\r
+  }\r
+\r
+  // --- get -----------------------------------------------------------------\r
+  /** Returns the value to which the specified key is mapped, or {@code null}\r
+   *  if this map contains no mapping for the key.\r
+   *  <p>More formally, if this map contains a mapping from a key {@code k} to\r
+   *  a value {@code v} such that {@code key.equals(k)}, then this method\r
+   *  returns {@code v}; otherwise it returns {@code null}.  (There can be at\r
+   *  most one such mapping.)\r
+   * @throws NullPointerException if the specified key is null */\r
+  // Never returns a Prime nor a Tombstone.\r
+  @Override\r
+  public TypeV get( Object key ) {\r
+    final int fullhash= hash (key); // throws NullPointerException if key is null\r
+    final Object V = get_impl(this,_kvs,key,fullhash);\r
+    assert !(V instanceof Prime); // Never return a Prime\r
+    return (TypeV)V;\r
+  }\r
+\r
+  private static final Object get_impl( final NonBlockingHashMap topmap, final Object[] kvs, final Object key, final int fullhash ) {\r
+    final int len     = len  (kvs); // Count of key/value pairs, reads kvs.length\r
+    final CHM chm     = chm  (kvs); // The CHM, for a volatile read below; reads slot 0 of kvs\r
+    final int[] hashes=hashes(kvs); // The memoized hashes; reads slot 1 of kvs\r
+\r
+    int idx = fullhash & (len-1); // First key hash\r
+\r
+    // Main spin/reprobe loop, looking for a Key hit\r
+    int reprobe_cnt=0;\r
+    while( true ) {\r
+      // Probe table.  Each read of 'val' probably misses in cache in a big\r
+      // table; hopefully the read of 'key' then hits in cache.\r
+      final Object K = key(kvs,idx); // Get key   before volatile read, could be null\r
+      final Object V = val(kvs,idx); // Get value before volatile read, could be null or Tombstone or Prime\r
+      if( K == null ) return null;   // A clear miss\r
+\r
+      // We need a volatile-read here to preserve happens-before semantics on\r
+      // newly inserted Keys.  If the Key body was written just before inserting\r
+      // into the table a Key-compare here might read the uninitalized Key body.\r
+      // Annoyingly this means we have to volatile-read before EACH key compare.\r
+      // .\r
+      // We also need a volatile-read between reading a newly inserted Value\r
+      // and returning the Value (so the user might end up reading the stale\r
+      // Value contents).  Same problem as with keys - and the one volatile\r
+      // read covers both.\r
+      final Object[] newkvs = chm._newkvs; // VOLATILE READ before key compare\r
+\r
+      // Key-compare\r
+      if( keyeq(K,key,hashes,idx,fullhash) ) {\r
+        // Key hit!  Check for no table-copy-in-progress\r
+        if( !(V instanceof Prime) ) // No copy?\r
+          return (V == TOMBSTONE) ? null : V; // Return the value\r
+        // Key hit - but slot is (possibly partially) copied to the new table.\r
+        // Finish the copy & retry in the new table.\r
+        return get_impl(topmap,chm.copy_slot_and_check(topmap,kvs,idx,key),key,fullhash); // Retry in the new table\r
+      }\r
+      // get and put must have the same key lookup logic!  But only 'put'\r
+      // needs to force a table-resize for a too-long key-reprobe sequence.\r
+      // Check for too-many-reprobes on get - and flip to the new table.\r
+         // ???? Why a TOMBSTONE key means no more keys in this table\r
+         // because a TOMBSTONE key should be null before\r
+      if( ++reprobe_cnt >= reprobe_limit(len) || // too many probes\r
+          key == TOMBSTONE ) // found a TOMBSTONE key, means no more keys in this table\r
+        return newkvs == null ? null : get_impl(topmap,topmap.help_copy(newkvs),key,fullhash); // Retry in the new table\r
+\r
+      idx = (idx+1)&(len-1);    // Reprobe by 1!  (could now prefetch)\r
+    }\r
+  }\r
+\r
+  // --- putIfMatch ---------------------------------------------------------\r
+  // Put, Remove, PutIfAbsent, etc.  Return the old value.  If the returned\r
+  // value is equal to expVal (or expVal is NO_MATCH_OLD) then the put can be\r
+  // assumed to work (although might have been immediately overwritten).  Only\r
+  // the path through copy_slot passes in an expected value of null, and\r
+  // putIfMatch only returns a null if passed in an expected null.\r
+  private static final Object putIfMatch( final NonBlockingHashMap topmap, final Object[] kvs, final Object key, final Object putval, final Object expVal ) {\r
+    assert putval != null;\r
+    assert !(putval instanceof Prime);\r
+    assert !(expVal instanceof Prime);\r
+    final int fullhash = hash  (key); // throws NullPointerException if key null\r
+    final int len      = len   (kvs); // Count of key/value pairs, reads kvs.length\r
+    final CHM chm      = chm   (kvs); // Reads kvs[0]\r
+    final int[] hashes = hashes(kvs); // Reads kvs[1], read before kvs[0]\r
+    int idx = fullhash & (len-1);\r
+\r
+    // ---\r
+    // Key-Claim stanza: spin till we can claim a Key (or force a resizing).\r
+    int reprobe_cnt=0;\r
+    Object K=null, V=null;\r
+    Object[] newkvs=null;\r
+    while( true ) {             // Spin till we get a Key slot\r
+      V = val(kvs,idx);         // Get old value (before volatile read below!)\r
+      K = key(kvs,idx);         // Get current key\r
+      if( K == null ) {         // Slot is free?\r
+        // Found an empty Key slot - which means this Key has never been in\r
+        // this table.  No need to put a Tombstone - the Key is not here!\r
+        if( putval == TOMBSTONE ) return putval; // Not-now & never-been in this table\r
+        // Claim the null key-slot\r
+        if( CAS_key(kvs,idx, null, key ) ) { // Claim slot for Key\r
+          chm._slots.add(1);      // Raise key-slots-used count\r
+          hashes[idx] = fullhash; // Memoize fullhash\r
+          break;                  // Got it!\r
+        }\r
+        // CAS to claim the key-slot failed.\r
+        //\r
+        // This re-read of the Key points out an annoying short-coming of Java\r
+        // CAS.  Most hardware CAS's report back the existing value - so that\r
+        // if you fail you have a *witness* - the value which caused the CAS\r
+        // to fail.  The Java API turns this into a boolean destroying the\r
+        // witness.  Re-reading does not recover the witness because another\r
+        // thread can write over the memory after the CAS.  Hence we can be in\r
+        // the unfortunate situation of having a CAS fail *for cause* but\r
+        // having that cause removed by a later store.  This turns a\r
+        // non-spurious-failure CAS (such as Azul has) into one that can\r
+        // apparently spuriously fail - and we avoid apparent spurious failure\r
+        // by not allowing Keys to ever change.\r
+        K = key(kvs,idx);       // CAS failed, get updated value\r
+        assert K != null;       // If keys[idx] is null, CAS shoulda worked\r
+      }\r
+      // Key slot was not null, there exists a Key here\r
+\r
+      // We need a volatile-read here to preserve happens-before semantics on\r
+      // newly inserted Keys.  If the Key body was written just before inserting\r
+      // into the table a Key-compare here might read the uninitalized Key body.\r
+      // Annoyingly this means we have to volatile-read before EACH key compare.\r
+      newkvs = chm._newkvs;     // VOLATILE READ before key compare\r
+\r
+      if( keyeq(K,key,hashes,idx,fullhash) )\r
+        break;                  // Got it!\r
+\r
+      // get and put must have the same key lookup logic!  Lest 'get' give\r
+      // up looking too soon.\r
+      //topmap._reprobes.add(1);\r
+      if( ++reprobe_cnt >= reprobe_limit(len) || // too many probes or\r
+          key == TOMBSTONE ) { // found a TOMBSTONE key, means no more keys\r
+        // We simply must have a new table to do a 'put'.  At this point a\r
+        // 'get' will also go to the new table (if any).  We do not need\r
+        // to claim a key slot (indeed, we cannot find a free one to claim!).\r
+        newkvs = chm.resize(topmap,kvs);\r
+        if( expVal != null ) topmap.help_copy(newkvs); // help along an existing copy\r
+        return putIfMatch(topmap,newkvs,key,putval,expVal);\r
+      }\r
+\r
+      idx = (idx+1)&(len-1); // Reprobe!\r
+    } // End of spinning till we get a Key slot\r
+\r
+    // ---\r
+    // Found the proper Key slot, now update the matching Value slot.  We\r
+    // never put a null, so Value slots monotonically move from null to\r
+    // not-null (deleted Values use Tombstone).  Thus if 'V' is null we\r
+    // fail this fast cutout and fall into the check for table-full.\r
+    if( putval == V ) return V; // Fast cutout for no-change\r
+\r
+    // See if we want to move to a new table (to avoid high average re-probe\r
+    // counts).  We only check on the initial set of a Value from null to\r
+    // not-null (i.e., once per key-insert).  Of course we got a 'free' check\r
+    // of newkvs once per key-compare (not really free, but paid-for by the\r
+    // time we get here).\r
+    if( newkvs == null &&       // New table-copy already spotted?\r
+        // Once per fresh key-insert check the hard way\r
+        ((V == null && chm.tableFull(reprobe_cnt,len)) ||\r
+         // Or we found a Prime, but the JMM allowed reordering such that we\r
+         // did not spot the new table (very rare race here: the writing\r
+         // thread did a CAS of _newkvs then a store of a Prime.  This thread\r
+         // reads the Prime, then reads _newkvs - but the read of Prime was so\r
+         // delayed (or the read of _newkvs was so accelerated) that they\r
+         // swapped and we still read a null _newkvs.  The resize call below\r
+         // will do a CAS on _newkvs forcing the read.\r
+         V instanceof Prime) )\r
+      newkvs = chm.resize(topmap,kvs); // Force the new table copy to start\r
+    // See if we are moving to a new table.\r
+    // If so, copy our slot and retry in the new table.\r
+    if( newkvs != null )\r
+      return putIfMatch(topmap,chm.copy_slot_and_check(topmap,kvs,idx,expVal),key,putval,expVal);\r
+\r
+    // ---\r
+    // We are finally prepared to update the existing table\r
+    while( true ) {\r
+      assert !(V instanceof Prime);\r
+\r
+      // Must match old, and we do not?  Then bail out now.  Note that either V\r
+      // or expVal might be TOMBSTONE.  Also V can be null, if we've never\r
+      // inserted a value before.  expVal can be null if we are called from\r
+      // copy_slot.\r
+\r
+      if( expVal != NO_MATCH_OLD && // Do we care about expected-Value at all?\r
+          V != expVal &&            // No instant match already?\r
+          (expVal != MATCH_ANY || V == TOMBSTONE || V == null) &&\r
+          !(V==null && expVal == TOMBSTONE) &&    // Match on null/TOMBSTONE combo\r
+          (expVal == null || !expVal.equals(V)) ) // Expensive equals check at the last\r
+        return V;                                 // Do not update!\r
+\r
+      // Actually change the Value in the Key,Value pair\r
+      if( CAS_val(kvs, idx, V, putval ) ) {\r
+        // CAS succeeded - we did the update!\r
+        // Both normal put's and table-copy calls putIfMatch, but table-copy\r
+        // does not (effectively) increase the number of live k/v pairs.\r
+        if( expVal != null ) {\r
+          // Adjust sizes - a striped counter\r
+          if(  (V == null || V == TOMBSTONE) && putval != TOMBSTONE ) chm._size.add( 1);\r
+          if( !(V == null || V == TOMBSTONE) && putval == TOMBSTONE ) chm._size.add(-1);\r
+        }\r
+        return (V==null && expVal!=null) ? TOMBSTONE : V;\r
+      } \r
+      // Else CAS failed\r
+      V = val(kvs,idx);         // Get new value\r
+      // If a Prime'd value got installed, we need to re-run the put on the\r
+      // new table.  Otherwise we lost the CAS to another racing put.\r
+      // Simply retry from the start.\r
+      if( V instanceof Prime )\r
+        return putIfMatch(topmap,chm.copy_slot_and_check(topmap,kvs,idx,expVal),key,putval,expVal);\r
+    }\r
+  }\r
+\r
+  // --- help_copy ---------------------------------------------------------\r
+  // Help along an existing resize operation.  This is just a fast cut-out\r
+  // wrapper, to encourage inlining for the fast no-copy-in-progress case.  We\r
+  // always help the top-most table copy, even if there are nested table\r
+  // copies in progress.\r
+  private final Object[] help_copy( Object[] helper ) {\r
+    // Read the top-level KVS only once.  We'll try to help this copy along,\r
+    // even if it gets promoted out from under us (i.e., the copy completes\r
+    // and another KVS becomes the top-level copy).\r
+    Object[] topkvs = _kvs;\r
+    CHM topchm = chm(topkvs);\r
+    if( topchm._newkvs == null ) return helper; // No copy in-progress\r
+    topchm.help_copy_impl(this,topkvs,false);\r
+    return helper;\r
+  }\r
+\r
+\r
+  // --- CHM -----------------------------------------------------------------\r
+  // The control structure for the NonBlockingHashMap\r
+  private static final class CHM<TypeK,TypeV> {\r
+    // Size in active K,V pairs\r
+    private final Counter _size;\r
+    public int size () { return (int)_size.get(); }\r
+\r
+    // ---\r
+    // These next 2 fields are used in the resizing heuristics, to judge when\r
+    // it is time to resize or copy the table.  Slots is a count of used-up\r
+    // key slots, and when it nears a large fraction of the table we probably\r
+    // end up reprobing too much.  Last-resize-milli is the time since the\r
+    // last resize; if we are running back-to-back resizes without growing\r
+    // (because there are only a few live keys but many slots full of dead\r
+    // keys) then we need a larger table to cut down on the churn.\r
+\r
+    // Count of used slots, to tell when table is full of dead unusable slots\r
+    private final Counter _slots;\r
+    public int slots() { return (int)_slots.get(); }\r
+\r
+    // ---\r
+    // New mappings, used during resizing.\r
+    // The 'new KVs' array - created during a resize operation.  This\r
+    // represents the new table being copied from the old one.  It's the\r
+    // volatile variable that is read as we cross from one table to the next,\r
+    // to get the required memory orderings.  It monotonically transits from\r
+    // null to set (once).\r
+    volatile Object[] _newkvs;\r
+    private final AtomicReferenceFieldUpdater<CHM,Object[]> _newkvsUpdater =\r
+      AtomicReferenceFieldUpdater.newUpdater(CHM.class,Object[].class, "_newkvs");\r
+    // Set the _next field if we can.\r
+    boolean CAS_newkvs( Object[] newkvs ) {\r
+      while( _newkvs == null )\r
+        if( _newkvsUpdater.compareAndSet(this,null,newkvs) )\r
+          return true;\r
+      return false;\r
+    }\r
+    // Sometimes many threads race to create a new very large table.  Only 1\r
+    // wins the race, but the losers all allocate a junk large table with\r
+    // hefty allocation costs.  Attempt to control the overkill here by\r
+    // throttling attempts to create a new table.  I cannot really block here\r
+    // (lest I lose the non-blocking property) but late-arriving threads can\r
+    // give the initial resizing thread a little time to allocate the initial\r
+    // new table.  The Right Long Term Fix here is to use array-lets and\r
+    // incrementally create the new very large array.  In C I'd make the array\r
+    // with malloc (which would mmap under the hood) which would only eat\r
+    // virtual-address and not real memory - and after Somebody wins then we\r
+    // could in parallel initialize the array.  Java does not allow\r
+    // un-initialized array creation (especially of ref arrays!).\r
+    volatile long _resizers; // count of threads attempting an initial resize\r
+    private static final AtomicLongFieldUpdater<CHM> _resizerUpdater =\r
+      AtomicLongFieldUpdater.newUpdater(CHM.class, "_resizers");\r
+\r
+    // ---\r
+    // Simple constructor\r
+    CHM( Counter size ) {\r
+      _size = size;\r
+      _slots= new Counter();\r
+    }\r
+\r
+    // --- tableFull ---------------------------------------------------------\r
+    // Heuristic to decide if this table is too full, and we should start a\r
+    // new table.  Note that if a 'get' call has reprobed too many times and\r
+    // decided the table must be full, then always the estimate_sum must be\r
+    // high and we must report the table is full.  If we do not, then we might\r
+    // end up deciding that the table is not full and inserting into the\r
+    // current table, while a 'get' has decided the same key cannot be in this\r
+    // table because of too many reprobes.  The invariant is:\r
+    //   slots.estimate_sum >= max_reprobe_cnt >= reprobe_limit(len)\r
+    private final boolean tableFull( int reprobe_cnt, int len ) {\r
+      return\r
+        // Do the cheap check first: we allow some number of reprobes always\r
+        reprobe_cnt >= REPROBE_LIMIT &&\r
+        // More expensive check: see if the table is > 1/4 full.\r
+        _slots.estimate_get() >= reprobe_limit(len);\r
+    }\r
+\r
+    // --- resize ------------------------------------------------------------\r
+    // Resizing after too many probes.  "How Big???" heuristics are here.\r
+    // Callers will (not this routine) will 'help_copy' any in-progress copy.\r
+    // Since this routine has a fast cutout for copy-already-started, callers\r
+    // MUST 'help_copy' lest we have a path which forever runs through\r
+    // 'resize' only to discover a copy-in-progress which never progresses.\r
+    private final Object[] resize( NonBlockingHashMap topmap, Object[] kvs) {\r
+      assert chm(kvs) == this;\r
+\r
+      // Check for resize already in progress, probably triggered by another thread\r
+      Object[] newkvs = _newkvs; // VOLATILE READ\r
+      if( newkvs != null )       // See if resize is already in progress\r
+        return newkvs;           // Use the new table already\r
+\r
+      // No copy in-progress, so start one.  First up: compute new table size.\r
+      int oldlen = len(kvs);    // Old count of K,V pairs allowed\r
+      int sz = size();          // Get current table count of active K,V pairs\r
+      int newsz = sz;           // First size estimate\r
+\r
+      // Heuristic to determine new size.  We expect plenty of dead-slots-with-keys\r
+      // and we need some decent padding to avoid endless reprobing.\r
+      if( sz >= (oldlen>>2) ) { // If we are >25% full of keys then...\r
+        newsz = oldlen<<1;      // Double size\r
+        if( sz >= (oldlen>>1) ) // If we are >50% full of keys then...\r
+          newsz = oldlen<<2;    // Double double size\r
+      }\r
+      // This heuristic in the next 2 lines leads to a much denser table\r
+      // with a higher reprobe rate\r
+      //if( sz >= (oldlen>>1) ) // If we are >50% full of keys then...\r
+      //  newsz = oldlen<<1;    // Double size\r
+\r
+      // Last (re)size operation was very recent?  Then double again; slows\r
+      // down resize operations for tables subject to a high key churn rate.\r
+      long tm = System.currentTimeMillis();\r
+      long q=0;\r
+      if( newsz <= oldlen && // New table would shrink or hold steady?\r
+          tm <= topmap._last_resize_milli+10000 && // Recent resize (less than 1 sec ago)\r
+          (q=_slots.estimate_get()) >= (sz<<1) ) // 1/2 of keys are dead?\r
+        newsz = oldlen<<1;      // Double the existing size\r
+\r
+      // Do not shrink, ever\r
+      if( newsz < oldlen ) newsz = oldlen;\r
+\r
+      // Convert to power-of-2\r
+      int log2;\r
+      for( log2=MIN_SIZE_LOG; (1<<log2) < newsz; log2++ ) ; // Compute log2 of size\r
+\r
+      // Now limit the number of threads actually allocating memory to a\r
+      // handful - lest we have 750 threads all trying to allocate a giant\r
+      // resized array.\r
+      long r = _resizers;\r
+      while( !_resizerUpdater.compareAndSet(this,r,r+1) )\r
+        r = _resizers;\r
+      // Size calculation: 2 words (K+V) per table entry, plus a handful.  We\r
+      // guess at 32-bit pointers; 64-bit pointers screws up the size calc by\r
+      // 2x but does not screw up the heuristic very much.\r
+      int megs = ((((1<<log2)<<1)+4)<<3/*word to bytes*/)>>20/*megs*/;\r
+      if( r >= 2 && megs > 0 ) { // Already 2 guys trying; wait and see\r
+        newkvs = _newkvs;        // Between dorking around, another thread did it\r
+        if( newkvs != null )     // See if resize is already in progress\r
+          return newkvs;         // Use the new table already\r
+        // TODO - use a wait with timeout, so we'll wakeup as soon as the new table\r
+        // is ready, or after the timeout in any case.\r
+        //synchronized( this ) { wait(8*megs); }         // Timeout - we always wakeup\r
+        // For now, sleep a tad and see if the 2 guys already trying to make\r
+        // the table actually get around to making it happen.\r
+        try { Thread.sleep(8*megs); } catch( Exception e ) { }\r
+      }\r
+      // Last check, since the 'new' below is expensive and there is a chance\r
+      // that another thread slipped in a new thread while we ran the heuristic.\r
+      newkvs = _newkvs;\r
+      if( newkvs != null )      // See if resize is already in progress\r
+        return newkvs;          // Use the new table already\r
+\r
+      // Double size for K,V pairs, add 1 for CHM\r
+      newkvs = new Object[((1<<log2)<<1)+2]; // This can get expensive for big arrays\r
+      newkvs[0] = new CHM(_size); // CHM in slot 0\r
+      newkvs[1] = new int[1<<log2]; // hashes in slot 1\r
+\r
+      // Another check after the slow allocation\r
+      if( _newkvs != null )     // See if resize is already in progress\r
+        return _newkvs;         // Use the new table already\r
+\r
+      // The new table must be CAS'd in so only 1 winner amongst duplicate\r
+      // racing resizing threads.  Extra CHM's will be GC'd.\r
+      if( CAS_newkvs( newkvs ) ) { // NOW a resize-is-in-progress!\r
+        //notifyAll();            // Wake up any sleepers\r
+        //long nano = System.nanoTime();\r
+        //System.out.println(" "+nano+" Resize from "+oldlen+" to "+(1<<log2)+" and had "+(_resizers-1)+" extras" );\r
+        //if( System.out != null ) System.out.print("["+log2);\r
+        topmap.rehash();        // Call for Hashtable's benefit\r
+      } else                    // CAS failed?\r
+        newkvs = _newkvs;       // Reread new table\r
+      return newkvs;\r
+    }\r
+\r
+\r
+    // The next part of the table to copy.  It monotonically transits from zero\r
+    // to _kvs.length.  Visitors to the table can claim 'work chunks' by\r
+    // CAS'ing this field up, then copying the indicated indices from the old\r
+    // table to the new table.  Workers are not required to finish any chunk;\r
+    // the counter simply wraps and work is copied duplicately until somebody\r
+    // somewhere completes the count.\r
+    volatile long _copyIdx = 0;\r
+    static private final AtomicLongFieldUpdater<CHM> _copyIdxUpdater =\r
+      AtomicLongFieldUpdater.newUpdater(CHM.class, "_copyIdx");\r
+\r
+    // Work-done reporting.  Used to efficiently signal when we can move to\r
+    // the new table.  From 0 to len(oldkvs) refers to copying from the old\r
+    // table to the new.\r
+    volatile long _copyDone= 0;\r
+    static private final AtomicLongFieldUpdater<CHM> _copyDoneUpdater =\r
+      AtomicLongFieldUpdater.newUpdater(CHM.class, "_copyDone");\r
+\r
+    // --- help_copy_impl ----------------------------------------------------\r
+    // Help along an existing resize operation.  We hope its the top-level\r
+    // copy (it was when we started) but this CHM might have been promoted out\r
+    // of the top position.\r
+    private final void help_copy_impl( NonBlockingHashMap topmap, Object[] oldkvs, boolean copy_all ) {\r
+      assert chm(oldkvs) == this;\r
+      Object[] newkvs = _newkvs;\r
+      assert newkvs != null;    // Already checked by caller\r
+      int oldlen = len(oldkvs); // Total amount to copy\r
+      final int MIN_COPY_WORK = Math.min(oldlen,1024); // Limit per-thread work\r
+\r
+      // ---\r
+      int panic_start = -1;\r
+      int copyidx=-9999;            // Fool javac to think it's initialized\r
+      while( _copyDone < oldlen ) { // Still needing to copy?\r
+        // Carve out a chunk of work.  The counter wraps around so every\r
+        // thread eventually tries to copy every slot repeatedly.\r
+\r
+        // We "panic" if we have tried TWICE to copy every slot - and it still\r
+        // has not happened.  i.e., twice some thread somewhere claimed they\r
+        // would copy 'slot X' (by bumping _copyIdx) but they never claimed to\r
+        // have finished (by bumping _copyDone).  Our choices become limited:\r
+        // we can wait for the work-claimers to finish (and become a blocking\r
+        // algorithm) or do the copy work ourselves.  Tiny tables with huge\r
+        // thread counts trying to copy the table often 'panic'.\r
+        if( panic_start == -1 ) { // No panic?\r
+          copyidx = (int)_copyIdx;\r
+          while( copyidx < (oldlen<<1) && // 'panic' check\r
+                 !_copyIdxUpdater.compareAndSet(this,copyidx,copyidx+MIN_COPY_WORK) )\r
+            copyidx = (int)_copyIdx;      // Re-read\r
+          if( !(copyidx < (oldlen<<1)) )  // Panic!\r
+            panic_start = copyidx;        // Record where we started to panic-copy\r
+        }\r
+\r
+        // We now know what to copy.  Try to copy.\r
+        int workdone = 0;\r
+        for( int i=0; i<MIN_COPY_WORK; i++ )\r
+          if( copy_slot(topmap,(copyidx+i)&(oldlen-1),oldkvs,newkvs) ) // Made an oldtable slot go dead?\r
+            workdone++;         // Yes!\r
+        if( workdone > 0 )      // Report work-done occasionally\r
+          copy_check_and_promote( topmap, oldkvs, workdone );// See if we can promote\r
+        //for( int i=0; i<MIN_COPY_WORK; i++ )\r
+        //  if( copy_slot(topmap,(copyidx+i)&(oldlen-1),oldkvs,newkvs) ) // Made an oldtable slot go dead?\r
+        //    copy_check_and_promote( topmap, oldkvs, 1 );// See if we can promote\r
+\r
+        copyidx += MIN_COPY_WORK;\r
+        // Uncomment these next 2 lines to turn on incremental table-copy.\r
+        // Otherwise this thread continues to copy until it is all done.\r
+        if( !copy_all && panic_start == -1 ) // No panic?\r
+          return;       // Then done copying after doing MIN_COPY_WORK\r
+      }\r
+      // Extra promotion check, in case another thread finished all copying\r
+      // then got stalled before promoting.\r
+      copy_check_and_promote( topmap, oldkvs, 0 );// See if we can promote\r
+    }\r
+\r
+\r
+    // --- copy_slot_and_check -----------------------------------------------\r
+    // Copy slot 'idx' from the old table to the new table.  If this thread\r
+    // confirmed the copy, update the counters and check for promotion.\r
+    //\r
+    // Returns the result of reading the volatile _newkvs, mostly as a\r
+    // convenience to callers.  We come here with 1-shot copy requests\r
+    // typically because the caller has found a Prime, and has not yet read\r
+    // the _newkvs volatile - which must have changed from null-to-not-null\r
+    // before any Prime appears.  So the caller needs to read the _newkvs\r
+    // field to retry his operation in the new table, but probably has not\r
+    // read it yet.\r
+    private final Object[] copy_slot_and_check( NonBlockingHashMap topmap, Object[] oldkvs, int idx, Object should_help ) {\r
+      assert chm(oldkvs) == this;\r
+      Object[] newkvs = _newkvs; // VOLATILE READ\r
+      // We're only here because the caller saw a Prime, which implies a\r
+      // table-copy is in progress.\r
+      assert newkvs != null;\r
+      if( copy_slot(topmap,idx,oldkvs,_newkvs) )   // Copy the desired slot\r
+        copy_check_and_promote(topmap, oldkvs, 1); // Record the slot copied\r
+      // Generically help along any copy (except if called recursively from a helper)\r
+      return (should_help == null) ? newkvs : topmap.help_copy(newkvs);\r
+    }\r
+\r
+    // --- copy_check_and_promote --------------------------------------------\r
+    private final void copy_check_and_promote( NonBlockingHashMap topmap, Object[] oldkvs, int workdone ) {\r
+      assert chm(oldkvs) == this;\r
+      int oldlen = len(oldkvs);\r
+      // We made a slot unusable and so did some of the needed copy work\r
+      long copyDone = _copyDone;\r
+      assert (copyDone+workdone) <= oldlen;\r
+      if( workdone > 0 ) {\r
+        while( !_copyDoneUpdater.compareAndSet(this,copyDone,copyDone+workdone) ) {\r
+          copyDone = _copyDone; // Reload, retry\r
+          assert (copyDone+workdone) <= oldlen;\r
+        }\r
+        //if( (10*copyDone/oldlen) != (10*(copyDone+workdone)/oldlen) )\r
+        //System.out.print(" "+(copyDone+workdone)*100/oldlen+"%"+"_"+(_copyIdx*100/oldlen)+"%");\r
+      }\r
+\r
+      // Check for copy being ALL done, and promote.  Note that we might have\r
+      // nested in-progress copies and manage to finish a nested copy before\r
+      // finishing the top-level copy.  We only promote top-level copies.\r
+      if( copyDone+workdone == oldlen && // Ready to promote this table?\r
+          topmap._kvs == oldkvs && // Looking at the top-level table?\r
+          // Attempt to promote\r
+          topmap.CAS_kvs(oldkvs,_newkvs) ) {\r
+        topmap._last_resize_milli = System.currentTimeMillis(); // Record resize time for next check\r
+        //long nano = System.nanoTime();\r
+        //System.out.println(" "+nano+" Promote table to "+len(_newkvs));\r
+        //if( System.out != null ) System.out.print("]");\r
+      }\r
+    }\r
+    // --- copy_slot ---------------------------------------------------------\r
+    // Copy one K/V pair from oldkvs[i] to newkvs.  Returns true if we can\r
+    // confirm that the new table guaranteed has a value for this old-table\r
+    // slot.  We need an accurate confirmed-copy count so that we know when we\r
+    // can promote (if we promote the new table too soon, other threads may\r
+    // 'miss' on values not-yet-copied from the old table).  We don't allow\r
+    // any direct updates on the new table, unless they first happened to the\r
+    // old table - so that any transition in the new table from null to\r
+    // not-null must have been from a copy_slot (or other old-table overwrite)\r
+    // and not from a thread directly writing in the new table.  Thus we can\r
+    // count null-to-not-null transitions in the new table.\r
+    private boolean copy_slot( NonBlockingHashMap topmap, int idx, Object[] oldkvs, Object[] newkvs ) {\r
+      // Blindly set the key slot from null to TOMBSTONE, to eagerly stop\r
+      // fresh put's from inserting new values in the old table when the old\r
+      // table is mid-resize.  We don't need to act on the results here,\r
+      // because our correctness stems from box'ing the Value field.  Slamming\r
+      // the Key field is a minor speed optimization.\r
+      Object key;\r
+      while( (key=key(oldkvs,idx)) == null )\r
+        CAS_key(oldkvs,idx, null, TOMBSTONE);\r
+\r
+      // ---\r
+      // Prevent new values from appearing in the old table.\r
+      // Box what we see in the old table, to prevent further updates.\r
+      Object oldval = val(oldkvs,idx); // Read OLD table\r
+      while( !(oldval instanceof Prime) ) {\r
+        final Prime box = (oldval == null || oldval == TOMBSTONE) ? TOMBPRIME : new Prime(oldval);\r
+        if( CAS_val(oldkvs,idx,oldval,box) ) { // CAS down a box'd version of oldval\r
+          // If we made the Value slot hold a TOMBPRIME, then we both\r
+          // prevented further updates here but also the (absent)\r
+          // oldval is vaccuously available in the new table.  We\r
+          // return with true here: any thread looking for a value for\r
+          // this key can correctly go straight to the new table and\r
+          // skip looking in the old table.\r
+          if( box == TOMBPRIME )\r
+            return true;\r
+          // Otherwise we boxed something, but it still needs to be\r
+          // copied into the new table.\r
+          oldval = box;         // Record updated oldval\r
+          break;                // Break loop; oldval is now boxed by us\r
+        }\r
+        oldval = val(oldkvs,idx); // Else try, try again\r
+      }\r
+      if( oldval == TOMBPRIME ) return false; // Copy already complete here!\r
+\r
+      // ---\r
+      // Copy the value into the new table, but only if we overwrite a null.\r
+      // If another value is already in the new table, then somebody else\r
+      // wrote something there and that write is happens-after any value that\r
+      // appears in the old table.  If putIfMatch does not find a null in the\r
+      // new table - somebody else should have recorded the null-not_null\r
+      // transition in this copy.\r
+      Object old_unboxed = ((Prime)oldval)._V;\r
+      assert old_unboxed != TOMBSTONE;\r
+      boolean copied_into_new = (putIfMatch(topmap, newkvs, key, old_unboxed, null) == null);\r
+\r
+      // ---\r
+      // Finally, now that any old value is exposed in the new table, we can\r
+      // forever hide the old-table value by slapping a TOMBPRIME down.  This\r
+      // will stop other threads from uselessly attempting to copy this slot\r
+      // (i.e., it's a speed optimization not a correctness issue).\r
+      while( !CAS_val(oldkvs,idx,oldval,TOMBPRIME) )\r
+        oldval = val(oldkvs,idx);\r
+\r
+      return copied_into_new;\r
+    } // end copy_slot\r
+  } // End of CHM\r
+\r
+\r
+  // --- Snapshot ------------------------------------------------------------\r
+  // The main class for iterating over the NBHM.  It "snapshots" a clean\r
+  // view of the K/V array.\r
+  private class SnapshotV implements Iterator<TypeV>, Enumeration<TypeV> {\r
+    final Object[] _sskvs;\r
+    public SnapshotV() {\r
+      while( true ) {           // Verify no table-copy-in-progress\r
+        Object[] topkvs = _kvs;\r
+        CHM topchm = chm(topkvs);\r
+        if( topchm._newkvs == null ) { // No table-copy-in-progress\r
+          // The "linearization point" for the iteration.  Every key in this\r
+          // table will be visited, but keys added later might be skipped or\r
+          // even be added to a following table (also not iterated over).\r
+          _sskvs = topkvs;\r
+          break;\r
+        }\r
+        // Table copy in-progress - so we cannot get a clean iteration.  We\r
+        // must help finish the table copy before we can start iterating.\r
+        topchm.help_copy_impl(NonBlockingHashMap.this,topkvs,true);\r
+      }\r
+      // Warm-up the iterator\r
+      next();\r
+    }\r
+    int length() { return len(_sskvs); }\r
+    Object key(int idx) { return NonBlockingHashMap.key(_sskvs,idx); }\r
+    private int _idx;              // Varies from 0-keys.length\r
+    private Object _nextK, _prevK; // Last 2 keys found\r
+    private TypeV  _nextV, _prevV; // Last 2 values found\r
+    public boolean hasNext() { return _nextV != null; }\r
+    public TypeV next() {\r
+      // 'next' actually knows what the next value will be - it had to\r
+      // figure that out last go-around lest 'hasNext' report true and\r
+      // some other thread deleted the last value.  Instead, 'next'\r
+      // spends all its effort finding the key that comes after the\r
+      // 'next' key.\r
+      if( _idx != 0 && _nextV == null ) throw new NoSuchElementException();\r
+      _prevK = _nextK;          // This will become the previous key\r
+      _prevV = _nextV;          // This will become the previous value\r
+      _nextV = null;            // We have no more next-key\r
+      // Attempt to set <_nextK,_nextV> to the next K,V pair.\r
+      // _nextV is the trigger: stop searching when it is != null\r
+      while( _idx<length() ) {  // Scan array\r
+        _nextK = key(_idx++); // Get a key that definitely is in the set (for the moment!)\r
+        if( _nextK != null && // Found something?\r
+            _nextK != TOMBSTONE &&\r
+            (_nextV=get(_nextK)) != null )\r
+          break;                // Got it!  _nextK is a valid Key\r
+      }                         // Else keep scanning\r
+      return _prevV;            // Return current value.\r
+    }\r
+    public void remove() {\r
+      if( _prevV == null ) throw new IllegalStateException();\r
+      putIfMatch( NonBlockingHashMap.this, _sskvs, _prevK, TOMBSTONE, _prevV );\r
+      _prevV = null;\r
+    }\r
+\r
+    public TypeV nextElement() { return next(); }\r
+    public boolean hasMoreElements() { return hasNext(); }\r
+  }\r
+\r
+  /** Returns an enumeration of the values in this table.\r
+   *  @return an enumeration of the values in this table\r
+   *  @see #values()  */\r
+  public Enumeration<TypeV> elements() { return new SnapshotV(); }\r
+\r
+  // --- values --------------------------------------------------------------\r
+  /** Returns a {@link Collection} view of the values contained in this map.\r
+   *  The collection is backed by the map, so changes to the map are reflected\r
+   *  in the collection, and vice-versa.  The collection supports element\r
+   *  removal, which removes the corresponding mapping from this map, via the\r
+   *  <tt>Iterator.remove</tt>, <tt>Collection.remove</tt>,\r
+   *  <tt>removeAll</tt>, <tt>retainAll</tt>, and <tt>clear</tt> operations.\r
+   *  It does not support the <tt>add</tt> or <tt>addAll</tt> operations.\r
+   *\r
+   *  <p>The view's <tt>iterator</tt> is a "weakly consistent" iterator that\r
+   *  will never throw {@link ConcurrentModificationException}, and guarantees\r
+   *  to traverse elements as they existed upon construction of the iterator,\r
+   *  and may (but is not guaranteed to) reflect any modifications subsequent\r
+   *  to construction. */\r
+  @Override\r
+  public Collection<TypeV> values() {\r
+    return new AbstractCollection<TypeV>() {\r
+      @Override public void    clear   (          ) {        NonBlockingHashMap.this.clear        ( ); }\r
+      @Override public int     size    (          ) { return NonBlockingHashMap.this.size         ( ); }\r
+      @Override public boolean contains( Object v ) { return NonBlockingHashMap.this.containsValue(v); }\r
+      @Override public Iterator<TypeV> iterator()   { return new SnapshotV(); }\r
+    };\r
+  }\r
+\r
+  // --- keySet --------------------------------------------------------------\r
+  private class SnapshotK implements Iterator<TypeK>, Enumeration<TypeK> {\r
+    final SnapshotV _ss;\r
+    public SnapshotK() { _ss = new SnapshotV(); }\r
+    public void remove() { _ss.remove(); }\r
+    public TypeK next() { _ss.next(); return (TypeK)_ss._prevK; }\r
+    public boolean hasNext() { return _ss.hasNext(); }\r
+    public TypeK nextElement() { return next(); }\r
+    public boolean hasMoreElements() { return hasNext(); }\r
+  }\r
+\r
+  /** Returns an enumeration of the keys in this table.\r
+   *  @return an enumeration of the keys in this table\r
+   *  @see #keySet()  */\r
+  public Enumeration<TypeK> keys() { return new SnapshotK(); }\r
+\r
+  /** Returns a {@link Set} view of the keys contained in this map.  The set\r
+   *  is backed by the map, so changes to the map are reflected in the set,\r
+   *  and vice-versa.  The set supports element removal, which removes the\r
+   *  corresponding mapping from this map, via the <tt>Iterator.remove</tt>,\r
+   *  <tt>Set.remove</tt>, <tt>removeAll</tt>, <tt>retainAll</tt>, and\r
+   *  <tt>clear</tt> operations.  It does not support the <tt>add</tt> or\r
+   *  <tt>addAll</tt> operations.\r
+   *\r
+   *  <p>The view's <tt>iterator</tt> is a "weakly consistent" iterator that\r
+   *  will never throw {@link ConcurrentModificationException}, and guarantees\r
+   *  to traverse elements as they existed upon construction of the iterator,\r
+   *  and may (but is not guaranteed to) reflect any modifications subsequent\r
+   *  to construction.  */\r
+  @Override\r
+  public Set<TypeK> keySet() {\r
+    return new AbstractSet<TypeK> () {\r
+      @Override public void    clear   (          ) {        NonBlockingHashMap.this.clear   ( ); }\r
+      @Override public int     size    (          ) { return NonBlockingHashMap.this.size    ( ); }\r
+      @Override public boolean contains( Object k ) { return NonBlockingHashMap.this.containsKey(k); }\r
+      @Override public boolean remove  ( Object k ) { return NonBlockingHashMap.this.remove  (k) != null; }\r
+      @Override public Iterator<TypeK> iterator()   { return new SnapshotK(); }\r
+    };\r
+  }\r
+\r
+\r
+  // --- entrySet ------------------------------------------------------------\r
+  // Warning: Each call to 'next' in this iterator constructs a new NBHMEntry.\r
+  private class NBHMEntry extends AbstractEntry<TypeK,TypeV> {\r
+    NBHMEntry( final TypeK k, final TypeV v ) { super(k,v); }\r
+    public TypeV setValue(final TypeV val) {\r
+      if( val == null ) throw new NullPointerException();\r
+      _val = val;\r
+      return put(_key, val);\r
+    }\r
+  }\r
+\r
+  private class SnapshotE implements Iterator<Map.Entry<TypeK,TypeV>> {\r
+    final SnapshotV _ss;\r
+    public SnapshotE() { _ss = new SnapshotV(); }\r
+    public void remove() { _ss.remove(); }\r
+    public Map.Entry<TypeK,TypeV> next() { _ss.next(); return new NBHMEntry((TypeK)_ss._prevK,_ss._prevV); }\r
+    public boolean hasNext() { return _ss.hasNext(); }\r
+  }\r
+\r
+  /** Returns a {@link Set} view of the mappings contained in this map.  The\r
+   *  set is backed by the map, so changes to the map are reflected in the\r
+   *  set, and vice-versa.  The set supports element removal, which removes\r
+   *  the corresponding mapping from the map, via the\r
+   *  <tt>Iterator.remove</tt>, <tt>Set.remove</tt>, <tt>removeAll</tt>,\r
+   *  <tt>retainAll</tt>, and <tt>clear</tt> operations.  It does not support\r
+   *  the <tt>add</tt> or <tt>addAll</tt> operations.\r
+   *\r
+   *  <p>The view's <tt>iterator</tt> is a "weakly consistent" iterator\r
+   *  that will never throw {@link ConcurrentModificationException},\r
+   *  and guarantees to traverse elements as they existed upon\r
+   *  construction of the iterator, and may (but is not guaranteed to)\r
+   *  reflect any modifications subsequent to construction.\r
+   *\r
+   *  <p><strong>Warning:</strong> the iterator associated with this Set\r
+   *  requires the creation of {@link java.util.Map.Entry} objects with each\r
+   *  iteration.  The {@link NonBlockingHashMap} does not normally create or\r
+   *  using {@link java.util.Map.Entry} objects so they will be created soley\r
+   *  to support this iteration.  Iterating using {@link #keySet} or {@link\r
+   *  #values} will be more efficient.\r
+   */\r
+  @Override\r
+  public Set<Map.Entry<TypeK,TypeV>> entrySet() {\r
+    return new AbstractSet<Map.Entry<TypeK,TypeV>>() {\r
+      @Override public void    clear   (          ) {        NonBlockingHashMap.this.clear( ); }\r
+      @Override public int     size    (          ) { return NonBlockingHashMap.this.size ( ); }\r
+      @Override public boolean remove( final Object o ) {\r
+        if( !(o instanceof Map.Entry)) return false;\r
+        final Map.Entry<?,?> e = (Map.Entry<?,?>)o;\r
+        return NonBlockingHashMap.this.remove(e.getKey(), e.getValue());\r
+      }\r
+      @Override public boolean contains(final Object o) {\r
+        if( !(o instanceof Map.Entry)) return false;\r
+        final Map.Entry<?,?> e = (Map.Entry<?,?>)o;\r
+        TypeV v = get(e.getKey());\r
+        return v.equals(e.getValue());\r
+      }\r
+      @Override public Iterator<Map.Entry<TypeK,TypeV>> iterator() { return new SnapshotE(); }\r
+    };\r
+  }\r
+\r
+  // --- writeObject -------------------------------------------------------\r
+  // Write a NBHM to a stream\r
+  private void writeObject(java.io.ObjectOutputStream s) throws IOException  {\r
+    s.defaultWriteObject();     // Nothing to write\r
+    for( Object K : keySet() ) {\r
+      final Object V = get(K);  // Do an official 'get'\r
+      s.writeObject(K);         // Write the <TypeK,TypeV> pair\r
+      s.writeObject(V);\r
+    }\r
+    s.writeObject(null);        // Sentinel to indicate end-of-data\r
+    s.writeObject(null);\r
+  }\r
+\r
+  // --- readObject --------------------------------------------------------\r
+  // Read a CHM from a stream\r
+  private void readObject(java.io.ObjectInputStream s) throws IOException, ClassNotFoundException {\r
+    s.defaultReadObject();      // Read nothing\r
+    initialize(MIN_SIZE);\r
+    for(;;) {\r
+      final TypeK K = (TypeK) s.readObject();\r
+      final TypeV V = (TypeV) s.readObject();\r
+      if( K == null ) break;\r
+      put(K,V);                 // Insert with an offical put\r
+    }\r
+  }\r
+\r
+} // End NonBlockingHashMap class\r