From 18609d4482268ee61246f182a6a5cb1144e34573 Mon Sep 17 00:00:00 2001 From: Peizhao Ou Date: Tue, 15 Oct 2013 23:12:08 -0700 Subject: [PATCH] tweak --- .../cliffc-hashtable/NonBlockingHashMap.java | 1292 +++++++++++++++++ .../simplified_cliffc_hashtable.cc | 57 + .../simplified_cliffc_hashtable.h | 837 +++++++++++ benchmark/linuxrwlocks/.gitignore | 1 + benchmark/linuxrwlocks/Makefile | 11 + benchmark/linuxrwlocks/linuxrwlocks.c | 292 ++++ benchmark/ms-queue/.gitignore | 1 + benchmark/ms-queue/Makefile | 17 + benchmark/ms-queue/main.c | 80 + benchmark/ms-queue/my_queue.c | 221 +++ benchmark/ms-queue/my_queue.h | 31 + .../codeGenerator/CodeGenerator.java | 24 +- 12 files changed, 2863 insertions(+), 1 deletion(-) create mode 100644 benchmark/cliffc-hashtable/NonBlockingHashMap.java create mode 100644 benchmark/cliffc-hashtable/simplified_cliffc_hashtable.cc create mode 100644 benchmark/cliffc-hashtable/simplified_cliffc_hashtable.h create mode 100644 benchmark/linuxrwlocks/.gitignore create mode 100644 benchmark/linuxrwlocks/Makefile create mode 100644 benchmark/linuxrwlocks/linuxrwlocks.c create mode 100644 benchmark/ms-queue/.gitignore create mode 100644 benchmark/ms-queue/Makefile create mode 100644 benchmark/ms-queue/main.c create mode 100644 benchmark/ms-queue/my_queue.c create mode 100644 benchmark/ms-queue/my_queue.h diff --git a/benchmark/cliffc-hashtable/NonBlockingHashMap.java b/benchmark/cliffc-hashtable/NonBlockingHashMap.java new file mode 100644 index 0000000..5995e48 --- /dev/null +++ b/benchmark/cliffc-hashtable/NonBlockingHashMap.java @@ -0,0 +1,1292 @@ +/* + * Written by Cliff Click and released to the public domain, as explained at + * http://creativecommons.org/licenses/publicdomain + */ + +package org.cliffc.high_scale_lib; +import java.io.IOException; +import java.io.Serializable; +import java.lang.reflect.Field; +import java.util.*; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.atomic.*; +import sun.misc.Unsafe; + +/** + * A lock-free alternate implementation of {@link java.util.concurrent.ConcurrentHashMap} + * with better scaling properties and generally lower costs to mutate the Map. + * It provides identical correctness properties as ConcurrentHashMap. All + * operations are non-blocking and multi-thread safe, including all update + * operations. {@link NonBlockingHashMap} scales substatially better than + * {@link java.util.concurrent.ConcurrentHashMap} for high update rates, even with a + * large concurrency factor. Scaling is linear up to 768 CPUs on a 768-CPU + * Azul box, even with 100% updates or 100% reads or any fraction in-between. + * Linear scaling up to all cpus has been observed on a 32-way Sun US2 box, + * 32-way Sun Niagra box, 8-way Intel box and a 4-way Power box. + * + * This class obeys the same functional specification as {@link + * java.util.Hashtable}, and includes versions of methods corresponding to + * each method of Hashtable. However, even though all operations are + * thread-safe, operations do not entail locking and there is + * not any support for locking the entire table in a way that + * prevents all access. This class is fully interoperable with + * Hashtable in programs that rely on its thread safety but not on + * its synchronization details. + * + *

Operations (including put) generally do not block, so may + * overlap with other update operations (including other puts and + * removes). Retrievals reflect the results of the most recently + * completed update operations holding upon their onset. For + * aggregate operations such as putAll, concurrent retrievals may + * reflect insertion or removal of only some entries. Similarly, Iterators + * and Enumerations return elements reflecting the state of the hash table at + * some point at or since the creation of the iterator/enumeration. They do + * not throw {@link ConcurrentModificationException}. However, + * iterators are designed to be used by only one thread at a time. + * + *

Very full tables, or tables with high reprobe rates may trigger an + * internal resize operation to move into a larger table. Resizing is not + * terribly expensive, but it is not free either; during resize operations + * table throughput may drop somewhat. All threads that visit the table + * during a resize will 'help' the resizing but will still be allowed to + * complete their operation before the resize is finished (i.e., a simple + * 'get' operation on a million-entry table undergoing resizing will not need + * to block until the entire million entries are copied). + * + *

This class and its views and iterators implement all of the + * optional methods of the {@link Map} and {@link Iterator} + * interfaces. + * + *

Like {@link Hashtable} but unlike {@link HashMap}, this class + * does not allow null to be used as a key or value. + * + * + * @since 1.5 + * @author Cliff Click + * @param the type of keys maintained by this map + * @param the type of mapped values + * + * @version 1.1.2 + * @author Prashant Deva - moved hash() function out of get_impl() so it is + * not calculated multiple times. + */ + +public class NonBlockingHashMap + extends AbstractMap + implements ConcurrentMap, Cloneable, Serializable { + + private static final long serialVersionUID = 1234123412341234123L; + + private static final int REPROBE_LIMIT=10; // Too many reprobes then force a table-resize + + // --- Bits to allow Unsafe access to arrays + private static final Unsafe _unsafe = UtilUnsafe.getUnsafe(); + private static final int _Obase = _unsafe.arrayBaseOffset(Object[].class); + private static final int _Oscale = _unsafe.arrayIndexScale(Object[].class); + private static long rawIndex(final Object[] ary, final int idx) { + assert idx >= 0 && idx < ary.length; + return _Obase + idx * _Oscale; + } + + // --- Setup to use Unsafe + private static final long _kvs_offset; + static { // + Field f = null; + try { f = NonBlockingHashMap.class.getDeclaredField("_kvs"); } + catch( java.lang.NoSuchFieldException e ) { throw new RuntimeException(e); } + _kvs_offset = _unsafe.objectFieldOffset(f); + } + private final boolean CAS_kvs( final Object[] oldkvs, final Object[] newkvs ) { + return _unsafe.compareAndSwapObject(this, _kvs_offset, oldkvs, newkvs ); + } + + // --- Adding a 'prime' bit onto Values via wrapping with a junk wrapper class + private static final class Prime { + final Object _V; + Prime( Object V ) { _V = V; } + static Object unbox( Object V ) { return V instanceof Prime ? ((Prime)V)._V : V; } + } + + // --- hash ---------------------------------------------------------------- + // Helper function to spread lousy hashCodes + private static final int hash(final Object key) { + int h = key.hashCode(); // The real hashCode call + // Spread bits to regularize both segment and index locations, + // using variant of single-word Wang/Jenkins hash. + h += (h << 15) ^ 0xffffcd7d; + h ^= (h >>> 10); + h += (h << 3); + h ^= (h >>> 6); + h += (h << 2) + (h << 14); + return h ^ (h >>> 16); + } + + // --- The Hash Table -------------------- + // Slot 0 is always used for a 'CHM' entry below to hold the interesting + // bits of the hash table. Slot 1 holds full hashes as an array of ints. + // Slots {2,3}, {4,5}, etc hold {Key,Value} pairs. The entire hash table + // can be atomically replaced by CASing the _kvs field. + // + // Why is CHM buried inside the _kvs Object array, instead of the other way + // around? The CHM info is used during resize events and updates, but not + // during standard 'get' operations. I assume 'get' is much more frequent + // than 'put'. 'get' can skip the extra indirection of skipping through the + // CHM to reach the _kvs array. + private transient Object[] _kvs; + private static final CHM chm (Object[] kvs) { return (CHM )kvs[0]; } + private static final int[] hashes(Object[] kvs) { return (int[])kvs[1]; } + // Number of K,V pairs in the table + private static final int len(Object[] kvs) { return (kvs.length-2)>>1; } + + // Time since last resize + private transient long _last_resize_milli; + + // --- Minimum table size ---------------- + // Pick size 8 K/V pairs, which turns into (8*2+2)*4+12 = 84 bytes on a + // standard 32-bit HotSpot, and (8*2+2)*8+12 = 156 bytes on 64-bit Azul. + private static final int MIN_SIZE_LOG=3; // + private static final int MIN_SIZE=(1<>2); + } + + // --- NonBlockingHashMap -------------------------------------------------- + // Constructors + + /** Create a new NonBlockingHashMap with default minimum size (currently set + * to 8 K/V pairs or roughly 84 bytes on a standard 32-bit JVM). */ + public NonBlockingHashMap( ) { this(MIN_SIZE); } + + /** Create a new NonBlockingHashMap with initial room for the given number of + * elements, thus avoiding internal resizing operations to reach an + * appropriate size. Large numbers here when used with a small count of + * elements will sacrifice space for a small amount of time gained. The + * initial size will be rounded up internally to the next larger power of 2. */ + public NonBlockingHashMap( final int initial_sz ) { initialize(initial_sz); } + private final void initialize( int initial_sz ) { + if( initial_sz < 0 ) throw new IllegalArgumentException(); + int i; // Convert to next largest power-of-2 + if( initial_sz > 1024*1024 ) initial_sz = 1024*1024; + for( i=MIN_SIZE_LOG; (1<size() == 0. + * @return size() == 0 */ + @Override + public boolean isEmpty ( ) { return size() == 0; } + + /** Tests if the key in the table using the equals method. + * @return true if the key is in the table using the equals method + * @throws NullPointerException if the specified key is null */ + @Override + public boolean containsKey( Object key ) { return get(key) != null; } + + /** Legacy method testing if some key maps into the specified value in this + * table. This method is identical in functionality to {@link + * #containsValue}, and exists solely to ensure full compatibility with + * class {@link java.util.Hashtable}, which supported this method prior to + * introduction of the Java Collections framework. + * @param val a value to search for + * @return true if this map maps one or more keys to the specified value + * @throws NullPointerException if the specified value is null */ + public boolean contains ( Object val ) { return containsValue(val); } + + /** Maps the specified key to the specified value in the table. Neither key + * nor value can be null. + *

The value can be retrieved by calling {@link #get} with a key that is + * equal to the original key. + * @param key key with which the specified value is to be associated + * @param val value to be associated with the specified key + * @return the previous value associated with key, or + * null if there was no mapping for key + * @throws NullPointerException if the specified key or value is null */ + @Override + public TypeV put ( TypeK key, TypeV val ) { return putIfMatch( key, val, NO_MATCH_OLD); } + + /** Atomically, do a {@link #put} if-and-only-if the key is not mapped. + * Useful to ensure that only a single mapping for the key exists, even if + * many threads are trying to create the mapping in parallel. + * @return the previous value associated with the specified key, + * or null if there was no mapping for the key + * @throws NullPointerException if the specified key or value is null */ + public TypeV putIfAbsent( TypeK key, TypeV val ) { return putIfMatch( key, val, TOMBSTONE ); } + + /** Removes the key (and its corresponding value) from this map. + * This method does nothing if the key is not in the map. + * @return the previous value associated with key, or + * null if there was no mapping for key + * @throws NullPointerException if the specified key is null */ + @Override + public TypeV remove ( Object key ) { return putIfMatch( key,TOMBSTONE, NO_MATCH_OLD); } + + /** Atomically do a {@link #remove(Object)} if-and-only-if the key is mapped + * to a value which is equals to the given value. + * @throws NullPointerException if the specified key or value is null */ + public boolean remove ( Object key,Object val ) { return putIfMatch( key,TOMBSTONE, val ) == val; } + + /** Atomically do a put(key,val) if-and-only-if the key is + * mapped to some value already. + * @throws NullPointerException if the specified key or value is null */ + public TypeV replace ( TypeK key, TypeV val ) { return putIfMatch( key, val,MATCH_ANY ); } + + /** Atomically do a put(key,newValue) if-and-only-if the key is + * mapped a value which is equals to oldValue. + * @throws NullPointerException if the specified key or value is null */ + public boolean replace ( TypeK key, TypeV oldValue, TypeV newValue ) { + return putIfMatch( key, newValue, oldValue ) == oldValue; + } + + private final TypeV putIfMatch( Object key, Object newVal, Object oldVal ) { + if (oldVal == null || newVal == null) throw new NullPointerException(); + final Object res = putIfMatch( this, _kvs, key, newVal, oldVal ); + assert !(res instanceof Prime); + assert res != null; + return res == TOMBSTONE ? null : (TypeV)res; + } + + + /** Copies all of the mappings from the specified map to this one, replacing + * any existing mappings. + * @param m mappings to be stored in this map */ + @Override + public void putAll(Map m) { + for (Map.Entry e : m.entrySet()) + put(e.getKey(), e.getValue()); + } + + /** Removes all of the mappings from this map. */ + @Override + public void clear() { // Smack a new empty table down + Object[] newkvs = new NonBlockingHashMap(MIN_SIZE)._kvs; + while( !CAS_kvs(_kvs,newkvs) ) // Spin until the clear works + ; + } + + /** Returns true if this Map maps one or more keys to the specified + * value. Note: This method requires a full internal traversal of the + * hash table and is much slower than {@link #containsKey}. + * @param val value whose presence in this map is to be tested + * @return true if this map maps one or more keys to the specified value + * @throws NullPointerException if the specified value is null */ + @Override + public boolean containsValue( final Object val ) { + if( val == null ) throw new NullPointerException(); + for( TypeV V : values() ) + if( V == val || V.equals(val) ) + return true; + return false; + } + + // This function is supposed to do something for Hashtable, and the JCK + // tests hang until it gets called... by somebody ... for some reason, + // any reason.... + protected void rehash() { + } + + /** + * Creates a shallow copy of this hashtable. All the structure of the + * hashtable itself is copied, but the keys and values are not cloned. + * This is a relatively expensive operation. + * + * @return a clone of the hashtable. + */ + @Override + public Object clone() { + try { + // Must clone, to get the class right; NBHM might have been + // extended so it would be wrong to just make a new NBHM. + NonBlockingHashMap t = (NonBlockingHashMap) super.clone(); + // But I don't have an atomic clone operation - the underlying _kvs + // structure is undergoing rapid change. If I just clone the _kvs + // field, the CHM in _kvs[0] won't be in sync. + // + // Wipe out the cloned array (it was shallow anyways). + t.clear(); + // Now copy sanely + for( TypeK K : keySet() ) { + final TypeV V = get(K); // Do an official 'get' + t.put(K,V); + } + return t; + } catch (CloneNotSupportedException e) { + // this shouldn't happen, since we are Cloneable + throw new InternalError(); + } + } + + /** + * Returns a string representation of this map. The string representation + * consists of a list of key-value mappings in the order returned by the + * map's entrySet view's iterator, enclosed in braces + * ("{}"). Adjacent mappings are separated by the characters + * ", " (comma and space). Each key-value mapping is rendered as + * the key followed by an equals sign ("=") followed by the + * associated value. Keys and values are converted to strings as by + * {@link String#valueOf(Object)}. + * + * @return a string representation of this map + */ + @Override + public String toString() { + Iterator> i = entrySet().iterator(); + if( !i.hasNext()) + return "{}"; + + StringBuilder sb = new StringBuilder(); + sb.append('{'); + for (;;) { + Entry e = i.next(); + TypeK key = e.getKey(); + TypeV value = e.getValue(); + sb.append(key == this ? "(this Map)" : key); + sb.append('='); + sb.append(value == this ? "(this Map)" : value); + if( !i.hasNext()) + return sb.append('}').toString(); + sb.append(", "); + } + } + + // --- keyeq --------------------------------------------------------------- + // Check for key equality. Try direct pointer compare first, then see if + // the hashes are unequal (fast negative test) and finally do the full-on + // 'equals' v-call. + private static boolean keyeq( Object K, Object key, int[] hashes, int hash, int fullhash ) { + return + K==key || // Either keys match exactly OR + // hash exists and matches? hash can be zero during the install of a + // new key/value pair. + ((hashes[hash] == 0 || hashes[hash] == fullhash) && + // Do not call the users' "equals()" call with a Tombstone, as this can + // surprise poorly written "equals()" calls that throw exceptions + // instead of simply returning false. + K != TOMBSTONE && // Do not call users' equals call with a Tombstone + // Do the match the hard way - with the users' key being the loop- + // invariant "this" pointer. I could have flipped the order of + // operands (since equals is commutative), but I'm making mega-morphic + // v-calls in a reprobing loop and nailing down the 'this' argument + // gives both the JIT and the hardware a chance to prefetch the call target. + key.equals(K)); // Finally do the hard match + } + + // --- get ----------------------------------------------------------------- + /** Returns the value to which the specified key is mapped, or {@code null} + * if this map contains no mapping for the key. + *

More formally, if this map contains a mapping from a key {@code k} to + * a value {@code v} such that {@code key.equals(k)}, then this method + * returns {@code v}; otherwise it returns {@code null}. (There can be at + * most one such mapping.) + * @throws NullPointerException if the specified key is null */ + // Never returns a Prime nor a Tombstone. + @Override + public TypeV get( Object key ) { + final int fullhash= hash (key); // throws NullPointerException if key is null + final Object V = get_impl(this,_kvs,key,fullhash); + assert !(V instanceof Prime); // Never return a Prime + return (TypeV)V; + } + + private static final Object get_impl( final NonBlockingHashMap topmap, final Object[] kvs, final Object key, final int fullhash ) { + final int len = len (kvs); // Count of key/value pairs, reads kvs.length + final CHM chm = chm (kvs); // The CHM, for a volatile read below; reads slot 0 of kvs + final int[] hashes=hashes(kvs); // The memoized hashes; reads slot 1 of kvs + + int idx = fullhash & (len-1); // First key hash + + // Main spin/reprobe loop, looking for a Key hit + int reprobe_cnt=0; + while( true ) { + // Probe table. Each read of 'val' probably misses in cache in a big + // table; hopefully the read of 'key' then hits in cache. + final Object K = key(kvs,idx); // Get key before volatile read, could be null + final Object V = val(kvs,idx); // Get value before volatile read, could be null or Tombstone or Prime + if( K == null ) return null; // A clear miss + + // We need a volatile-read here to preserve happens-before semantics on + // newly inserted Keys. If the Key body was written just before inserting + // into the table a Key-compare here might read the uninitalized Key body. + // Annoyingly this means we have to volatile-read before EACH key compare. + // . + // We also need a volatile-read between reading a newly inserted Value + // and returning the Value (so the user might end up reading the stale + // Value contents). Same problem as with keys - and the one volatile + // read covers both. + final Object[] newkvs = chm._newkvs; // VOLATILE READ before key compare + + // Key-compare + if( keyeq(K,key,hashes,idx,fullhash) ) { + // Key hit! Check for no table-copy-in-progress + if( !(V instanceof Prime) ) // No copy? + return (V == TOMBSTONE) ? null : V; // Return the value + // Key hit - but slot is (possibly partially) copied to the new table. + // Finish the copy & retry in the new table. + return get_impl(topmap,chm.copy_slot_and_check(topmap,kvs,idx,key),key,fullhash); // Retry in the new table + } + // get and put must have the same key lookup logic! But only 'put' + // needs to force a table-resize for a too-long key-reprobe sequence. + // Check for too-many-reprobes on get - and flip to the new table. + // ???? Why a TOMBSTONE key means no more keys in this table + // because a TOMBSTONE key should be null before + if( ++reprobe_cnt >= reprobe_limit(len) || // too many probes + key == TOMBSTONE ) // found a TOMBSTONE key, means no more keys in this table + return newkvs == null ? null : get_impl(topmap,topmap.help_copy(newkvs),key,fullhash); // Retry in the new table + + idx = (idx+1)&(len-1); // Reprobe by 1! (could now prefetch) + } + } + + // --- putIfMatch --------------------------------------------------------- + // Put, Remove, PutIfAbsent, etc. Return the old value. If the returned + // value is equal to expVal (or expVal is NO_MATCH_OLD) then the put can be + // assumed to work (although might have been immediately overwritten). Only + // the path through copy_slot passes in an expected value of null, and + // putIfMatch only returns a null if passed in an expected null. + private static final Object putIfMatch( final NonBlockingHashMap topmap, final Object[] kvs, final Object key, final Object putval, final Object expVal ) { + assert putval != null; + assert !(putval instanceof Prime); + assert !(expVal instanceof Prime); + final int fullhash = hash (key); // throws NullPointerException if key null + final int len = len (kvs); // Count of key/value pairs, reads kvs.length + final CHM chm = chm (kvs); // Reads kvs[0] + final int[] hashes = hashes(kvs); // Reads kvs[1], read before kvs[0] + int idx = fullhash & (len-1); + + // --- + // Key-Claim stanza: spin till we can claim a Key (or force a resizing). + int reprobe_cnt=0; + Object K=null, V=null; + Object[] newkvs=null; + while( true ) { // Spin till we get a Key slot + V = val(kvs,idx); // Get old value (before volatile read below!) + K = key(kvs,idx); // Get current key + if( K == null ) { // Slot is free? + // Found an empty Key slot - which means this Key has never been in + // this table. No need to put a Tombstone - the Key is not here! + if( putval == TOMBSTONE ) return putval; // Not-now & never-been in this table + // Claim the null key-slot + if( CAS_key(kvs,idx, null, key ) ) { // Claim slot for Key + chm._slots.add(1); // Raise key-slots-used count + hashes[idx] = fullhash; // Memoize fullhash + break; // Got it! + } + // CAS to claim the key-slot failed. + // + // This re-read of the Key points out an annoying short-coming of Java + // CAS. Most hardware CAS's report back the existing value - so that + // if you fail you have a *witness* - the value which caused the CAS + // to fail. The Java API turns this into a boolean destroying the + // witness. Re-reading does not recover the witness because another + // thread can write over the memory after the CAS. Hence we can be in + // the unfortunate situation of having a CAS fail *for cause* but + // having that cause removed by a later store. This turns a + // non-spurious-failure CAS (such as Azul has) into one that can + // apparently spuriously fail - and we avoid apparent spurious failure + // by not allowing Keys to ever change. + K = key(kvs,idx); // CAS failed, get updated value + assert K != null; // If keys[idx] is null, CAS shoulda worked + } + // Key slot was not null, there exists a Key here + + // We need a volatile-read here to preserve happens-before semantics on + // newly inserted Keys. If the Key body was written just before inserting + // into the table a Key-compare here might read the uninitalized Key body. + // Annoyingly this means we have to volatile-read before EACH key compare. + newkvs = chm._newkvs; // VOLATILE READ before key compare + + if( keyeq(K,key,hashes,idx,fullhash) ) + break; // Got it! + + // get and put must have the same key lookup logic! Lest 'get' give + // up looking too soon. + //topmap._reprobes.add(1); + if( ++reprobe_cnt >= reprobe_limit(len) || // too many probes or + key == TOMBSTONE ) { // found a TOMBSTONE key, means no more keys + // We simply must have a new table to do a 'put'. At this point a + // 'get' will also go to the new table (if any). We do not need + // to claim a key slot (indeed, we cannot find a free one to claim!). + newkvs = chm.resize(topmap,kvs); + if( expVal != null ) topmap.help_copy(newkvs); // help along an existing copy + return putIfMatch(topmap,newkvs,key,putval,expVal); + } + + idx = (idx+1)&(len-1); // Reprobe! + } // End of spinning till we get a Key slot + + // --- + // Found the proper Key slot, now update the matching Value slot. We + // never put a null, so Value slots monotonically move from null to + // not-null (deleted Values use Tombstone). Thus if 'V' is null we + // fail this fast cutout and fall into the check for table-full. + if( putval == V ) return V; // Fast cutout for no-change + + // See if we want to move to a new table (to avoid high average re-probe + // counts). We only check on the initial set of a Value from null to + // not-null (i.e., once per key-insert). Of course we got a 'free' check + // of newkvs once per key-compare (not really free, but paid-for by the + // time we get here). + if( newkvs == null && // New table-copy already spotted? + // Once per fresh key-insert check the hard way + ((V == null && chm.tableFull(reprobe_cnt,len)) || + // Or we found a Prime, but the JMM allowed reordering such that we + // did not spot the new table (very rare race here: the writing + // thread did a CAS of _newkvs then a store of a Prime. This thread + // reads the Prime, then reads _newkvs - but the read of Prime was so + // delayed (or the read of _newkvs was so accelerated) that they + // swapped and we still read a null _newkvs. The resize call below + // will do a CAS on _newkvs forcing the read. + V instanceof Prime) ) + newkvs = chm.resize(topmap,kvs); // Force the new table copy to start + // See if we are moving to a new table. + // If so, copy our slot and retry in the new table. + if( newkvs != null ) + return putIfMatch(topmap,chm.copy_slot_and_check(topmap,kvs,idx,expVal),key,putval,expVal); + + // --- + // We are finally prepared to update the existing table + while( true ) { + assert !(V instanceof Prime); + + // Must match old, and we do not? Then bail out now. Note that either V + // or expVal might be TOMBSTONE. Also V can be null, if we've never + // inserted a value before. expVal can be null if we are called from + // copy_slot. + + if( expVal != NO_MATCH_OLD && // Do we care about expected-Value at all? + V != expVal && // No instant match already? + (expVal != MATCH_ANY || V == TOMBSTONE || V == null) && + !(V==null && expVal == TOMBSTONE) && // Match on null/TOMBSTONE combo + (expVal == null || !expVal.equals(V)) ) // Expensive equals check at the last + return V; // Do not update! + + // Actually change the Value in the Key,Value pair + if( CAS_val(kvs, idx, V, putval ) ) { + // CAS succeeded - we did the update! + // Both normal put's and table-copy calls putIfMatch, but table-copy + // does not (effectively) increase the number of live k/v pairs. + if( expVal != null ) { + // Adjust sizes - a striped counter + if( (V == null || V == TOMBSTONE) && putval != TOMBSTONE ) chm._size.add( 1); + if( !(V == null || V == TOMBSTONE) && putval == TOMBSTONE ) chm._size.add(-1); + } + return (V==null && expVal!=null) ? TOMBSTONE : V; + } + // Else CAS failed + V = val(kvs,idx); // Get new value + // If a Prime'd value got installed, we need to re-run the put on the + // new table. Otherwise we lost the CAS to another racing put. + // Simply retry from the start. + if( V instanceof Prime ) + return putIfMatch(topmap,chm.copy_slot_and_check(topmap,kvs,idx,expVal),key,putval,expVal); + } + } + + // --- help_copy --------------------------------------------------------- + // Help along an existing resize operation. This is just a fast cut-out + // wrapper, to encourage inlining for the fast no-copy-in-progress case. We + // always help the top-most table copy, even if there are nested table + // copies in progress. + private final Object[] help_copy( Object[] helper ) { + // Read the top-level KVS only once. We'll try to help this copy along, + // even if it gets promoted out from under us (i.e., the copy completes + // and another KVS becomes the top-level copy). + Object[] topkvs = _kvs; + CHM topchm = chm(topkvs); + if( topchm._newkvs == null ) return helper; // No copy in-progress + topchm.help_copy_impl(this,topkvs,false); + return helper; + } + + + // --- CHM ----------------------------------------------------------------- + // The control structure for the NonBlockingHashMap + private static final class CHM { + // Size in active K,V pairs + private final Counter _size; + public int size () { return (int)_size.get(); } + + // --- + // These next 2 fields are used in the resizing heuristics, to judge when + // it is time to resize or copy the table. Slots is a count of used-up + // key slots, and when it nears a large fraction of the table we probably + // end up reprobing too much. Last-resize-milli is the time since the + // last resize; if we are running back-to-back resizes without growing + // (because there are only a few live keys but many slots full of dead + // keys) then we need a larger table to cut down on the churn. + + // Count of used slots, to tell when table is full of dead unusable slots + private final Counter _slots; + public int slots() { return (int)_slots.get(); } + + // --- + // New mappings, used during resizing. + // The 'new KVs' array - created during a resize operation. This + // represents the new table being copied from the old one. It's the + // volatile variable that is read as we cross from one table to the next, + // to get the required memory orderings. It monotonically transits from + // null to set (once). + volatile Object[] _newkvs; + private final AtomicReferenceFieldUpdater _newkvsUpdater = + AtomicReferenceFieldUpdater.newUpdater(CHM.class,Object[].class, "_newkvs"); + // Set the _next field if we can. + boolean CAS_newkvs( Object[] newkvs ) { + while( _newkvs == null ) + if( _newkvsUpdater.compareAndSet(this,null,newkvs) ) + return true; + return false; + } + // Sometimes many threads race to create a new very large table. Only 1 + // wins the race, but the losers all allocate a junk large table with + // hefty allocation costs. Attempt to control the overkill here by + // throttling attempts to create a new table. I cannot really block here + // (lest I lose the non-blocking property) but late-arriving threads can + // give the initial resizing thread a little time to allocate the initial + // new table. The Right Long Term Fix here is to use array-lets and + // incrementally create the new very large array. In C I'd make the array + // with malloc (which would mmap under the hood) which would only eat + // virtual-address and not real memory - and after Somebody wins then we + // could in parallel initialize the array. Java does not allow + // un-initialized array creation (especially of ref arrays!). + volatile long _resizers; // count of threads attempting an initial resize + private static final AtomicLongFieldUpdater _resizerUpdater = + AtomicLongFieldUpdater.newUpdater(CHM.class, "_resizers"); + + // --- + // Simple constructor + CHM( Counter size ) { + _size = size; + _slots= new Counter(); + } + + // --- tableFull --------------------------------------------------------- + // Heuristic to decide if this table is too full, and we should start a + // new table. Note that if a 'get' call has reprobed too many times and + // decided the table must be full, then always the estimate_sum must be + // high and we must report the table is full. If we do not, then we might + // end up deciding that the table is not full and inserting into the + // current table, while a 'get' has decided the same key cannot be in this + // table because of too many reprobes. The invariant is: + // slots.estimate_sum >= max_reprobe_cnt >= reprobe_limit(len) + private final boolean tableFull( int reprobe_cnt, int len ) { + return + // Do the cheap check first: we allow some number of reprobes always + reprobe_cnt >= REPROBE_LIMIT && + // More expensive check: see if the table is > 1/4 full. + _slots.estimate_get() >= reprobe_limit(len); + } + + // --- resize ------------------------------------------------------------ + // Resizing after too many probes. "How Big???" heuristics are here. + // Callers will (not this routine) will 'help_copy' any in-progress copy. + // Since this routine has a fast cutout for copy-already-started, callers + // MUST 'help_copy' lest we have a path which forever runs through + // 'resize' only to discover a copy-in-progress which never progresses. + private final Object[] resize( NonBlockingHashMap topmap, Object[] kvs) { + assert chm(kvs) == this; + + // Check for resize already in progress, probably triggered by another thread + Object[] newkvs = _newkvs; // VOLATILE READ + if( newkvs != null ) // See if resize is already in progress + return newkvs; // Use the new table already + + // No copy in-progress, so start one. First up: compute new table size. + int oldlen = len(kvs); // Old count of K,V pairs allowed + int sz = size(); // Get current table count of active K,V pairs + int newsz = sz; // First size estimate + + // Heuristic to determine new size. We expect plenty of dead-slots-with-keys + // and we need some decent padding to avoid endless reprobing. + if( sz >= (oldlen>>2) ) { // If we are >25% full of keys then... + newsz = oldlen<<1; // Double size + if( sz >= (oldlen>>1) ) // If we are >50% full of keys then... + newsz = oldlen<<2; // Double double size + } + // This heuristic in the next 2 lines leads to a much denser table + // with a higher reprobe rate + //if( sz >= (oldlen>>1) ) // If we are >50% full of keys then... + // newsz = oldlen<<1; // Double size + + // Last (re)size operation was very recent? Then double again; slows + // down resize operations for tables subject to a high key churn rate. + long tm = System.currentTimeMillis(); + long q=0; + if( newsz <= oldlen && // New table would shrink or hold steady? + tm <= topmap._last_resize_milli+10000 && // Recent resize (less than 1 sec ago) + (q=_slots.estimate_get()) >= (sz<<1) ) // 1/2 of keys are dead? + newsz = oldlen<<1; // Double the existing size + + // Do not shrink, ever + if( newsz < oldlen ) newsz = oldlen; + + // Convert to power-of-2 + int log2; + for( log2=MIN_SIZE_LOG; (1<>20/*megs*/; + if( r >= 2 && megs > 0 ) { // Already 2 guys trying; wait and see + newkvs = _newkvs; // Between dorking around, another thread did it + if( newkvs != null ) // See if resize is already in progress + return newkvs; // Use the new table already + // TODO - use a wait with timeout, so we'll wakeup as soon as the new table + // is ready, or after the timeout in any case. + //synchronized( this ) { wait(8*megs); } // Timeout - we always wakeup + // For now, sleep a tad and see if the 2 guys already trying to make + // the table actually get around to making it happen. + try { Thread.sleep(8*megs); } catch( Exception e ) { } + } + // Last check, since the 'new' below is expensive and there is a chance + // that another thread slipped in a new thread while we ran the heuristic. + newkvs = _newkvs; + if( newkvs != null ) // See if resize is already in progress + return newkvs; // Use the new table already + + // Double size for K,V pairs, add 1 for CHM + newkvs = new Object[((1< _copyIdxUpdater = + AtomicLongFieldUpdater.newUpdater(CHM.class, "_copyIdx"); + + // Work-done reporting. Used to efficiently signal when we can move to + // the new table. From 0 to len(oldkvs) refers to copying from the old + // table to the new. + volatile long _copyDone= 0; + static private final AtomicLongFieldUpdater _copyDoneUpdater = + AtomicLongFieldUpdater.newUpdater(CHM.class, "_copyDone"); + + // --- help_copy_impl ---------------------------------------------------- + // Help along an existing resize operation. We hope its the top-level + // copy (it was when we started) but this CHM might have been promoted out + // of the top position. + private final void help_copy_impl( NonBlockingHashMap topmap, Object[] oldkvs, boolean copy_all ) { + assert chm(oldkvs) == this; + Object[] newkvs = _newkvs; + assert newkvs != null; // Already checked by caller + int oldlen = len(oldkvs); // Total amount to copy + final int MIN_COPY_WORK = Math.min(oldlen,1024); // Limit per-thread work + + // --- + int panic_start = -1; + int copyidx=-9999; // Fool javac to think it's initialized + while( _copyDone < oldlen ) { // Still needing to copy? + // Carve out a chunk of work. The counter wraps around so every + // thread eventually tries to copy every slot repeatedly. + + // We "panic" if we have tried TWICE to copy every slot - and it still + // has not happened. i.e., twice some thread somewhere claimed they + // would copy 'slot X' (by bumping _copyIdx) but they never claimed to + // have finished (by bumping _copyDone). Our choices become limited: + // we can wait for the work-claimers to finish (and become a blocking + // algorithm) or do the copy work ourselves. Tiny tables with huge + // thread counts trying to copy the table often 'panic'. + if( panic_start == -1 ) { // No panic? + copyidx = (int)_copyIdx; + while( copyidx < (oldlen<<1) && // 'panic' check + !_copyIdxUpdater.compareAndSet(this,copyidx,copyidx+MIN_COPY_WORK) ) + copyidx = (int)_copyIdx; // Re-read + if( !(copyidx < (oldlen<<1)) ) // Panic! + panic_start = copyidx; // Record where we started to panic-copy + } + + // We now know what to copy. Try to copy. + int workdone = 0; + for( int i=0; i 0 ) // Report work-done occasionally + copy_check_and_promote( topmap, oldkvs, workdone );// See if we can promote + //for( int i=0; i 0 ) { + while( !_copyDoneUpdater.compareAndSet(this,copyDone,copyDone+workdone) ) { + copyDone = _copyDone; // Reload, retry + assert (copyDone+workdone) <= oldlen; + } + //if( (10*copyDone/oldlen) != (10*(copyDone+workdone)/oldlen) ) + //System.out.print(" "+(copyDone+workdone)*100/oldlen+"%"+"_"+(_copyIdx*100/oldlen)+"%"); + } + + // Check for copy being ALL done, and promote. Note that we might have + // nested in-progress copies and manage to finish a nested copy before + // finishing the top-level copy. We only promote top-level copies. + if( copyDone+workdone == oldlen && // Ready to promote this table? + topmap._kvs == oldkvs && // Looking at the top-level table? + // Attempt to promote + topmap.CAS_kvs(oldkvs,_newkvs) ) { + topmap._last_resize_milli = System.currentTimeMillis(); // Record resize time for next check + //long nano = System.nanoTime(); + //System.out.println(" "+nano+" Promote table to "+len(_newkvs)); + //if( System.out != null ) System.out.print("]"); + } + } + // --- copy_slot --------------------------------------------------------- + // Copy one K/V pair from oldkvs[i] to newkvs. Returns true if we can + // confirm that the new table guaranteed has a value for this old-table + // slot. We need an accurate confirmed-copy count so that we know when we + // can promote (if we promote the new table too soon, other threads may + // 'miss' on values not-yet-copied from the old table). We don't allow + // any direct updates on the new table, unless they first happened to the + // old table - so that any transition in the new table from null to + // not-null must have been from a copy_slot (or other old-table overwrite) + // and not from a thread directly writing in the new table. Thus we can + // count null-to-not-null transitions in the new table. + private boolean copy_slot( NonBlockingHashMap topmap, int idx, Object[] oldkvs, Object[] newkvs ) { + // Blindly set the key slot from null to TOMBSTONE, to eagerly stop + // fresh put's from inserting new values in the old table when the old + // table is mid-resize. We don't need to act on the results here, + // because our correctness stems from box'ing the Value field. Slamming + // the Key field is a minor speed optimization. + Object key; + while( (key=key(oldkvs,idx)) == null ) + CAS_key(oldkvs,idx, null, TOMBSTONE); + + // --- + // Prevent new values from appearing in the old table. + // Box what we see in the old table, to prevent further updates. + Object oldval = val(oldkvs,idx); // Read OLD table + while( !(oldval instanceof Prime) ) { + final Prime box = (oldval == null || oldval == TOMBSTONE) ? TOMBPRIME : new Prime(oldval); + if( CAS_val(oldkvs,idx,oldval,box) ) { // CAS down a box'd version of oldval + // If we made the Value slot hold a TOMBPRIME, then we both + // prevented further updates here but also the (absent) + // oldval is vaccuously available in the new table. We + // return with true here: any thread looking for a value for + // this key can correctly go straight to the new table and + // skip looking in the old table. + if( box == TOMBPRIME ) + return true; + // Otherwise we boxed something, but it still needs to be + // copied into the new table. + oldval = box; // Record updated oldval + break; // Break loop; oldval is now boxed by us + } + oldval = val(oldkvs,idx); // Else try, try again + } + if( oldval == TOMBPRIME ) return false; // Copy already complete here! + + // --- + // Copy the value into the new table, but only if we overwrite a null. + // If another value is already in the new table, then somebody else + // wrote something there and that write is happens-after any value that + // appears in the old table. If putIfMatch does not find a null in the + // new table - somebody else should have recorded the null-not_null + // transition in this copy. + Object old_unboxed = ((Prime)oldval)._V; + assert old_unboxed != TOMBSTONE; + boolean copied_into_new = (putIfMatch(topmap, newkvs, key, old_unboxed, null) == null); + + // --- + // Finally, now that any old value is exposed in the new table, we can + // forever hide the old-table value by slapping a TOMBPRIME down. This + // will stop other threads from uselessly attempting to copy this slot + // (i.e., it's a speed optimization not a correctness issue). + while( !CAS_val(oldkvs,idx,oldval,TOMBPRIME) ) + oldval = val(oldkvs,idx); + + return copied_into_new; + } // end copy_slot + } // End of CHM + + + // --- Snapshot ------------------------------------------------------------ + // The main class for iterating over the NBHM. It "snapshots" a clean + // view of the K/V array. + private class SnapshotV implements Iterator, Enumeration { + final Object[] _sskvs; + public SnapshotV() { + while( true ) { // Verify no table-copy-in-progress + Object[] topkvs = _kvs; + CHM topchm = chm(topkvs); + if( topchm._newkvs == null ) { // No table-copy-in-progress + // The "linearization point" for the iteration. Every key in this + // table will be visited, but keys added later might be skipped or + // even be added to a following table (also not iterated over). + _sskvs = topkvs; + break; + } + // Table copy in-progress - so we cannot get a clean iteration. We + // must help finish the table copy before we can start iterating. + topchm.help_copy_impl(NonBlockingHashMap.this,topkvs,true); + } + // Warm-up the iterator + next(); + } + int length() { return len(_sskvs); } + Object key(int idx) { return NonBlockingHashMap.key(_sskvs,idx); } + private int _idx; // Varies from 0-keys.length + private Object _nextK, _prevK; // Last 2 keys found + private TypeV _nextV, _prevV; // Last 2 values found + public boolean hasNext() { return _nextV != null; } + public TypeV next() { + // 'next' actually knows what the next value will be - it had to + // figure that out last go-around lest 'hasNext' report true and + // some other thread deleted the last value. Instead, 'next' + // spends all its effort finding the key that comes after the + // 'next' key. + if( _idx != 0 && _nextV == null ) throw new NoSuchElementException(); + _prevK = _nextK; // This will become the previous key + _prevV = _nextV; // This will become the previous value + _nextV = null; // We have no more next-key + // Attempt to set <_nextK,_nextV> to the next K,V pair. + // _nextV is the trigger: stop searching when it is != null + while( _idx elements() { return new SnapshotV(); } + + // --- values -------------------------------------------------------------- + /** Returns a {@link Collection} view of the values contained in this map. + * The collection is backed by the map, so changes to the map are reflected + * in the collection, and vice-versa. The collection supports element + * removal, which removes the corresponding mapping from this map, via the + * Iterator.remove, Collection.remove, + * removeAll, retainAll, and clear operations. + * It does not support the add or addAll operations. + * + *

The view's iterator is a "weakly consistent" iterator that + * will never throw {@link ConcurrentModificationException}, and guarantees + * to traverse elements as they existed upon construction of the iterator, + * and may (but is not guaranteed to) reflect any modifications subsequent + * to construction. */ + @Override + public Collection values() { + return new AbstractCollection() { + @Override public void clear ( ) { NonBlockingHashMap.this.clear ( ); } + @Override public int size ( ) { return NonBlockingHashMap.this.size ( ); } + @Override public boolean contains( Object v ) { return NonBlockingHashMap.this.containsValue(v); } + @Override public Iterator iterator() { return new SnapshotV(); } + }; + } + + // --- keySet -------------------------------------------------------------- + private class SnapshotK implements Iterator, Enumeration { + final SnapshotV _ss; + public SnapshotK() { _ss = new SnapshotV(); } + public void remove() { _ss.remove(); } + public TypeK next() { _ss.next(); return (TypeK)_ss._prevK; } + public boolean hasNext() { return _ss.hasNext(); } + public TypeK nextElement() { return next(); } + public boolean hasMoreElements() { return hasNext(); } + } + + /** Returns an enumeration of the keys in this table. + * @return an enumeration of the keys in this table + * @see #keySet() */ + public Enumeration keys() { return new SnapshotK(); } + + /** Returns a {@link Set} view of the keys contained in this map. The set + * is backed by the map, so changes to the map are reflected in the set, + * and vice-versa. The set supports element removal, which removes the + * corresponding mapping from this map, via the Iterator.remove, + * Set.remove, removeAll, retainAll, and + * clear operations. It does not support the add or + * addAll operations. + * + *

The view's iterator is a "weakly consistent" iterator that + * will never throw {@link ConcurrentModificationException}, and guarantees + * to traverse elements as they existed upon construction of the iterator, + * and may (but is not guaranteed to) reflect any modifications subsequent + * to construction. */ + @Override + public Set keySet() { + return new AbstractSet () { + @Override public void clear ( ) { NonBlockingHashMap.this.clear ( ); } + @Override public int size ( ) { return NonBlockingHashMap.this.size ( ); } + @Override public boolean contains( Object k ) { return NonBlockingHashMap.this.containsKey(k); } + @Override public boolean remove ( Object k ) { return NonBlockingHashMap.this.remove (k) != null; } + @Override public Iterator iterator() { return new SnapshotK(); } + }; + } + + + // --- entrySet ------------------------------------------------------------ + // Warning: Each call to 'next' in this iterator constructs a new NBHMEntry. + private class NBHMEntry extends AbstractEntry { + NBHMEntry( final TypeK k, final TypeV v ) { super(k,v); } + public TypeV setValue(final TypeV val) { + if( val == null ) throw new NullPointerException(); + _val = val; + return put(_key, val); + } + } + + private class SnapshotE implements Iterator> { + final SnapshotV _ss; + public SnapshotE() { _ss = new SnapshotV(); } + public void remove() { _ss.remove(); } + public Map.Entry next() { _ss.next(); return new NBHMEntry((TypeK)_ss._prevK,_ss._prevV); } + public boolean hasNext() { return _ss.hasNext(); } + } + + /** Returns a {@link Set} view of the mappings contained in this map. The + * set is backed by the map, so changes to the map are reflected in the + * set, and vice-versa. The set supports element removal, which removes + * the corresponding mapping from the map, via the + * Iterator.remove, Set.remove, removeAll, + * retainAll, and clear operations. It does not support + * the add or addAll operations. + * + *

The view's iterator is a "weakly consistent" iterator + * that will never throw {@link ConcurrentModificationException}, + * and guarantees to traverse elements as they existed upon + * construction of the iterator, and may (but is not guaranteed to) + * reflect any modifications subsequent to construction. + * + *

Warning: the iterator associated with this Set + * requires the creation of {@link java.util.Map.Entry} objects with each + * iteration. The {@link NonBlockingHashMap} does not normally create or + * using {@link java.util.Map.Entry} objects so they will be created soley + * to support this iteration. Iterating using {@link #keySet} or {@link + * #values} will be more efficient. + */ + @Override + public Set> entrySet() { + return new AbstractSet>() { + @Override public void clear ( ) { NonBlockingHashMap.this.clear( ); } + @Override public int size ( ) { return NonBlockingHashMap.this.size ( ); } + @Override public boolean remove( final Object o ) { + if( !(o instanceof Map.Entry)) return false; + final Map.Entry e = (Map.Entry)o; + return NonBlockingHashMap.this.remove(e.getKey(), e.getValue()); + } + @Override public boolean contains(final Object o) { + if( !(o instanceof Map.Entry)) return false; + final Map.Entry e = (Map.Entry)o; + TypeV v = get(e.getKey()); + return v.equals(e.getValue()); + } + @Override public Iterator> iterator() { return new SnapshotE(); } + }; + } + + // --- writeObject ------------------------------------------------------- + // Write a NBHM to a stream + private void writeObject(java.io.ObjectOutputStream s) throws IOException { + s.defaultWriteObject(); // Nothing to write + for( Object K : keySet() ) { + final Object V = get(K); // Do an official 'get' + s.writeObject(K); // Write the pair + s.writeObject(V); + } + s.writeObject(null); // Sentinel to indicate end-of-data + s.writeObject(null); + } + + // --- readObject -------------------------------------------------------- + // Read a CHM from a stream + private void readObject(java.io.ObjectInputStream s) throws IOException, ClassNotFoundException { + s.defaultReadObject(); // Read nothing + initialize(MIN_SIZE); + for(;;) { + final TypeK K = (TypeK) s.readObject(); + final TypeV V = (TypeV) s.readObject(); + if( K == null ) break; + put(K,V); // Insert with an offical put + } + } + +} // End NonBlockingHashMap class diff --git a/benchmark/cliffc-hashtable/simplified_cliffc_hashtable.cc b/benchmark/cliffc-hashtable/simplified_cliffc_hashtable.cc new file mode 100644 index 0000000..723310d --- /dev/null +++ b/benchmark/cliffc-hashtable/simplified_cliffc_hashtable.cc @@ -0,0 +1,57 @@ +#include + +#include "simplified_cliffc_hashtable.h" + +using namespace std; + +template +slot* const cliffc_hashtable::MATCH_ANY = new slot(false, NULL); + +template +slot* const cliffc_hashtable::NO_MATCH_OLD = new slot(false, NULL); + +template +slot* const cliffc_hashtable::TOMBPRIME = new slot(true, NULL); + +template +slot* const cliffc_hashtable::TOMBSTONE = new slot(false, NULL); + + +class IntWrapper { + private: + int _val; + public: + + IntWrapper(int val) : _val(val) {} + + IntWrapper() : _val(0) {} + + IntWrapper(IntWrapper& copy) : _val(copy._val) {} + + int get() { + return _val; + } + + int hashCode() { + return _val; + } + + bool equals(const shared_ptr another) { + if (another == NULL) + return false; + shared_ptr ptr = + static_pointer_cast(another); + return ptr->_val == _val; + } +}; + +int main(int argc, char *argv[]) { + cliffc_hashtable table; + IntWrapper k1(3), k2(4), v1(1), v2(2); + + + table.put(k1, v1); + table.put(k2, v2); + cout << table.get(k2)->get() << endl; + return 1; +} diff --git a/benchmark/cliffc-hashtable/simplified_cliffc_hashtable.h b/benchmark/cliffc-hashtable/simplified_cliffc_hashtable.h new file mode 100644 index 0000000..0e7031e --- /dev/null +++ b/benchmark/cliffc-hashtable/simplified_cliffc_hashtable.h @@ -0,0 +1,837 @@ +#ifndef SIMPLIFIED_CLIFFC_HASHTABLE_H +#define SIMPLIFIED_CLIFFC_HASHTABLE_H + +#include +#include +#include +#include + +using namespace std; + + + +/** + This header file declares and defines a simplified version of Cliff Click's + NonblockingHashMap. It contains all the necessary structrues and main + functions. In simplified_cliffc_hashtable.cc file, it has the definition for + the static fields. +*/ + +template +class cliffc_hashtable; + +/** + Corresponding the the Object[] array in Cliff Click's Java implementation. + It keeps the first two slots for CHM (Hashtable control unit) and the hash + records (an array of hash used for fast negative key-equality check). +*/ +struct kvs_data { + int _size; + atomic *_data; + + kvs_data(int sz) { + _size = sz; + int real_size = sizeof(atomic) * 2 + 2; + _data = new atomic[real_size]; + // The control block should be initialized in resize() + // Init the hash record array + int *hashes = new int[_size]; + int i; + for (i = 0; i < _size; i++) { + hashes[i] = 0; + } + _data[1].store(hashes, memory_order_relaxed); + // Init the data to Null slot + for (i = 2; i < real_size; i++) { + _data[i].store(NULL, memory_order_relaxed); + } + } + + ~kvs_data() { + int *hashes = (int*) _data[1].load(memory_order_relaxed); + delete hashes; + delete[] _data; + } +}; + +struct slot { + bool _prime; + shared_ptr _ptr; + + slot(bool prime, shared_ptr ptr) { + _prime = prime; + _ptr = ptr; + } + +}; + + +/** + TypeK must have defined function "int hashCode()" which return the hash + code for the its object, and "int equals(TypeK anotherKey)" which is + used to judge equality. + TypeK and TypeV should define their own copy constructor. + To make the memory management safe and similar to Cliff Click's Java + implementation, we use shared_ptr instead of normal pointer in terms of the + pointers that point to TypeK and TypeV. +*/ +template +class cliffc_hashtable { + /** + # The synchronization we have for the hashtable gives us the property of + # serializability, so we should have a sequential hashtable when we check the + # correctness. The key thing is to identify all the commit point. + + @Begin + @Global_define: + + spec_hashtable __map; + spec_hashtable __id_map; + Tag __tag; + + static bool _val_equals(TypeV *ptr1, TypeV *ptr2) { + // ... + } + + # Update the tag for the current key slot if the corresponding tag + # is NULL, otherwise just return that tag. It will update the next + # available tag too if it requires a new tag for that key slot. + static Tag _getKeyTag(TypeK &key) { + if (__id_map.get(key) == NULL) { + Tag cur_tag = tag.current(); + __id_map.put(key, cur_tag); + __tag.next(); + return cur_tag; + } else { + return __id_map.get(key); + } + } + + @Interface_cluster: + Read_interface = { + Get, + PutIfAbsent, + RemoveAny, + RemoveIfMatch, + ReplaceAny, + ReplaceIfMatch + } + + Write_interface = { + Put, + PutIfAbsent(COND_PutIfAbsentSucc), + RemoveAny, + RemoveIfMatch(COND_RemoveIfMatchSucc), + ReplaceAny, + ReplaceIfMatch(COND_ReplaceIfMatchSucc) + } + @Happens_before: + Write_interface -> Read_interface + @End + */ + +friend class CHM; + /** + The control structure for the hashtable + */ + private: + class CHM { + friend class cliffc_hashtable; + private: + atomic _newkvs; + + // Size of active K,V pairs + atomic_int _size; + + // Count of used slots + atomic_int _slots; + + // The next part of the table to copy + atomic_int _copy_idx; + + // Work-done reporting + atomic_int _copy_done; + + public: + CHM(int size) { + _size.store(size, memory_order_relaxed); + _slots.store(0, memory_order_relaxed); + + _copy_idx.store(0, memory_order_relaxed); + _copy_done.store(0, memory_order_release); + } + + ~CHM() {} + + private: + + // Heuristic to decide if the table is too full + bool table_full(int reprobe_cnt, int len) { + return + reprobe_cnt >= REPROBE_LIMIT && + _slots.load(memory_order_relaxed) >= reprobe_limit(len); + } + + kvs_data* resize(cliffc_hashtable *topmap, kvs_data *kvs) { + kvs_data *newkvs = _newkvs.load(memory_order_acquire); + if (newkvs != NULL) + return newkvs; + + // No copy in-progress, start one; Only double the table size + int oldlen = kvs->_size; + int sz = _size.load(memory_order_relaxed); + int newsz = sz; + + // Just follow Cliff Click's heuristic to decide the new size + if (sz >= (oldlen >> 2)) { // If we are 25% full + newsz = oldlen << 1; // Double size + if (sz >= (oldlen >> 1)) + newsz = oldlen << 2; // Double double size + } + + // We do not record the record timestamp + if (newsz <= oldlen) newsz = oldlen << 1; + // Do not shrink ever + if (newsz < oldlen) newsz = oldlen; + + // Last check cause the 'new' below is expensive + newkvs = _newkvs.load(memory_order_acquire); + if (newkvs != NULL) return newkvs; + + newkvs = new kvs_data(newsz); + void *chm = (void*) new CHM(sz); + newkvs->_data[0].store(chm, memory_order_relaxed); + + kvs_data *cur_newkvs; + // Another check after the slow allocation + if ((cur_newkvs = _newkvs.load(memory_order_acquire)) != NULL) + return cur_newkvs; + // CAS the _newkvs to the allocated table + kvs_data *desired = (kvs_data*) NULL; + kvs_data *expected = (kvs_data*) newkvs; + if (!_newkvs.compare_exchange_strong(desired, expected, memory_order_release, + memory_order_release)) { + // Should clean the allocated area + delete newkvs; + newkvs = _newkvs.load(memory_order_acquire); + } + return newkvs; + } + + void help_copy_impl(cliffc_hashtable *topmap, kvs_data *oldkvs, + bool copy_all) { + assert (get_chm(oldkvs) == this); + kvs_data *newkvs = _newkvs.load(memory_order_acquire); + int oldlen = oldkvs->_size; + int min_copy_work = oldlen > 1024 ? 1024 : oldlen; + + // Just follow Cliff Click's code here + int panic_start = -1; + int copyidx; + while (_copy_done.load(memory_order_acquire) < oldlen) { + copyidx = _copy_idx.load(memory_order_acquire); + if (panic_start == -1) { // No painc + copyidx = _copy_idx.load(memory_order_acquire); + while (copyidx < (oldlen << 1) && + !_copy_idx.compare_exchange_strong(copyidx, copyidx + + min_copy_work, memory_order_release, memory_order_release)) + copyidx = _copy_idx.load(memory_order_relaxed); + if (!(copyidx < (oldlen << 1))) + panic_start = copyidx; + } + + // Now copy the chunk of work we claimed + int workdone = 0; + for (int i = 0; i < min_copy_work; i++) + if (copy_slot(topmap, (copyidx + i) & (oldlen - 1), oldkvs, + newkvs)) + workdone++; + if (workdone > 0) + copy_check_and_promote(topmap, oldkvs, workdone); + + copyidx += min_copy_work; + if (!copy_all && panic_start == -1) + return; // We are done with the work we claim + } + copy_check_and_promote(topmap, oldkvs, 0); // See if we can promote + } + + kvs_data* copy_slot_and_check(cliffc_hashtable *topmap, kvs_data + *oldkvs, int idx, void *should_help) { + kvs_data *newkvs = _newkvs.load(memory_order_acquire); + // We're only here cause the caller saw a Prime + if (copy_slot(topmap, idx, oldkvs, _newkvs)) + copy_check_and_promote(topmap, oldkvs, 1); // Record the slot copied + return (should_help == NULL) ? newkvs : topmap->help_copy(newkvs); + } + + void copy_check_and_promote(cliffc_hashtable *topmap, kvs_data* + oldkvs, int workdone) { + int oldlen = oldkvs->_size; + int copyDone = _copy_done.load(memory_order_relaxed); + if (workdone > 0) { + while (true) { + copyDone = _copy_done.load(memory_order_relaxed); + if (_copy_done.compare_exchange_weak(copyDone, copyDone + + workdone, memory_order_relaxed, memory_order_relaxed)) + break; + } + } + + // Promote the new table to the current table + if (copyDone + workdone == oldlen && + topmap->_kvs.load(memory_order_acquire) == oldkvs) + topmap->_kvs.compare_exchange_strong(oldkvs, _newkvs, memory_order_release, + memory_order_release); + } + + bool copy_slot(cliffc_hashtable *topmap, int idx, kvs_data *oldkvs, + kvs_data *newkvs) { + slot *key_slot; + while ((key_slot = key(oldkvs, idx)) == NULL) + CAS_key(oldkvs, idx, NULL, TOMBSTONE); + + // First CAS old to Prime + slot *oldval = val(oldkvs, idx, NULL); + while (!is_prime(oldval)) { + slot *box = (oldval == NULL || oldval == TOMBSTONE) + ? TOMBPRIME : new slot(true, oldval->_ptr); + if (CAS_val(oldkvs, idx, oldval, box)) { + if (box == TOMBPRIME) + return 1; // Copy done + // Otherwise we CAS'd the box + oldval = box; // Record updated oldval + break; + } + oldval = val(oldkvs, idx, NULL); // Else re-try + } + + if (oldval == TOMBPRIME) return false; // Copy already completed here + + slot *old_unboxed = new slot(false, oldval->_ptr); + int copied_into_new = (putIfMatch(topmap, newkvs, key_slot, old_unboxed, + NULL) == NULL); + + // Old value is exposed in the new table + while (!CAS_val(oldkvs, idx, oldval, TOMBPRIME)) + oldval = val(oldkvs, idx, NULL); + + return copied_into_new; + } + }; + + + + private: + static const int Default_Init_Size = 8; // Intial table size + + static slot* const MATCH_ANY; + static slot* const NO_MATCH_OLD; + + static slot* const TOMBPRIME; + static slot* const TOMBSTONE; + + static const int REPROBE_LIMIT = 10; // Forces a table-resize + + atomic _kvs; + + public: + cliffc_hashtable() { + // Should initialize the CHM for the construction of the table + // For other CHM in kvs_data, they should be initialzed in resize() + // because the size is determined dynamically + kvs_data *kvs = new kvs_data(Default_Init_Size); + void *chm = (void*) new CHM(0); + kvs->_data[0].store(chm, memory_order_relaxed); + _kvs.store(kvs, memory_order_release); + } + + cliffc_hashtable(int init_size) { + // Should initialize the CHM for the construction of the table + // For other CHM in kvs_data, they should be initialzed in resize() + // because the size is determined dynamically + kvs_data *kvs = new kvs_data(init_size); + void *chm = (void*) new CHM(0); + kvs->_data[0].store(chm, memory_order_relaxed); + _kvs.store(kvs, memory_order_release); + } + + /** + @Begin + @Interface: Get + @Commit_point_set: Read_Val_Point1 | Read_Val_Point2 | Read_Val_Point3 + @ID: _getKeyTag(key) + @Action: + @DefineVar: TypeV *_Old_Val = __map.get(key) + @Post_check: + _equals_val(_Old_Val, __RET__) + @End + */ + shared_ptr get(TypeK& key) { + void *key_ptr = (void*) new TypeK(key); + slot *key_slot = new slot(false, shared_ptr(key_ptr)); + int fullhash = hash(key_slot); + slot *V = get_impl(this, _kvs, key_slot, fullhash); + if (V == NULL) return NULL; + assert (!is_prime(V)); + return static_pointer_cast(V->_ptr); + } + + /** + @Begin + @Interface: Put + @Commit_point_set: Write_Val_Point + @ID: _getKeyTag(key) + @Action: + # Remember this old value at checking point + @DefineVar: TypeV *_Old_Val = __map.get(key) + @Code: __map.put(key, &val); + @Post_check: + _equals_val(__RET__, _Old_Val) + @End + */ + shared_ptr put(TypeK& key, TypeV& val) { + return putIfMatch(key, val, NO_MATCH_OLD); + } + + /** + @Begin + @Interface: PutIfAbsent + @Commit_point_set: + Write_Val_Point | PutIfAbsent_Fail_Point + @Condition: __map.get(key) == NULL + @HB_condition: + COND_PutIfAbsentSucc :: __RET__ == NULL + @ID: _getKeyTag(key) + @Action: + @DefineVar: TypeV *_Old_Val = __map.get(key) + @Code: + if (COND_SAT) + __map.put(key, &value); + @Post_check: + COND_SAT ? __RET__ == NULL : _equals_val(_Old_Val, __RET__) + @End + */ + shared_ptr putIfAbsent(TypeK& key, TypeV& value) { + return putIfMatch(key, val, TOMBSTONE); + } + + /** + @Begin + @Interface: RemoveAny + @Commit_point_set: Write_Val_Point + @ID: _getKeyTag(key) + @Action: + @DefineVar: TypeV *_Old_Val = __map.get(key) + @Code: __map.put(key, NULL); + @Post_check: + _equals_val(__RET__, _Old_Val) + @End + */ + shared_ptr remove(TypeK& key) { + return putIfMatch(key, TOMBSTONE, NO_MATCH_OLD); + } + + /** + @Begin + @Interface: RemoveIfMatch + @Commit_point_set: + Write_Val_Point | RemoveIfMatch_Fail_Point + @Condition: + _equals_val(__map.get(key), &val) + @HB_condition: + COND_RemoveIfMatchSucc :: __RET__ == true + @ID: _getKeyTag(key) + @Action: + @Code: + if (COND_SAT) + __map.put(key, NULL); + @Post_check: + COND_SAT ? __RET__ : !__RET__ + @End + */ + bool remove(TypeK& key, TypeV& val) { + slot *val_slot = val == NULL ? NULL : new slot(false, val); + return putIfMatch(key, TOMBSTONE, val) == val; + + } + + /** + @Begin + @Interface: ReplaceAny + @Commit_point_set: + Write_Val_Point + @ID: _getKeyTag(key) + @Action: + @DefineVar: TypeV *_Old_Val = __map.get(key) + @Post_check: + _equals_val(__RET__, _Old_Val) + @End + */ + shared_ptr replace(TypeK& key, TypeV& val) { + return putIfMatch(key, val, MATCH_ANY); + } + + /** + @Begin + @Interface: ReplaceIfMatch + @Commit_point_set: + Write_Val_Point | ReplaceIfMatch_Fail_Point + @Condition: + _equals_val(__map.get(key), &oldval) + @HB_condition: + COND_ReplaceIfMatchSucc :: __RET__ == true + @ID: _getKeyTag(key) + @Action: + @Code: + if (COND_SAT) + __map.put(key, &newval); + @Post_check: + COND_SAT ? __RET__ : !__RET__ + @End + */ + bool replace(TypeK& key, TypeV& oldval, TypeV& newval) { + return putIfMatch(key, newval, oldval) == oldval; + } + + private: + static CHM* get_chm(kvs_data* kvs) { + return (CHM*) kvs->_data[0].load(memory_order_relaxed); + } + + static int* get_hashes(kvs_data *kvs) { + return (int *) kvs->_data[1].load(memory_order_relaxed); + } + + // Preserve happens-before semantics on newly inserted keys + static inline slot* key(kvs_data *kvs, int idx) { + assert (idx >= 0 && idx < kvs->_size); + // Corresponding to the volatile read in get_impl() and putIfMatch in + // Cliff Click's Java implementation + return (slot*) kvs->_data[idx * 2 + 2].load(memory_order_acquire); + } + + /** + The atomic operation in val() function is a "potential" commit point, + which means in some case it is a real commit point while it is not for + some other cases. This so happens because the val() function is such a + fundamental function that many internal operation will call. Our + strategy is that we label any potential commit points and check if they + really are the commit points later. + */ + // Preserve happens-before semantics on newly inserted values + static inline slot* val(kvs_data *kvs, int idx) { + assert (idx >= 0 && idx < kvs->_size); + // Corresponding to the volatile read in get_impl() and putIfMatch in + // Cliff Click's Java implementation + slot *res = (slot*) kvs->_data[idx * 2 + 3].load(memory_order_acquire); + /** + @Begin + # This is a complicated potential commit point since many many functions are + # calling val(). + @Potential_commit_point_define: true + @Label: Read_Val_Point + @End + */ + return res; + + + } + + static int hash(slot *key_slot) { + assert(key_slot != NULL && key_slot->_ptr != NULL); + shared_ptr key = static_pointer_cast(key_slot->_ptr); + int h = key->hashCode(); + // Spread bits according to Cliff Click's code + h += (h << 15) ^ 0xffffcd7d; + h ^= (h >> 10); + h += (h << 3); + h ^= (h >> 6); + h += (h << 2) + (h << 14); + return h ^ (h >> 16); + } + + // Heuristic to decide if reprobed too many times. + // Be careful here: Running over the limit on a 'get' acts as a 'miss'; on a + // put it triggers a table resize. Several places MUST have exact agreement. + static int reprobe_limit(int len) { + return REPROBE_LIMIT + (len >> 2); + } + + static inline bool is_prime(slot *val) { + return (val != NULL) && val->_prime; + } + + // Check for key equality. Try direct pointer comparison first (fast + // negative teset) and then the full 'equals' call + static bool keyeq(slot *K, slot *key_slot, int *hashes, int hash, + int fullhash) { + // Caller should've checked this. + assert (K != NULL); + shared_ptr key_ptr = static_pointer_cast(key_slot->_ptr); + return + K == key_slot || + ((hashes[hash] == 0 || hashes[hash] == fullhash) && + K != TOMBSTONE && + key_ptr->equals(K->_ptr)); + } + + static bool valeq(slot *val_slot1, slot *val_slot2) { + assert (val_slot1 != NULL); + shared_ptr ptr1 = static_pointer_cast(val_slot1->_ptr); + if (val_slot2 == NULL || ptr1 == NULL) return false; + return ptr1->equals(val_slot2->_ptr); + } + + // Together with key() preserve the happens-before relationship on newly + // inserted keys + static inline bool CAS_key(kvs_data *kvs, int idx, void *expected, void *desired) { + return kvs->_data[2 * idx + 2].compare_exchange_strong(expected, + desired, memory_order_release, memory_order_release); + } + + /** + Same as the val() function, we only label the CAS operation as the + potential commit point. + */ + // Together with val() preserve the happens-before relationship on newly + // inserted values + static inline bool CAS_val(kvs_data *kvs, int idx, void *expected, void + *desired) { + bool res = kvs->_data[2 * idx + 3].compare_exchange_strong(expected, + desired, memory_order_release, memory_order_release); + /** + # If it is a successful put instead of a copy or any other internal + # operantions, expected != NULL + @Begin + @Potential_commit_point_define: __ATOMIC_RET__ == true + @Label: Write_Val_Point + @End + */ + return res; + } + + slot* get_impl(cliffc_hashtable *topmap, kvs_data *kvs, slot* key_slot, int + fullhash) { + int len = kvs->_size; + CHM *chm = get_chm(kvs); + int *hashes = get_hashes(kvs); + + int idx = fullhash & (len - 1); + int reprobe_cnt = 0; + while (true) { + slot *K = key(kvs, idx); + slot *V = val(kvs, idx); + /** + @Begin + @Commit_point_define: V == NULL + @Potential_commit_point_label: Read_Val_Point + @Label: Get_Success_Point_1 + @End + */ + + if (V == NULL) return NULL; // A miss + + if (keyeq(K, key_slot, hashes, idx, fullhash)) { + // Key hit! Check if table-resize in progress + if (!is_prime(V)) { + /** + @Begin + @Commit_point_define: true + @Potential_commit_point_label: Read_Val_Point + @Label: Get_Success_Point_2 + @End + */ + return (V == TOMBSTONE) ? NULL : V; // Return this value + } + // Otherwise, finish the copy & retry in the new table + return get_impl(topmap, chm->copy_slot_and_check(topmap, kvs, + idx, key_slot), key_slot, fullhash); + } + + if (++reprobe_cnt >= REPROBE_LIMIT || + key_slot == TOMBSTONE) { + // Retry in new table + // Atomic read (acquire) can be here + kvs_data *newkvs = chm->_newkvs.load(memory_order_acquire); + /** + @Begin + @Commit_point_define_check: newkvs == NULL + @Label: Get_Success_Point_3 + @End + */ + return newkvs == NULL ? NULL : get_impl(topmap, + topmap->help_copy(newkvs), key_slot, fullhash); + } + + idx = (idx + 1) & (len - 1); // Reprobe by 1 + } + } + + // A wrapper of the essential function putIfMatch() + shared_ptr putIfMatch(TypeK& key, TypeV& value, slot *old_val) { + // TODO: Should throw an exception rather return NULL + if (old_val == NULL) { + return NULL; + } + void *key_ptr = (void*) new TypeK(key); + slot *key_slot = new slot(false, shared_ptr(key_ptr)); + + void *val_ptr = (void*) new TypeV(value); + slot *value_slot = new slot(false, shared_ptr(val_ptr)); + slot *res = putIfMatch(this, _kvs, key_slot, value_slot, old_val); + // Only when copy_slot() call putIfMatch() will it return NULL + assert (res != NULL); + assert (!is_prime(res)); + return res == TOMBSTONE ? NULL : static_pointer_cast(res->_ptr); + } + + /** + Put, Remove, PutIfAbsent, etc will call this function. Return the old + value. If the returned value is equals to the expVal (or expVal is + NO_MATCH_OLD), then this function puts the val_slot to the table 'kvs'. + Only copy_slot will pass a NULL expVal, and putIfMatch only returns a + NULL if passed a NULL expVal. + */ + static slot* putIfMatch(cliffc_hashtable *topmap, kvs_data *kvs, slot + *key_slot, slot *val_slot, slot *expVal) { + assert (val_slot != NULL); + assert (!is_prime(val_slot)); + assert (!is_prime(expVal)); + + int fullhash = hash(key_slot); + int len = kvs->_size; + CHM *chm = get_chm(kvs); + int *hashes = get_hashes(kvs); + int idx = fullhash & (len - 1); + + // Claim a key slot + int reprobe_cnt = 0; + slot *K; + slot *V; + kvs_data *newkvs; + + while (true) { // Spin till we get a key slot + K = key(kvs, idx); + V = val(kvs, idx, NULL); + if (K == NULL) { // Get a free slot + if (val_slot == TOMBSTONE) return val_slot; + // Claim the null key-slot + if (CAS_key(kvs, idx, NULL, key_slot)) { + chm->_slots.fetch_add(1, memory_order_relaxed); // Inc key-slots-used count + hashes[idx] = fullhash; // Memorize full hash + break; + } + K = key(kvs, idx); // CAS failed, get updated value + assert (K != NULL); + } + + // Key slot not null, there exists a Key here + if (keyeq(K, key_slot, hashes, idx, fullhash)) + break; // Got it + + // Notice that the logic here should be consistent with that of get. + // The first predicate means too many reprobes means nothing in the + // old table. + if (++reprobe_cnt >= reprobe_limit(len) || + K == TOMBSTONE) { // Found a Tombstone key, no more keys + newkvs = chm->resize(topmap, kvs); + // Help along an existing copy + if (expVal != NULL) topmap->help_copy(newkvs); + return putIfMatch(topmap, newkvs, key_slot, val_slot, expVal); + } + + idx = (idx + 1) & (len - 1); // Reprobe + } // End of spinning till we get a Key slot + + if (val_slot == V) return V; // Fast cutout for no-change + + // Here it tries to resize cause it doesn't want other threads to stop + // its progress (eagerly try to resize soon) + newkvs = chm->_newkvs.load(memory_order_acquire); + if (newkvs == NULL && + ((V == NULL && chm->table_full(reprobe_cnt, len)) || is_prime(V))) + newkvs = chm->resize(topmap, kvs); // Force the copy to start + + // Finish the copy and then put it in the new table + if (newkvs != NULL) + return putIfMatch(topmap, chm->copy_slot_and_check(topmap, kvs, idx, + expVal), key_slot, val_slot, expVal); + + // Decided to update the existing table + while (true) { + assert (!is_prime(V)); + + if (expVal != NO_MATCH_OLD && + V != expVal && + (expVal != MATCH_ANY || V == TOMBSTONE || V == NULL) && + !(V == NULL && expVal == TOMBSTONE) && + (expVal == NULL || !valeq(expVal, V))) { + /** + @Begin + @Commit_point_define: expVal == TOMBSTONE + @Potential_commit_point_label: Read_Val_Point + @Label: PutIfAbsent_Fail_Point + # This is a check for the PutIfAbsent() when the value + # is not absent + @End + */ + /** + @Begin + @Commit_point_define: expVal != NULL && val_slot == TOMBSTONE + @Potential_commit_point_label: Read_Val_Point + @Label: RemoveIfMatch_Fail_Point + @End + */ + /** + @Begin + @Commit_point_define: !valeq(expVal, V) + @Potential_commit_point_label: Read_Val_Point + @Label: ReplaceIfMatch_Fail_Point + @End + */ + return V; // Do not update! + } + + if (CAS_val(kvs, idx, V, val_slot)) { + /** + @Begin + # The only point where a successful put happens + @Commit_point_define: true + @Potential_commit_point_label: Write_Val_Point + @Label: Write_Success_Point + @End + */ + if (expVal != NULL) { // Not called by a table-copy + // CAS succeeded, should adjust size + // Both normal put's and table-copy calls putIfMatch, but + // table-copy does not increase the number of live K/V pairs + if ((V == NULL || V == TOMBSTONE) && + val_slot != TOMBSTONE) + chm->_size.fetch_add(1, memory_order_relaxed); + if (!(V == NULL || V == TOMBSTONE) && + val_slot == TOMBSTONE) + chm->_size.fetch_add(-1, memory_order_relaxed); + } + return (V == NULL && expVal != NULL) ? TOMBSTONE : V; + } + // Else CAS failed + V = val(kvs, idx, NULL); + if (is_prime(V)) + return putIfMatch(topmap, chm->copy_slot_and_check(topmap, kvs, + idx, expVal), key_slot, val_slot, expVal); + } + } + + // Help along an existing table-resize. This is a fast cut-out wrapper. + kvs_data* help_copy(kvs_data *helper) { + kvs_data *topkvs = _kvs.load(memory_order_acquire); + CHM *topchm = get_chm(topkvs); + // No cpy in progress + if (topchm->_newkvs.load(memory_order_acquire) == NULL) return helper; + topchm->help_copy_impl(this, topkvs, false); + return helper; + } +}; + +#endif diff --git a/benchmark/linuxrwlocks/.gitignore b/benchmark/linuxrwlocks/.gitignore new file mode 100644 index 0000000..2fdb632 --- /dev/null +++ b/benchmark/linuxrwlocks/.gitignore @@ -0,0 +1 @@ +/linuxrwlocks diff --git a/benchmark/linuxrwlocks/Makefile b/benchmark/linuxrwlocks/Makefile new file mode 100644 index 0000000..90dafcf --- /dev/null +++ b/benchmark/linuxrwlocks/Makefile @@ -0,0 +1,11 @@ +include ../benchmarks.mk + +TESTNAME = linuxrwlocks + +all: $(TESTNAME) + +$(TESTNAME): $(TESTNAME).c + $(CC) -o $@ $< $(CFLAGS) $(LDFLAGS) + +clean: + rm -f $(TESTNAME) *.o diff --git a/benchmark/linuxrwlocks/linuxrwlocks.c b/benchmark/linuxrwlocks/linuxrwlocks.c new file mode 100644 index 0000000..7fb604c --- /dev/null +++ b/benchmark/linuxrwlocks/linuxrwlocks.c @@ -0,0 +1,292 @@ +#include +#include +#include + +#include "librace.h" + +#define RW_LOCK_BIAS 0x00100000 +#define WRITE_LOCK_CMP RW_LOCK_BIAS + +/** Example implementation of linux rw lock along with 2 thread test + * driver... */ + +/** + Properties to check: + 1. At most 1 thread can acquire the write lock, and at the same time, no + other threads can acquire any lock (including read/write lock). + 2. At most RW_LOCK_BIAS threads can successfully acquire the read lock. + 3. A read_unlock release 1 read lock, and a write_unlock release the write + lock. They can not release a lock that they don't acquire. + ### + 4. Read_lock and write_lock can not be grabbed at the same time. + 5. Happpens-before relationship should be checked and guaranteed, which + should be as the following: + a. read_unlock hb-> write_lock + b. write_unlock hb-> write_lock + c. write_unlock hb-> read_lock +*/ + +/** + Interesting point for locks: + a. If the users unlock() before any lock(), then the model checker will fail. + For this case, we can not say that the data structure is buggy, how can we + tell them from a real data structure bug??? + b. We should specify that for a specific thread, successful locks and + unlocks should always come in pairs. We could make this check as an + auxiliary check since it is an extra rule for how the interfaces should called. +*/ + +/** + @Begin + @Global_define: + bool __writer_lock_acquired = false; + int __reader_lock_cnt = 0; + + @Happens_before: + # Since commit_point_set has no ID attached, A -> B means that for any B, + # the previous A happens before B. + Read_Unlock -> Write_Lock + Read_Unlock -> Write_Trylock(HB_Write_Trylock_Succ) + + Write_Unlock -> Write_Lock + Write_Unlock -> Write_Trylock(HB_Write_Trylock_Succ) + + Write_Unlock -> Read_Lock + Write_Unlock -> Read_Trylock(HB_Read_Trylock_Succ) + @End +*/ + +/** + */ + +typedef union { + atomic_int lock; +} rwlock_t; + +static inline int read_can_lock(rwlock_t *lock) +{ + return atomic_load_explicit(&lock->lock, memory_order_relaxed) > 0; +} + +static inline int write_can_lock(rwlock_t *lock) +{ + return atomic_load_explicit(&lock->lock, memory_order_relaxed) == RW_LOCK_BIAS; +} + + +/** + @Begin + @Interface: Read_Lock + @Commit_point_set: + Read_Lock_Success_1 | Read_Lock_Success_2 + @Check: + !__writer_lock_acquired + @Action: + @Code: + __reader_lock_cnt++; + @End +*/ +static inline void read_lock(rwlock_t *rw) +{ + int priorvalue = atomic_fetch_sub_explicit(&rw->lock, 1, memory_order_acquire); + /** + @Begin + @Commit_point_define_check: __ATOMIC_RET__ > 0 + @Label:Read_Lock_Success_1 + @End + */ + while (priorvalue <= 0) { + atomic_fetch_add_explicit(&rw->lock, 1, memory_order_relaxed); + while (atomic_load_explicit(&rw->lock, memory_order_relaxed) <= 0) { + thrd_yield(); + } + priorvalue = atomic_fetch_sub_explicit(&rw->lock, 1, memory_order_acquire); + /** + @Begin + @Commit_point_define_check: __ATOMIC_RET__ > 0 + @Label:Read_Lock_Success_2 + @End + */ + } +} + + +/** + @Begin + @Interface: Write_Lock + @Commit_point_set: + Write_Lock_Success_1 | Write_Lock_Success_2 + @Check: + !__writer_lock_acquired && __reader_lock_cnt == 0 + @Action: + @Code: + __writer_lock_acquired = true; + @End +*/ +static inline void write_lock(rwlock_t *rw) +{ + int priorvalue = atomic_fetch_sub_explicit(&rw->lock, RW_LOCK_BIAS, memory_order_acquire); + /** + @Begin + @Commit_point_define_check: __ATOMIC_RET__ == RW_LOCK_BIAS + @Label: Write_Lock_Success_1 + @End + */ + while (priorvalue != RW_LOCK_BIAS) { + atomic_fetch_add_explicit(&rw->lock, RW_LOCK_BIAS, memory_order_relaxed); + while (atomic_load_explicit(&rw->lock, memory_order_relaxed) != RW_LOCK_BIAS) { + thrd_yield(); + } + priorvalue = atomic_fetch_sub_explicit(&rw->lock, RW_LOCK_BIAS, memory_order_acquire); + /** + @Begin + @Commit_point_define_check: __ATOMIC_RET__ == RW_LOCK_BIAS + @Label: Write_Lock_Success_2 + @End + */ + } +} + +/** + @Begin + @Interface: Read_Trylock + @Commit_point_set: + Read_Trylock_Point + @Condition: + __writer_lock_acquired == false + @HB_condition: + HB_Read_Trylock_Succ :: __RET__ == 1 + @Action: + @Code: + if (COND_SAT) + __reader_lock_cnt++; + @Post_check: + COND_SAT ? __RET__ == 1 : __RET__ == 0 + @End +*/ +static inline int read_trylock(rwlock_t *rw) +{ + int priorvalue = atomic_fetch_sub_explicit(&rw->lock, 1, memory_order_acquire); + /** + @Begin + @Commit_point_define_check: true + @Label:Read_Trylock_Point + @End + */ + if (priorvalue > 0) + return 1; + + atomic_fetch_add_explicit(&rw->lock, 1, memory_order_relaxed); + return 0; +} + +/** + @Begin + @Interface: Write_Trylock + @Commit_point_set: + Write_Trylock_Point + @Condition: + !__writer_lock_acquired && __reader_lock_cnt == 0 + @HB_condition: + HB_Write_Trylock_Succ :: __RET__ == 1 + @Action: + @Code: + if (COND_SAT) + __writer_lock_acquired = true; + @Post_check: + COND_SAT ? __RET__ == 1 : __RET__ == 0 + @End +*/ +static inline int write_trylock(rwlock_t *rw) +{ + int priorvalue = atomic_fetch_sub_explicit(&rw->lock, RW_LOCK_BIAS, memory_order_acquire); + /** + @Begin + @Commit_point_define_check: true + @Label: Write_Trylock_Point + @End + */ + if (priorvalue == RW_LOCK_BIAS) + return 1; + + atomic_fetch_add_explicit(&rw->lock, RW_LOCK_BIAS, memory_order_relaxed); + return 0; +} + +/** + @Begin + @Interface: Read_Unlock + @Commit_point_set: Read_Unlock_Point + @Check: + __reader_lock_cnt > 0 && !__writer_lock_acquired + @Action: + @Code: + reader_lock_cnt--; + @End +*/ +static inline void read_unlock(rwlock_t *rw) +{ + atomic_fetch_add_explicit(&rw->lock, 1, memory_order_release); + /** + @Begin + @Commit_point_define_check: true + @Label: Read_Unlock_Point + @End + */ +} + +/** + @Begin + @Interface: Write_Unlock + @Commit_point_set: Write_Unlock_Point + @Check: + reader_lock_cnt == 0 && writer_lock_acquired + @Action: + @Code: + __writer_lock_acquired = false; + @End +*/ + +static inline void write_unlock(rwlock_t *rw) +{ + atomic_fetch_add_explicit(&rw->lock, RW_LOCK_BIAS, memory_order_release); + /** + @Begin + @Commit_point_define_check: true + @Label: Write_Unlock_Point + @End + */ +} + +rwlock_t mylock; +int shareddata; + +static void a(void *obj) +{ + int i; + for(i = 0; i < 2; i++) { + if ((i % 2) == 0) { + read_lock(&mylock); + load_32(&shareddata); + read_unlock(&mylock); + } else { + write_lock(&mylock); + store_32(&shareddata,(unsigned int)i); + write_unlock(&mylock); + } + } +} + +int user_main(int argc, char **argv) +{ + thrd_t t1, t2; + atomic_init(&mylock.lock, RW_LOCK_BIAS); + + thrd_create(&t1, (thrd_start_t)&a, NULL); + thrd_create(&t2, (thrd_start_t)&a, NULL); + + thrd_join(t1); + thrd_join(t2); + + return 0; +} diff --git a/benchmark/ms-queue/.gitignore b/benchmark/ms-queue/.gitignore new file mode 100644 index 0000000..95811e0 --- /dev/null +++ b/benchmark/ms-queue/.gitignore @@ -0,0 +1 @@ +/main diff --git a/benchmark/ms-queue/Makefile b/benchmark/ms-queue/Makefile new file mode 100644 index 0000000..da3a0e4 --- /dev/null +++ b/benchmark/ms-queue/Makefile @@ -0,0 +1,17 @@ +include ../benchmarks.mk + +TESTNAME = main + +HEADERS = my_queue.h +OBJECTS = main.o my_queue.o + +all: $(TESTNAME) + +$(TESTNAME): $(HEADERS) $(OBJECTS) + $(CC) -o $@ $(OBJECTS) $(CFLAGS) $(LDFLAGS) + +%.o: %.c + $(CC) -c -o $@ $< $(CFLAGS) + +clean: + rm -f $(TESTNAME) *.o diff --git a/benchmark/ms-queue/main.c b/benchmark/ms-queue/main.c new file mode 100644 index 0000000..b541b01 --- /dev/null +++ b/benchmark/ms-queue/main.c @@ -0,0 +1,80 @@ +#include +#include +#include + +#include "my_queue.h" +#include "model-assert.h" + +static int procs = 2; +static queue_t *queue; +static thrd_t *threads; +static unsigned int *input; +static unsigned int *output; +static int num_threads; + +int get_thread_num() +{ + thrd_t curr = thrd_current(); + int i; + for (i = 0; i < num_threads; i++) + if (curr.priv == threads[i].priv) + return i; + MODEL_ASSERT(0); + return -1; +} + +static void main_task(void *param) +{ + unsigned int val; + int pid = *((int *)param); + + if (!pid) { + input[0] = 17; + enqueue(queue, input[0]); + output[0] = dequeue(queue); + } else { + input[1] = 37; + enqueue(queue, input[1]); + output[1] = dequeue(queue); + } +} + +int user_main(int argc, char **argv) +{ + int i; + int *param; + unsigned int in_sum = 0, out_sum = 0; + + queue = calloc(1, sizeof(*queue)); + MODEL_ASSERT(queue); + + num_threads = procs; + threads = malloc(num_threads * sizeof(thrd_t)); + param = malloc(num_threads * sizeof(*param)); + input = calloc(num_threads, sizeof(*input)); + output = calloc(num_threads, sizeof(*output)); + + init_queue(queue, num_threads); + for (i = 0; i < num_threads; i++) { + param[i] = i; + thrd_create(&threads[i], main_task, ¶m[i]); + } + for (i = 0; i < num_threads; i++) + thrd_join(threads[i]); + + for (i = 0; i < num_threads; i++) { + in_sum += input[i]; + out_sum += output[i]; + } + for (i = 0; i < num_threads; i++) + printf("input[%d] = %u\n", i, input[i]); + for (i = 0; i < num_threads; i++) + printf("output[%d] = %u\n", i, output[i]); + MODEL_ASSERT(in_sum == out_sum); + + free(param); + free(threads); + free(queue); + + return 0; +} diff --git a/benchmark/ms-queue/my_queue.c b/benchmark/ms-queue/my_queue.c new file mode 100644 index 0000000..fc8e02a --- /dev/null +++ b/benchmark/ms-queue/my_queue.c @@ -0,0 +1,221 @@ +#include +#include +#include "librace.h" +#include "model-assert.h" + +#include "my_queue.h" + +#define relaxed memory_order_relaxed +#define release memory_order_release +#define acquire memory_order_acquire + +#define MAX_FREELIST 4 /* Each thread can own up to MAX_FREELIST free nodes */ +#define INITIAL_FREE 2 /* Each thread starts with INITIAL_FREE free nodes */ + +#define POISON_IDX 0x666 + +static unsigned int (*free_lists)[MAX_FREELIST]; + +/* Search this thread's free list for a "new" node */ +static unsigned int new_node() +{ + int i; + int t = get_thread_num(); + for (i = 0; i < MAX_FREELIST; i++) { + unsigned int node = load_32(&free_lists[t][i]); + if (node) { + store_32(&free_lists[t][i], 0); + return node; + } + } + /* free_list is empty? */ + MODEL_ASSERT(0); + return 0; +} + +/* Place this node index back on this thread's free list */ +static void reclaim(unsigned int node) +{ + int i; + int t = get_thread_num(); + + /* Don't reclaim NULL node */ + MODEL_ASSERT(node); + + for (i = 0; i < MAX_FREELIST; i++) { + /* Should never race with our own thread here */ + unsigned int idx = load_32(&free_lists[t][i]); + + /* Found empty spot in free list */ + if (idx == 0) { + store_32(&free_lists[t][i], node); + return; + } + } + /* free list is full? */ + MODEL_ASSERT(0); +} + +void init_queue(queue_t *q, int num_threads) +{ + int i, j; + + /* Initialize each thread's free list with INITIAL_FREE pointers */ + /* The actual nodes are initialized with poison indexes */ + free_lists = malloc(num_threads * sizeof(*free_lists)); + for (i = 0; i < num_threads; i++) { + for (j = 0; j < INITIAL_FREE; j++) { + free_lists[i][j] = 2 + i * MAX_FREELIST + j; + atomic_init(&q->nodes[free_lists[i][j]].next, MAKE_POINTER(POISON_IDX, 0)); + } + } + + /* initialize queue */ + atomic_init(&q->head, MAKE_POINTER(1, 0)); + atomic_init(&q->tail, MAKE_POINTER(1, 0)); + atomic_init(&q->nodes[1].next, MAKE_POINTER(0, 0)); +} + +/** + @Begin + @Global_define: + typedef struct tag_elem { + Tag id; + unsigned int data; + + tag_elem(Tag _id, unsigned int _data) { + id = _id; + data = _data; + } + } tag_elem_t; + + spec_queue __queue; + Tag __tag; + @Happens_before: + # Only check the happens-before relationship according to the id of the + # commit_point_set. For commit_point_set that has same ID, A -> B means + # B happens after the previous A. + Enqueue -> Dequeue + @End +*/ + +/** + @Begin + @Interface: Enqueue + @Commit_point_set: Enqueue_Success_Point + @ID: __tag.getCurAndInc() + @Action: + # __ID__ is an internal macro that refers to the id of the current + # interface call + @Code: + __queue.enqueue(tag_elem_t(__ID__, val)); + @End +*/ +void enqueue(queue_t *q, unsigned int val) +{ + int success = 0; + unsigned int node; + pointer tail; + pointer next; + pointer tmp; + + node = new_node(); + store_32(&q->nodes[node].value, val); + tmp = atomic_load_explicit(&q->nodes[node].next, relaxed); + set_ptr(&tmp, 0); // NULL + atomic_store_explicit(&q->nodes[node].next, tmp, relaxed); + + while (!success) { + tail = atomic_load_explicit(&q->tail, acquire); + next = atomic_load_explicit(&q->nodes[get_ptr(tail)].next, acquire); + if (tail == atomic_load_explicit(&q->tail, relaxed)) { + + /* Check for uninitialized 'next' */ + MODEL_ASSERT(get_ptr(next) != POISON_IDX); + + if (get_ptr(next) == 0) { // == NULL + pointer value = MAKE_POINTER(node, get_count(next) + 1); + success = atomic_compare_exchange_strong_explicit(&q->nodes[get_ptr(tail)].next, + &next, value, release, release); + } + if (!success) { + unsigned int ptr = get_ptr(atomic_load_explicit(&q->nodes[get_ptr(tail)].next, acquire)); + pointer value = MAKE_POINTER(ptr, + get_count(tail) + 1); + int commit_success = 0; + commit_success = atomic_compare_exchange_strong_explicit(&q->tail, + &tail, value, release, release); + /** + @Begin + @Commit_point_define_check: __ATOMIC_RET__ == true + @Label: Enqueue_Success_Point + @End + */ + thrd_yield(); + } + } + } + atomic_compare_exchange_strong_explicit(&q->tail, + &tail, + MAKE_POINTER(node, get_count(tail) + 1), + release, release); +} + +/** + @Begin + @Interface: Dequeue + @Commit_point_set: Dequeue_Success_Point + @ID: __queue.peak().tag + @Action: + @Code: + unsigned int _Old_Val = __queue.dequeue().data; + @Post_check: + _Old_Val == __RET__ + @End +*/ +unsigned int dequeue(queue_t *q) +{ + unsigned int value; + int success = 0; + pointer head; + pointer tail; + pointer next; + + while (!success) { + head = atomic_load_explicit(&q->head, acquire); + tail = atomic_load_explicit(&q->tail, relaxed); + next = atomic_load_explicit(&q->nodes[get_ptr(head)].next, acquire); + if (atomic_load_explicit(&q->head, relaxed) == head) { + if (get_ptr(head) == get_ptr(tail)) { + + /* Check for uninitialized 'next' */ + MODEL_ASSERT(get_ptr(next) != POISON_IDX); + + if (get_ptr(next) == 0) { // NULL + return 0; // NULL + } + atomic_compare_exchange_strong_explicit(&q->tail, + &tail, + MAKE_POINTER(get_ptr(next), get_count(tail) + 1), + release, release); + thrd_yield(); + } else { + value = load_32(&q->nodes[get_ptr(next)].value); + success = atomic_compare_exchange_strong_explicit(&q->head, + &head, + MAKE_POINTER(get_ptr(next), get_count(head) + 1), + release, release); + /** + @Begin + @Commit_point_define_check: __ATOMIC_RET__ == true + @Label: Dequeue_Success_Point + @End + */ + if (!success) + thrd_yield(); + } + } + } + reclaim(get_ptr(head)); + return value; +} diff --git a/benchmark/ms-queue/my_queue.h b/benchmark/ms-queue/my_queue.h new file mode 100644 index 0000000..c92e420 --- /dev/null +++ b/benchmark/ms-queue/my_queue.h @@ -0,0 +1,31 @@ +#include + +#define MAX_NODES 0xf + +typedef unsigned long long pointer; +typedef atomic_ullong pointer_t; + +#define MAKE_POINTER(ptr, count) ((((pointer)count) << 32) | ptr) +#define PTR_MASK 0xffffffffLL +#define COUNT_MASK (0xffffffffLL << 32) + +static inline void set_count(pointer *p, unsigned int val) { *p = (*p & ~COUNT_MASK) | ((pointer)val << 32); } +static inline void set_ptr(pointer *p, unsigned int val) { *p = (*p & ~PTR_MASK) | val; } +static inline unsigned int get_count(pointer p) { return (p & COUNT_MASK) >> 32; } +static inline unsigned int get_ptr(pointer p) { return p & PTR_MASK; } + +typedef struct node { + unsigned int value; + pointer_t next; +} node_t; + +typedef struct { + pointer_t head; + pointer_t tail; + node_t nodes[MAX_NODES + 1]; +} queue_t; + +void init_queue(queue_t *q, int num_threads); +void enqueue(queue_t *q, unsigned int val); +unsigned int dequeue(queue_t *q); +int get_thread_num(); diff --git a/src/edu/uci/eecs/specCompiler/codeGenerator/CodeGenerator.java b/src/edu/uci/eecs/specCompiler/codeGenerator/CodeGenerator.java index cbd8d7f..b53b482 100644 --- a/src/edu/uci/eecs/specCompiler/codeGenerator/CodeGenerator.java +++ b/src/edu/uci/eecs/specCompiler/codeGenerator/CodeGenerator.java @@ -8,7 +8,12 @@ import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; +import edu.uci.eecs.specCompiler.specExtraction.CPDefineCheckConstruct; +import edu.uci.eecs.specCompiler.specExtraction.CPDefineConstruct; +import edu.uci.eecs.specCompiler.specExtraction.Construct; import edu.uci.eecs.specCompiler.specExtraction.GlobalConstruct; +import edu.uci.eecs.specCompiler.specExtraction.InterfaceConstruct; +import edu.uci.eecs.specCompiler.specExtraction.PotentialCPDefineConstruct; import edu.uci.eecs.specCompiler.specExtraction.SpecConstruct; import edu.uci.eecs.specCompiler.specExtraction.SpecExtractor; import edu.uci.eecs.specCompiler.specExtraction.SpecNotMatchException; @@ -101,6 +106,9 @@ public class CodeGenerator { end++; } + // Generate code from the DefineVar and __COND_SAT__ + + CodeAddition addition = new CodeAddition(lineNum, newCode); if (!codeAdditions.containsKey(inst.file)) { codeAdditions.put(inst.file, new ArrayList()); @@ -161,7 +169,21 @@ public class CodeGenerator { } public void generateCode() { - + for (int i = 0; i < _semantics.constructs.size(); i++) { + SpecConstruct inst = _semantics.constructs.get(i); + Construct construct = inst.construct; + if (construct instanceof GlobalConstruct) { + globalConstruct2Code(inst); + } else if (construct instanceof InterfaceConstruct) { + interface2Code(inst); + } else if (construct instanceof PotentialCPDefineConstruct) { + potentialCP2Code(inst); + } else if (construct instanceof CPDefineConstruct) { + CPDefine2Code(inst); + } else if (construct instanceof CPDefineCheckConstruct) { + CPDefineCheck2Code(inst); + } + } } public static void main(String[] argvs) { -- 2.34.1