From: Peizhao Ou <peizhaoo@uci.edu>
Date: Tue, 8 Oct 2013 17:11:19 +0000 (-0700)
Subject: Merge branch 'master' of ssh://demsky.eecs.uci.edu/home/git/model-checker-priv
X-Git-Url: http://plrg.eecs.uci.edu/git/?p=cdsspec-compiler.git;a=commitdiff_plain;h=2e63f889abede3b1d257e2c87f8604dd0a3613b3;hp=bfa4ff3a8ecef9c1e060921ea19d4cb6ec8c3b03

Merge branch 'master' of ssh://demsky.eecs.uci.edu/home/git/model-checker-priv
---

diff --git a/.dir-locals.el b/.dir-locals.el
new file mode 100644
index 0000000..ce85e5f
--- /dev/null
+++ b/.dir-locals.el
@@ -0,0 +1 @@
+((nil . ((indent-tabs-mode . t))))
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8df9862
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,15 @@
+# generic types
+*.o
+*.swp
+*.swo
+*.so
+*~
+*.dot
+.*.d
+*.pdf
+
+# files in this directory
+/tags
+/doc/docs
+/benchmarks
+/README.html
diff --git a/DEBUGGINGNOTES.txt b/DEBUGGINGNOTES.txt
new file mode 100644
index 0000000..70cbba6
--- /dev/null
+++ b/DEBUGGINGNOTES.txt
@@ -0,0 +1,6 @@
+To run inside MacOS under gdb you need:
+set dont-handle-bad-access 1
+handle SIGBUS nostop noprint
+
+To run in Linux under gdb, use:
+handle SIGSEGV nostop noprint
diff --git a/Doxyfile b/Doxyfile
new file mode 100644
index 0000000..ed9f000
--- /dev/null
+++ b/Doxyfile
@@ -0,0 +1,1801 @@
+# Doxyfile 1.8.0
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a hash (#) is considered a comment and will be ignored.
+# The format is:
+#       TAG = value [value, ...]
+# For lists items can also be appended using:
+#       TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ").
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# http://www.gnu.org/software/libiconv for the list of possible encodings.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or sequence of words) that should
+# identify the project. Note that if you do not use Doxywizard you need
+# to put quotes around the project name if it contains spaces.
+
+PROJECT_NAME           = "CDSChecker: A Model Checker for C11/C++11 Atomics"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER         =
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer
+# a quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF          =
+
+# With the PROJECT_LOGO tag one can specify an logo or icon that is
+# included in the documentation. The maximum height of the logo should not
+# exceed 55 pixels and the maximum width should not exceed 200 pixels.
+# Doxygen will copy the logo to the output directory.
+
+PROJECT_LOGO           =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = doc
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS         = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
+# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
+# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
+# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
+# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak,
+# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
+
+OUTPUT_LANGUAGE        = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF       =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES        = YES
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip.
+
+STRIP_FROM_PATH        =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH    =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful if your file system
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like regular Qt-style comments
+# (thus requiring an explicit @brief command for a brief description.)
+
+JAVADOC_AUTOBRIEF      = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
+# interpret the first line (until the first dot) of a Qt-style
+# comment as the brief description. If set to NO, the comments
+# will behave just like regular Qt-style comments (thus requiring
+# an explicit \brief command for a brief description.)
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE               = 5
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                =
+
+# This tag can be used to specify a number of word-keyword mappings (TCL only).
+# A mapping has the form "name=value". For example adding
+# "class=itcl::class" will allow you to use the command class in the
+# itcl::class meaning.
+
+TCL_SUBST              =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
+# sources only. Doxygen will then generate output that is more tailored for
+# Java. For instance, namespaces will be presented as packages, qualified
+# scopes will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources only. Doxygen will then generate output that is more tailored for
+# Fortran.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for
+# VHDL.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given extension.
+# Doxygen has a built-in mapping, but you can override or extend it using this
+# tag. The format is ext=language, where ext is a file extension, and language
+# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C,
+# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make
+# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C
+# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions
+# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen.
+
+EXTENSION_MAPPING      =
+
+# If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all
+# comments according to the Markdown format, which allows for more readable
+# documentation. See http://daringfireball.net/projects/markdown/ for details.
+# The output of markdown processing is further processed by doxygen, so you
+# can mix doxygen, HTML, and XML commands with Markdown formatting.
+# Disable only in case of backward compatibilities issues.
+
+MARKDOWN_SUPPORT       = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should
+# set this tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
+# func(std::string) {}). This also makes the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
+# Doxygen will parse them like normal C++ but will assume all classes use public
+# instead of private inheritance when no explicit protection keyword is present.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate getter
+# and setter methods for a property. Setting this option to YES (the default)
+# will make doxygen replace the get and set methods by a property in the
+# documentation. This will only work if the methods are indeed getting or
+# setting a simple type. If this is not the case, or you want to show the
+# methods anyway, you should set this option to NO.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING            = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and
+# unions are shown inside the group in which they are included (e.g. using
+# @ingroup) instead of on a separate page (for HTML and Man pages) or
+# section (for LaTeX and RTF).
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and
+# unions with only public data fields will be shown inline in the documentation
+# of the scope in which they are defined (i.e. file, namespace, or group
+# documentation), provided this scope is documented. If set to NO (the default),
+# structs, classes, and unions are shown on a separate page (for HTML and Man
+# pages) or section (for LaTeX and RTF).
+
+INLINE_SIMPLE_STRUCTS  = NO
+
+# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
+# is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically
+# be useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
+# determine which symbols to keep in memory and which to flush to disk.
+# When the cache is full, less often used symbols will be written to disk.
+# For small to medium size projects (<1000 input files) the default value is
+# probably good enough. For larger projects a too small cache size can cause
+# doxygen to be busy swapping symbols to and from disk most of the time
+# causing a significant performance penalty.
+# If the system has enough physical memory increasing the cache will improve the
+# performance by keeping more symbols in memory. Note that the value works on
+# a logarithmic scale so increasing the size by one will roughly double the
+# memory usage. The cache size is given by this formula:
+# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols.
+
+SYMBOL_CACHE_SIZE      = 0
+
+# Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be
+# set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given
+# their name and scope. Since this can be an expensive process and often the
+# same symbol appear multiple times in the code, doxygen keeps a cache of
+# pre-resolved symbols. If the cache is too small doxygen will become slower.
+# If the cache is too large, memory is wasted. The cache size is given by this
+# formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols.
+
+LOOKUP_CACHE_SIZE      = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL            = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE        = YES
+
+# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal scope will be included in the documentation.
+
+EXTRACT_PACKAGE        = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC         = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base
+# name of the file that contains the anonymous namespace. By default
+# anonymous namespaces are hidden.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES       = NO
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
+# will list include files with double quotes in the documentation
+# rather than with sharp brackets.
+
+FORCE_LOCAL_INCLUDES   = NO
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
+# will sort the (brief and detailed) documentation of class members so that
+# constructors and destructors are listed first. If set to NO (the default)
+# the constructors will appear in the respective orders defined by
+# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
+# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
+# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
+# hierarchy of group names into alphabetical order. If set to NO (the default)
+# the group names will appear in their defined order.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to
+# do proper type resolution of all parameters of a function it will reject a
+# match between the prototype and the implementation of a member function even
+# if there is only one candidate or it is obvious which candidate to choose
+# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen
+# will still accept a match between prototype and implementation in such cases.
+
+STRICT_PROTO_MATCHING  = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or macro consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and macros in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES        = YES
+
+# If the sources in your project are distributed over multiple directories
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
+# in the documentation. The default is NO.
+
+SHOW_DIRECTORIES       = NO
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
+# This will remove the Files entry from the Quick Index and from the
+# Folder Tree View (if specified). The default is YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
+# Namespaces page.
+# This will remove the Namespaces entry from the Quick Index
+# and from the Folder Tree View (if specified). The default is YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command <command> <input-file>, where <command> is the value of
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
+# provided by doxygen. Whatever the program writes to standard output
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER    =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. The create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option.
+# You can optionally specify a file name after the option, if omitted
+# DoxygenLayout.xml will be used as the name of the layout file.
+
+LAYOUT_FILE            =
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files
+# containing the references data. This must be a list of .bib files. The
+# .bib extension is automatically appended if omitted. Using this command
+# requires the bibtex tool to be installed. See also
+# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style
+# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this
+# feature you need bibtex and perl available in the search path.
+
+CITE_BIB_FILES         =
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS               = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR      = YES
+
+# The WARN_NO_PARAMDOC option can be enabled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
+# documentation.
+
+WARN_NO_PARAMDOC       = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  = . include/ include/atomic include/condition_variable include/cstdatomic include/mutex
+
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
+# also the default input encoding. Doxygen uses libiconv (or the iconv built
+# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
+# the list of possible encodings.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh
+# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py
+# *.f90 *.f *.for *.vhd *.vhdl
+
+FILE_PATTERNS          =
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE              = NO
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+
+EXCLUDE                = malloc.c
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           = .
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS       =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output.
+# If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+
+INPUT_FILTER           =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis.
+# Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match.
+# The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty or if
+# non of the patterns match the file name, INPUT_FILTER is applied.
+
+FILTER_PATTERNS        =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES    = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any)
+# and it is also possible to disable source filtering for a specific pattern
+# using *.ext= (so without naming a filter). This option only has effect when
+# FILTER_SOURCE_FILES is enabled.
+
+FILTER_SOURCE_PATTERNS =
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER         = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code.
+# Otherwise they will link to the documentation.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
+# will need version 4.8.6 or higher.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX     = YES
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT            = docs
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header. Note that when using a custom header you are responsible
+#  for the proper inclusion of any scripts and style sheets that doxygen
+# needs, which is dependent on the configuration options used.
+# It is advised to generate a default header using "doxygen -w html
+# header.html footer.html stylesheet.css YourConfigFile" and then modify
+# that header. Note that the header is subject to change so you typically
+# have to redo this when upgrading to a newer version of doxygen or when
+# changing the value of configuration settings such as GENERATE_TREEVIEW!
+
+HTML_HEADER            =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER            =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# style sheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET        =
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that
+# the files will be copied as-is; there are no commands or markers available.
+
+HTML_EXTRA_FILES       =
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output.
+# Doxygen will adjust the colors in the style sheet and background images
+# according to this color. Hue is specified as an angle on a colorwheel,
+# see http://en.wikipedia.org/wiki/Hue for more information.
+# For instance the value 0 represents red, 60 is yellow, 120 is green,
+# 180 is cyan, 240 is blue, 300 purple, and 360 is red again.
+# The allowed range is 0 to 359.
+
+HTML_COLORSTYLE_HUE    = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of
+# the colors in the HTML output. For a value of 0 the output will use
+# grayscales only. A value of 255 will produce the most vivid colors.
+
+HTML_COLORSTYLE_SAT    = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to
+# the luminance component of the colors in the HTML output. Values below
+# 100 gradually make the output lighter, whereas values above 100 make
+# the output darker. The value divided by 100 is the actual gamma applied,
+# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2,
+# and 100 does not change the gamma.
+
+HTML_COLORSTYLE_GAMMA  = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting
+# this to NO can help when comparing the output of multiple runs.
+
+HTML_TIMESTAMP         = YES
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS     = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded. For this to work a browser that supports
+# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
+# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files
+# will be generated that can be used as input for Apple's Xcode 3
+# integrated development environment, introduced with OSX 10.5 (Leopard).
+# To create a documentation set, doxygen will generate a Makefile in the
+# HTML output directory. Running make will produce the docset in that
+# directory and running "make install" will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
+# it at startup.
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
+# for more information.
+
+GENERATE_DOCSET        = NO
+
+# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
+# feed. A documentation feed provides an umbrella under which multiple
+# documentation sets from a single provider (such as a company or product suite)
+# can be grouped.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
+# should uniquely identify the documentation set bundle. This should be a
+# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
+# will append .docset to the name.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
+
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+
+# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher.
+
+DOCSET_PUBLISHER_NAME  = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP      = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE               =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION           =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI           = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
+# is used to encode HtmlHelp index (hhk), content (hhc) and project file
+# content.
+
+CHM_INDEX_ENCODING     =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated
+# that can be used as input for Qt's qhelpgenerator to generate a
+# Qt Compressed Help (.qch) of the generated HTML documentation.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
+# be used to specify the file name of the resulting .qch file.
+# The path specified is relative to the HTML output folder.
+
+QCH_FILE               =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#namespace
+
+QHP_NAMESPACE          = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#virtual-folders
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to
+# add. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#custom-filters
+
+QHP_CUST_FILTER_NAME   =
+
+# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see
+# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">
+# Qt Help Project / Custom Filters</a>.
+
+QHP_CUST_FILTER_ATTRS  =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's
+# filter section matches.
+# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">
+# Qt Help Project / Filter Attributes</a>.
+
+QHP_SECT_FILTER_ATTRS  =
+
+# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
+# be used to specify the location of Qt's qhelpgenerator.
+# If non-empty doxygen will try to run qhelpgenerator on the generated
+# .qhp file.
+
+QHG_LOCATION           =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
+#  will be generated, which together with the HTML files, form an Eclipse help
+# plugin. To install this plugin and make it available under the help contents
+# menu in Eclipse, the contents of the directory containing the HTML and XML
+# files needs to be copied into the plugins directory of eclipse. The name of
+# the directory within the plugins directory should be the same as
+# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before
+# the help appears.
+
+GENERATE_ECLIPSEHELP   = NO
+
+# A unique identifier for the eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have
+# this name.
+
+ECLIPSE_DOC_ID         = org.doxygen.Project
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs)
+# at top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it. Since the tabs have the same information as the
+# navigation tree you can set this option to NO if you already set
+# GENERATE_TREEVIEW to YES.
+
+DISABLE_INDEX          = NO
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information.
+# If the tag value is set to YES, a side panel will be generated
+# containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
+# Windows users are probably better off using the HTML help feature.
+# Since the tree basically has the same information as the tab index you
+# could consider to set DISABLE_INDEX to NO when enabling this option.
+
+GENERATE_TREEVIEW      = NO
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values
+# (range [0,1..20]) that doxygen will group on one line in the generated HTML
+# documentation. Note that a value of 0 will completely suppress the enum
+# values from appearing in the overview section.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
+# and Class Hierarchy pages using a tree view instead of an ordered list.
+
+USE_INLINE_TREES       = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH         = 250
+
+# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open
+# links to external symbols imported via tag files in a separate window.
+
+EXT_LINKS_IN_WINDOW    = NO
+
+# Use this tag to change the font size of Latex formulas included
+# as images in the HTML documentation. The default is 10. Note that
+# when you change the font size after a successful doxygen run you need
+# to manually remove any form_*.png images from the HTML output directory
+# to force them to be regenerated.
+
+FORMULA_FONTSIZE       = 10
+
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are
+# not supported properly for IE 6.0, but are supported on all modern browsers.
+# Note that when changing this option you need to delete any form_*.png files
+# in the HTML output before the changes have effect.
+
+FORMULA_TRANSPARENT    = YES
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax
+# (see http://www.mathjax.org) which uses client side Javascript for the
+# rendering instead of using prerendered bitmaps. Use this if you do not
+# have LaTeX installed or if you want to formulas look prettier in the HTML
+# output. When enabled you may also need to install MathJax separately and
+# configure the path to it using the MATHJAX_RELPATH option.
+
+USE_MATHJAX            = NO
+
+# When MathJax is enabled you need to specify the location relative to the
+# HTML output directory using the MATHJAX_RELPATH option. The destination
+# directory should contain the MathJax.js script. For instance, if the mathjax
+# directory is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to
+# the MathJax Content Delivery Network so you can quickly see the result without
+# installing MathJax.
+# However, it is strongly recommended to install a local
+# copy of MathJax from http://www.mathjax.org before deployment.
+
+MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension
+# names that should be enabled during MathJax rendering.
+
+MATHJAX_EXTENSIONS     =
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box
+# for the HTML output. The underlying search engine uses javascript
+# and DHTML and should work on any modern browser. Note that when using
+# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets
+# (GENERATE_DOCSET) there is already a search function so this one should
+# typically be disabled. For large projects the javascript based search engine
+# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
+
+SEARCHENGINE           = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a PHP enabled web server instead of at the web client
+# using Javascript. Doxygen will generate the search PHP script and index
+# file to put on the web server. The advantage of the server
+# based approach is that it scales better to large projects and allows
+# full text search. The disadvantages are that it is more difficult to setup
+# and does not have live searching capabilities.
+
+SERVER_BASED_SEARCH    = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX         = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+# Note that when enabling USE_PDFLATEX this option is only used for
+# generating bitmaps for formulas in the HTML output, but not in the
+# Makefile that is written to the output directory.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE             = a4
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES         =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER           =
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for
+# the generated latex document. The footer should contain everything after
+# the last chapter. If it is left blank doxygen will generate a
+# standard footer. Notice: only use this tag if you know what you are doing!
+
+LATEX_FOOTER           =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE        = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES     = NO
+
+# If LATEX_SOURCE_CODE is set to YES then doxygen will include
+# source code with syntax highlighting in the LaTeX output.
+# Note that which sources are shown also depends on other settings
+# such as SOURCE_BROWSER.
+
+LATEX_SOURCE_CODE      = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See
+# http://en.wikipedia.org/wiki/BibTeX for more info.
+
+LATEX_BIB_STYLE        = plain
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS         = NO
+
+# Load style sheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE    =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE    =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION          = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT             = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA             =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD                =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader.
+# This is useful
+# if you want to understand what is going on.
+# On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# pointed to by INCLUDE_PATH will be searched when a #include is found.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition that
+# overrules the definition found in the source code.
+
+EXPAND_AS_DEFINED      =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all references to function-like macros
+# that are alone on a line, have an all uppercase name, and do not end with a
+# semicolon, because these will confuse the parser if not removed.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles. For each
+# tag file the location of the external documentation should be added. The
+# format of a tag file without this location is as follows:
+#
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+#
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths
+# or URLs. Note that each tag file must have a unique name (where the name does
+# NOT include the path). If a tag file is not located in the directory in which
+# doxygen is run, you must also specify the path to the tagfile here.
+
+TAGFILES               =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE       =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS        = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH              = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option also works with HAVE_DOT disabled, but it is recommended to
+# install and use dot, since it yields more powerful graphs.
+
+CLASS_DIAGRAMS         = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see
+# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH            =
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT               = NO
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is
+# allowed to run in parallel. When set to 0 (the default) doxygen will
+# base this on the number of processors available in the system. You can set it
+# explicitly to a value larger than 0 to get control over the balance
+# between CPU load and processing speed.
+
+DOT_NUM_THREADS        = 0
+
+# By default doxygen will use the Helvetica font for all dot files that
+# doxygen generates. When you want a differently looking font you can specify
+# the font name using DOT_FONTNAME. You need to make sure dot is able to find
+# the font, which can be done by putting it in a standard location or by setting
+# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
+# directory containing the font.
+
+DOT_FONTNAME           = Helvetica
+
+# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
+# The default size is 10pt.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the Helvetica font.
+# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to
+# set the path where dot can find it.
+
+DOT_FONTPATH           =
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK               = NO
+
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside
+# the class node. If there are many fields or methods and many nodes the
+# graph may become too big to be useful. The UML_LIMIT_NUM_FIELDS
+# threshold limits the number of items for each type to make the size more
+# managable. Set this to 0 for no limit. Note that the threshold may be
+# exceeded by 50% before the limit is enforced.
+
+UML_LIMIT_NUM_FIELDS   = 10
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH          = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH and HAVE_DOT options are set to YES then
+# doxygen will generate a call dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable call graphs
+# for selected functions only using the \callgraph command.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
+# doxygen will generate a caller dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable caller
+# graphs for selected functions only using the \callergraph command.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will generate a graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are svg, png, jpg, or gif.
+# If left blank png will be used. If you choose svg you need to set
+# HTML_FILE_EXTENSION to xhtml in order to make the SVG files
+# visible in IE 9+ (other browsers do not have this requirement).
+
+DOT_IMAGE_FORMAT       = png
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+# Note that this requires a modern browser other than Internet Explorer.
+# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you
+# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files
+# visible. Older versions of IE do not have SVG support.
+
+INTERACTIVE_SVG        = NO
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH               =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the
+# \mscfile command).
+
+MSCFILE_DIRS           =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
+# nodes that will be shown in the graph. If the number of nodes in a graph
+# becomes larger than this value, doxygen will truncate the graph, which is
+# visualized by representing a node as a red box. Note that doxygen if the
+# number of direct children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not
+# seem to support this out of the box. Warning: Depending on the platform used,
+# enabling this option may lead to badly anti-aliased labels on the edges of
+# a graph (i.e. they become hard to read).
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP            = YES
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..d159169
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,339 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..f44a895
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,85 @@
+include common.mk
+
+OBJECTS := libthreads.o schedule.o model.o threads.o librace.o action.o \
+	   nodestack.o clockvector.o main.o snapshot-interface.o cyclegraph.o \
+	   datarace.o impatomic.o cmodelint.o \
+	   snapshot.o malloc.o mymemory.o common.o mutex.o promise.o conditionvariable.o \
+	   context.o scanalysis.o execution.o plugins.o libannotate.o
+
+CPPFLAGS += -Iinclude -I.
+LDFLAGS := -ldl -lrt -rdynamic
+SHARED := -shared
+
+# Mac OSX options
+ifeq ($(UNAME), Darwin)
+LDFLAGS := -ldl
+SHARED := -Wl,-undefined,dynamic_lookup -dynamiclib
+endif
+
+TESTS_DIR := test
+
+MARKDOWN := doc/Markdown/Markdown.pl
+
+all: $(LIB_SO) tests README.html
+
+debug: CPPFLAGS += -DCONFIG_DEBUG
+debug: all
+
+PHONY += docs
+docs: *.c *.cc *.h README.html
+	doxygen
+
+README.html: README.md
+	$(MARKDOWN) $< > $@
+
+$(LIB_SO): $(OBJECTS)
+	$(CXX) $(SHARED) -o $(LIB_SO) $+ $(LDFLAGS)
+
+malloc.o: malloc.c
+	$(CC) -fPIC -c malloc.c -DMSPACES -DONLY_MSPACES -DHAVE_MMAP=0 $(CPPFLAGS) -Wno-unused-variable
+
+%.o: %.cc
+	$(CXX) -MMD -MF .$@.d -fPIC -c $< $(CPPFLAGS)
+
+%.pdf: %.dot
+	dot -Tpdf $< -o $@
+
+-include $(OBJECTS:%=.%.d)
+
+PHONY += clean
+clean:
+	rm -f *.o *.so .*.d *.pdf *.dot
+	$(MAKE) -C $(TESTS_DIR) clean
+
+PHONY += mrclean
+mrclean: clean
+	rm -rf docs
+
+PHONY += tags
+tags:
+	ctags -R
+
+PHONY += tests
+tests: $(LIB_SO)
+	$(MAKE) -C $(TESTS_DIR)
+
+BENCH_DIR := benchmarks
+
+PHONY += benchmarks
+benchmarks: $(LIB_SO)
+	@if ! test -d $(BENCH_DIR); then \
+		echo "Directory $(BENCH_DIR) does not exist" && \
+		echo "Please clone the benchmarks repository" && \
+		echo && \
+		exit 1; \
+	fi
+	$(MAKE) -C $(BENCH_DIR)
+
+PHONY += pdfs
+pdfs: $(patsubst %.dot,%.pdf,$(wildcard *.dot))
+
+.PHONY: $(PHONY)
+
+# A 1-inch margin PDF generated by 'pandoc'
+%.pdf: %.md
+	pandoc -o $@ $< -V header-includes='\usepackage[margin=1in]{geometry}'
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..ba64e18
--- /dev/null
+++ b/README.md
@@ -0,0 +1,410 @@
+CDSChecker: A Model Checker for C11 and C++11 Atomics
+=====================================================
+
+CDSChecker is a model checker for C11/C++11 which exhaustively explores the
+behaviors of code under the C/C++ memory model. It uses partial order reduction
+as well as a few other novel techniques to eliminate time spent on redundant
+execution behaviors and to significantly shrink the state space. The model
+checking algorithm is described in more detail in this paper (published in
+OOPSLA '13):
+
+>   <http://demsky.eecs.uci.edu/publications/c11modelcheck.pdf>
+
+It is designed to support unit tests on concurrent data structure written using
+C/C++ atomics.
+
+CDSChecker is constructed as a dynamically-linked shared library which
+implements the C and C++ atomic types and portions of the other thread-support
+libraries of C/C++ (e.g., std::atomic, std::mutex, etc.). Notably, we only
+support the C version of threads (i.e., `thrd_t` and similar, from `<threads.h>`),
+because C++ threads require features which are only available to a C++11
+compiler (and we want to support others, at least for now).
+
+CDSChecker should compile on Linux and Mac OSX with no dependencies and has been
+tested with LLVM (clang/clang++) and GCC. It likely can be ported to other \*NIX
+flavors. We have not attempted to port to Windows.
+
+
+Getting Started
+---------------
+
+If you haven't done so already, you may download CDSChecker using
+[git](http://git-scm.com/):
+
+      git clone git://demsky.eecs.uci.edu/model-checker.git
+
+Source code can also be downloaded via the snapshot links on Gitweb (found in
+the __See Also__ section).
+
+Get the benchmarks (not required; distributed separately), placing them as a
+subdirectory under the `model-checker` directory:
+
+      cd model-checker
+      git clone git://demsky.eecs.uci.edu/model-checker-benchmarks.git benchmarks
+
+Compile the model checker:
+
+      make
+
+Compile the benchmarks:
+
+      make benchmarks
+
+Run a simple example (the `run.sh` script does some very minimal processing for
+you):
+
+      ./run.sh test/userprog.o
+
+To see the help message on how to run CDSChecker, execute:
+
+      ./run.sh -h
+
+
+Useful Options
+--------------
+
+`-m num`
+
+  > Controls the liveness of the memory system. Note that multithreaded programs
+  > often rely on memory liveness for termination, so this parameter is
+  > necessary for such programs.
+  >
+  > Liveness is controlled by `num`: the number of times a load is allowed to
+  > see the same store when a newer store exists---one that is ordered later in
+  > the modification order.
+
+`-y`
+
+  > Turns on CHESS-like yield-based fairness support (requires `thrd_yield()`
+  > instrumentation in test program).
+
+`-f num`
+
+  > Turns on alternative fairness support (less desirable than `-y`). A
+  > necessary alternative for some programs that do not support yield-based
+  > fairness properly.
+
+`-v`
+
+  > Verbose: show all executions and not just buggy ones.
+
+`-s num`
+
+  > Constrain how long we will run to wait for a future value past when it is
+  > expected
+
+`-u num`
+
+  > Value to provide to atomics loads from uninitialized memory locations. The
+  > default is 0, but this may cause some programs to throw exceptions
+  > (segfault) before the model checker prints a trace.
+
+Suggested options:
+
+>     -m 2 -y
+
+or
+
+>     -m 2 -f 10
+
+
+Benchmarks
+-------------------
+
+Many simple tests are located in the `tests/` directory. You may also want to
+try the larger benchmarks (distributed separately), which can be placed under
+the `benchmarks/` directory. After building CDSChecker, you can build and run
+the benchmarks as follows:
+
+>     make benchmarks
+>     cd benchmarks
+>
+>     # run barrier test with fairness/memory liveness
+>     ./run.sh barrier/barrier -y -m 2
+>
+>     # Linux reader/write lock test with fairness/memory liveness
+>     ./run.sh linuxrwlocks/linuxrwlocks -y -m 2
+>
+>     # run all benchmarks and provide timing results
+>     ./bench.sh
+
+
+Running your own code
+---------------------
+
+You likely want to test your own code, not just our simple tests. To do so, you
+need to perform a few steps.
+
+First, because CDSChecker executes your program dozens (if not hundreds or
+thousands) of times, you will have the most success if your code is written as a
+unit test and not as a full-blown program.
+
+Second, because CDSChecker must be able to manage your program for you, your
+program should declare its main entry point as `user_main(int, char**)` rather
+than `main(int, char**)`.
+
+Third, test programs must use the standard C11/C++11 library headers (see below
+for supported APIs) and must compile against the versions provided in
+CDSChecker's `include/` directory. Notably, we only support C11 thread syntax
+(`thrd_t`, etc. from `<thread.h>`).
+
+Test programs may also use our included happens-before race detector by
+including <librace.h> and utilizing the appropriate functions
+(`store_{8,16,32,64}()` and `load_{8,16,32,64}()`) for storing/loading data
+to/from non-atomic shared memory.
+
+CDSChecker can also check boolean assertions in your test programs. Just
+include `<model-assert.h>` and use the `MODEL_ASSERT()` macro in your test program.
+CDSChecker will report a bug in any possible execution in which the argument to
+`MODEL_ASSERT()` evaluates to false (that is, 0).
+
+Test programs should be compiled against our shared library (libmodel.so) using
+the headers in the `include/` directory. Then the shared library must be made
+available to the dynamic linker, using the `LD_LIBRARY_PATH` environment
+variable, for instance.
+
+
+### Supported C11/C++11 APIs ###
+
+To model-check multithreaded code properly, CDSChecker needs to instrument any
+concurrency-related API calls made in your code. Currently, we support parts of
+the following thread-support libraries. The C versions can be used in either C
+or C++.
+
+* `<atomic>`, `<cstdatomic>`, `<stdatomic.h>`
+* `<condition_variable>`
+* `<mutex>`
+* `<threads.h>`
+
+Because we want to extend support to legacy (i.e., non-C++11) compilers, we do
+not support some new C++11 features that can't be implemented in C++03 (e.g.,
+C++ `<thread>`).
+
+Reading an execution trace
+--------------------------
+
+When CDSChecker detects a bug in your program (or when run with the `--verbose`
+flag), it prints the output of the program run (STDOUT) along with some summary
+trace information for the execution in question. The trace is given as a
+sequence of lines, where each line represents an operation in the execution
+trace. These lines are ordered by the order in which they were run by CDSChecker
+(i.e., the "execution order"), which does not necessarily align with the "order"
+of the values observed (i.e., the modification order or the reads-from
+relation).
+
+The following list describes each of the columns in the execution trace output:
+
+ * \#: The sequence number within the execution. That is, sequence number "9"
+   means the operation was the 9th operation executed by CDSChecker. Note that
+   this represents the execution order, not necessarily any other order (e.g.,
+   modification order or reads-from).
+
+ * t: The thread number
+
+ * Action type: The type of operation performed
+
+ * MO: The memory-order for this operation (i.e., `memory_order_XXX`, where `XXX` is
+   `relaxed`, `release`, `acquire`, `rel_acq`, or `seq_cst`)
+
+ * Location: The memory location on which this operation is operating. This is
+   well-defined for atomic write/read/RMW, but other operations are subject to
+   CDSChecker implementation details.
+
+ * Value: For reads/writes/RMW, the value returned by the operation. Note that
+   for RMW, this is the value that is *read*, not the value that was *written*.
+   For other operations, 'value' may have some CDSChecker-internal meaning, or
+   it may simply be a don't-care (such as `0xdeadbeef`).
+
+ * Rf: For reads, the sequence number of the operation from which it reads.
+   [Note: If the execution is a partial, infeasible trace (labeled INFEASIBLE),
+   as printed during `--verbose` execution, reads may not be resolved and so may
+   have Rf=? or Rf=Px, where x is a promised future value.]
+
+ * CV: The clock vector, encapsulating the happens-before relation (see our
+   paper, or the C/C++ memory model itself). We use a Lamport-style clock vector
+   similar to [1]. The "clock" is just the sequence number (#). The clock vector
+   can be read as follows:
+
+   Each entry is indexed as CV[i], where
+
+            i = 0, 1, 2, ..., <number of threads>
+
+   So for any thread i, we say CV[i] is the sequence number of the most recent
+   operation in thread i such that operation i happens-before this operation.
+   Notably, thread 0 is reserved as a dummy thread for certain CDSChecker
+   operations.
+
+See the following example trace:
+
+    ------------------------------------------------------------------------------------
+    #    t    Action type     MO       Location         Value               Rf  CV
+    ------------------------------------------------------------------------------------
+    1    1    thread start    seq_cst  0x7f68ff11e7c0   0xdeadbeef              ( 0,  1)
+    2    1    init atomic     relaxed        0x601068   0                       ( 0,  2)
+    3    1    init atomic     relaxed        0x60106c   0                       ( 0,  3)
+    4    1    thread create   seq_cst  0x7f68fe51c710   0x7f68fe51c6e0          ( 0,  4)
+    5    2    thread start    seq_cst  0x7f68ff11ebc0   0xdeadbeef              ( 0,  4,  5)
+    6    2    atomic read     relaxed        0x60106c   0                   3   ( 0,  4,  6)
+    7    1    thread create   seq_cst  0x7f68fe51c720   0x7f68fe51c6e0          ( 0,  7)
+    8    3    thread start    seq_cst  0x7f68ff11efc0   0xdeadbeef              ( 0,  7,  0,  8)
+    9    2    atomic write    relaxed        0x601068   0                       ( 0,  4,  9)
+    10   3    atomic read     relaxed        0x601068   0                   2   ( 0,  7,  0, 10)
+    11   2    thread finish   seq_cst  0x7f68ff11ebc0   0xdeadbeef              ( 0,  4, 11)
+    12   3    atomic write    relaxed        0x60106c   0x2a                    ( 0,  7,  0, 12)
+    13   1    thread join     seq_cst  0x7f68ff11ebc0   0x2                     ( 0, 13, 11)
+    14   3    thread finish   seq_cst  0x7f68ff11efc0   0xdeadbeef              ( 0,  7,  0, 14)
+    15   1    thread join     seq_cst  0x7f68ff11efc0   0x3                     ( 0, 15, 11, 14)
+    16   1    thread finish   seq_cst  0x7f68ff11e7c0   0xdeadbeef              ( 0, 16, 11, 14)
+    HASH 4073708854
+    ------------------------------------------------------------------------------------
+
+Now consider, for example, operation 10:
+
+This is the 10th operation in the execution order. It is an atomic read-relaxed
+operation performed by thread 3 at memory address `0x601068`. It reads the value
+"0", which was written by the 2nd operation in the execution order. Its clock
+vector consists of the following values:
+
+        CV[0] = 0, CV[1] = 7, CV[2] = 0, CV[3] = 10
+
+End of Execution Summary
+------------------------
+
+CDSChecker prints summary statistics at the end of each execution. These
+summaries are based off of a few different properties of an execution, which we
+will break down here:
+
+* An _infeasible_ execution is an execution which is not consistent with the
+  memory model. Such an execution can be considered overhead for the
+  model-checker, since it should never appear in practice.
+
+* A _buggy_ execution is an execution in which CDSChecker has found a real
+  bug: a data race, a deadlock, failure of a user-provided assertion, or an
+  uninitialized load, for instance. CDSChecker will only report bugs in feasible
+  executions.
+
+* A _redundant_ execution is a feasible execution that is exploring the same
+  state space explored by a previous feasible execution. Such exploration is
+  another instance of overhead, so CDSChecker terminates these executions as
+  soon as they are detected. CDSChecker is mostly able to avoid such executions
+  but may encounter them if a fairness option is enabled.
+
+Now, we can examine the end-of-execution summary of one test program:
+
+    $ ./run.sh test/rmwprog.o
+    + test/rmwprog.o
+    ******* Model-checking complete: *******
+    Number of complete, bug-free executions: 6
+    Number of redundant executions: 0
+    Number of buggy executions: 0
+    Number of infeasible executions: 29
+    Total executions: 35
+
+* _Number of complete, bug-free executions:_ these are feasible, non-buggy, and
+  non-redundant executions. They each represent different, legal behaviors you
+  can expect to see in practice.
+
+* _Number of redundant executions:_ these are feasible but redundant executions
+  that were terminated as soon as CDSChecker noticed the redundancy.
+
+* _Number of buggy executions:_ these are feasible, buggy executions. These are
+  the trouble spots where your program is triggering a bug or assertion.
+  Ideally, this number should be 0.
+
+* _Number of infeasible executions:_ these are infeasible executions,
+  representing some of the overhead of model-checking.
+
+* _Total executions:_ the total number of executions explored by CDSChecker.
+  Should be the sum of the above categories, since they are mutually exclusive.
+
+
+Other Notes and Pitfalls
+------------------------
+
+* Many programs require some form of fairness in order to terminate in a finite
+  amount of time. CDSChecker supports the `-y num` and `-f num` flags for these
+  cases. The `-y` option (yield-based fairness) is preferable, but it requires
+  careful usage of yields (i.e., `thrd_yield()`) in the test program. For
+  programs without proper `thrd_yield()`, you may consider using `-f` instead.
+
+* Deadlock detection: CDSChecker can detect deadlocks. For instance, try the
+  following test program.
+
+  >     ./run.sh test/deadlock.o
+
+  Deadlock detection currently detects when a thread is about to step into a
+  deadlock, without actually including the final step in the trace. But you can
+  examine the program to see the next step.
+
+* CDSChecker has to speculatively explore many execution behaviors due to the
+  relaxed memory model, and many of these turn out to be infeasible (that is,
+  they cannot be legally produced by the memory model). CDSChecker discards
+  these executions as soon as it identifies them (see the "Number of infeasible
+  executions" statistic); however, the speculation can occasionally cause
+  CDSChecker to hit unexpected parts of the unit test program (causing a
+  division by 0, for instance). In such programs, you might consider running
+  CDSChecker with the `-u num` option.
+
+* Related to the previous point, CDSChecker may report more than one bug for a
+  particular candidate execution. This is because some bugs may not be
+  reportable until CDSChecker has explored more of the program, and in the
+  time between initial discovery and final assessment of the bug, CDSChecker may
+  discover another bug.
+
+* Data races may be reported as multiple bugs, one for each byte-address of the
+  data race in question. See, for example, this run:
+
+        $ ./run.sh test/releaseseq.o
+        ...
+        Bug report: 4 bugs detected
+          [BUG] Data race detected @ address 0x601078:
+            Access 1: write in thread  2 @ clock   4
+            Access 2:  read in thread  3 @ clock   9
+          [BUG] Data race detected @ address 0x601079:
+            Access 1: write in thread  2 @ clock   4
+            Access 2:  read in thread  3 @ clock   9
+          [BUG] Data race detected @ address 0x60107a:
+            Access 1: write in thread  2 @ clock   4
+            Access 2:  read in thread  3 @ clock   9
+          [BUG] Data race detected @ address 0x60107b:
+            Access 1: write in thread  2 @ clock   4
+            Access 2:  read in thread  3 @ clock   9
+
+
+See Also
+--------
+
+The CDSChecker project page:
+
+>   <http://demsky.eecs.uci.edu/c11modelchecker.html>
+
+The CDSChecker source and accompanying benchmarks on Gitweb:
+
+>   <http://demsky.eecs.uci.edu/git/?p=model-checker.git>
+>
+>   <http://demsky.eecs.uci.edu/git/?p=model-checker-benchmarks.git>
+
+
+Contact
+-------
+
+Please feel free to contact us for more information. Bug reports are welcome,
+and we are happy to hear from our users. We are also very interested to know if
+CDSChecker catches bugs in your programs.
+
+Contact Brian Norris at <banorris@uci.edu> or Brian Demsky at <bdemsky@uci.edu>.
+
+
+Copyright
+---------
+
+Copyright &copy; 2013 Regents of the University of California. All rights reserved.
+
+CDSChecker is distributed under the GPL v2. See the LICENSE file for details.
+
+
+References
+----------
+
+[1] L. Lamport. Time, clocks, and the ordering of events in a distributed
+    system. CACM, 21(7):558-565, July 1978.
diff --git a/action.cc b/action.cc
new file mode 100644
index 0000000..2010a0b
--- /dev/null
+++ b/action.cc
@@ -0,0 +1,658 @@
+#include <stdio.h>
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+#include <stdlib.h>
+
+#include "model.h"
+#include "action.h"
+#include "clockvector.h"
+#include "common.h"
+#include "threads-model.h"
+#include "nodestack.h"
+
+#define ACTION_INITIAL_CLOCK 0
+
+/** @brief A special value to represent a successful trylock */
+#define VALUE_TRYSUCCESS 1
+
+/** @brief A special value to represent a failed trylock */
+#define VALUE_TRYFAILED 0
+
+/**
+ * @brief Construct a new ModelAction
+ *
+ * @param type The type of action
+ * @param order The memory order of this action. A "don't care" for non-ATOMIC
+ * actions (e.g., THREAD_* or MODEL_* actions).
+ * @param loc The location that this action acts upon
+ * @param value (optional) A value associated with the action (e.g., the value
+ * read or written). Defaults to a given macro constant, for debugging purposes.
+ * @param thread (optional) The Thread in which this action occurred. If NULL
+ * (default), then a Thread is assigned according to the scheduler.
+ */
+ModelAction::ModelAction(action_type_t type, memory_order order, void *loc,
+		uint64_t value, Thread *thread) :
+	type(type),
+	order(order),
+	location(loc),
+	value(value),
+	reads_from(NULL),
+	reads_from_promise(NULL),
+	last_fence_release(NULL),
+	node(NULL),
+	seq_number(ACTION_INITIAL_CLOCK),
+	cv(NULL),
+	sleep_flag(false)
+{
+	/* References to NULL atomic variables can end up here */
+	ASSERT(loc || type == ATOMIC_FENCE || type == MODEL_FIXUP_RELSEQ);
+
+	Thread *t = thread ? thread : thread_current();
+	this->tid = t->get_id();
+}
+
+/** @brief ModelAction destructor */
+ModelAction::~ModelAction()
+{
+	/**
+	 * We can't free the clock vector:
+	 * Clock vectors are snapshotting state. When we delete model actions,
+	 * they are at the end of the node list and have invalid old clock
+	 * vectors which have already been rolled back to an unallocated state.
+	 */
+
+	/*
+	 if (cv)
+		delete cv; */
+}
+
+void ModelAction::copy_from_new(ModelAction *newaction)
+{
+	seq_number = newaction->seq_number;
+}
+
+void ModelAction::set_seq_number(modelclock_t num)
+{
+	/* ATOMIC_UNINIT actions should never have non-zero clock */
+	ASSERT(!is_uninitialized());
+	ASSERT(seq_number == ACTION_INITIAL_CLOCK);
+	seq_number = num;
+}
+
+bool ModelAction::is_thread_start() const
+{
+	return type == THREAD_START;
+}
+
+bool ModelAction::is_thread_join() const
+{
+	return type == THREAD_JOIN;
+}
+
+bool ModelAction::is_relseq_fixup() const
+{
+	return type == MODEL_FIXUP_RELSEQ;
+}
+
+bool ModelAction::is_mutex_op() const
+{
+	return type == ATOMIC_LOCK || type == ATOMIC_TRYLOCK || type == ATOMIC_UNLOCK || type == ATOMIC_WAIT || type == ATOMIC_NOTIFY_ONE || type == ATOMIC_NOTIFY_ALL;
+}
+
+bool ModelAction::is_lock() const
+{
+	return type == ATOMIC_LOCK;
+}
+
+bool ModelAction::is_wait() const {
+	return type == ATOMIC_WAIT;
+}
+
+bool ModelAction::is_notify() const {
+	return type == ATOMIC_NOTIFY_ONE || type == ATOMIC_NOTIFY_ALL;
+}
+
+bool ModelAction::is_notify_one() const {
+	return type == ATOMIC_NOTIFY_ONE;
+}
+
+bool ModelAction::is_unlock() const
+{
+	return type == ATOMIC_UNLOCK;
+}
+
+bool ModelAction::is_trylock() const
+{
+	return type == ATOMIC_TRYLOCK;
+}
+
+bool ModelAction::is_success_lock() const
+{
+	return type == ATOMIC_LOCK || (type == ATOMIC_TRYLOCK && value == VALUE_TRYSUCCESS);
+}
+
+bool ModelAction::is_failed_trylock() const
+{
+	return (type == ATOMIC_TRYLOCK && value == VALUE_TRYFAILED);
+}
+
+/** @return True if this operation is performed on a C/C++ atomic variable */
+bool ModelAction::is_atomic_var() const
+{
+	return is_read() || could_be_write();
+}
+
+bool ModelAction::is_uninitialized() const
+{
+	return type == ATOMIC_UNINIT;
+}
+
+bool ModelAction::is_read() const
+{
+	return type == ATOMIC_READ || type == ATOMIC_RMWR || type == ATOMIC_RMW;
+}
+
+bool ModelAction::is_write() const
+{
+	return type == ATOMIC_WRITE || type == ATOMIC_RMW || type == ATOMIC_INIT || type == ATOMIC_UNINIT;
+}
+
+bool ModelAction::could_be_write() const
+{
+	return is_write() || is_rmwr();
+}
+
+bool ModelAction::is_yield() const
+{
+	return type == THREAD_YIELD;
+}
+
+bool ModelAction::is_rmwr() const
+{
+	return type == ATOMIC_RMWR;
+}
+
+bool ModelAction::is_rmw() const
+{
+	return type == ATOMIC_RMW;
+}
+
+bool ModelAction::is_rmwc() const
+{
+	return type == ATOMIC_RMWC;
+}
+
+bool ModelAction::is_fence() const
+{
+	return type == ATOMIC_FENCE;
+}
+
+bool ModelAction::is_initialization() const
+{
+	return type == ATOMIC_INIT;
+}
+
+bool ModelAction::is_annotation() const
+{
+	return type == ATOMIC_ANNOTATION;
+}
+
+bool ModelAction::is_relaxed() const
+{
+	return order == std::memory_order_relaxed;
+}
+
+bool ModelAction::is_acquire() const
+{
+	switch (order) {
+	case std::memory_order_acquire:
+	case std::memory_order_acq_rel:
+	case std::memory_order_seq_cst:
+		return true;
+	default:
+		return false;
+	}
+}
+
+bool ModelAction::is_release() const
+{
+	switch (order) {
+	case std::memory_order_release:
+	case std::memory_order_acq_rel:
+	case std::memory_order_seq_cst:
+		return true;
+	default:
+		return false;
+	}
+}
+
+bool ModelAction::is_seqcst() const
+{
+	return order == std::memory_order_seq_cst;
+}
+
+bool ModelAction::same_var(const ModelAction *act) const
+{
+	if (act->is_wait() || is_wait()) {
+		if (act->is_wait() && is_wait()) {
+			if (((void *)value) == ((void *)act->value))
+				return true;
+		} else if (is_wait()) {
+			if (((void *)value) == act->location)
+				return true;
+		} else if (act->is_wait()) {
+			if (location == ((void *)act->value))
+				return true;
+		}
+	}
+
+	return location == act->location;
+}
+
+bool ModelAction::same_thread(const ModelAction *act) const
+{
+	return tid == act->tid;
+}
+
+void ModelAction::copy_typeandorder(ModelAction * act)
+{
+	this->type = act->type;
+	this->order = act->order;
+}
+
+/**
+ * Get the Thread which is the operand of this action. This is only valid for
+ * THREAD_* operations (currently only for THREAD_CREATE and THREAD_JOIN). Note
+ * that this provides a central place for determining the conventions of Thread
+ * storage in ModelAction, where we generally aren't very type-safe (e.g., we
+ * store object references in a (void *) address.
+ *
+ * For THREAD_CREATE, this yields the Thread which is created.
+ * For THREAD_JOIN, this yields the Thread we are joining with.
+ *
+ * @return The Thread which this action acts on, if exists; otherwise NULL
+ */
+Thread * ModelAction::get_thread_operand() const
+{
+	if (type == THREAD_CREATE) {
+		/* THREAD_CREATE stores its (Thread *) in a thrd_t::priv */
+		thrd_t *thrd = (thrd_t *)get_location();
+		return thrd->priv;
+	} else if (type == THREAD_JOIN)
+		/* THREAD_JOIN uses (Thread *) for location */
+		return (Thread *)get_location();
+	else
+		return NULL;
+}
+
+/**
+ * @brief Convert the read portion of an RMW
+ *
+ * Changes an existing read part of an RMW action into either:
+ *  -# a full RMW action in case of the completed write or
+ *  -# a READ action in case a failed action.
+ *
+ * @todo  If the memory_order changes, we may potentially need to update our
+ * clock vector.
+ *
+ * @param act The second half of the RMW (either RMWC or RMW)
+ */
+void ModelAction::process_rmw(ModelAction *act)
+{
+	this->order = act->order;
+	if (act->is_rmwc())
+		this->type = ATOMIC_READ;
+	else if (act->is_rmw()) {
+		this->type = ATOMIC_RMW;
+		this->value = act->value;
+	}
+}
+
+/**
+ * @brief Check if this action should be backtracked with another, due to
+ * potential synchronization
+ *
+ * The is_synchronizing method should only explore interleavings if:
+ *  -# the operations are seq_cst and don't commute or
+ *  -# the reordering may establish or break a synchronization relation.
+ *
+ * Other memory operations will be dealt with by using the reads_from relation.
+ *
+ * @param act The action to consider exploring a reordering
+ * @return True, if we have to explore a reordering; otherwise false
+ */
+bool ModelAction::could_synchronize_with(const ModelAction *act) const
+{
+	// Same thread can't be reordered
+	if (same_thread(act))
+		return false;
+
+	// Different locations commute
+	if (!same_var(act))
+		return false;
+
+	// Explore interleavings of seqcst writes/fences to guarantee total
+	// order of seq_cst operations that don't commute
+	if ((could_be_write() || act->could_be_write() || is_fence() || act->is_fence()) && is_seqcst() && act->is_seqcst())
+		return true;
+
+	// Explore synchronizing read/write pairs
+	if (is_acquire() && act->is_release() && is_read() && act->could_be_write())
+		return true;
+
+	// lock just released...we can grab lock
+	if ((is_lock() || is_trylock()) && (act->is_unlock() || act->is_wait()))
+		return true;
+
+	// lock just acquired...we can fail to grab lock
+	if (is_trylock() && act->is_success_lock())
+		return true;
+
+	// other thread stalling on lock...we can release lock
+	if (is_unlock() && (act->is_trylock() || act->is_lock()))
+		return true;
+
+	if (is_trylock() && (act->is_unlock() || act->is_wait()))
+		return true;
+
+	if (is_notify() && act->is_wait())
+		return true;
+
+	if (is_wait() && act->is_notify())
+		return true;
+
+	// Otherwise handle by reads_from relation
+	return false;
+}
+
+bool ModelAction::is_conflicting_lock(const ModelAction *act) const
+{
+	// Must be different threads to reorder
+	if (same_thread(act))
+		return false;
+
+	// Try to reorder a lock past a successful lock
+	if (act->is_success_lock())
+		return true;
+
+	// Try to push a successful trylock past an unlock
+	if (act->is_unlock() && is_trylock() && value == VALUE_TRYSUCCESS)
+		return true;
+
+	// Try to push a successful trylock past a wait
+	if (act->is_wait() && is_trylock() && value == VALUE_TRYSUCCESS)
+		return true;
+
+	return false;
+}
+
+/**
+ * Create a new clock vector for this action. Note that this function allows a
+ * user to clobber (and leak) a ModelAction's existing clock vector. A user
+ * should ensure that the vector has already either been rolled back
+ * (effectively "freed") or freed.
+ *
+ * @param parent A ModelAction from which to inherit a ClockVector
+ */
+void ModelAction::create_cv(const ModelAction *parent)
+{
+	if (parent)
+		cv = new ClockVector(parent->cv, this);
+	else
+		cv = new ClockVector(NULL, this);
+}
+
+void ModelAction::set_try_lock(bool obtainedlock)
+{
+	value = obtainedlock ? VALUE_TRYSUCCESS : VALUE_TRYFAILED;
+}
+
+/**
+ * @brief Get the value read by this load
+ *
+ * We differentiate this function from ModelAction::get_write_value and
+ * ModelAction::get_value for the purpose of RMW's, which may have both a
+ * 'read' and a 'write' value.
+ *
+ * Note: 'this' must be a load.
+ *
+ * @return The value read by this load
+ */
+uint64_t ModelAction::get_reads_from_value() const
+{
+	ASSERT(is_read());
+	if (reads_from)
+		return reads_from->get_write_value();
+	else if (reads_from_promise)
+		return reads_from_promise->get_value();
+	return VALUE_NONE; /* Only for new actions with no reads-from */
+}
+
+/**
+ * @brief Get the value written by this store
+ *
+ * We differentiate this function from ModelAction::get_reads_from_value and
+ * ModelAction::get_value for the purpose of RMW's, which may have both a
+ * 'read' and a 'write' value.
+ *
+ * Note: 'this' must be a store.
+ *
+ * @return The value written by this store
+ */
+uint64_t ModelAction::get_write_value() const
+{
+	ASSERT(is_write());
+	return value;
+}
+
+/**
+ * @brief Get the value returned by this action
+ *
+ * For atomic reads (including RMW), an operation returns the value it read.
+ * For atomic writes, an operation returns the value it wrote. For other
+ * operations, the return value varies (sometimes is a "don't care"), but the
+ * value is simply stored in the "value" field.
+ *
+ * @return This action's return value
+ */
+uint64_t ModelAction::get_return_value() const
+{
+	if (is_read())
+		return get_reads_from_value();
+	else if (is_write())
+		return get_write_value();
+	else
+		return value;
+}
+
+/** @return The Node associated with this ModelAction */
+Node * ModelAction::get_node() const
+{
+	/* UNINIT actions do not have a Node */
+	ASSERT(!is_uninitialized());
+	return node;
+}
+
+/**
+ * Update the model action's read_from action
+ * @param act The action to read from; should be a write
+ */
+void ModelAction::set_read_from(const ModelAction *act)
+{
+	ASSERT(act);
+	reads_from = act;
+	reads_from_promise = NULL;
+	if (act->is_uninitialized())
+		model->assert_bug("May read from uninitialized atomic:\n"
+				"    action %d, thread %d, location %p (%s, %s)",
+				seq_number, id_to_int(tid), location,
+				get_type_str(), get_mo_str());
+}
+
+/**
+ * Set this action's read-from promise
+ * @param promise The promise to read from
+ */
+void ModelAction::set_read_from_promise(Promise *promise)
+{
+	ASSERT(is_read());
+	reads_from_promise = promise;
+	reads_from = NULL;
+}
+
+/**
+ * Synchronize the current thread with the thread corresponding to the
+ * ModelAction parameter.
+ * @param act The ModelAction to synchronize with
+ * @return True if this is a valid synchronization; false otherwise
+ */
+bool ModelAction::synchronize_with(const ModelAction *act)
+{
+	if (*this < *act)
+		return false;
+	cv->merge(act->cv);
+	return true;
+}
+
+bool ModelAction::has_synchronized_with(const ModelAction *act) const
+{
+	return cv->synchronized_since(act);
+}
+
+/**
+ * Check whether 'this' happens before act, according to the memory-model's
+ * happens before relation. This is checked via the ClockVector constructs.
+ * @return true if this action's thread has synchronized with act's thread
+ * since the execution of act, false otherwise.
+ */
+bool ModelAction::happens_before(const ModelAction *act) const
+{
+	return act->cv->synchronized_since(this);
+}
+
+const char * ModelAction::get_type_str() const
+{
+	switch (this->type) {
+		case MODEL_FIXUP_RELSEQ: return "relseq fixup";
+		case THREAD_CREATE: return "thread create";
+		case THREAD_START: return "thread start";
+		case THREAD_YIELD: return "thread yield";
+		case THREAD_JOIN: return "thread join";
+		case THREAD_FINISH: return "thread finish";
+		case ATOMIC_UNINIT: return "uninitialized";
+		case ATOMIC_READ: return "atomic read";
+		case ATOMIC_WRITE: return "atomic write";
+		case ATOMIC_RMW: return "atomic rmw";
+		case ATOMIC_FENCE: return "fence";
+		case ATOMIC_RMWR: return "atomic rmwr";
+		case ATOMIC_RMWC: return "atomic rmwc";
+		case ATOMIC_INIT: return "init atomic";
+		case ATOMIC_LOCK: return "lock";
+		case ATOMIC_UNLOCK: return "unlock";
+		case ATOMIC_TRYLOCK: return "trylock";
+		case ATOMIC_WAIT: return "wait";
+		case ATOMIC_NOTIFY_ONE: return "notify one";
+	  case ATOMIC_NOTIFY_ALL: return "notify all";
+	  case ATOMIC_ANNOTATION: return "atomic annotation";
+		default: return "unknown type";
+	};
+}
+
+const char * ModelAction::get_mo_str() const
+{
+	switch (this->order) {
+		case std::memory_order_relaxed: return "relaxed";
+		case std::memory_order_acquire: return "acquire";
+		case std::memory_order_release: return "release";
+		case std::memory_order_acq_rel: return "acq_rel";
+		case std::memory_order_seq_cst: return "seq_cst";
+		default: return "unknown";
+	}
+}
+
+/** @brief Print nicely-formatted info about this ModelAction */
+void ModelAction::print() const
+{
+	const char *type_str = get_type_str(), *mo_str = get_mo_str();
+
+	model_print("%-4d %-2d   %-13s   %7s  %14p   %-#18" PRIx64,
+			seq_number, id_to_int(tid), type_str, mo_str, location, get_return_value());
+	if (is_read()) {
+		if (reads_from)
+			model_print("  %-3d", reads_from->get_seq_number());
+		else if (reads_from_promise) {
+			int idx = reads_from_promise->get_index();
+			if (idx >= 0)
+				model_print("  P%-2d", idx);
+			else
+				model_print("  P? ");
+		} else
+			model_print("  ?  ");
+	}
+	if (cv) {
+		if (is_read())
+			model_print(" ");
+		else
+			model_print("      ");
+		cv->print();
+	} else
+		model_print("\n");
+}
+
+/** @brief Get a (likely) unique hash for this ModelAction */
+unsigned int ModelAction::hash() const
+{
+	unsigned int hash = (unsigned int)this->type;
+	hash ^= ((unsigned int)this->order) << 3;
+	hash ^= seq_number << 5;
+	hash ^= id_to_int(tid) << 6;
+
+	if (is_read()) {
+	       if (reads_from)
+		       hash ^= reads_from->get_seq_number();
+	       else if (reads_from_promise)
+		       hash ^= reads_from_promise->get_index();
+	       hash ^= get_reads_from_value();
+	}
+	return hash;
+}
+
+/**
+ * @brief Checks the NodeStack to see if a ModelAction is in our may-read-from set
+ * @param write The ModelAction to check for
+ * @return True if the ModelAction is found; false otherwise
+ */
+bool ModelAction::may_read_from(const ModelAction *write) const
+{
+	for (int i = 0; i < node->get_read_from_past_size(); i++)
+		if (node->get_read_from_past(i) == write)
+			return true;
+	return false;
+}
+
+/**
+ * @brief Checks the NodeStack to see if a Promise is in our may-read-from set
+ * @param promise The Promise to check for
+ * @return True if the Promise is found; false otherwise
+ */
+bool ModelAction::may_read_from(const Promise *promise) const
+{
+	for (int i = 0; i < node->get_read_from_promise_size(); i++)
+		if (node->get_read_from_promise(i) == promise)
+			return true;
+	return false;
+}
+
+/**
+ * Only valid for LOCK, TRY_LOCK, UNLOCK, and WAIT operations.
+ * @return The mutex operated on by this action, if any; otherwise NULL
+ */
+std::mutex * ModelAction::get_mutex() const
+{
+	if (is_trylock() || is_lock() || is_unlock())
+		return (std::mutex *)get_location();
+	else if (is_wait())
+		return (std::mutex *)get_value();
+	else
+		return NULL;
+}
diff --git a/action.h b/action.h
new file mode 100644
index 0000000..ad3b828
--- /dev/null
+++ b/action.h
@@ -0,0 +1,251 @@
+/** @file action.h
+ *  @brief Models actions taken by threads.
+ */
+
+#ifndef __ACTION_H__
+#define __ACTION_H__
+
+#include <cstddef>
+#include <inttypes.h>
+
+#include "mymemory.h"
+#include "memoryorder.h"
+#include "modeltypes.h"
+
+/* Forward declarations */
+class ClockVector;
+class Thread;
+class Promise;
+
+namespace std {
+	class mutex;
+}
+
+using std::memory_order;
+using std::memory_order_relaxed;
+using std::memory_order_acquire;
+using std::memory_order_release;
+using std::memory_order_acq_rel;
+using std::memory_order_seq_cst;
+
+/**
+ * @brief A recognizable don't-care value for use in the ModelAction::value
+ * field
+ *
+ * Note that this value can be legitimately used by a program, and hence by
+ * iteself does not indicate no value.
+ */
+#define VALUE_NONE 0xdeadbeef
+
+/**
+ * @brief The "location" at which a fence occurs
+ *
+ * We need a non-zero memory location to associate with fences, since our hash
+ * tables don't handle NULL-pointer keys. HACK: Hopefully this doesn't collide
+ * with any legitimate memory locations.
+ */
+#define FENCE_LOCATION ((void *)0x7)
+
+/** @brief Represents an action type, identifying one of several types of
+ * ModelAction */
+typedef enum action_type {
+	MODEL_FIXUP_RELSEQ,   /**< Special ModelAction: finalize a release
+	                       *   sequence */
+	THREAD_CREATE,        /**< A thread creation action */
+	THREAD_START,         /**< First action in each thread */
+	THREAD_YIELD,         /**< A thread yield action */
+	THREAD_JOIN,          /**< A thread join action */
+	THREAD_FINISH,        /**< A thread completion action */
+	ATOMIC_UNINIT,        /**< Represents an uninitialized atomic */
+	ATOMIC_READ,          /**< An atomic read action */
+	ATOMIC_WRITE,         /**< An atomic write action */
+	ATOMIC_RMWR,          /**< The read part of an atomic RMW action */
+	ATOMIC_RMW,           /**< The write part of an atomic RMW action */
+	ATOMIC_RMWC,          /**< Convert an atomic RMW action into a READ */
+	ATOMIC_INIT,          /**< Initialization of an atomic object (e.g.,
+	                       *   atomic_init()) */
+	ATOMIC_FENCE,         /**< A fence action */
+	ATOMIC_LOCK,          /**< A lock action */
+	ATOMIC_TRYLOCK,       /**< A trylock action */
+	ATOMIC_UNLOCK,        /**< An unlock action */
+	ATOMIC_NOTIFY_ONE,    /**< A notify_one action */
+	ATOMIC_NOTIFY_ALL,    /**< A notify all action */
+	ATOMIC_WAIT,          /**< A wait action */
+	ATOMIC_ANNOTATION     /**< An annotation action to pass information
+													 to a trace analysis */
+} action_type_t;
+
+/* Forward declaration */
+class Node;
+class ClockVector;
+
+/**
+ * @brief Represents a single atomic action
+ *
+ * A ModelAction is always allocated as non-snapshotting, because it is used in
+ * multiple executions during backtracking. Except for fake uninitialized
+ * (ATOMIC_UNINIT) ModelActions, each action is assigned a unique sequence
+ * number.
+ */
+class ModelAction {
+public:
+	ModelAction(action_type_t type, memory_order order, void *loc, uint64_t value = VALUE_NONE, Thread *thread = NULL);
+	~ModelAction();
+	void print() const;
+
+	thread_id_t get_tid() const { return tid; }
+	action_type get_type() const { return type; }
+	memory_order get_mo() const { return order; }
+	void * get_location() const { return location; }
+	modelclock_t get_seq_number() const { return seq_number; }
+	uint64_t get_value() const { return value; }
+	uint64_t get_reads_from_value() const;
+	uint64_t get_write_value() const;
+	uint64_t get_return_value() const;
+	const ModelAction * get_reads_from() const { return reads_from; }
+	Promise * get_reads_from_promise() const { return reads_from_promise; }
+	std::mutex * get_mutex() const;
+
+	Node * get_node() const;
+	void set_node(Node *n) { node = n; }
+
+	void set_read_from(const ModelAction *act);
+	void set_read_from_promise(Promise *promise);
+
+	/** Store the most recent fence-release from the same thread
+	 *  @param fence The fence-release that occured prior to this */
+	void set_last_fence_release(const ModelAction *fence) { last_fence_release = fence; }
+	/** @return The most recent fence-release from the same thread */
+	const ModelAction * get_last_fence_release() const { return last_fence_release; }
+
+	void copy_from_new(ModelAction *newaction);
+	void set_seq_number(modelclock_t num);
+	void set_try_lock(bool obtainedlock);
+	bool is_thread_start() const;
+	bool is_thread_join() const;
+	bool is_relseq_fixup() const;
+	bool is_mutex_op() const;
+	bool is_lock() const;
+	bool is_trylock() const;
+	bool is_unlock() const;
+	bool is_wait() const;
+	bool is_notify() const;
+	bool is_notify_one() const;
+	bool is_success_lock() const;
+	bool is_failed_trylock() const;
+	bool is_atomic_var() const;
+	bool is_uninitialized() const;
+	bool is_read() const;
+	bool is_write() const;
+	bool is_yield() const;
+	bool could_be_write() const;
+	bool is_rmwr() const;
+	bool is_rmwc() const;
+	bool is_rmw() const;
+	bool is_fence() const;
+	bool is_initialization() const;
+	bool is_annotation() const;
+	bool is_relaxed() const;
+	bool is_acquire() const;
+	bool is_release() const;
+	bool is_seqcst() const;
+	bool same_var(const ModelAction *act) const;
+	bool same_thread(const ModelAction *act) const;
+	bool is_conflicting_lock(const ModelAction *act) const;
+	bool could_synchronize_with(const ModelAction *act) const;
+
+	Thread * get_thread_operand() const;
+
+	void create_cv(const ModelAction *parent = NULL);
+	ClockVector * get_cv() const { return cv; }
+	bool synchronize_with(const ModelAction *act);
+
+	bool has_synchronized_with(const ModelAction *act) const;
+	bool happens_before(const ModelAction *act) const;
+
+	inline bool operator <(const ModelAction& act) const {
+		return get_seq_number() < act.get_seq_number();
+	}
+	inline bool operator >(const ModelAction& act) const {
+		return get_seq_number() > act.get_seq_number();
+	}
+
+	void process_rmw(ModelAction * act);
+	void copy_typeandorder(ModelAction * act);
+
+	void set_sleep_flag() { sleep_flag=true; }
+	bool get_sleep_flag() { return sleep_flag; }
+	unsigned int hash() const;
+
+	bool equals(const ModelAction *x) const { return this == x; }
+	bool equals(const Promise *x) const { return false; }
+
+	bool may_read_from(const ModelAction *write) const;
+	bool may_read_from(const Promise *promise) const;
+	MEMALLOC
+private:
+
+	const char * get_type_str() const;
+	const char * get_mo_str() const;
+
+	/** @brief Type of action (read, write, RMW, fence, thread create, etc.) */
+	action_type type;
+
+	/** @brief The memory order for this operation. */
+	memory_order order;
+
+	/** @brief A pointer to the memory location for this action. */
+	void *location;
+
+	/** @brief The thread id that performed this action. */
+	thread_id_t tid;
+
+	/** @brief The value written (for write or RMW; undefined for read) */
+	uint64_t value;
+
+	/**
+	 * @brief The store that this action reads from
+	 *
+	 * Only valid for reads
+	 */
+	const ModelAction *reads_from;
+
+	/**
+	 * @brief The promise that this action reads from
+	 *
+	 * Only valid for reads
+	 */
+	Promise *reads_from_promise;
+
+	/** @brief The last fence release from the same thread */
+	const ModelAction *last_fence_release;
+
+	/**
+	 * @brief A back reference to a Node in NodeStack
+	 *
+	 * Only set if this ModelAction is saved on the NodeStack. (A
+	 * ModelAction can be thrown away before it ever enters the NodeStack.)
+	 */
+	Node *node;
+
+	/**
+	 * @brief The sequence number of this action
+	 *
+	 * Except for ATOMIC_UNINIT actions, this number should be unique and
+	 * should represent the action's position in the execution order.
+	 */
+	modelclock_t seq_number;
+
+	/**
+	 * @brief The clock vector for this operation
+	 *
+	 * Technically, this is only needed for potentially synchronizing
+	 * (e.g., non-relaxed) operations, but it is very handy to have these
+	 * vectors for all operations.
+	 */
+	ClockVector *cv;
+
+	bool sleep_flag;
+};
+
+#endif /* __ACTION_H__ */
diff --git a/bugmessage.h b/bugmessage.h
new file mode 100644
index 0000000..bd7d0b6
--- /dev/null
+++ b/bugmessage.h
@@ -0,0 +1,21 @@
+#ifndef __BUGMESSAGE_H__
+#define __BUGMESSAGE_H__
+
+#include "common.h"
+#include "mymemory.h"
+
+struct bug_message {
+	bug_message(const char *str) {
+		const char *fmt = "  [BUG] %s\n";
+		msg = (char *)snapshot_malloc(strlen(fmt) + strlen(str));
+		sprintf(msg, fmt, str);
+	}
+	~bug_message() { if (msg) snapshot_free(msg); }
+
+	char *msg;
+	void print() { model_print("%s", msg); }
+
+	SNAPSHOTALLOC
+};
+
+#endif /* __BUGMESSAGE_H__ */
diff --git a/clockvector.cc b/clockvector.cc
new file mode 100644
index 0000000..5f068e9
--- /dev/null
+++ b/clockvector.cc
@@ -0,0 +1,101 @@
+#include <cstring>
+#include <stdlib.h>
+
+#include "action.h"
+#include "clockvector.h"
+#include "common.h"
+#include "threads-model.h"
+
+/**
+ * Constructs a new ClockVector, given a parent ClockVector and a first
+ * ModelAction. This constructor can assign appropriate default settings if no
+ * parent and/or action is supplied.
+ * @param parent is the previous ClockVector to inherit (i.e., clock from the
+ * same thread or the parent that created this thread)
+ * @param act is an action with which to update the ClockVector
+ */
+ClockVector::ClockVector(ClockVector *parent, ModelAction *act)
+{
+	ASSERT(act);
+	num_threads = int_to_id(act->get_tid()) + 1;
+	if (parent && parent->num_threads > num_threads)
+		num_threads = parent->num_threads;
+
+	clock = (modelclock_t *)snapshot_calloc(num_threads, sizeof(int));
+	if (parent)
+		std::memcpy(clock, parent->clock, parent->num_threads * sizeof(modelclock_t));
+
+	clock[id_to_int(act->get_tid())] = act->get_seq_number();
+}
+
+/** @brief Destructor */
+ClockVector::~ClockVector()
+{
+	snapshot_free(clock);
+}
+
+/**
+ * Merge a clock vector into this vector, using a pairwise comparison. The
+ * resulting vector length will be the maximum length of the two being merged.
+ * @param cv is the ClockVector being merged into this vector.
+ */
+bool ClockVector::merge(const ClockVector *cv)
+{
+	ASSERT(cv != NULL);
+	bool changed = false;
+	if (cv->num_threads > num_threads) {
+		clock = (modelclock_t *)snapshot_realloc(clock, cv->num_threads * sizeof(modelclock_t));
+		for (int i = num_threads; i < cv->num_threads; i++)
+			clock[i] = 0;
+		num_threads = cv->num_threads;
+	}
+
+	/* Element-wise maximum */
+	for (int i = 0; i < cv->num_threads; i++)
+		if (cv->clock[i] > clock[i]) {
+			clock[i] = cv->clock[i];
+			changed = true;
+		}
+	
+	return changed;
+}
+
+/**
+ * Check whether this vector's thread has synchronized with another action's
+ * thread. This effectively checks the happens-before relation (or actually,
+ * happens after), but it's easier to compare two ModelAction events directly,
+ * using ModelAction::happens_before.
+ *
+ * @see ModelAction::happens_before
+ *
+ * @return true if this ClockVector's thread has synchronized with act's
+ * thread, false otherwise. That is, this function returns:
+ * <BR><CODE>act <= cv[act->tid]</CODE>
+ */
+bool ClockVector::synchronized_since(const ModelAction *act) const
+{
+	int i = id_to_int(act->get_tid());
+
+	if (i < num_threads)
+		return act->get_seq_number() <= clock[i];
+	return false;
+}
+
+/** Gets the clock corresponding to a given thread id from the clock vector. */
+modelclock_t ClockVector::getClock(thread_id_t thread) {
+	int threadid = id_to_int(thread);
+
+	if (threadid < num_threads)
+		return clock[threadid];
+	else
+		return 0;
+}
+
+/** @brief Formats and prints this ClockVector's data. */
+void ClockVector::print() const
+{
+	int i;
+	model_print("(");
+	for (i = 0; i < num_threads; i++)
+		model_print("%2u%s", clock[i], (i == num_threads - 1) ? ")\n" : ", ");
+}
diff --git a/clockvector.h b/clockvector.h
new file mode 100644
index 0000000..e19a211
--- /dev/null
+++ b/clockvector.h
@@ -0,0 +1,33 @@
+/** @file clockvector.h
+ *  @brief Implements a clock vector.
+ */
+
+#ifndef __CLOCKVECTOR_H__
+#define __CLOCKVECTOR_H__
+
+#include "mymemory.h"
+#include "modeltypes.h"
+
+/* Forward declaration */
+class ModelAction;
+
+class ClockVector {
+public:
+	ClockVector(ClockVector *parent = NULL, ModelAction *act = NULL);
+	~ClockVector();
+	bool merge(const ClockVector *cv);
+	bool synchronized_since(const ModelAction *act) const;
+
+	void print() const;
+	modelclock_t getClock(thread_id_t thread);
+
+	SNAPSHOTALLOC
+private:
+	/** @brief Holds the actual clock data, as an array. */
+	modelclock_t *clock;
+
+	/** @brief The number of threads recorded in clock (i.e., its length).  */
+	int num_threads;
+};
+
+#endif /* __CLOCKVECTOR_H__ */
diff --git a/cmodelint.cc b/cmodelint.cc
new file mode 100644
index 0000000..1632581
--- /dev/null
+++ b/cmodelint.cc
@@ -0,0 +1,43 @@
+#include "model.h"
+#include "action.h"
+#include "cmodelint.h"
+#include "threads-model.h"
+
+/** Performs a read action.*/
+uint64_t model_read_action(void * obj, memory_order ord) {
+	return model->switch_to_master(new ModelAction(ATOMIC_READ, ord, obj));
+}
+
+/** Performs a write action.*/
+void model_write_action(void * obj, memory_order ord, uint64_t val) {
+	model->switch_to_master(new ModelAction(ATOMIC_WRITE, ord, obj, val));
+}
+
+/** Performs an init action. */
+void model_init_action(void * obj, uint64_t val) {
+	model->switch_to_master(new ModelAction(ATOMIC_INIT, memory_order_relaxed, obj, val));
+}
+
+/**
+ * Performs the read part of a RMW action. The next action must either be the
+ * write part of the RMW action or an explicit close out of the RMW action w/o
+ * a write.
+ */
+uint64_t model_rmwr_action(void *obj, memory_order ord) {
+	return model->switch_to_master(new ModelAction(ATOMIC_RMWR, ord, obj));
+}
+
+/** Performs the write part of a RMW action. */
+void model_rmw_action(void *obj, memory_order ord, uint64_t val) {
+	model->switch_to_master(new ModelAction(ATOMIC_RMW, ord, obj, val));
+}
+
+/** Closes out a RMW action without doing a write. */
+void model_rmwc_action(void *obj, memory_order ord) {
+	model->switch_to_master(new ModelAction(ATOMIC_RMWC, ord, obj));
+}
+
+/** Issues a fence operation. */
+void model_fence_action(memory_order ord) {
+	model->switch_to_master(new ModelAction(ATOMIC_FENCE, ord, FENCE_LOCATION));
+}
diff --git a/common.cc b/common.cc
new file mode 100644
index 0000000..26f6d5d
--- /dev/null
+++ b/common.cc
@@ -0,0 +1,173 @@
+#include <execinfo.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <model-assert.h>
+
+#include "common.h"
+#include "model.h"
+#include "stacktrace.h"
+#include "output.h"
+
+#define MAX_TRACE_LEN 100
+
+/** @brief Model-checker output file descriptor; default to stdout until redirected */
+int model_out = STDOUT_FILENO;
+
+#define CONFIG_STACKTRACE
+/** Print a backtrace of the current program state. */
+void print_trace(void)
+{
+#ifdef CONFIG_STACKTRACE
+	print_stacktrace(model_out);
+#else
+	void *array[MAX_TRACE_LEN];
+	char **strings;
+	int size, i;
+
+	size = backtrace(array, MAX_TRACE_LEN);
+	strings = backtrace_symbols(array, size);
+
+	model_print("\nDumping stack trace (%d frames):\n", size);
+
+	for (i = 0; i < size; i++)
+		model_print("\t%s\n", strings[i]);
+
+	free(strings);
+#endif /* CONFIG_STACKTRACE */
+}
+
+void assert_hook(void)
+{
+	model_print("Add breakpoint to line %u in file %s.\n", __LINE__, __FILE__);
+}
+
+void model_assert(bool expr, const char *file, int line)
+{
+	if (!expr) {
+		char msg[100];
+		sprintf(msg, "Program has hit assertion in file %s at line %d\n",
+				file, line);
+		model->assert_user_bug(msg);
+	}
+}
+
+#ifndef CONFIG_DEBUG
+
+static int fd_user_out; /**< @brief File descriptor from which to read user program output */
+
+/**
+ * @brief Setup output redirecting
+ *
+ * Redirects user program's stdout to a pipe so that we can dump it
+ * selectively, when displaying bugs, etc.
+ * Also connects a file descriptor 'model_out' directly to stdout, for printing
+ * data when needed.
+ *
+ * The model-checker can selectively choose to print/hide the user program
+ * output.
+ * @see clear_program_output
+ * @see print_program_output
+ *
+ * Note that the user program's pipe has limited memory, so if a program will
+ * output much data, we will need to buffer it in user-space during execution.
+ * This also means that if ModelChecker decides not to print an execution, it
+ * should promptly clear the pipe.
+ *
+ * This function should only be called once.
+ */
+void redirect_output()
+{
+	/* Save stdout for later use */
+	model_out = dup(STDOUT_FILENO);
+	if (model_out < 0) {
+		perror("dup");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Redirect program output to a pipe */
+	int pipefd[2];
+	if (pipe(pipefd) < 0) {
+		perror("pipe");
+		exit(EXIT_FAILURE);
+	}
+	if (dup2(pipefd[1], STDOUT_FILENO) < 0) {
+		perror("dup2");
+		exit(EXIT_FAILURE);
+	}
+	close(pipefd[1]);
+
+	/* Save the "read" side of the pipe for use later */
+	if (fcntl(pipefd[0], F_SETFL, O_NONBLOCK) < 0) {
+		perror("fcntl");
+		exit(EXIT_FAILURE);
+	}
+	fd_user_out = pipefd[0];
+}
+
+/**
+ * @brief Wrapper for reading data to buffer
+ *
+ * Besides a simple read, this handles the subtleties of EOF and nonblocking
+ * input (if fd is O_NONBLOCK).
+ *
+ * @param fd The file descriptor to read.
+ * @param buf Buffer to read to.
+ * @param maxlen Maximum data to read to buffer
+ * @return The length of data read. If zero, then we hit EOF or ran out of data
+ * (non-blocking)
+ */
+static ssize_t read_to_buf(int fd, char *buf, size_t maxlen)
+{
+	ssize_t ret = read(fd, buf, maxlen);
+	if (ret < 0) {
+		if (errno == EAGAIN || errno == EWOULDBLOCK) {
+			return 0;
+		} else {
+			perror("read");
+			exit(EXIT_FAILURE);
+		}
+	}
+	return ret;
+}
+
+/** @brief Dump any pending program output without printing */
+void clear_program_output()
+{
+	fflush(stdout);
+	char buf[200];
+	while (read_to_buf(fd_user_out, buf, sizeof(buf)));
+}
+
+/** @brief Print out any pending program output */
+void print_program_output()
+{
+	char buf[200];
+
+	model_print("---- BEGIN PROGRAM OUTPUT ----\n");
+
+	/* Gather all program output */
+	fflush(stdout);
+
+	/* Read program output pipe and write to (real) stdout */
+	ssize_t ret;
+	while (1) {
+		ret = read_to_buf(fd_user_out, buf, sizeof(buf));
+		if (!ret)
+			break;
+		while (ret > 0) {
+			ssize_t res = write(model_out, buf, ret);
+			if (res < 0) {
+				perror("write");
+				exit(EXIT_FAILURE);
+			}
+			ret -= res;
+		}
+	}
+
+	model_print("---- END PROGRAM OUTPUT   ----\n");
+}
+#endif /* ! CONFIG_DEBUG */
diff --git a/common.h b/common.h
new file mode 100644
index 0000000..62c16f4
--- /dev/null
+++ b/common.h
@@ -0,0 +1,45 @@
+/** @file common.h
+ *  @brief General purpose macros.
+ */
+
+#ifndef __COMMON_H__
+#define __COMMON_H__
+
+#include <stdio.h>
+#include "config.h"
+
+extern int model_out;
+
+#define model_print(fmt, ...) do { dprintf(model_out, fmt, ##__VA_ARGS__); } while (0)
+
+#ifdef CONFIG_DEBUG
+#define DEBUG(fmt, ...) do { model_print("*** %15s:%-4d %25s() *** " fmt, __FILE__, __LINE__, __func__, ##__VA_ARGS__); } while (0)
+#define DBG() DEBUG("\n")
+#define DBG_ENABLED() (1)
+#else
+#define DEBUG(fmt, ...)
+#define DBG()
+#define DBG_ENABLED() (0)
+#endif
+
+void assert_hook(void);
+
+#ifdef CONFIG_ASSERT
+#define ASSERT(expr) \
+do { \
+	if (!(expr)) { \
+		fprintf(stderr, "Error: assertion failed in %s at line %d\n", __FILE__, __LINE__); \
+		/* print_trace(); // Trace printing may cause dynamic memory allocation */ \
+		assert_hook();				 \
+		exit(EXIT_FAILURE); \
+	} \
+} while (0)
+#else
+#define ASSERT(expr) \
+	do { } while (0)
+#endif /* CONFIG_ASSERT */
+
+#define error_msg(...) fprintf(stderr, "Error: " __VA_ARGS__)
+
+void print_trace(void);
+#endif /* __COMMON_H__ */
diff --git a/common.mk b/common.mk
new file mode 100644
index 0000000..bc068df
--- /dev/null
+++ b/common.mk
@@ -0,0 +1,16 @@
+# A few common Makefile items
+
+CC := gcc
+CXX := g++
+
+UNAME := $(shell uname)
+
+LIB_NAME := model
+LIB_SO := lib$(LIB_NAME).so
+
+CPPFLAGS += -Wall -g -O3
+
+# Mac OSX options
+ifeq ($(UNAME), Darwin)
+CPPFLAGS += -D_XOPEN_SOURCE -DMAC
+endif
diff --git a/conditionvariable.cc b/conditionvariable.cc
new file mode 100644
index 0000000..75af879
--- /dev/null
+++ b/conditionvariable.cc
@@ -0,0 +1,30 @@
+#include <mutex>
+#include "model.h"
+#include <condition_variable>
+#include "action.h"
+
+namespace std {
+
+condition_variable::condition_variable() {
+		
+}
+
+condition_variable::~condition_variable() {
+		
+}
+
+void condition_variable::notify_one() {
+	model->switch_to_master(new ModelAction(ATOMIC_NOTIFY_ONE, std::memory_order_seq_cst, this));
+}
+
+void condition_variable::notify_all() {
+	model->switch_to_master(new ModelAction(ATOMIC_NOTIFY_ALL, std::memory_order_seq_cst, this));
+}
+
+void condition_variable::wait(mutex& lock) {
+	model->switch_to_master(new ModelAction(ATOMIC_WAIT, std::memory_order_seq_cst, this, (uint64_t) &lock));
+	//relock as a second action
+	lock.lock();
+}
+}
+
diff --git a/config.h b/config.h
new file mode 100644
index 0000000..891dfd7
--- /dev/null
+++ b/config.h
@@ -0,0 +1,57 @@
+/** @file config.h
+ * @brief Configuration file.
+ */
+
+#ifndef CONFIG_H
+#define CONFIG_H
+
+/** Turn on debugging. */
+/*		#ifndef CONFIG_DEBUG
+		#define CONFIG_DEBUG
+		#endif
+
+		#ifndef CONFIG_ASSERT
+		#define CONFIG_ASSERT
+		#endif
+*/
+
+/** Turn on support for dumping cyclegraphs as dot files at each
+ *  printed summary.*/
+#define SUPPORT_MOD_ORDER_DUMP 0
+
+/** Do we have a 48 bit virtual address (64 bit machine) or 32 bit addresses.
+ * Set to 1 for 48-bit, 0 for 32-bit. */
+#ifndef BIT48
+#ifdef _LP64
+#define BIT48 1
+#else
+#define BIT48 0
+#endif
+#endif /* BIT48 */
+
+/** Snapshotting configurables */
+
+/** 
+ * If USE_MPROTECT_SNAPSHOT=2, then snapshot by tuned mmap() algorithm
+ * If USE_MPROTECT_SNAPSHOT=1, then snapshot by using mmap() and mprotect()
+ * If USE_MPROTECT_SNAPSHOT=0, then snapshot by using fork() */
+#define USE_MPROTECT_SNAPSHOT 2
+
+/** Size of signal stack */
+#define SIGSTACKSIZE 65536
+
+/** Page size configuration */
+#define PAGESIZE 4096
+
+/** Thread parameters */
+
+/* Size of stack to allocate for a thread. */
+#define STACK_SIZE (1024 * 1024)
+
+/** How many shadow tables of memory to preallocate for data race detector. */
+#define SHADOWBASETABLES 4
+
+/** Enable debugging assertions (via ASSERT()) */
+#define CONFIG_ASSERT
+
+#endif
diff --git a/context.cc b/context.cc
new file mode 100644
index 0000000..b5ae0ba
--- /dev/null
+++ b/context.cc
@@ -0,0 +1,27 @@
+#include "context.h"
+
+#ifdef MAC
+
+int model_swapcontext(ucontext_t *oucp, ucontext_t *ucp)
+{
+	/*
+	 * Mac OSX swapcontext() clobbers some registers, so use a hand-rolled
+	 * version with {get,set}context(). We can avoid the same problem
+	 * (where optimizations can break the following code) because we don't
+	 * statically link with the C library
+	 */
+
+	/* volatile, so that 'i' doesn't get promoted to a register */
+	volatile int i = 0;
+
+	getcontext(oucp);
+
+	if (i == 0) {
+		i = 1;
+		setcontext(ucp);
+	}
+
+	return 0;
+}
+
+#endif /* MAC */
diff --git a/context.h b/context.h
new file mode 100644
index 0000000..ea32d2f
--- /dev/null
+++ b/context.h
@@ -0,0 +1,24 @@
+/**
+ * @file context.h
+ * @brief ucontext header, since Mac OSX swapcontext() is broken
+ */
+
+#ifndef __CONTEXT_H__
+#define __CONTEXT_H__
+
+#include <ucontext.h>
+
+#ifdef MAC
+
+int model_swapcontext(ucontext_t *oucp, ucontext_t *ucp);
+
+#else /* !MAC */
+
+static inline int model_swapcontext(ucontext_t *oucp, ucontext_t *ucp)
+{
+	return swapcontext(oucp, ucp);
+}
+
+#endif /* !MAC */
+
+#endif /* __CONTEXT_H__ */
diff --git a/cyclegraph.cc b/cyclegraph.cc
new file mode 100644
index 0000000..7e5e956
--- /dev/null
+++ b/cyclegraph.cc
@@ -0,0 +1,659 @@
+#include "cyclegraph.h"
+#include "action.h"
+#include "common.h"
+#include "promise.h"
+#include "threads-model.h"
+
+/** Initializes a CycleGraph object. */
+CycleGraph::CycleGraph() :
+	discovered(new HashTable<const CycleNode *, const CycleNode *, uintptr_t, 4, model_malloc, model_calloc, model_free>(16)),
+	queue(new ModelVector<const CycleNode *>()),
+	hasCycles(false),
+	oldCycles(false)
+{
+}
+
+/** CycleGraph destructor */
+CycleGraph::~CycleGraph()
+{
+	delete queue;
+	delete discovered;
+}
+
+/**
+ * Add a CycleNode to the graph, corresponding to a store ModelAction
+ * @param act The write action that should be added
+ * @param node The CycleNode that corresponds to the store
+ */
+void CycleGraph::putNode(const ModelAction *act, CycleNode *node)
+{
+	actionToNode.put(act, node);
+#if SUPPORT_MOD_ORDER_DUMP
+	nodeList.push_back(node);
+#endif
+}
+
+/**
+ * Add a CycleNode to the graph, corresponding to a Promise
+ * @param promise The Promise that should be added
+ * @param node The CycleNode that corresponds to the Promise
+ */
+void CycleGraph::putNode(const Promise *promise, CycleNode *node)
+{
+	promiseToNode.put(promise, node);
+#if SUPPORT_MOD_ORDER_DUMP
+	nodeList.push_back(node);
+#endif
+}
+
+/**
+ * @brief Remove the Promise node from the graph
+ * @param promise The promise to remove from the graph
+ */
+void CycleGraph::erasePromiseNode(const Promise *promise)
+{
+	promiseToNode.put(promise, NULL);
+#if SUPPORT_MOD_ORDER_DUMP
+	/* Remove the promise node from nodeList */
+	CycleNode *node = getNode_noCreate(promise);
+	for (unsigned int i = 0; i < nodeList.size(); )
+		if (nodeList[i] == node)
+			nodeList.erase(nodeList.begin() + i);
+		else
+			i++;
+#endif
+}
+
+/** @return The corresponding CycleNode, if exists; otherwise NULL */
+CycleNode * CycleGraph::getNode_noCreate(const ModelAction *act) const
+{
+	return actionToNode.get(act);
+}
+
+/** @return The corresponding CycleNode, if exists; otherwise NULL */
+CycleNode * CycleGraph::getNode_noCreate(const Promise *promise) const
+{
+	return promiseToNode.get(promise);
+}
+
+/**
+ * @brief Returns the CycleNode corresponding to a given ModelAction
+ *
+ * Gets (or creates, if none exist) a CycleNode corresponding to a ModelAction
+ *
+ * @param action The ModelAction to find a node for
+ * @return The CycleNode paired with this action
+ */
+CycleNode * CycleGraph::getNode(const ModelAction *action)
+{
+	CycleNode *node = getNode_noCreate(action);
+	if (node == NULL) {
+		node = new CycleNode(action);
+		putNode(action, node);
+	}
+	return node;
+}
+
+/**
+ * @brief Returns a CycleNode corresponding to a promise
+ *
+ * Gets (or creates, if none exist) a CycleNode corresponding to a promised
+ * value.
+ *
+ * @param promise The Promise generated by a reader
+ * @return The CycleNode corresponding to the Promise
+ */
+CycleNode * CycleGraph::getNode(const Promise *promise)
+{
+	CycleNode *node = getNode_noCreate(promise);
+	if (node == NULL) {
+		node = new CycleNode(promise);
+		putNode(promise, node);
+	}
+	return node;
+}
+
+/**
+ * Resolve/satisfy a Promise with a particular store ModelAction, taking care
+ * of the CycleGraph cleanups, including merging any necessary CycleNodes.
+ *
+ * @param promise The Promise to resolve
+ * @param writer The store that will resolve this Promise
+ * @return false if the resolution results in a cycle (or fails in some other
+ * way); true otherwise
+ */
+bool CycleGraph::resolvePromise(const Promise *promise, ModelAction *writer)
+{
+	CycleNode *promise_node = promiseToNode.get(promise);
+	CycleNode *w_node = actionToNode.get(writer);
+	ASSERT(promise_node);
+
+	if (w_node)
+		return mergeNodes(w_node, promise_node);
+	/* No existing write-node; just convert the promise-node */
+	promise_node->resolvePromise(writer);
+	erasePromiseNode(promise_node->getPromise());
+	putNode(writer, promise_node);
+	return true;
+}
+
+/**
+ * @brief Merge two CycleNodes that represent the same write
+ *
+ * Note that this operation cannot be rolled back.
+ *
+ * @param w_node The write ModelAction node with which to merge
+ * @param p_node The Promise node to merge. Will be destroyed after this
+ * function.
+ *
+ * @return false if the merge cannot succeed; true otherwise
+ */
+bool CycleGraph::mergeNodes(CycleNode *w_node, CycleNode *p_node)
+{
+	ASSERT(!w_node->is_promise());
+	ASSERT(p_node->is_promise());
+
+	const Promise *promise = p_node->getPromise();
+	if (!promise->is_compatible(w_node->getAction()) ||
+			!promise->same_value(w_node->getAction()))
+		return false;
+
+	/* Transfer the RMW */
+	CycleNode *promise_rmw = p_node->getRMW();
+	if (promise_rmw && promise_rmw != w_node->getRMW() && w_node->setRMW(promise_rmw))
+		return false;
+
+	/* Transfer back edges to w_node */
+	while (p_node->getNumBackEdges() > 0) {
+		CycleNode *back = p_node->removeBackEdge();
+		if (back == w_node)
+			continue;
+		addNodeEdge(back, w_node);
+		if (hasCycles)
+			return false;
+	}
+
+	/* Transfer forward edges to w_node */
+	while (p_node->getNumEdges() > 0) {
+		CycleNode *forward = p_node->removeEdge();
+		if (forward == w_node)
+			continue;
+		addNodeEdge(w_node, forward);
+		if (hasCycles)
+			return false;
+	}
+
+	erasePromiseNode(promise);
+	/* Not deleting p_node, to maintain consistency if mergeNodes() fails */
+
+	return !hasCycles;
+}
+
+/**
+ * Adds an edge between two CycleNodes.
+ * @param fromnode The edge comes from this CycleNode
+ * @param tonode The edge points to this CycleNode
+ * @return True, if new edge(s) are added; otherwise false
+ */
+bool CycleGraph::addNodeEdge(CycleNode *fromnode, CycleNode *tonode)
+{
+	if (fromnode->addEdge(tonode)) {
+		rollbackvector.push_back(fromnode);
+		if (!hasCycles)
+			hasCycles = checkReachable(tonode, fromnode);
+	} else
+		return false; /* No new edge */
+
+	/*
+	 * If the fromnode has a rmwnode that is not the tonode, we should
+	 * follow its RMW chain to add an edge at the end, unless we encounter
+	 * tonode along the way
+	 */
+	CycleNode *rmwnode = fromnode->getRMW();
+	if (rmwnode) {
+		while (rmwnode != tonode && rmwnode->getRMW())
+			rmwnode = rmwnode->getRMW();
+
+		if (rmwnode != tonode) {
+			if (rmwnode->addEdge(tonode)) {
+				if (!hasCycles)
+					hasCycles = checkReachable(tonode, rmwnode);
+
+				rollbackvector.push_back(rmwnode);
+			}
+		}
+	}
+	return true;
+}
+
+/**
+ * @brief Add an edge between a write and the RMW which reads from it
+ *
+ * Handles special case of a RMW action, where the ModelAction rmw reads from
+ * the ModelAction/Promise from. The key differences are:
+ *  -# No write can occur in between the @a rmw and @a from actions.
+ *  -# Only one RMW action can read from a given write.
+ *
+ * @param from The edge comes from this ModelAction/Promise
+ * @param rmw The edge points to this ModelAction; this action must read from
+ * the ModelAction/Promise from
+ */
+template <typename T>
+void CycleGraph::addRMWEdge(const T *from, const ModelAction *rmw)
+{
+	ASSERT(from);
+	ASSERT(rmw);
+
+	CycleNode *fromnode = getNode(from);
+	CycleNode *rmwnode = getNode(rmw);
+
+	/* We assume that this RMW has no RMW reading from it yet */
+	ASSERT(!rmwnode->getRMW());
+
+	/* Two RMW actions cannot read from the same write. */
+	if (fromnode->setRMW(rmwnode))
+		hasCycles = true;
+	else
+		rmwrollbackvector.push_back(fromnode);
+
+	/* Transfer all outgoing edges from the from node to the rmw node */
+	/* This process should not add a cycle because either:
+	 * (1) The rmw should not have any incoming edges yet if it is the
+	 * new node or
+	 * (2) the fromnode is the new node and therefore it should not
+	 * have any outgoing edges.
+	 */
+	for (unsigned int i = 0; i < fromnode->getNumEdges(); i++) {
+		CycleNode *tonode = fromnode->getEdge(i);
+		if (tonode != rmwnode) {
+			if (rmwnode->addEdge(tonode))
+				rollbackvector.push_back(rmwnode);
+		}
+	}
+
+	addNodeEdge(fromnode, rmwnode);
+}
+/* Instantiate two forms of CycleGraph::addRMWEdge */
+template void CycleGraph::addRMWEdge(const ModelAction *from, const ModelAction *rmw);
+template void CycleGraph::addRMWEdge(const Promise *from, const ModelAction *rmw);
+
+/**
+ * @brief Adds an edge between objects
+ *
+ * This function will add an edge between any two objects which can be
+ * associated with a CycleNode. That is, if they have a CycleGraph::getNode
+ * implementation.
+ *
+ * The object to is ordered after the object from.
+ *
+ * @param to The edge points to this object, of type T
+ * @param from The edge comes from this object, of type U
+ * @return True, if new edge(s) are added; otherwise false
+ */
+template <typename T, typename U>
+bool CycleGraph::addEdge(const T *from, const U *to)
+{
+	ASSERT(from);
+	ASSERT(to);
+
+	CycleNode *fromnode = getNode(from);
+	CycleNode *tonode = getNode(to);
+
+	return addNodeEdge(fromnode, tonode);
+}
+/* Instantiate four forms of CycleGraph::addEdge */
+template bool CycleGraph::addEdge(const ModelAction *from, const ModelAction *to);
+template bool CycleGraph::addEdge(const ModelAction *from, const Promise *to);
+template bool CycleGraph::addEdge(const Promise *from, const ModelAction *to);
+template bool CycleGraph::addEdge(const Promise *from, const Promise *to);
+
+#if SUPPORT_MOD_ORDER_DUMP
+
+static void print_node(FILE *file, const CycleNode *node, int label)
+{
+	if (node->is_promise()) {
+		const Promise *promise = node->getPromise();
+		int idx = promise->get_index();
+		fprintf(file, "P%u", idx);
+		if (label) {
+			int first = 1;
+			fprintf(file, " [label=\"P%d, T", idx);
+			for (unsigned int i = 0 ; i < promise->max_available_thread_idx(); i++)
+				if (promise->thread_is_available(int_to_id(i))) {
+					fprintf(file, "%s%u", first ? "": ",", i);
+					first = 0;
+				}
+			fprintf(file, "\"]");
+		}
+	} else {
+		const ModelAction *act = node->getAction();
+		modelclock_t idx = act->get_seq_number();
+		fprintf(file, "N%u", idx);
+		if (label)
+			fprintf(file, " [label=\"N%u, T%u\"]", idx, act->get_tid());
+	}
+}
+
+static void print_edge(FILE *file, const CycleNode *from, const CycleNode *to, const char *prop)
+{
+	print_node(file, from, 0);
+	fprintf(file, " -> ");
+	print_node(file, to, 0);
+	if (prop && strlen(prop))
+		fprintf(file, " [%s]", prop);
+	fprintf(file, ";\n");
+}
+
+void CycleGraph::dot_print_node(FILE *file, const ModelAction *act)
+{
+	print_node(file, getNode(act), 1);
+}
+
+template <typename T, typename U>
+void CycleGraph::dot_print_edge(FILE *file, const T *from, const U *to, const char *prop)
+{
+	CycleNode *fromnode = getNode(from);
+	CycleNode *tonode = getNode(to);
+
+	print_edge(file, fromnode, tonode, prop);
+}
+/* Instantiate two forms of CycleGraph::dot_print_edge */
+template void CycleGraph::dot_print_edge(FILE *file, const Promise *from, const ModelAction *to, const char *prop);
+template void CycleGraph::dot_print_edge(FILE *file, const ModelAction *from, const ModelAction *to, const char *prop);
+
+void CycleGraph::dumpNodes(FILE *file) const
+{
+	for (unsigned int i = 0; i < nodeList.size(); i++) {
+		CycleNode *n = nodeList[i];
+		print_node(file, n, 1);
+		fprintf(file, ";\n");
+		if (n->getRMW())
+			print_edge(file, n, n->getRMW(), "style=dotted");
+		for (unsigned int j = 0; j < n->getNumEdges(); j++)
+			print_edge(file, n, n->getEdge(j), NULL);
+	}
+}
+
+void CycleGraph::dumpGraphToFile(const char *filename) const
+{
+	char buffer[200];
+	sprintf(buffer, "%s.dot", filename);
+	FILE *file = fopen(buffer, "w");
+	fprintf(file, "digraph %s {\n", filename);
+	dumpNodes(file);
+	fprintf(file, "}\n");
+	fclose(file);
+}
+#endif
+
+/**
+ * Checks whether one CycleNode can reach another.
+ * @param from The CycleNode from which to begin exploration
+ * @param to The CycleNode to reach
+ * @return True, @a from can reach @a to; otherwise, false
+ */
+bool CycleGraph::checkReachable(const CycleNode *from, const CycleNode *to) const
+{
+	discovered->reset();
+	queue->clear();
+	queue->push_back(from);
+	discovered->put(from, from);
+	while (!queue->empty()) {
+		const CycleNode *node = queue->back();
+		queue->pop_back();
+		if (node == to)
+			return true;
+		for (unsigned int i = 0; i < node->getNumEdges(); i++) {
+			CycleNode *next = node->getEdge(i);
+			if (!discovered->contains(next)) {
+				discovered->put(next, next);
+				queue->push_back(next);
+			}
+		}
+	}
+	return false;
+}
+
+/**
+ * Checks whether one ModelAction/Promise can reach another ModelAction/Promise
+ * @param from The ModelAction or Promise from which to begin exploration
+ * @param to The ModelAction or Promise to reach
+ * @return True, @a from can reach @a to; otherwise, false
+ */
+template <typename T, typename U>
+bool CycleGraph::checkReachable(const T *from, const U *to) const
+{
+	CycleNode *fromnode = getNode_noCreate(from);
+	CycleNode *tonode = getNode_noCreate(to);
+
+	if (!fromnode || !tonode)
+		return false;
+
+	return checkReachable(fromnode, tonode);
+}
+/* Instantiate four forms of CycleGraph::checkReachable */
+template bool CycleGraph::checkReachable(const ModelAction *from,
+		const ModelAction *to) const;
+template bool CycleGraph::checkReachable(const ModelAction *from,
+		const Promise *to) const;
+template bool CycleGraph::checkReachable(const Promise *from,
+		const ModelAction *to) const;
+template bool CycleGraph::checkReachable(const Promise *from,
+		const Promise *to) const;
+
+/** @return True, if the promise has failed; false otherwise */
+bool CycleGraph::checkPromise(const ModelAction *fromact, Promise *promise) const
+{
+	discovered->reset();
+	queue->clear();
+	CycleNode *from = actionToNode.get(fromact);
+
+	queue->push_back(from);
+	discovered->put(from, from);
+	while (!queue->empty()) {
+		const CycleNode *node = queue->back();
+		queue->pop_back();
+
+		if (node->getPromise() == promise)
+			return true;
+		if (!node->is_promise() &&
+				promise->eliminate_thread(node->getAction()->get_tid()))
+			return true;
+
+		for (unsigned int i = 0; i < node->getNumEdges(); i++) {
+			CycleNode *next = node->getEdge(i);
+			if (!discovered->contains(next)) {
+				discovered->put(next, next);
+				queue->push_back(next);
+			}
+		}
+	}
+	return false;
+}
+
+/** @brief Begin a new sequence of graph additions which can be rolled back */
+void CycleGraph::startChanges()
+{
+	ASSERT(rollbackvector.empty());
+	ASSERT(rmwrollbackvector.empty());
+	ASSERT(oldCycles == hasCycles);
+}
+
+/** Commit changes to the cyclegraph. */
+void CycleGraph::commitChanges()
+{
+	rollbackvector.clear();
+	rmwrollbackvector.clear();
+	oldCycles = hasCycles;
+}
+
+/** Rollback changes to the previous commit. */
+void CycleGraph::rollbackChanges()
+{
+	for (unsigned int i = 0; i < rollbackvector.size(); i++)
+		rollbackvector[i]->removeEdge();
+
+	for (unsigned int i = 0; i < rmwrollbackvector.size(); i++)
+		rmwrollbackvector[i]->clearRMW();
+
+	hasCycles = oldCycles;
+	rollbackvector.clear();
+	rmwrollbackvector.clear();
+}
+
+/** @returns whether a CycleGraph contains cycles. */
+bool CycleGraph::checkForCycles() const
+{
+	return hasCycles;
+}
+
+/**
+ * @brief Constructor for a CycleNode
+ * @param act The ModelAction for this node
+ */
+CycleNode::CycleNode(const ModelAction *act) :
+	action(act),
+	promise(NULL),
+	hasRMW(NULL)
+{
+}
+
+/**
+ * @brief Constructor for a Promise CycleNode
+ * @param promise The Promise which was generated
+ */
+CycleNode::CycleNode(const Promise *promise) :
+	action(NULL),
+	promise(promise),
+	hasRMW(NULL)
+{
+}
+
+/**
+ * @param i The index of the edge to return
+ * @returns The CycleNode edge indexed by i
+ */
+CycleNode * CycleNode::getEdge(unsigned int i) const
+{
+	return edges[i];
+}
+
+/** @returns The number of edges leaving this CycleNode */
+unsigned int CycleNode::getNumEdges() const
+{
+	return edges.size();
+}
+
+/**
+ * @param i The index of the back edge to return
+ * @returns The CycleNode back-edge indexed by i
+ */
+CycleNode * CycleNode::getBackEdge(unsigned int i) const
+{
+	return back_edges[i];
+}
+
+/** @returns The number of edges entering this CycleNode */
+unsigned int CycleNode::getNumBackEdges() const
+{
+	return back_edges.size();
+}
+
+/**
+ * @brief Remove an element from a vector
+ * @param v The vector
+ * @param n The element to remove
+ * @return True if the element was found; false otherwise
+ */
+template <typename T>
+static bool vector_remove_node(SnapVector<T>& v, const T n)
+{
+	for (unsigned int i = 0; i < v.size(); i++) {
+		if (v[i] == n) {
+			v.erase(v.begin() + i);
+			return true;
+		}
+	}
+	return false;
+}
+
+/**
+ * @brief Remove a (forward) edge from this CycleNode
+ * @return The CycleNode which was popped, if one exists; otherwise NULL
+ */
+CycleNode * CycleNode::removeEdge()
+{
+	if (edges.empty())
+		return NULL;
+
+	CycleNode *ret = edges.back();
+	edges.pop_back();
+	vector_remove_node(ret->back_edges, this);
+	return ret;
+}
+
+/**
+ * @brief Remove a (back) edge from this CycleNode
+ * @return The CycleNode which was popped, if one exists; otherwise NULL
+ */
+CycleNode * CycleNode::removeBackEdge()
+{
+	if (back_edges.empty())
+		return NULL;
+
+	CycleNode *ret = back_edges.back();
+	back_edges.pop_back();
+	vector_remove_node(ret->edges, this);
+	return ret;
+}
+
+/**
+ * Adds an edge from this CycleNode to another CycleNode.
+ * @param node The node to which we add a directed edge
+ * @return True if this edge is a new edge; false otherwise
+ */
+bool CycleNode::addEdge(CycleNode *node)
+{
+	for (unsigned int i = 0; i < edges.size(); i++)
+		if (edges[i] == node)
+			return false;
+	edges.push_back(node);
+	node->back_edges.push_back(this);
+	return true;
+}
+
+/** @returns the RMW CycleNode that reads from the current CycleNode */
+CycleNode * CycleNode::getRMW() const
+{
+	return hasRMW;
+}
+
+/**
+ * Set a RMW action node that reads from the current CycleNode.
+ * @param node The RMW that reads from the current node
+ * @return True, if this node already was read by another RMW; false otherwise
+ * @see CycleGraph::addRMWEdge
+ */
+bool CycleNode::setRMW(CycleNode *node)
+{
+	if (hasRMW != NULL)
+		return true;
+	hasRMW = node;
+	return false;
+}
+
+/**
+ * Convert a Promise CycleNode into a concrete-valued CycleNode. Should only be
+ * used when there's no existing ModelAction CycleNode for this write.
+ *
+ * @param writer The ModelAction which wrote the future value represented by
+ * this CycleNode
+ */
+void CycleNode::resolvePromise(const ModelAction *writer)
+{
+	ASSERT(is_promise());
+	ASSERT(promise->is_compatible(writer));
+	action = writer;
+	promise = NULL;
+	ASSERT(!is_promise());
+}
diff --git a/cyclegraph.h b/cyclegraph.h
new file mode 100644
index 0000000..7e7d180
--- /dev/null
+++ b/cyclegraph.h
@@ -0,0 +1,136 @@
+/**
+ * @file cyclegraph.h
+ * @brief Data structure to track ordering constraints on modification order
+ *
+ * Used to determine whether a total order exists that satisfies the ordering
+ * constraints.
+ */
+
+#ifndef __CYCLEGRAPH_H__
+#define __CYCLEGRAPH_H__
+
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "hashtable.h"
+#include "config.h"
+#include "mymemory.h"
+#include "stl-model.h"
+
+class Promise;
+class CycleNode;
+class ModelAction;
+
+/** @brief A graph of Model Actions for tracking cycles. */
+class CycleGraph {
+ public:
+	CycleGraph();
+	~CycleGraph();
+
+	template <typename T, typename U>
+	bool addEdge(const T *from, const U *to);
+
+	template <typename T>
+	void addRMWEdge(const T *from, const ModelAction *rmw);
+
+	bool checkForCycles() const;
+	bool checkPromise(const ModelAction *from, Promise *p) const;
+
+	template <typename T, typename U>
+	bool checkReachable(const T *from, const U *to) const;
+
+	void startChanges();
+	void commitChanges();
+	void rollbackChanges();
+#if SUPPORT_MOD_ORDER_DUMP
+	void dumpNodes(FILE *file) const;
+	void dumpGraphToFile(const char *filename) const;
+
+	void dot_print_node(FILE *file, const ModelAction *act);
+	template <typename T, typename U>
+	void dot_print_edge(FILE *file, const T *from, const U *to, const char *prop);
+#endif
+
+	bool resolvePromise(const Promise *promise, ModelAction *writer);
+
+	SNAPSHOTALLOC
+ private:
+	bool addNodeEdge(CycleNode *fromnode, CycleNode *tonode);
+	void putNode(const ModelAction *act, CycleNode *node);
+	void putNode(const Promise *promise, CycleNode *node);
+	void erasePromiseNode(const Promise *promise);
+	CycleNode * getNode(const ModelAction *act);
+	CycleNode * getNode(const Promise *promise);
+	CycleNode * getNode_noCreate(const ModelAction *act) const;
+	CycleNode * getNode_noCreate(const Promise *promise) const;
+	bool mergeNodes(CycleNode *node1, CycleNode *node2);
+
+	HashTable<const CycleNode *, const CycleNode *, uintptr_t, 4, model_malloc, model_calloc, model_free> *discovered;
+	ModelVector<const CycleNode *> * queue;
+
+
+	/** @brief A table for mapping ModelActions to CycleNodes */
+	HashTable<const ModelAction *, CycleNode *, uintptr_t, 4> actionToNode;
+	/** @brief A table for mapping Promises to CycleNodes */
+	HashTable<const Promise *, CycleNode *, uintptr_t, 4> promiseToNode;
+
+#if SUPPORT_MOD_ORDER_DUMP
+	SnapVector<CycleNode *> nodeList;
+#endif
+
+	bool checkReachable(const CycleNode *from, const CycleNode *to) const;
+
+	/** @brief A flag: true if this graph contains cycles */
+	bool hasCycles;
+	/** @brief The previous value of CycleGraph::hasCycles, for rollback */
+	bool oldCycles;
+
+	SnapVector<CycleNode *> rollbackvector;
+	SnapVector<CycleNode *> rmwrollbackvector;
+};
+
+/**
+ * @brief A node within a CycleGraph; corresponds either to one ModelAction or
+ * to a promised future value
+ */
+class CycleNode {
+ public:
+	CycleNode(const ModelAction *act);
+	CycleNode(const Promise *promise);
+	bool addEdge(CycleNode *node);
+	CycleNode * getEdge(unsigned int i) const;
+	unsigned int getNumEdges() const;
+	CycleNode * getBackEdge(unsigned int i) const;
+	unsigned int getNumBackEdges() const;
+	CycleNode * removeEdge();
+	CycleNode * removeBackEdge();
+
+	bool setRMW(CycleNode *);
+	CycleNode * getRMW() const;
+	void clearRMW() { hasRMW = NULL; }
+	const ModelAction * getAction() const { return action; }
+	const Promise * getPromise() const { return promise; }
+	bool is_promise() const { return !action; }
+	void resolvePromise(const ModelAction *writer);
+
+	SNAPSHOTALLOC
+ private:
+	/** @brief The ModelAction that this node represents */
+	const ModelAction *action;
+
+	/** @brief The promise represented by this node; only valid when action
+	 * is NULL */
+	const Promise *promise;
+
+	/** @brief The edges leading out from this node */
+	SnapVector<CycleNode *> edges;
+
+	/** @brief The edges leading into this node */
+	SnapVector<CycleNode *> back_edges;
+
+	/** Pointer to a RMW node that reads from this node, or NULL, if none
+	 * exists */
+	CycleNode *hasRMW;
+};
+
+#endif /* __CYCLEGRAPH_H__ */
diff --git a/datarace.cc b/datarace.cc
new file mode 100644
index 0000000..653039b
--- /dev/null
+++ b/datarace.cc
@@ -0,0 +1,363 @@
+#include "datarace.h"
+#include "model.h"
+#include "threads-model.h"
+#include <stdio.h>
+#include <cstring>
+#include "mymemory.h"
+#include "clockvector.h"
+#include "config.h"
+#include "action.h"
+#include "execution.h"
+#include "stl-model.h"
+
+static struct ShadowTable *root;
+static SnapVector<DataRace *> *unrealizedraces;
+static void *memory_base;
+static void *memory_top;
+
+static const ModelExecution * get_execution()
+{
+	return model->get_execution();
+}
+
+/** This function initialized the data race detector. */
+void initRaceDetector()
+{
+	root = (struct ShadowTable *)snapshot_calloc(sizeof(struct ShadowTable), 1);
+	memory_base = snapshot_calloc(sizeof(struct ShadowBaseTable) * SHADOWBASETABLES, 1);
+	memory_top = ((char *)memory_base) + sizeof(struct ShadowBaseTable) * SHADOWBASETABLES;
+	unrealizedraces = new SnapVector<DataRace *>();
+}
+
+void * table_calloc(size_t size)
+{
+	if ((((char *)memory_base) + size) > memory_top) {
+		return snapshot_calloc(size, 1);
+	} else {
+		void *tmp = memory_base;
+		memory_base = ((char *)memory_base) + size;
+		return tmp;
+	}
+}
+
+/** This function looks up the entry in the shadow table corresponding to a
+ * given address.*/
+static uint64_t * lookupAddressEntry(const void *address)
+{
+	struct ShadowTable *currtable = root;
+#if BIT48
+	currtable = (struct ShadowTable *) currtable->array[(((uintptr_t)address) >> 32) & MASK16BIT];
+	if (currtable == NULL) {
+		currtable = (struct ShadowTable *)(root->array[(((uintptr_t)address) >> 32) & MASK16BIT] = table_calloc(sizeof(struct ShadowTable)));
+	}
+#endif
+
+	struct ShadowBaseTable *basetable = (struct ShadowBaseTable *)currtable->array[(((uintptr_t)address) >> 16) & MASK16BIT];
+	if (basetable == NULL) {
+		basetable = (struct ShadowBaseTable *)(currtable->array[(((uintptr_t)address) >> 16) & MASK16BIT] = table_calloc(sizeof(struct ShadowBaseTable)));
+	}
+	return &basetable->array[((uintptr_t)address) & MASK16BIT];
+}
+
+/**
+ * Compares a current clock-vector/thread-ID pair with a clock/thread-ID pair
+ * to check the potential for a data race.
+ * @param clock1 The current clock vector
+ * @param tid1 The current thread; paired with clock1
+ * @param clock2 The clock value for the potentially-racing action
+ * @param tid2 The thread ID for the potentially-racing action
+ * @return true if the current clock allows a race with the event at clock2/tid2
+ */
+static bool clock_may_race(ClockVector *clock1, thread_id_t tid1,
+                           modelclock_t clock2, thread_id_t tid2)
+{
+	return tid1 != tid2 && clock2 != 0 && clock1->getClock(tid2) <= clock2;
+}
+
+/**
+ * Expands a record from the compact form to the full form.  This is
+ * necessary for multiple readers or for very large thread ids or time
+ * stamps. */
+static void expandRecord(uint64_t *shadow)
+{
+	uint64_t shadowval = *shadow;
+
+	modelclock_t readClock = READVECTOR(shadowval);
+	thread_id_t readThread = int_to_id(RDTHREADID(shadowval));
+	modelclock_t writeClock = WRITEVECTOR(shadowval);
+	thread_id_t writeThread = int_to_id(WRTHREADID(shadowval));
+
+	struct RaceRecord *record = (struct RaceRecord *)snapshot_calloc(1, sizeof(struct RaceRecord));
+	record->writeThread = writeThread;
+	record->writeClock = writeClock;
+
+	if (readClock != 0) {
+		record->capacity = INITCAPACITY;
+		record->thread = (thread_id_t *)snapshot_malloc(sizeof(thread_id_t) * record->capacity);
+		record->readClock = (modelclock_t *)snapshot_malloc(sizeof(modelclock_t) * record->capacity);
+		record->numReads = 1;
+		record->thread[0] = readThread;
+		record->readClock[0] = readClock;
+	}
+	*shadow = (uint64_t) record;
+}
+
+/** This function is called when we detect a data race.*/
+static void reportDataRace(thread_id_t oldthread, modelclock_t oldclock, bool isoldwrite, ModelAction *newaction, bool isnewwrite, const void *address)
+{
+	struct DataRace *race = (struct DataRace *)snapshot_malloc(sizeof(struct DataRace));
+	race->oldthread = oldthread;
+	race->oldclock = oldclock;
+	race->isoldwrite = isoldwrite;
+	race->newaction = newaction;
+	race->isnewwrite = isnewwrite;
+	race->address = address;
+	unrealizedraces->push_back(race);
+
+	/* If the race is realized, bail out now. */
+	if (checkDataRaces())
+		model->switch_to_master(NULL);
+}
+
+/**
+ * @brief Check and report data races
+ *
+ * If the trace is feasible (a feasible prefix), clear out the list of
+ * unrealized data races, asserting any realized ones as execution bugs so that
+ * the model-checker will end the execution.
+ *
+ * @return True if any data races were realized
+ */
+bool checkDataRaces()
+{
+	if (get_execution()->isfeasibleprefix()) {
+		bool race_asserted = false;
+		/* Prune the non-racing unrealized dataraces */
+		for (unsigned i = 0; i < unrealizedraces->size(); i++) {
+			struct DataRace *race = (*unrealizedraces)[i];
+			if (clock_may_race(race->newaction->get_cv(), race->newaction->get_tid(), race->oldclock, race->oldthread)) {
+				assert_race(race);
+				race_asserted = true;
+			}
+			snapshot_free(race);
+		}
+		unrealizedraces->clear();
+		return race_asserted;
+	}
+	return false;
+}
+
+/**
+ * @brief Assert a data race
+ *
+ * Asserts a data race which is currently realized, causing the execution to
+ * end and stashing a message in the model-checker's bug list
+ *
+ * @param race The race to report
+ */
+void assert_race(struct DataRace *race)
+{
+	model->assert_bug(
+			"Data race detected @ address %p:\n"
+			"    Access 1: %5s in thread %2d @ clock %3u\n"
+			"    Access 2: %5s in thread %2d @ clock %3u",
+			race->address,
+			race->isoldwrite ? "write" : "read",
+			id_to_int(race->oldthread),
+			race->oldclock,
+			race->isnewwrite ? "write" : "read",
+			id_to_int(race->newaction->get_tid()),
+			race->newaction->get_seq_number()
+		);
+}
+
+/** This function does race detection for a write on an expanded record. */
+void fullRaceCheckWrite(thread_id_t thread, void *location, uint64_t *shadow, ClockVector *currClock)
+{
+	struct RaceRecord *record = (struct RaceRecord *)(*shadow);
+
+	/* Check for datarace against last read. */
+
+	for (int i = 0; i < record->numReads; i++) {
+		modelclock_t readClock = record->readClock[i];
+		thread_id_t readThread = record->thread[i];
+
+		/* Note that readClock can't actuall be zero here, so it could be
+			 optimized. */
+
+		if (clock_may_race(currClock, thread, readClock, readThread)) {
+			/* We have a datarace */
+			reportDataRace(readThread, readClock, false, get_execution()->get_parent_action(thread), true, location);
+		}
+	}
+
+	/* Check for datarace against last write. */
+
+	modelclock_t writeClock = record->writeClock;
+	thread_id_t writeThread = record->writeThread;
+
+	if (clock_may_race(currClock, thread, writeClock, writeThread)) {
+		/* We have a datarace */
+		reportDataRace(writeThread, writeClock, true, get_execution()->get_parent_action(thread), true, location);
+	}
+
+	record->numReads = 0;
+	record->writeThread = thread;
+	modelclock_t ourClock = currClock->getClock(thread);
+	record->writeClock = ourClock;
+}
+
+/** This function does race detection on a write. */
+void raceCheckWrite(thread_id_t thread, void *location)
+{
+	uint64_t *shadow = lookupAddressEntry(location);
+	uint64_t shadowval = *shadow;
+	ClockVector *currClock = get_execution()->get_cv(thread);
+
+	/* Do full record */
+	if (shadowval != 0 && !ISSHORTRECORD(shadowval)) {
+		fullRaceCheckWrite(thread, location, shadow, currClock);
+		return;
+	}
+
+	int threadid = id_to_int(thread);
+	modelclock_t ourClock = currClock->getClock(thread);
+
+	/* Thread ID is too large or clock is too large. */
+	if (threadid > MAXTHREADID || ourClock > MAXWRITEVECTOR) {
+		expandRecord(shadow);
+		fullRaceCheckWrite(thread, location, shadow, currClock);
+		return;
+	}
+
+	/* Check for datarace against last read. */
+
+	modelclock_t readClock = READVECTOR(shadowval);
+	thread_id_t readThread = int_to_id(RDTHREADID(shadowval));
+
+	if (clock_may_race(currClock, thread, readClock, readThread)) {
+		/* We have a datarace */
+		reportDataRace(readThread, readClock, false, get_execution()->get_parent_action(thread), true, location);
+	}
+
+	/* Check for datarace against last write. */
+
+	modelclock_t writeClock = WRITEVECTOR(shadowval);
+	thread_id_t writeThread = int_to_id(WRTHREADID(shadowval));
+
+	if (clock_may_race(currClock, thread, writeClock, writeThread)) {
+		/* We have a datarace */
+		reportDataRace(writeThread, writeClock, true, get_execution()->get_parent_action(thread), true, location);
+	}
+	*shadow = ENCODEOP(0, 0, threadid, ourClock);
+}
+
+/** This function does race detection on a read for an expanded record. */
+void fullRaceCheckRead(thread_id_t thread, const void *location, uint64_t *shadow, ClockVector *currClock)
+{
+	struct RaceRecord *record = (struct RaceRecord *) (*shadow);
+
+	/* Check for datarace against last write. */
+
+	modelclock_t writeClock = record->writeClock;
+	thread_id_t writeThread = record->writeThread;
+
+	if (clock_may_race(currClock, thread, writeClock, writeThread)) {
+		/* We have a datarace */
+		reportDataRace(writeThread, writeClock, true, get_execution()->get_parent_action(thread), false, location);
+	}
+
+	/* Shorten vector when possible */
+
+	int copytoindex = 0;
+
+	for (int i = 0; i < record->numReads; i++) {
+		modelclock_t readClock = record->readClock[i];
+		thread_id_t readThread = record->thread[i];
+
+		/*  Note that is not really a datarace check as reads cannott
+				actually race.  It is just determining that this read subsumes
+				another in the sense that either this read races or neither
+				read races. Note that readClock can't actually be zero, so it
+				could be optimized.  */
+
+		if (clock_may_race(currClock, thread, readClock, readThread)) {
+			/* Still need this read in vector */
+			if (copytoindex != i) {
+				record->readClock[copytoindex] = record->readClock[i];
+				record->thread[copytoindex] = record->thread[i];
+			}
+			copytoindex++;
+		}
+	}
+
+	if (copytoindex >= record->capacity) {
+		int newCapacity = record->capacity * 2;
+		thread_id_t *newthread = (thread_id_t *)snapshot_malloc(sizeof(thread_id_t) * newCapacity);
+		modelclock_t *newreadClock = (modelclock_t *)snapshot_malloc(sizeof(modelclock_t) * newCapacity);
+		std::memcpy(newthread, record->thread, record->capacity * sizeof(thread_id_t));
+		std::memcpy(newreadClock, record->readClock, record->capacity * sizeof(modelclock_t));
+		snapshot_free(record->readClock);
+		snapshot_free(record->thread);
+		record->readClock = newreadClock;
+		record->thread = newthread;
+		record->capacity = newCapacity;
+	}
+
+	modelclock_t ourClock = currClock->getClock(thread);
+
+	record->thread[copytoindex] = thread;
+	record->readClock[copytoindex] = ourClock;
+	record->numReads = copytoindex + 1;
+}
+
+/** This function does race detection on a read. */
+void raceCheckRead(thread_id_t thread, const void *location)
+{
+	uint64_t *shadow = lookupAddressEntry(location);
+	uint64_t shadowval = *shadow;
+	ClockVector *currClock = get_execution()->get_cv(thread);
+
+	/* Do full record */
+	if (shadowval != 0 && !ISSHORTRECORD(shadowval)) {
+		fullRaceCheckRead(thread, location, shadow, currClock);
+		return;
+	}
+
+	int threadid = id_to_int(thread);
+	modelclock_t ourClock = currClock->getClock(thread);
+
+	/* Thread ID is too large or clock is too large. */
+	if (threadid > MAXTHREADID || ourClock > MAXWRITEVECTOR) {
+		expandRecord(shadow);
+		fullRaceCheckRead(thread, location, shadow, currClock);
+		return;
+	}
+
+	/* Check for datarace against last write. */
+
+	modelclock_t writeClock = WRITEVECTOR(shadowval);
+	thread_id_t writeThread = int_to_id(WRTHREADID(shadowval));
+
+	if (clock_may_race(currClock, thread, writeClock, writeThread)) {
+		/* We have a datarace */
+		reportDataRace(writeThread, writeClock, true, get_execution()->get_parent_action(thread), false, location);
+	}
+
+	modelclock_t readClock = READVECTOR(shadowval);
+	thread_id_t readThread = int_to_id(RDTHREADID(shadowval));
+
+	if (clock_may_race(currClock, thread, readClock, readThread)) {
+		/* We don't subsume this read... Have to expand record. */
+		expandRecord(shadow);
+		fullRaceCheckRead(thread, location, shadow, currClock);
+		return;
+	}
+
+	*shadow = ENCODEOP(threadid, ourClock, id_to_int(writeThread), writeClock);
+}
+
+bool haveUnrealizedRaces()
+{
+	return !unrealizedraces->empty();
+}
diff --git a/datarace.h b/datarace.h
new file mode 100644
index 0000000..737a6d6
--- /dev/null
+++ b/datarace.h
@@ -0,0 +1,94 @@
+/** @file datarace.h
+ *  @brief Data race detection code.
+ */
+
+#ifndef __DATARACE_H__
+#define __DATARACE_H__
+
+#include "config.h"
+#include <stdint.h>
+#include "modeltypes.h"
+
+/* Forward declaration */
+class ModelAction;
+
+struct ShadowTable {
+	void * array[65536];
+};
+
+struct ShadowBaseTable {
+	uint64_t array[65536];
+};
+
+struct DataRace {
+	/* Clock and thread associated with first action.  This won't change in
+		 response to synchronization. */
+
+	thread_id_t oldthread;
+	modelclock_t oldclock;
+	/* Record whether this is a write, so we can tell the user. */
+	bool isoldwrite;
+
+	/* Model action associated with second action.  This could change as
+		 a result of synchronization. */
+	ModelAction *newaction;
+	/* Record whether this is a write, so we can tell the user. */
+	bool isnewwrite;
+
+	/* Address of data race. */
+	const void *address;
+};
+
+#define MASK16BIT 0xffff
+
+void initRaceDetector();
+void raceCheckWrite(thread_id_t thread, void *location);
+void raceCheckRead(thread_id_t thread, const void *location);
+bool checkDataRaces();
+void assert_race(struct DataRace *race);
+bool haveUnrealizedRaces();
+
+/**
+ * @brief A record of information for detecting data races
+ */
+struct RaceRecord {
+	modelclock_t *readClock;
+	thread_id_t *thread;
+	int capacity;
+	int numReads;
+	thread_id_t writeThread;
+	modelclock_t writeClock;
+};
+
+#define INITCAPACITY 4
+
+#define ISSHORTRECORD(x) ((x)&0x1)
+
+#define THREADMASK 0xff
+#define RDTHREADID(x) (((x)>>1)&THREADMASK)
+#define READMASK 0x07fffff
+#define READVECTOR(x) (((x)>>9)&READMASK)
+
+#define WRTHREADID(x) (((x)>>32)&THREADMASK)
+
+#define WRITEMASK READMASK
+#define WRITEVECTOR(x) (((x)>>40)&WRITEMASK)
+
+/**
+ * The basic encoding idea is that (void *) either:
+ *  -# points to a full record (RaceRecord) or
+ *  -# encodes the information in a 64 bit word. Encoding is as
+ *     follows:
+ *     - lowest bit set to 1
+ *     - next 8 bits are read thread id
+ *     - next 23 bits are read clock vector
+ *     - next 8 bits are write thread id
+ *     - next 23 bits are write clock vector
+ */
+#define ENCODEOP(rdthread, rdtime, wrthread, wrtime) (0x1ULL | ((rdthread)<<1) | ((rdtime) << 9) | (((uint64_t)wrthread)<<32) | (((uint64_t)wrtime)<<40))
+
+#define MAXTHREADID (THREADMASK-1)
+#define MAXREADVECTOR (READMASK-1)
+#define MAXWRITEVECTOR (WRITEMASK-1)
+
+#endif /* __DATARACE_H__ */
diff --git a/doc/Markdown/License.text b/doc/Markdown/License.text
new file mode 100644
index 0000000..6d76506
--- /dev/null
+++ b/doc/Markdown/License.text
@@ -0,0 +1,30 @@
+Copyright (c) 2004, John Gruber  
+<http://daringfireball.net/>  
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name "Markdown" nor the names of its contributors may
+  be used to endorse or promote products derived from this software
+  without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as
+is" and any express or implied warranties, including, but not limited
+to, the implied warranties of merchantability and fitness for a
+particular purpose are disclaimed. In no event shall the copyright owner
+or contributors be liable for any direct, indirect, incidental, special,
+exemplary, or consequential damages (including, but not limited to,
+procurement of substitute goods or services; loss of use, data, or
+profits; or business interruption) however caused and on any theory of
+liability, whether in contract, strict liability, or tort (including
+negligence or otherwise) arising in any way out of the use of this
+software, even if advised of the possibility of such damage.
diff --git a/doc/Markdown/Markdown Readme.text b/doc/Markdown/Markdown Readme.text
new file mode 100644
index 0000000..6fbb95f
--- /dev/null
+++ b/doc/Markdown/Markdown Readme.text	
@@ -0,0 +1,341 @@
+Markdown
+========
+
+Version 1.0.1 - Tue 14 Dec 2004
+
+by John Gruber  
+<http://daringfireball.net/>
+
+
+Introduction
+------------
+
+Markdown is a text-to-HTML conversion tool for web writers. Markdown
+allows you to write using an easy-to-read, easy-to-write plain text
+format, then convert it to structurally valid XHTML (or HTML).
+
+Thus, "Markdown" is two things: a plain text markup syntax, and a
+software tool, written in Perl, that converts the plain text markup 
+to HTML.
+
+Markdown works both as a Movable Type plug-in and as a standalone Perl
+script -- which means it can also be used as a text filter in BBEdit
+(or any other application that supporst filters written in Perl).
+
+Full documentation of Markdown's syntax and configuration options is
+available on the web: <http://daringfireball.net/projects/markdown/>.
+(Note: this readme file is formatted in Markdown.)
+
+
+
+Installation and Requirements
+-----------------------------
+
+Markdown requires Perl 5.6.0 or later. Welcome to the 21st Century.
+Markdown also requires the standard Perl library module `Digest::MD5`. 
+
+
+### Movable Type ###
+
+Markdown works with Movable Type version 2.6 or later (including 
+MT 3.0 or later).
+
+1.  Copy the "Markdown.pl" file into your Movable Type "plugins"
+    directory. The "plugins" directory should be in the same directory
+    as "mt.cgi"; if the "plugins" directory doesn't already exist, use
+    your FTP program to create it. Your installation should look like
+    this:
+
+        (mt home)/plugins/Markdown.pl
+
+2.  Once installed, Markdown will appear as an option in Movable Type's
+    Text Formatting pop-up menu. This is selectable on a per-post basis.
+    Markdown translates your posts to HTML when you publish; the posts
+    themselves are stored in your MT database in Markdown format.
+
+3.  If you also install SmartyPants 1.5 (or later), Markdown will offer
+    a second text formatting option: "Markdown with SmartyPants". This
+    option is the same as the regular "Markdown" formatter, except that
+    automatically uses SmartyPants to create typographically correct
+    curly quotes, em-dashes, and ellipses. See the SmartyPants web page
+    for more information: <http://daringfireball.net/projects/smartypants/>
+
+4.  To make Markdown (or "Markdown with SmartyPants") your default
+    text formatting option for new posts, go to Weblog Config ->
+    Preferences.
+
+Note that by default, Markdown produces XHTML output. To configure
+Markdown to produce HTML 4 output, see "Configuration", below.
+
+
+### Blosxom ###
+
+Markdown works with Blosxom version 2.x.
+
+1.  Rename the "Markdown.pl" plug-in to "Markdown" (case is
+    important). Movable Type requires plug-ins to have a ".pl"
+    extension; Blosxom forbids it.
+
+2.  Copy the "Markdown" plug-in file to your Blosxom plug-ins folder.
+    If you're not sure where your Blosxom plug-ins folder is, see the
+    Blosxom documentation for information.
+
+3.  That's it. The entries in your weblog will now automatically be
+    processed by Markdown.
+
+4.  If you'd like to apply Markdown formatting only to certain posts,
+    rather than all of them, see Jason Clark's instructions for using
+    Markdown in conjunction with Blosxom's Meta plugin:
+    
+    <http://jclark.org/weblog/WebDev/Blosxom/Markdown.html>
+
+
+### BBEdit ###
+
+Markdown works with BBEdit 6.1 or later on Mac OS X. (It also works
+with BBEdit 5.1 or later and MacPerl 5.6.1 on Mac OS 8.6 or later.)
+
+1.  Copy the "Markdown.pl" file to appropriate filters folder in your
+    "BBEdit Support" folder. On Mac OS X, this should be:
+
+        BBEdit Support/Unix Support/Unix Filters/
+
+    See the BBEdit documentation for more details on the location of
+    these folders.
+
+    You can rename "Markdown.pl" to whatever you wish.
+
+2.  That's it. To use Markdown, select some text in a BBEdit document,
+    then choose Markdown from the Filters sub-menu in the "#!" menu, or
+    the Filters floating palette
+
+
+
+Configuration
+-------------
+
+By default, Markdown produces XHTML output for tags with empty elements.
+E.g.:
+
+    <br />
+
+Markdown can be configured to produce HTML-style tags; e.g.:
+
+    <br>
+
+
+### Movable Type ###
+
+You need to use a special `MTMarkdownOptions` container tag in each
+Movable Type template where you want HTML 4-style output:
+
+    <MTMarkdownOptions output='html4'>
+        ... put your entry content here ...
+    </MTMarkdownOptions>
+
+The easiest way to use MTMarkdownOptions is probably to put the
+opening tag right after your `<body>` tag, and the closing tag right
+before `</body>`.
+
+To suppress Markdown processing in a particular template, i.e. to
+publish the raw Markdown-formatted text without translation into
+(X)HTML, set the `output` attribute to 'raw':
+
+    <MTMarkdownOptions output='raw'>
+        ... put your entry content here ...
+    </MTMarkdownOptions>
+
+
+### Command-Line ###
+
+Use the `--html4tags` command-line switch to produce HTML output from a
+Unix-style command line. E.g.:
+
+    % perl Markdown.pl --html4tags foo.text
+
+Type `perldoc Markdown.pl`, or read the POD documentation within the
+Markdown.pl source code for more information.
+
+
+
+Bugs
+----
+
+To file bug reports or feature requests please send email to:
+<markdown@daringfireball.net>.
+
+
+
+Version History
+---------------
+
+1.0.1 (14 Dec 2004):
+
++	Changed the syntax rules for code blocks and spans. Previously,
+	backslash escapes for special Markdown characters were processed
+	everywhere other than within inline HTML tags. Now, the contents
+	of code blocks and spans are no longer processed for backslash
+	escapes. This means that code blocks and spans are now treated
+	literally, with no special rules to worry about regarding
+	backslashes.
+
+	**NOTE**: This changes the syntax from all previous versions of
+	Markdown. Code blocks and spans involving backslash characters
+	will now generate different output than before.
+
++	Tweaked the rules for link definitions so that they must occur
+	within three spaces of the left margin. Thus if you indent a link
+	definition by four spaces or a tab, it will now be a code block.
+	
+		   [a]: /url/  "Indented 3 spaces, this is a link def"
+
+		    [b]: /url/  "Indented 4 spaces, this is a code block"
+	
+	**IMPORTANT**: This may affect existing Markdown content if it
+	contains link definitions indented by 4 or more spaces.
+
++	Added `>`, `+`, and `-` to the list of backslash-escapable
+	characters. These should have been done when these characters
+	were added as unordered list item markers.
+
++	Trailing spaces and tabs following HTML comments and `<hr/>` tags
+	are now ignored.
+
++	Inline links using `<` and `>` URL delimiters weren't working:
+
+		like [this](<http://example.com/>)
+
++	Added a bit of tolerance for trailing spaces and tabs after
+	Markdown hr's.
+
++	Fixed bug where auto-links were being processed within code spans:
+
+		like this: `<http://example.com/>`
+
++	Sort-of fixed a bug where lines in the middle of hard-wrapped
+	paragraphs, which lines look like the start of a list item,
+	would accidentally trigger the creation of a list. E.g. a
+	paragraph that looked like this:
+
+		I recommend upgrading to version
+		8. Oops, now this line is treated
+		as a sub-list.
+
+	This is fixed for top-level lists, but it can still happen for
+	sub-lists. E.g., the following list item will not be parsed
+	properly:
+
+		+	I recommend upgrading to version
+			8. Oops, now this line is treated
+			as a sub-list.
+
+	Given Markdown's list-creation rules, I'm not sure this can
+	be fixed.
+
++	Standalone HTML comments are now handled; previously, they'd get
+	wrapped in a spurious `<p>` tag.
+
++	Fix for horizontal rules preceded by 2 or 3 spaces.
+
++	`<hr>` HTML tags in must occur within three spaces of left
+	margin. (With 4 spaces or a tab, they should be code blocks, but
+	weren't before this fix.)
+
++	Capitalized "With" in "Markdown With SmartyPants" for
+	consistency with the same string label in SmartyPants.pl.
+	(This fix is specific to the MT plug-in interface.)
+
++	Auto-linked email address can now optionally contain
+	a 'mailto:' protocol. I.e. these are equivalent:
+
+		<mailto:user@example.com>
+		<user@example.com>
+
++	Fixed annoying bug where nested lists would wind up with
+	spurious (and invalid) `<p>` tags.
+
++	You can now write empty links:
+
+		[like this]()
+
+	and they'll be turned into anchor tags with empty href attributes.
+	This should have worked before, but didn't.
+
++	`***this***` and `___this___` are now turned into
+
+		<strong><em>this</em></strong>
+
+	Instead of
+
+		<strong><em>this</strong></em>
+
+	which isn't valid. (Thanks to Michel Fortin for the fix.)
+
++	Added a new substitution in `_EncodeCode()`: s/\$/&#036;/g; This
+	is only for the benefit of Blosxom users, because Blosxom
+	(sometimes?) interpolates Perl scalars in your article bodies.
+
++	Fixed problem for links defined with urls that include parens, e.g.:
+
+		[1]: http://sources.wikipedia.org/wiki/Middle_East_Policy_(Chomsky)
+
+	"Chomsky" was being erroneously treated as the URL's title.
+
++	At some point during 1.0's beta cycle, I changed every sub's
+	argument fetching from this idiom:
+
+		my $text = shift;
+
+	to:
+
+		my $text = shift || return '';
+
+	The idea was to keep Markdown from doing any work in a sub
+	if the input was empty. This introduced a bug, though:
+	if the input to any function was the single-character string
+	"0", it would also evaluate as false and return immediately.
+	How silly. Now fixed.
+
+
+
+Donations
+---------
+
+Donations to support Markdown's development are happily accepted. See:
+<http://daringfireball.net/projects/markdown/> for details.
+
+
+
+Copyright and License
+---------------------
+
+Copyright (c) 2003-2004 John Gruber   
+<http://daringfireball.net/>   
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name "Markdown" nor the names of its contributors may
+  be used to endorse or promote products derived from this software
+  without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as
+is" and any express or implied warranties, including, but not limited
+to, the implied warranties of merchantability and fitness for a
+particular purpose are disclaimed. In no event shall the copyright owner
+or contributors be liable for any direct, indirect, incidental, special,
+exemplary, or consequential damages (including, but not limited to,
+procurement of substitute goods or services; loss of use, data, or
+profits; or business interruption) however caused and on any theory of
+liability, whether in contract, strict liability, or tort (including
+negligence or otherwise) arising in any way out of the use of this
+software, even if advised of the possibility of such damage.
diff --git a/doc/Markdown/Markdown.pl b/doc/Markdown/Markdown.pl
new file mode 100755
index 0000000..e4c8469
--- /dev/null
+++ b/doc/Markdown/Markdown.pl
@@ -0,0 +1,1450 @@
+#!/usr/bin/perl
+
+#
+# Markdown -- A text-to-HTML conversion tool for web writers
+#
+# Copyright (c) 2004 John Gruber
+# <http://daringfireball.net/projects/markdown/>
+#
+
+
+package Markdown;
+require 5.006_000;
+use strict;
+use warnings;
+
+use Digest::MD5 qw(md5_hex);
+use vars qw($VERSION);
+$VERSION = '1.0.1';
+# Tue 14 Dec 2004
+
+## Disabled; causes problems under Perl 5.6.1:
+# use utf8;
+# binmode( STDOUT, ":utf8" );  # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html
+
+
+#
+# Global default settings:
+#
+my $g_empty_element_suffix = " />";     # Change to ">" for HTML output
+my $g_tab_width = 4;
+
+
+#
+# Globals:
+#
+
+# Regex to match balanced [brackets]. See Friedl's
+# "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
+my $g_nested_brackets;
+$g_nested_brackets = qr{
+	(?> 								# Atomic matching
+	   [^\[\]]+							# Anything other than brackets
+	 | 
+	   \[
+		 (??{ $g_nested_brackets })		# Recursive set of nested brackets
+	   \]
+	)*
+}x;
+
+
+# Table of hash values for escaped characters:
+my %g_escape_table;
+foreach my $char (split //, '\\`*_{}[]()>#+-.!') {
+	$g_escape_table{$char} = md5_hex($char);
+}
+
+
+# Global hashes, used by various utility routines
+my %g_urls;
+my %g_titles;
+my %g_html_blocks;
+
+# Used to track when we're inside an ordered or unordered list
+# (see _ProcessListItems() for details):
+my $g_list_level = 0;
+
+
+#### Blosxom plug-in interface ##########################################
+
+# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
+# which posts Markdown should process, using a "meta-markup: markdown"
+# header. If it's set to 0 (the default), Markdown will process all
+# entries.
+my $g_blosxom_use_meta = 0;
+
+sub start { 1; }
+sub story {
+	my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
+
+	if ( (! $g_blosxom_use_meta) or
+	     (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i))
+	     ){
+			$$body_ref  = Markdown($$body_ref);
+     }
+     1;
+}
+
+
+#### Movable Type plug-in interface #####################################
+eval {require MT};  # Test to see if we're running in MT.
+unless ($@) {
+    require MT;
+    import  MT;
+    require MT::Template::Context;
+    import  MT::Template::Context;
+
+	eval {require MT::Plugin};  # Test to see if we're running >= MT 3.0.
+	unless ($@) {
+		require MT::Plugin;
+		import  MT::Plugin;
+		my $plugin = new MT::Plugin({
+			name => "Markdown",
+			description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
+			doc_link => 'http://daringfireball.net/projects/markdown/'
+		});
+		MT->add_plugin( $plugin );
+	}
+
+	MT::Template::Context->add_container_tag(MarkdownOptions => sub {
+		my $ctx	 = shift;
+		my $args = shift;
+		my $builder = $ctx->stash('builder');
+		my $tokens = $ctx->stash('tokens');
+
+		if (defined ($args->{'output'}) ) {
+			$ctx->stash('markdown_output', lc $args->{'output'});
+		}
+
+		defined (my $str = $builder->build($ctx, $tokens) )
+			or return $ctx->error($builder->errstr);
+		$str;		# return value
+	});
+
+	MT->add_text_filter('markdown' => {
+		label     => 'Markdown',
+		docs      => 'http://daringfireball.net/projects/markdown/',
+		on_format => sub {
+			my $text = shift;
+			my $ctx  = shift;
+			my $raw  = 0;
+		    if (defined $ctx) {
+		    	my $output = $ctx->stash('markdown_output'); 
+				if (defined $output  &&  $output =~ m/^html/i) {
+					$g_empty_element_suffix = ">";
+					$ctx->stash('markdown_output', '');
+				}
+				elsif (defined $output  &&  $output eq 'raw') {
+					$raw = 1;
+					$ctx->stash('markdown_output', '');
+				}
+				else {
+					$raw = 0;
+					$g_empty_element_suffix = " />";
+				}
+			}
+			$text = $raw ? $text : Markdown($text);
+			$text;
+		},
+	});
+
+	# If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
+	my $smartypants;
+
+	{
+		no warnings "once";
+		$smartypants = $MT::Template::Context::Global_filters{'smarty_pants'};
+	}
+
+	if ($smartypants) {
+		MT->add_text_filter('markdown_with_smartypants' => {
+			label     => 'Markdown With SmartyPants',
+			docs      => 'http://daringfireball.net/projects/markdown/',
+			on_format => sub {
+				my $text = shift;
+				my $ctx  = shift;
+				if (defined $ctx) {
+					my $output = $ctx->stash('markdown_output'); 
+					if (defined $output  &&  $output eq 'html') {
+						$g_empty_element_suffix = ">";
+					}
+					else {
+						$g_empty_element_suffix = " />";
+					}
+				}
+				$text = Markdown($text);
+				$text = $smartypants->($text, '1');
+			},
+		});
+	}
+}
+else {
+#### BBEdit/command-line text filter interface ##########################
+# Needs to be hidden from MT (and Blosxom when running in static mode).
+
+    # We're only using $blosxom::version once; tell Perl not to warn us:
+	no warnings 'once';
+    unless ( defined($blosxom::version) ) {
+		use warnings;
+
+		#### Check for command-line switches: #################
+		my %cli_opts;
+		use Getopt::Long;
+		Getopt::Long::Configure('pass_through');
+		GetOptions(\%cli_opts,
+			'version',
+			'shortversion',
+			'html4tags',
+		);
+		if ($cli_opts{'version'}) {		# Version info
+			print "\nThis is Markdown, version $VERSION.\n";
+			print "Copyright 2004 John Gruber\n";
+			print "http://daringfireball.net/projects/markdown/\n\n";
+			exit 0;
+		}
+		if ($cli_opts{'shortversion'}) {		# Just the version number string.
+			print $VERSION;
+			exit 0;
+		}
+		if ($cli_opts{'html4tags'}) {			# Use HTML tag style instead of XHTML
+			$g_empty_element_suffix = ">";
+		}
+
+
+		#### Process incoming text: ###########################
+		my $text;
+		{
+			local $/;               # Slurp the whole file
+			$text = <>;
+		}
+        print Markdown($text);
+    }
+}
+
+
+
+sub Markdown {
+#
+# Main function. The order in which other subs are called here is
+# essential. Link and image substitutions need to happen before
+# _EscapeSpecialChars(), so that any *'s or _'s in the <a>
+# and <img> tags get encoded.
+#
+	my $text = shift;
+
+	# Clear the global hashes. If we don't clear these, you get conflicts
+	# from other articles when generating a page which contains more than
+	# one article (e.g. an index page that shows the N most recent
+	# articles):
+	%g_urls = ();
+	%g_titles = ();
+	%g_html_blocks = ();
+
+
+	# Standardize line endings:
+	$text =~ s{\r\n}{\n}g; 	# DOS to Unix
+	$text =~ s{\r}{\n}g; 	# Mac to Unix
+
+	# Make sure $text ends with a couple of newlines:
+	$text .= "\n\n";
+
+	# Convert all tabs to spaces.
+	$text = _Detab($text);
+
+	# Strip any lines consisting only of spaces and tabs.
+	# This makes subsequent regexen easier to write, because we can
+	# match consecutive blank lines with /\n+/ instead of something
+	# contorted like /[ \t]*\n+/ .
+	$text =~ s/^[ \t]+$//mg;
+
+	# Turn block-level HTML blocks into hash entries
+	$text = _HashHTMLBlocks($text);
+
+	# Strip link definitions, store in hashes.
+	$text = _StripLinkDefinitions($text);
+
+	$text = _RunBlockGamut($text);
+
+	$text = _UnescapeSpecialChars($text);
+
+	return $text . "\n";
+}
+
+
+sub _StripLinkDefinitions {
+#
+# Strips link definitions from text, stores the URLs and titles in
+# hash references.
+#
+	my $text = shift;
+	my $less_than_tab = $g_tab_width - 1;
+
+	# Link defs are in the form: ^[id]: url "optional title"
+	while ($text =~ s{
+						^[ ]{0,$less_than_tab}\[(.+)\]:	# id = $1
+						  [ \t]*
+						  \n?				# maybe *one* newline
+						  [ \t]*
+						<?(\S+?)>?			# url = $2
+						  [ \t]*
+						  \n?				# maybe one newline
+						  [ \t]*
+						(?:
+							(?<=\s)			# lookbehind for whitespace
+							["(]
+							(.+?)			# title = $3
+							[")]
+							[ \t]*
+						)?	# title is optional
+						(?:\n+|\Z)
+					}
+					{}mx) {
+		$g_urls{lc $1} = _EncodeAmpsAndAngles( $2 );	# Link IDs are case-insensitive
+		if ($3) {
+			$g_titles{lc $1} = $3;
+			$g_titles{lc $1} =~ s/"/&quot;/g;
+		}
+	}
+
+	return $text;
+}
+
+
+sub _HashHTMLBlocks {
+	my $text = shift;
+	my $less_than_tab = $g_tab_width - 1;
+
+	# Hashify HTML blocks:
+	# We only want to do this for block-level HTML tags, such as headers,
+	# lists, and tables. That's because we still want to wrap <p>s around
+	# "paragraphs" that are wrapped in non-block-level tags, such as anchors,
+	# phrase emphasis, and spans. The list of tags we're looking for is
+	# hard-coded:
+	my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/;
+	my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/;
+
+	# First, look for nested blocks, e.g.:
+	# 	<div>
+	# 		<div>
+	# 		tags for inner block must be indented.
+	# 		</div>
+	# 	</div>
+	#
+	# The outermost tags must start at the left margin for this to match, and
+	# the inner nested divs must be indented.
+	# We need to do this before the next, more liberal match, because the next
+	# match will start at the first `<div>` and stop at the first `</div>`.
+	$text =~ s{
+				(						# save in $1
+					^					# start of line  (with /m)
+					<($block_tags_a)	# start tag = $2
+					\b					# word break
+					(.*\n)*?			# any number of lines, minimally matching
+					</\2>				# the matching end tag
+					[ \t]*				# trailing spaces/tabs
+					(?=\n+|\Z)	# followed by a newline or end of document
+				)
+			}{
+				my $key = md5_hex($1);
+				$g_html_blocks{$key} = $1;
+				"\n\n" . $key . "\n\n";
+			}egmx;
+
+
+	#
+	# Now match more liberally, simply from `\n<tag>` to `</tag>\n`
+	#
+	$text =~ s{
+				(						# save in $1
+					^					# start of line  (with /m)
+					<($block_tags_b)	# start tag = $2
+					\b					# word break
+					(.*\n)*?			# any number of lines, minimally matching
+					.*</\2>				# the matching end tag
+					[ \t]*				# trailing spaces/tabs
+					(?=\n+|\Z)	# followed by a newline or end of document
+				)
+			}{
+				my $key = md5_hex($1);
+				$g_html_blocks{$key} = $1;
+				"\n\n" . $key . "\n\n";
+			}egmx;
+	# Special case just for <hr />. It was easier to make a special case than
+	# to make the other regex more complicated.	
+	$text =~ s{
+				(?:
+					(?<=\n\n)		# Starting after a blank line
+					|				# or
+					\A\n?			# the beginning of the doc
+				)
+				(						# save in $1
+					[ ]{0,$less_than_tab}
+					<(hr)				# start tag = $2
+					\b					# word break
+					([^<>])*?			# 
+					/?>					# the matching end tag
+					[ \t]*
+					(?=\n{2,}|\Z)		# followed by a blank line or end of document
+				)
+			}{
+				my $key = md5_hex($1);
+				$g_html_blocks{$key} = $1;
+				"\n\n" . $key . "\n\n";
+			}egx;
+
+	# Special case for standalone HTML comments:
+	$text =~ s{
+				(?:
+					(?<=\n\n)		# Starting after a blank line
+					|				# or
+					\A\n?			# the beginning of the doc
+				)
+				(						# save in $1
+					[ ]{0,$less_than_tab}
+					(?s:
+						<!
+						(--.*?--\s*)+
+						>
+					)
+					[ \t]*
+					(?=\n{2,}|\Z)		# followed by a blank line or end of document
+				)
+			}{
+				my $key = md5_hex($1);
+				$g_html_blocks{$key} = $1;
+				"\n\n" . $key . "\n\n";
+			}egx;
+
+
+	return $text;
+}
+
+
+sub _RunBlockGamut {
+#
+# These are all the transformations that form block-level
+# tags like paragraphs, headers, and list items.
+#
+	my $text = shift;
+
+	$text = _DoHeaders($text);
+
+	# Do Horizontal Rules:
+	$text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
+	$text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
+	$text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
+
+	$text = _DoLists($text);
+
+	$text = _DoCodeBlocks($text);
+
+	$text = _DoBlockQuotes($text);
+
+	# We already ran _HashHTMLBlocks() before, in Markdown(), but that
+	# was to escape raw HTML in the original Markdown source. This time,
+	# we're escaping the markup we've just created, so that we don't wrap
+	# <p> tags around block-level tags.
+	$text = _HashHTMLBlocks($text);
+
+	$text = _FormParagraphs($text);
+
+	return $text;
+}
+
+
+sub _RunSpanGamut {
+#
+# These are all the transformations that occur *within* block-level
+# tags like paragraphs, headers, and list items.
+#
+	my $text = shift;
+
+	$text = _DoCodeSpans($text);
+
+	$text = _EscapeSpecialChars($text);
+
+	# Process anchor and image tags. Images must come first,
+	# because ![foo][f] looks like an anchor.
+	$text = _DoImages($text);
+	$text = _DoAnchors($text);
+
+	# Make links out of things like `<http://example.com/>`
+	# Must come after _DoAnchors(), because you can use < and >
+	# delimiters in inline links like [this](<url>).
+	$text = _DoAutoLinks($text);
+
+	$text = _EncodeAmpsAndAngles($text);
+
+	$text = _DoItalicsAndBold($text);
+
+	# Do hard breaks:
+	$text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g;
+
+	return $text;
+}
+
+
+sub _EscapeSpecialChars {
+	my $text = shift;
+	my $tokens ||= _TokenizeHTML($text);
+
+	$text = '';   # rebuild $text from the tokens
+# 	my $in_pre = 0;	 # Keep track of when we're inside <pre> or <code> tags.
+# 	my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
+
+	foreach my $cur_token (@$tokens) {
+		if ($cur_token->[0] eq "tag") {
+			# Within tags, encode * and _ so they don't conflict
+			# with their use in Markdown for italics and strong.
+			# We're replacing each such character with its
+			# corresponding MD5 checksum value; this is likely
+			# overkill, but it should prevent us from colliding
+			# with the escape values by accident.
+			$cur_token->[1] =~  s! \* !$g_escape_table{'*'}!gx;
+			$cur_token->[1] =~  s! _  !$g_escape_table{'_'}!gx;
+			$text .= $cur_token->[1];
+		} else {
+			my $t = $cur_token->[1];
+			$t = _EncodeBackslashEscapes($t);
+			$text .= $t;
+		}
+	}
+	return $text;
+}
+
+
+sub _DoAnchors {
+#
+# Turn Markdown link shortcuts into XHTML <a> tags.
+#
+	my $text = shift;
+
+	#
+	# First, handle reference-style links: [link text] [id]
+	#
+	$text =~ s{
+		(					# wrap whole match in $1
+		  \[
+		    ($g_nested_brackets)	# link text = $2
+		  \]
+
+		  [ ]?				# one optional space
+		  (?:\n[ ]*)?		# one optional newline followed by spaces
+
+		  \[
+		    (.*?)		# id = $3
+		  \]
+		)
+	}{
+		my $result;
+		my $whole_match = $1;
+		my $link_text   = $2;
+		my $link_id     = lc $3;
+
+		if ($link_id eq "") {
+			$link_id = lc $link_text;     # for shortcut links like [this][].
+		}
+
+		if (defined $g_urls{$link_id}) {
+			my $url = $g_urls{$link_id};
+			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
+			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
+			$result = "<a href=\"$url\"";
+			if ( defined $g_titles{$link_id} ) {
+				my $title = $g_titles{$link_id};
+				$title =~ s! \* !$g_escape_table{'*'}!gx;
+				$title =~ s!  _ !$g_escape_table{'_'}!gx;
+				$result .=  " title=\"$title\"";
+			}
+			$result .= ">$link_text</a>";
+		}
+		else {
+			$result = $whole_match;
+		}
+		$result;
+	}xsge;
+
+	#
+	# Next, inline-style links: [link text](url "optional title")
+	#
+	$text =~ s{
+		(				# wrap whole match in $1
+		  \[
+		    ($g_nested_brackets)	# link text = $2
+		  \]
+		  \(			# literal paren
+		  	[ \t]*
+			<?(.*?)>?	# href = $3
+		  	[ \t]*
+			(			# $4
+			  (['"])	# quote char = $5
+			  (.*?)		# Title = $6
+			  \5		# matching quote
+			)?			# title is optional
+		  \)
+		)
+	}{
+		my $result;
+		my $whole_match = $1;
+		my $link_text   = $2;
+		my $url	  		= $3;
+		my $title		= $6;
+
+		$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
+		$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
+		$result = "<a href=\"$url\"";
+
+		if (defined $title) {
+			$title =~ s/"/&quot;/g;
+			$title =~ s! \* !$g_escape_table{'*'}!gx;
+			$title =~ s!  _ !$g_escape_table{'_'}!gx;
+			$result .=  " title=\"$title\"";
+		}
+
+		$result .= ">$link_text</a>";
+
+		$result;
+	}xsge;
+
+	return $text;
+}
+
+
+sub _DoImages {
+#
+# Turn Markdown image shortcuts into <img> tags.
+#
+	my $text = shift;
+
+	#
+	# First, handle reference-style labeled images: ![alt text][id]
+	#
+	$text =~ s{
+		(				# wrap whole match in $1
+		  !\[
+		    (.*?)		# alt text = $2
+		  \]
+
+		  [ ]?				# one optional space
+		  (?:\n[ ]*)?		# one optional newline followed by spaces
+
+		  \[
+		    (.*?)		# id = $3
+		  \]
+
+		)
+	}{
+		my $result;
+		my $whole_match = $1;
+		my $alt_text    = $2;
+		my $link_id     = lc $3;
+
+		if ($link_id eq "") {
+			$link_id = lc $alt_text;     # for shortcut links like ![this][].
+		}
+
+		$alt_text =~ s/"/&quot;/g;
+		if (defined $g_urls{$link_id}) {
+			my $url = $g_urls{$link_id};
+			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
+			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
+			$result = "<img src=\"$url\" alt=\"$alt_text\"";
+			if (defined $g_titles{$link_id}) {
+				my $title = $g_titles{$link_id};
+				$title =~ s! \* !$g_escape_table{'*'}!gx;
+				$title =~ s!  _ !$g_escape_table{'_'}!gx;
+				$result .=  " title=\"$title\"";
+			}
+			$result .= $g_empty_element_suffix;
+		}
+		else {
+			# If there's no such link ID, leave intact:
+			$result = $whole_match;
+		}
+
+		$result;
+	}xsge;
+
+	#
+	# Next, handle inline images:  ![alt text](url "optional title")
+	# Don't forget: encode * and _
+
+	$text =~ s{
+		(				# wrap whole match in $1
+		  !\[
+		    (.*?)		# alt text = $2
+		  \]
+		  \(			# literal paren
+		  	[ \t]*
+			<?(\S+?)>?	# src url = $3
+		  	[ \t]*
+			(			# $4
+			  (['"])	# quote char = $5
+			  (.*?)		# title = $6
+			  \5		# matching quote
+			  [ \t]*
+			)?			# title is optional
+		  \)
+		)
+	}{
+		my $result;
+		my $whole_match = $1;
+		my $alt_text    = $2;
+		my $url	  		= $3;
+		my $title		= '';
+		if (defined($6)) {
+			$title		= $6;
+		}
+
+		$alt_text =~ s/"/&quot;/g;
+		$title    =~ s/"/&quot;/g;
+		$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
+		$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
+		$result = "<img src=\"$url\" alt=\"$alt_text\"";
+		if (defined $title) {
+			$title =~ s! \* !$g_escape_table{'*'}!gx;
+			$title =~ s!  _ !$g_escape_table{'_'}!gx;
+			$result .=  " title=\"$title\"";
+		}
+		$result .= $g_empty_element_suffix;
+
+		$result;
+	}xsge;
+
+	return $text;
+}
+
+
+sub _DoHeaders {
+	my $text = shift;
+
+	# Setext-style headers:
+	#	  Header 1
+	#	  ========
+	#  
+	#	  Header 2
+	#	  --------
+	#
+	$text =~ s{ ^(.+)[ \t]*\n=+[ \t]*\n+ }{
+		"<h1>"  .  _RunSpanGamut($1)  .  "</h1>\n\n";
+	}egmx;
+
+	$text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{
+		"<h2>"  .  _RunSpanGamut($1)  .  "</h2>\n\n";
+	}egmx;
+
+
+	# atx-style headers:
+	#	# Header 1
+	#	## Header 2
+	#	## Header 2 with closing hashes ##
+	#	...
+	#	###### Header 6
+	#
+	$text =~ s{
+			^(\#{1,6})	# $1 = string of #'s
+			[ \t]*
+			(.+?)		# $2 = Header text
+			[ \t]*
+			\#*			# optional closing #'s (not counted)
+			\n+
+		}{
+			my $h_level = length($1);
+			"<h$h_level>"  .  _RunSpanGamut($2)  .  "</h$h_level>\n\n";
+		}egmx;
+
+	return $text;
+}
+
+
+sub _DoLists {
+#
+# Form HTML ordered (numbered) and unordered (bulleted) lists.
+#
+	my $text = shift;
+	my $less_than_tab = $g_tab_width - 1;
+
+	# Re-usable patterns to match list item bullets and number markers:
+	my $marker_ul  = qr/[*+-]/;
+	my $marker_ol  = qr/\d+[.]/;
+	my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
+
+	# Re-usable pattern to match any entirel ul or ol list:
+	my $whole_list = qr{
+		(								# $1 = whole list
+		  (								# $2
+			[ ]{0,$less_than_tab}
+			(${marker_any})				# $3 = first list item marker
+			[ \t]+
+		  )
+		  (?s:.+?)
+		  (								# $4
+			  \z
+			|
+			  \n{2,}
+			  (?=\S)
+			  (?!						# Negative lookahead for another list item marker
+				[ \t]*
+				${marker_any}[ \t]+
+			  )
+		  )
+		)
+	}mx;
+
+	# We use a different prefix before nested lists than top-level lists.
+	# See extended comment in _ProcessListItems().
+	#
+	# Note: There's a bit of duplication here. My original implementation
+	# created a scalar regex pattern as the conditional result of the test on
+	# $g_list_level, and then only ran the $text =~ s{...}{...}egmx
+	# substitution once, using the scalar as the pattern. This worked,
+	# everywhere except when running under MT on my hosting account at Pair
+	# Networks. There, this caused all rebuilds to be killed by the reaper (or
+	# perhaps they crashed, but that seems incredibly unlikely given that the
+	# same script on the same server ran fine *except* under MT. I've spent
+	# more time trying to figure out why this is happening than I'd like to
+	# admit. My only guess, backed up by the fact that this workaround works,
+	# is that Perl optimizes the substition when it can figure out that the
+	# pattern will never change, and when this optimization isn't on, we run
+	# afoul of the reaper. Thus, the slightly redundant code to that uses two
+	# static s/// patterns rather than one conditional pattern.
+
+	if ($g_list_level) {
+		$text =~ s{
+				^
+				$whole_list
+			}{
+				my $list = $1;
+				my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
+				# Turn double returns into triple returns, so that we can make a
+				# paragraph for the last item in a list, if necessary:
+				$list =~ s/\n{2,}/\n\n\n/g;
+				my $result = _ProcessListItems($list, $marker_any);
+				$result = "<$list_type>\n" . $result . "</$list_type>\n";
+				$result;
+			}egmx;
+	}
+	else {
+		$text =~ s{
+				(?:(?<=\n\n)|\A\n?)
+				$whole_list
+			}{
+				my $list = $1;
+				my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
+				# Turn double returns into triple returns, so that we can make a
+				# paragraph for the last item in a list, if necessary:
+				$list =~ s/\n{2,}/\n\n\n/g;
+				my $result = _ProcessListItems($list, $marker_any);
+				$result = "<$list_type>\n" . $result . "</$list_type>\n";
+				$result;
+			}egmx;
+	}
+
+
+	return $text;
+}
+
+
+sub _ProcessListItems {
+#
+#	Process the contents of a single ordered or unordered list, splitting it
+#	into individual list items.
+#
+
+	my $list_str = shift;
+	my $marker_any = shift;
+
+
+	# The $g_list_level global keeps track of when we're inside a list.
+	# Each time we enter a list, we increment it; when we leave a list,
+	# we decrement. If it's zero, we're not in a list anymore.
+	#
+	# We do this because when we're not inside a list, we want to treat
+	# something like this:
+	#
+	#		I recommend upgrading to version
+	#		8. Oops, now this line is treated
+	#		as a sub-list.
+	#
+	# As a single paragraph, despite the fact that the second line starts
+	# with a digit-period-space sequence.
+	#
+	# Whereas when we're inside a list (or sub-list), that line will be
+	# treated as the start of a sub-list. What a kludge, huh? This is
+	# an aspect of Markdown's syntax that's hard to parse perfectly
+	# without resorting to mind-reading. Perhaps the solution is to
+	# change the syntax rules such that sub-lists must start with a
+	# starting cardinal number; e.g. "1." or "a.".
+
+	$g_list_level++;
+
+	# trim trailing blank lines:
+	$list_str =~ s/\n{2,}\z/\n/;
+
+
+	$list_str =~ s{
+		(\n)?							# leading line = $1
+		(^[ \t]*)						# leading whitespace = $2
+		($marker_any) [ \t]+			# list marker = $3
+		((?s:.+?)						# list item text   = $4
+		(\n{1,2}))
+		(?= \n* (\z | \2 ($marker_any) [ \t]+))
+	}{
+		my $item = $4;
+		my $leading_line = $1;
+		my $leading_space = $2;
+
+		if ($leading_line or ($item =~ m/\n{2,}/)) {
+			$item = _RunBlockGamut(_Outdent($item));
+		}
+		else {
+			# Recursion for sub-lists:
+			$item = _DoLists(_Outdent($item));
+			chomp $item;
+			$item = _RunSpanGamut($item);
+		}
+
+		"<li>" . $item . "</li>\n";
+	}egmx;
+
+	$g_list_level--;
+	return $list_str;
+}
+
+
+
+sub _DoCodeBlocks {
+#
+#	Process Markdown `<pre><code>` blocks.
+#	
+
+	my $text = shift;
+
+	$text =~ s{
+			(?:\n\n|\A)
+			(	            # $1 = the code block -- one or more lines, starting with a space/tab
+			  (?:
+			    (?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces
+			    .*\n+
+			  )+
+			)
+			((?=^[ ]{0,$g_tab_width}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
+		}{
+			my $codeblock = $1;
+			my $result; # return value
+
+			$codeblock = _EncodeCode(_Outdent($codeblock));
+			$codeblock = _Detab($codeblock);
+			$codeblock =~ s/\A\n+//; # trim leading newlines
+			$codeblock =~ s/\s+\z//; # trim trailing whitespace
+
+			$result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
+
+			$result;
+		}egmx;
+
+	return $text;
+}
+
+
+sub _DoCodeSpans {
+#
+# 	*	Backtick quotes are used for <code></code> spans.
+# 
+# 	*	You can use multiple backticks as the delimiters if you want to
+# 		include literal backticks in the code span. So, this input:
+#     
+#         Just type ``foo `bar` baz`` at the prompt.
+#     
+#     	Will translate to:
+#     
+#         <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
+#     
+#		There's no arbitrary limit to the number of backticks you
+#		can use as delimters. If you need three consecutive backticks
+#		in your code, use four for delimiters, etc.
+#
+#	*	You can use spaces to get literal backticks at the edges:
+#     
+#         ... type `` `bar` `` ...
+#     
+#     	Turns to:
+#     
+#         ... type <code>`bar`</code> ...
+#
+
+	my $text = shift;
+
+	$text =~ s@
+			(`+)		# $1 = Opening run of `
+			(.+?)		# $2 = The code block
+			(?<!`)
+			\1			# Matching closer
+			(?!`)
+		@
+ 			my $c = "$2";
+ 			$c =~ s/^[ \t]*//g; # leading whitespace
+ 			$c =~ s/[ \t]*$//g; # trailing whitespace
+ 			$c = _EncodeCode($c);
+			"<code>$c</code>";
+		@egsx;
+
+	return $text;
+}
+
+
+sub _EncodeCode {
+#
+# Encode/escape certain characters inside Markdown code runs.
+# The point is that in code, these characters are literals,
+# and lose their special Markdown meanings.
+#
+    local $_ = shift;
+
+	# Encode all ampersands; HTML entities are not
+	# entities within a Markdown code span.
+	s/&/&amp;/g;
+
+	# Encode $'s, but only if we're running under Blosxom.
+	# (Blosxom interpolates Perl variables in article bodies.)
+	{
+		no warnings 'once';
+    	if (defined($blosxom::version)) {
+    		s/\$/&#036;/g;	
+    	}
+    }
+
+
+	# Do the angle bracket song and dance:
+	s! <  !&lt;!gx;
+	s! >  !&gt;!gx;
+
+	# Now, escape characters that are magic in Markdown:
+	s! \* !$g_escape_table{'*'}!gx;
+	s! _  !$g_escape_table{'_'}!gx;
+	s! {  !$g_escape_table{'{'}!gx;
+	s! }  !$g_escape_table{'}'}!gx;
+	s! \[ !$g_escape_table{'['}!gx;
+	s! \] !$g_escape_table{']'}!gx;
+	s! \\ !$g_escape_table{'\\'}!gx;
+
+	return $_;
+}
+
+
+sub _DoItalicsAndBold {
+	my $text = shift;
+
+	# <strong> must go first:
+	$text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
+		{<strong>$2</strong>}gsx;
+
+	$text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }
+		{<em>$2</em>}gsx;
+
+	return $text;
+}
+
+
+sub _DoBlockQuotes {
+	my $text = shift;
+
+	$text =~ s{
+		  (								# Wrap whole match in $1
+			(
+			  ^[ \t]*>[ \t]?			# '>' at the start of a line
+			    .+\n					# rest of the first line
+			  (.+\n)*					# subsequent consecutive lines
+			  \n*						# blanks
+			)+
+		  )
+		}{
+			my $bq = $1;
+			$bq =~ s/^[ \t]*>[ \t]?//gm;	# trim one level of quoting
+			$bq =~ s/^[ \t]+$//mg;			# trim whitespace-only lines
+			$bq = _RunBlockGamut($bq);		# recurse
+
+			$bq =~ s/^/  /g;
+			# These leading spaces screw with <pre> content, so we need to fix that:
+			$bq =~ s{
+					(\s*<pre>.+?</pre>)
+				}{
+					my $pre = $1;
+					$pre =~ s/^  //mg;
+					$pre;
+				}egsx;
+
+			"<blockquote>\n$bq\n</blockquote>\n\n";
+		}egmx;
+
+
+	return $text;
+}
+
+
+sub _FormParagraphs {
+#
+#	Params:
+#		$text - string to process with html <p> tags
+#
+	my $text = shift;
+
+	# Strip leading and trailing lines:
+	$text =~ s/\A\n+//;
+	$text =~ s/\n+\z//;
+
+	my @grafs = split(/\n{2,}/, $text);
+
+	#
+	# Wrap <p> tags.
+	#
+	foreach (@grafs) {
+		unless (defined( $g_html_blocks{$_} )) {
+			$_ = _RunSpanGamut($_);
+			s/^([ \t]*)/<p>/;
+			$_ .= "</p>";
+		}
+	}
+
+	#
+	# Unhashify HTML blocks
+	#
+	foreach (@grafs) {
+		if (defined( $g_html_blocks{$_} )) {
+			$_ = $g_html_blocks{$_};
+		}
+	}
+
+	return join "\n\n", @grafs;
+}
+
+
+sub _EncodeAmpsAndAngles {
+# Smart processing for ampersands and angle brackets that need to be encoded.
+
+	my $text = shift;
+
+	# Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
+	#   http://bumppo.net/projects/amputator/
+ 	$text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&amp;/g;
+
+	# Encode naked <'s
+ 	$text =~ s{<(?![a-z/?\$!])}{&lt;}gi;
+
+	return $text;
+}
+
+
+sub _EncodeBackslashEscapes {
+#
+#   Parameter:  String.
+#   Returns:    The string, with after processing the following backslash
+#               escape sequences.
+#
+    local $_ = shift;
+
+    s! \\\\  !$g_escape_table{'\\'}!gx;		# Must process escaped backslashes first.
+    s! \\`   !$g_escape_table{'`'}!gx;
+    s! \\\*  !$g_escape_table{'*'}!gx;
+    s! \\_   !$g_escape_table{'_'}!gx;
+    s! \\\{  !$g_escape_table{'{'}!gx;
+    s! \\\}  !$g_escape_table{'}'}!gx;
+    s! \\\[  !$g_escape_table{'['}!gx;
+    s! \\\]  !$g_escape_table{']'}!gx;
+    s! \\\(  !$g_escape_table{'('}!gx;
+    s! \\\)  !$g_escape_table{')'}!gx;
+    s! \\>   !$g_escape_table{'>'}!gx;
+    s! \\\#  !$g_escape_table{'#'}!gx;
+    s! \\\+  !$g_escape_table{'+'}!gx;
+    s! \\\-  !$g_escape_table{'-'}!gx;
+    s! \\\.  !$g_escape_table{'.'}!gx;
+    s{ \\!  }{$g_escape_table{'!'}}gx;
+
+    return $_;
+}
+
+
+sub _DoAutoLinks {
+	my $text = shift;
+
+	$text =~ s{<((https?|ftp):[^'">\s]+)>}{<a href="$1">$1</a>}gi;
+
+	# Email addresses: <address@domain.foo>
+	$text =~ s{
+		<
+        (?:mailto:)?
+		(
+			[-.\w]+
+			\@
+			[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
+		)
+		>
+	}{
+		_EncodeEmailAddress( _UnescapeSpecialChars($1) );
+	}egix;
+
+	return $text;
+}
+
+
+sub _EncodeEmailAddress {
+#
+#	Input: an email address, e.g. "foo@example.com"
+#
+#	Output: the email address as a mailto link, with each character
+#		of the address encoded as either a decimal or hex entity, in
+#		the hopes of foiling most address harvesting spam bots. E.g.:
+#
+#	  <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
+#       x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
+#       &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
+#
+#	Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
+#	mailing list: <http://tinyurl.com/yu7ue>
+#
+
+	my $addr = shift;
+
+	srand;
+	my @encode = (
+		sub { '&#' .                 ord(shift)   . ';' },
+		sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
+		sub {                            shift          },
+	);
+
+	$addr = "mailto:" . $addr;
+
+	$addr =~ s{(.)}{
+		my $char = $1;
+		if ( $char eq '@' ) {
+			# this *must* be encoded. I insist.
+			$char = $encode[int rand 1]->($char);
+		} elsif ( $char ne ':' ) {
+			# leave ':' alone (to spot mailto: later)
+			my $r = rand;
+			# roughly 10% raw, 45% hex, 45% dec
+			$char = (
+				$r > .9   ?  $encode[2]->($char)  :
+				$r < .45  ?  $encode[1]->($char)  :
+							 $encode[0]->($char)
+			);
+		}
+		$char;
+	}gex;
+
+	$addr = qq{<a href="$addr">$addr</a>};
+	$addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part
+
+	return $addr;
+}
+
+
+sub _UnescapeSpecialChars {
+#
+# Swap back in all the special characters we've hidden.
+#
+	my $text = shift;
+
+	while( my($char, $hash) = each(%g_escape_table) ) {
+		$text =~ s/$hash/$char/g;
+	}
+    return $text;
+}
+
+
+sub _TokenizeHTML {
+#
+#   Parameter:  String containing HTML markup.
+#   Returns:    Reference to an array of the tokens comprising the input
+#               string. Each token is either a tag (possibly with nested,
+#               tags contained therein, such as <a href="<MTFoo>">, or a
+#               run of text between tags. Each element of the array is a
+#               two-element array; the first is either 'tag' or 'text';
+#               the second is the actual value.
+#
+#
+#   Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
+#       <http://www.bradchoate.com/past/mtregex.php>
+#
+
+    my $str = shift;
+    my $pos = 0;
+    my $len = length $str;
+    my @tokens;
+
+    my $depth = 6;
+    my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x  $depth);
+    my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) |  # comment
+                   (?s: <\? .*? \?> ) |              # processing instruction
+                   $nested_tags/ix;                   # nested tags
+
+    while ($str =~ m/($match)/g) {
+        my $whole_tag = $1;
+        my $sec_start = pos $str;
+        my $tag_start = $sec_start - length $whole_tag;
+        if ($pos < $tag_start) {
+            push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
+        }
+        push @tokens, ['tag', $whole_tag];
+        $pos = pos $str;
+    }
+    push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
+    \@tokens;
+}
+
+
+sub _Outdent {
+#
+# Remove one level of line-leading tabs or spaces
+#
+	my $text = shift;
+
+	$text =~ s/^(\t|[ ]{1,$g_tab_width})//gm;
+	return $text;
+}
+
+
+sub _Detab {
+#
+# Cribbed from a post by Bart Lateur:
+# <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
+#
+	my $text = shift;
+
+	$text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge;
+	return $text;
+}
+
+
+1;
+
+__END__
+
+
+=pod
+
+=head1 NAME
+
+B<Markdown>
+
+
+=head1 SYNOPSIS
+
+B<Markdown.pl> [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ]
+    [ I<file> ... ]
+
+
+=head1 DESCRIPTION
+
+Markdown is a text-to-HTML filter; it translates an easy-to-read /
+easy-to-write structured text format into HTML. Markdown's text format
+is most similar to that of plain text email, and supports features such
+as headers, *emphasis*, code blocks, blockquotes, and links.
+
+Markdown's syntax is designed not as a generic markup language, but
+specifically to serve as a front-end to (X)HTML. You can  use span-level
+HTML tags anywhere in a Markdown document, and you can use block level
+HTML tags (like <div> and <table> as well).
+
+For more information about Markdown's syntax, see:
+
+    http://daringfireball.net/projects/markdown/
+
+
+=head1 OPTIONS
+
+Use "--" to end switch parsing. For example, to open a file named "-z", use:
+
+	Markdown.pl -- -z
+
+=over 4
+
+
+=item B<--html4tags>
+
+Use HTML 4 style for empty element tags, e.g.:
+
+    <br>
+
+instead of Markdown's default XHTML style tags, e.g.:
+
+    <br />
+
+
+=item B<-v>, B<--version>
+
+Display Markdown's version number and copyright information.
+
+
+=item B<-s>, B<--shortversion>
+
+Display the short-form version number.
+
+
+=back
+
+
+
+=head1 BUGS
+
+To file bug reports or feature requests (other than topics listed in the
+Caveats section above) please send email to:
+
+    support@daringfireball.net
+
+Please include with your report: (1) the example input; (2) the output
+you expected; (3) the output Markdown actually produced.
+
+
+=head1 VERSION HISTORY
+
+See the readme file for detailed release notes for this version.
+
+1.0.1 - 14 Dec 2004
+
+1.0 - 28 Aug 2004
+
+
+=head1 AUTHOR
+
+    John Gruber
+    http://daringfireball.net
+
+    PHP port and other contributions by Michel Fortin
+    http://michelf.com
+
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (c) 2003-2004 John Gruber   
+<http://daringfireball.net/>   
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name "Markdown" nor the names of its contributors may
+  be used to endorse or promote products derived from this software
+  without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as
+is" and any express or implied warranties, including, but not limited
+to, the implied warranties of merchantability and fitness for a
+particular purpose are disclaimed. In no event shall the copyright owner
+or contributors be liable for any direct, indirect, incidental, special,
+exemplary, or consequential damages (including, but not limited to,
+procurement of substitute goods or services; loss of use, data, or
+profits; or business interruption) however caused and on any theory of
+liability, whether in contract, strict liability, or tort (including
+negligence or otherwise) arising in any way out of the use of this
+software, even if advised of the possibility of such damage.
+
+=cut
diff --git a/doc/notes/fence.txt b/doc/notes/fence.txt
new file mode 100644
index 0000000..8256735
--- /dev/null
+++ b/doc/notes/fence.txt
@@ -0,0 +1,308 @@
+-------------------------------------------------------------------------------
+ Fence support:
+-------------------------------------------------------------------------------
+
+Fences provide a few new modification order constraints as well as an
+interesting extension to release sequences, detailed in 29.3 (p4-p7) and 29.8
+(p2-p4). The specifications are pasted here in Appendix A and are applied to our
+model-checker in these notes.
+
+Section 29.3 details the modification order constraints established by
+sequentially-consistent fences.
+
+Section 29.8 details the behavior of release and acquire fences (note that
+memory_order_seq_cst is both release and acquire).
+
+The text of these rules are provided at the end of this document for reference.
+
+*******************************
+ Backtracking requirements
+*******************************
+
+Because we maintain the seq-cst order as consistent with the execution order,
+seq-cst fences cannot commute with each other, with seq-cst loads, nor with
+seq-cst stores; we backtrack at all such pairs.
+
+Fences extend release/acquire synchronization beyond just
+store-release/load-acquire. We must backtrack with potentially-synchronizing
+fences: that is, with any pair of store- or fence-release and load- or
+fence-acquire, where the release comes after the acquire in the execution order
+(the other ordering is OK, as we will explore both behaviors; where the pair
+synchronize and where they don't).
+
+Note that, for instance, a fence-release may synchronize with a fence-acquire
+only in the presence of a appropriate load/store pair (29.8p2); but the
+synchronization still occurs between the fences, so the backtracking
+requirements are only placed on the release/acquire fences themselves.
+
+*******************************
+ Seq-cst MO constraints (29.3 p4-p7)
+*******************************
+
+The statements given in the specification regarding sequentially consistent
+fences can be transformed into the following 4 modification order constraints.
+
+29.3p4
+
+If
+    is_write(A) && is_read(B) && is_write(W) && is_fence(X) &&
+    is_seqcst(W) && is_seqcst(X) && A != W &&
+    same_loc(W, A, B) &&
+    A --rf-> B &&
+    W --sc-> X --sb-> B
+then
+    W --mo-> A
+
+Intuition/Implementation:
+ * We may (but don't currently) limit our considertion of W to only the most
+   recent (in the SC order) store to the same location as A and B prior to X
+   (note that all prior writes will be ordered prior to W in both SC and MO)
+ * We should consider the "most recent" seq-cst fence X that precedes B
+ * This search can be combined with the r_modification_order search, since we
+   already iterate through the necessary stores W
+
+29.3p5
+
+If
+    is_write(A) && is_read(B) && is_write(W) && is_fence(X) &&
+    is_seqcst(B) && is_seqcst(X) &&
+    same_loc(W, A, B) &&
+    A != W &&
+    A --rf-> B &&
+    W --sb-> X --sc-> B
+then
+    W --mo-> A
+
+Intuition/Implementation:
+ * We only need to examine the "most recent" seq-cst fence X from each thread
+ * We only need to examine the "most recent" qualifying store W that precedes X;
+   all other W will provide a weaker MO constraint
+ * This search can be combined with the r_modification_order search, since we
+   already iterate through the necessary stores W
+
+29.3p6
+
+If
+    is_write(A) && is_read(B) && is_write(W) && is_fence(X) && is_fence(Y) &&
+    is_seqcst(X) && is_seqcst(Y) &&
+    same_loc(W, A, B) &&
+    A != W &&
+    A --rf-> B &&
+    W --sb-> X --sc-> Y --sb-> B
+then
+    W --mo-> A
+
+Intuition/Implementation:
+ * We should consider only the "most recent" fence Y in the same thread as B
+   (prior fences may only yield the same or weaker constraints)
+ * We may then consider the "most recent" seq-cst fence X prior to Y (in SC order)
+   from each thread (prior fences may only yield the same or weaker constraints)
+ * We should consider only the "most recent" store W (to the same location as A,
+   B) in the same thread as X (prior stores may only yield the same or weaker
+   constraints)
+ * This search can be combined with the r_modification_order search, since we
+   already iterate through the necessary stores W
+
+29.3p7
+
+If
+    is_write(A) && is_write(B) && is_fence(X) && is_fence(Y) &&
+    is_seqcst(X) && is_seqcst(Y) &&
+    same_loc(A, B) &&
+    A --sb-> X --sc-> Y --sb-> B
+then
+    A --mo-> B
+
+Intuition/Implementation:
+ * (Similar to 29.3p6 rules, except using A/B write/write) only search for the
+   most recent fence Y in the same thread; search for the most recent (prior to
+   Y) fence X from each thread; search for the most recent store A prior to X
+ * This search can be combined with the w_modification_order search, since we
+   already iterate through the necessary stores A
+
+**********************************************************************
+ Release/acquire synchronization: extended to fences (29.3 p4-p7)
+**********************************************************************
+
+The C++ specification statements regarding release and acquire fences make
+extensions to release sequences, using what they call "hypothetical release
+sequences." These hypothetical release sequences are the same as normal release
+sequences, except that the "head" doesn't have to be a release: it can have any
+memory order (e.g., relaxed). This change actually simplifies our release
+sequences (for the fence case), as we don't actually have to establish a
+contiguous modification order all the way to a release operation; we just need
+to reach the same thread (via a RMW chain, for instance).
+
+The statements given in the specification regarding release and acquire fences
+do not result in totally separable conditions, so I will write down my
+semi-formal notation here along with some simple notes then present my
+implementation notes at the end.
+
+Note that we will use A --rs-> B to denote that B is in the release sequence
+headed by A (we allow A = B, unless otherwise stated). The hypothetical release
+sequence will be similarly denoted A --hrs-> B.
+
+29.8p2
+
+If
+    is_fence(A) && is_write(X) && is_write(W) && is_read(Y) && is_fence(B) &&
+    is_release(A) && is_acquire(B) &&
+    A --sb-> X --hrs-> W --rf-> Y --sb-> B
+then
+    A --sw-> B
+
+Notes:
+ * The fence-release A does not result in any action on its own (i.e., when it
+   is first explored); it only affects later release operations, at which point
+   we may need to associate store X with A. Thus, for every store X, we eagerly
+   record the most recent fence-release, then this record can be utilized during
+   later (hypothetical) release sequence checks.
+ * The fence-acquire B is more troublesome, since there may be many qualifying
+   loads Y (loads from different locations; loads which read from different
+   threads; etc.). Each Y may read from different hypothetical release
+   sequences, ending in a different release A with which B should synchronize.
+   It is difficult (but not impossible) to find good stopping conditions at
+   which we should terminate our search for Y. However, we at least know we only
+   need to consder Y such that:
+       V --sb-> Y --sb-> B
+   where V is a previous fence-acquire.
+
+29.8p3
+
+If
+    is_fence(A) && is_write(X) && is_write(W) && is_read(B) &&
+    is_release(A) && is_acquire(B) &&
+    A --sb-> X --hrs-> W --rf-> B
+then
+    A --sw-> B
+
+Notes:
+ * See the note for fence-release A in 29.8p2
+
+29.8p4
+
+If
+    is_write(A) && is_write(W) && is_read(X) && is_fence(B) &&
+    is_release(A) && is_acquire(B) &&
+    A --rs-> W --rf-> X --sb-> B
+then
+    A --sw-> B
+
+Notes:
+ * See the note for fence-acquire B in 29.8p2. The A, Y, and B in 29.8p2
+   correspond to A, X, and B in this rule (29.8p4).
+
+Summary notes:
+
+Now, noting the overlap in implementation notes between 29.8p2,3,4 and the
+similarity between release sequences and hypothetical release sequences, I can
+extend our release sequence support to provide easy facilities for
+release/acquire fence support.
+
+I extend release sequence support to include fences first by distinguishing the
+'acquire' from the 'load'; previously, release sequence searches were always
+triggered by a load-acquire operation. Now, we may have a *fence*-acquire which
+finds a previous load-*relaxed*, then follows any chain to a release sequence
+(29.8p4). Any release heads found by our existing release sequence support must
+synchronize with the fence-acquire. Any uncertain release sequences can be
+stashed (along with both the fence-acquire and the load-relaxed) as "pending" in
+the existing lists.
+
+Next I extend the release sequence support to include hypothetical release
+sequences. Note that any search for a release sequence can also search for a
+hypothetical release sequence with little additional effort (and even saving
+effort in cases where a fence-release hides a prior store-release, whose release
+sequence may be harder to establish eagerly). Because the "most recent" 
+fence-release is stashed in each ModelAction (see the fence-release note in
+29.8p2), release sequence searches can easily add the most recent fence-release
+to the release_heads vector as it explores a RMW chain. Then, once it reaches a
+thread in which it looks for a store-release, it can perform this interesting
+optimization: if the most recent store-release is sequenced before the most
+recent fence-release, then we can ignore the store-release and simply
+synchronize with the fence-release. This avoids a "contiguous MO" computation.
+
+So, with hypothetical release sequences seamlessly integrated into the release
+sequence code, we easily cover 29.8p3 (fence-release --sw-> load-acquire). Then,
+it's a simple extension to see how 29.8p2 is just a combination of the rules
+described for 29.8p3 and 29.8p4: a fence-acquire triggers a search for loads in
+its same thread; these loads then launch a series of release sequence
+searches--hypothetical (29.8p2) or real (29.8p4)--and synchronizes with all the
+release heads.
+
+The best part about all of the preceding explanations: the lazy fixups, etc.,
+can simply be re-used from existing release sequence code, with slight
+adjustments for dealing the presence of a fence-acquire preceded by a
+load-relaxed.
+
+*******************************
+ Miscellaneous Notes
+*******************************
+
+fence(memory_order_consume) acts like memory_order_release, so if we ever
+support consume, we must alias consume -> release
+
+fence(memory_order_relaxed) is a no-op
+
+**************************************************
+ Appendix A: From the C++11 specification (N3337)
+**************************************************
+
+-------------
+Section 29.3
+-------------
+
+29.3p4
+
+For an atomic operation B that reads the value of an atomic object M, if there
+is a memory_order_seq_cst fence X sequenced before B, then B observes either
+the last memory_order_seq_cst modification of M preceding X in the total order
+S or a later modification of M in its modification order.
+
+29.3p5
+
+For atomic operations A and B on an atomic object M, where A modifies M and B
+takes its value, if there is a memory_order_seq_cst fence X such that A is
+sequenced before X and B follows X in S, then B observes either the effects of
+A or a later modification of M in its modification order.
+
+29.3p6
+
+For atomic operations A and B on an atomic object M, where A modifies M and B
+takes its value, if there are memory_order_seq_cst fences X and Y such that A
+is sequenced before X, Y is sequenced before B, and X precedes Y in S, then B
+observes either the effects of A or a later modification of M in its
+modification order.
+
+29.3p7
+
+For atomic operations A and B on an atomic object M, if there are
+memory_order_seq_cst fences X and Y such that A is sequenced before X, Y is
+sequenced before B, and X precedes Y in S, then B occurs later than A in the
+modification order of M.
+
+-------------
+Section 29.8
+-------------
+
+29.8p2
+
+A release fence A synchronizes with an acquire fence B if there exist atomic
+operations X and Y, both operating on some atomic object M, such that A is
+sequenced before X, X modifies M, Y is sequenced before B, and Y reads the value
+written by X or a value written by any side effect in the hypothetical release
+sequence X would head if it were a release operation.
+
+29.8p3
+
+A release fence A synchronizes with an atomic operation B that performs an
+acquire operation on an atomic object M if there exists an atomic operation X
+such that A is sequenced before X, X modifies M, and B reads the value written
+by X or a value written by any side effect in the hypothetical release sequence
+X would head if it were a release operation.
+
+29.8p4
+
+An atomic operation A that is a release operation on an atomic object M
+synchronizes with an acquire fence B if there exists some atomic operation X on
+M such that X is sequenced before B and reads the value written by A or a value
+written by any side effect in the release sequence headed by A.
diff --git a/doc/notes/release-sequence.txt b/doc/notes/release-sequence.txt
new file mode 100644
index 0000000..85e8c0c
--- /dev/null
+++ b/doc/notes/release-sequence.txt
@@ -0,0 +1,99 @@
+-------------------------------------------------------------------------------
+ Release sequence support:
+-------------------------------------------------------------------------------
+
+*******************************
+ From the C++11 specification
+*******************************
+
+1.10.7
+
+A release sequence from a release operation A on an atomic object M is a
+maximal contiguous sub-sequence of side effects in the modification order of
+M, where the first operation is A, and every subsequent operation
+
+- is performed by the same thread that performed A, or
+- is an atomic read-modify-write operation.
+
+29.3.2
+
+An atomic operation A that performs a release operation on an atomic object M
+synchronizes with an atomic operation B that performs an acquire operation on
+M and takes its value from any side effect in the release sequence headed by
+A.
+
+*******************************
+ My Notes
+*******************************
+
+The specification allows for a single acquire to synchronize with more than
+one release operation, as its "reads from" value might be part of more than
+one release sequence.
+
+*******************************
+ Approximate Algorithm
+*******************************
+
+Check read-write chain...
+
+Given:
+current action = curr
+read from = rf
+Cases:
+* rf is NULL: return uncertain
+* rf is RMW:
+	- if rf is release:
+		add rf to release heads
+	- if rf is rel_acq:
+		return certain [note: we don't need to extend release sequence
+		further because this acquire will have synchronized already]
+	  else
+		return (recursively) "get release sequence head of rf"
+* if rf is release:
+	add rf to release heads
+	return certain
+* else, rf is relaxed write (NOT RMW)
+  - check same thread
+
+*******************************
+"check same thread"
+*******************************
+
+let release = max{act in S | samethread(act, rf) && isrelease(act) && act <= rf}
+let t = thread(rf) // == thread(release)
+for all threads t_j != t
+	if exists c in S | c !--mo--> release, rf !--mo--> c, c is write, thread(c) == t_j then
+		return certain;
+[ note: need to check "future ordered" condition ]
+add release to release heads
+return certain;
+
+*******************************
+General fixup steps:
+*******************************
+
+1. process action, find read_from
+2. add initial mo_graph edges
+3. assign read_from, calc initial "get_release_seq_heads()"
+4. perform synchronization with all release heads
+
+synchronization => check for new mo_graph edges
+                => check for resolved release sequences
+		=> check for failed promises
+mo_graph edges  => check for resolved release sequences
+
+*******************************
+Other notes
+*******************************
+
+"cannot determine" means we need to lazily check conditions in the future
+   - check when future writes satisfy "promises"
+
+Read from future? We require that all release heads are "in the past", so that
+we don't form synchronization against the ordering of the program trace. We
+ensure that some execution is explored in which they are ordered the other way,
+so we declare this execution "infeasible."
+
+=> If we *do* establish a synchronization after the fact:
+   - need to recurse through the execution trace and update clock vectors
+   - more
diff --git a/execution.cc b/execution.cc
new file mode 100644
index 0000000..53aa521
--- /dev/null
+++ b/execution.cc
@@ -0,0 +1,2874 @@
+#include <stdio.h>
+#include <algorithm>
+#include <mutex>
+#include <new>
+#include <stdarg.h>
+
+#include "model.h"
+#include "execution.h"
+#include "action.h"
+#include "nodestack.h"
+#include "schedule.h"
+#include "common.h"
+#include "clockvector.h"
+#include "cyclegraph.h"
+#include "promise.h"
+#include "datarace.h"
+#include "threads-model.h"
+#include "bugmessage.h"
+
+#define INITIAL_THREAD_ID	0
+
+/**
+ * Structure for holding small ModelChecker members that should be snapshotted
+ */
+struct model_snapshot_members {
+	model_snapshot_members() :
+		/* First thread created will have id INITIAL_THREAD_ID */
+		next_thread_id(INITIAL_THREAD_ID),
+		used_sequence_numbers(0),
+		next_backtrack(NULL),
+		bugs(),
+		failed_promise(false),
+		too_many_reads(false),
+		no_valid_reads(false),
+		bad_synchronization(false),
+		asserted(false)
+	{ }
+
+	~model_snapshot_members() {
+		for (unsigned int i = 0; i < bugs.size(); i++)
+			delete bugs[i];
+		bugs.clear();
+	}
+
+	unsigned int next_thread_id;
+	modelclock_t used_sequence_numbers;
+	ModelAction *next_backtrack;
+	SnapVector<bug_message *> bugs;
+	bool failed_promise;
+	bool too_many_reads;
+	bool no_valid_reads;
+	/** @brief Incorrectly-ordered synchronization was made */
+	bool bad_synchronization;
+	bool asserted;
+
+	SNAPSHOTALLOC
+};
+
+/** @brief Constructor */
+ModelExecution::ModelExecution(ModelChecker *m,
+		const struct model_params *params,
+		Scheduler *scheduler,
+		NodeStack *node_stack) :
+	model(m),
+	params(params),
+	scheduler(scheduler),
+	action_trace(),
+	thread_map(2), /* We'll always need at least 2 threads */
+	obj_map(),
+	condvar_waiters_map(),
+	obj_thrd_map(),
+	promises(),
+	futurevalues(),
+	pending_rel_seqs(),
+	thrd_last_action(1),
+	thrd_last_fence_release(),
+	node_stack(node_stack),
+	priv(new struct model_snapshot_members()),
+	mo_graph(new CycleGraph())
+{
+	/* Initialize a model-checker thread, for special ModelActions */
+	model_thread = new Thread(get_next_id());
+	add_thread(model_thread);
+	scheduler->register_engine(this);
+	node_stack->register_engine(this);
+}
+
+/** @brief Destructor */
+ModelExecution::~ModelExecution()
+{
+	for (unsigned int i = 0; i < get_num_threads(); i++)
+		delete get_thread(int_to_id(i));
+
+	for (unsigned int i = 0; i < promises.size(); i++)
+		delete promises[i];
+
+	delete mo_graph;
+	delete priv;
+}
+
+int ModelExecution::get_execution_number() const
+{
+	return model->get_execution_number();
+}
+
+static action_list_t * get_safe_ptr_action(HashTable<const void *, action_list_t *, uintptr_t, 4> * hash, void * ptr)
+{
+	action_list_t *tmp = hash->get(ptr);
+	if (tmp == NULL) {
+		tmp = new action_list_t();
+		hash->put(ptr, tmp);
+	}
+	return tmp;
+}
+
+static SnapVector<action_list_t> * get_safe_ptr_vect_action(HashTable<void *, SnapVector<action_list_t> *, uintptr_t, 4> * hash, void * ptr)
+{
+	SnapVector<action_list_t> *tmp = hash->get(ptr);
+	if (tmp == NULL) {
+		tmp = new SnapVector<action_list_t>();
+		hash->put(ptr, tmp);
+	}
+	return tmp;
+}
+
+action_list_t * ModelExecution::get_actions_on_obj(void * obj, thread_id_t tid) const
+{
+	SnapVector<action_list_t> *wrv = obj_thrd_map.get(obj);
+	if (wrv==NULL)
+		return NULL;
+	unsigned int thread=id_to_int(tid);
+	if (thread < wrv->size())
+		return &(*wrv)[thread];
+	else
+		return NULL;
+}
+
+/** @return a thread ID for a new Thread */
+thread_id_t ModelExecution::get_next_id()
+{
+	return priv->next_thread_id++;
+}
+
+/** @return the number of user threads created during this execution */
+unsigned int ModelExecution::get_num_threads() const
+{
+	return priv->next_thread_id;
+}
+
+/** @return a sequence number for a new ModelAction */
+modelclock_t ModelExecution::get_next_seq_num()
+{
+	return ++priv->used_sequence_numbers;
+}
+
+/**
+ * @brief Should the current action wake up a given thread?
+ *
+ * @param curr The current action
+ * @param thread The thread that we might wake up
+ * @return True, if we should wake up the sleeping thread; false otherwise
+ */
+bool ModelExecution::should_wake_up(const ModelAction *curr, const Thread *thread) const
+{
+	const ModelAction *asleep = thread->get_pending();
+	/* Don't allow partial RMW to wake anyone up */
+	if (curr->is_rmwr())
+		return false;
+	/* Synchronizing actions may have been backtracked */
+	if (asleep->could_synchronize_with(curr))
+		return true;
+	/* All acquire/release fences and fence-acquire/store-release */
+	if (asleep->is_fence() && asleep->is_acquire() && curr->is_release())
+		return true;
+	/* Fence-release + store can awake load-acquire on the same location */
+	if (asleep->is_read() && asleep->is_acquire() && curr->same_var(asleep) && curr->is_write()) {
+		ModelAction *fence_release = get_last_fence_release(curr->get_tid());
+		if (fence_release && *(get_last_action(thread->get_id())) < *fence_release)
+			return true;
+	}
+	return false;
+}
+
+void ModelExecution::wake_up_sleeping_actions(ModelAction *curr)
+{
+	for (unsigned int i = 0; i < get_num_threads(); i++) {
+		Thread *thr = get_thread(int_to_id(i));
+		if (scheduler->is_sleep_set(thr)) {
+			if (should_wake_up(curr, thr))
+				/* Remove this thread from sleep set */
+				scheduler->remove_sleep(thr);
+		}
+	}
+}
+
+/** @brief Alert the model-checker that an incorrectly-ordered
+ * synchronization was made */
+void ModelExecution::set_bad_synchronization()
+{
+	priv->bad_synchronization = true;
+}
+
+bool ModelExecution::assert_bug(const char *msg)
+{
+	priv->bugs.push_back(new bug_message(msg));
+
+	if (isfeasibleprefix()) {
+		set_assert();
+		return true;
+	}
+	return false;
+}
+
+/** @return True, if any bugs have been reported for this execution */
+bool ModelExecution::have_bug_reports() const
+{
+	return priv->bugs.size() != 0;
+}
+
+SnapVector<bug_message *> * ModelExecution::get_bugs() const
+{
+	return &priv->bugs;
+}
+
+/**
+ * Check whether the current trace has triggered an assertion which should halt
+ * its execution.
+ *
+ * @return True, if the execution should be aborted; false otherwise
+ */
+bool ModelExecution::has_asserted() const
+{
+	return priv->asserted;
+}
+
+/**
+ * Trigger a trace assertion which should cause this execution to be halted.
+ * This can be due to a detected bug or due to an infeasibility that should
+ * halt ASAP.
+ */
+void ModelExecution::set_assert()
+{
+	priv->asserted = true;
+}
+
+/**
+ * Check if we are in a deadlock. Should only be called at the end of an
+ * execution, although it should not give false positives in the middle of an
+ * execution (there should be some ENABLED thread).
+ *
+ * @return True if program is in a deadlock; false otherwise
+ */
+bool ModelExecution::is_deadlocked() const
+{
+	bool blocking_threads = false;
+	for (unsigned int i = 0; i < get_num_threads(); i++) {
+		thread_id_t tid = int_to_id(i);
+		if (is_enabled(tid))
+			return false;
+		Thread *t = get_thread(tid);
+		if (!t->is_model_thread() && t->get_pending())
+			blocking_threads = true;
+	}
+	return blocking_threads;
+}
+
+/**
+ * @brief Check if we are yield-blocked
+ *
+ * A program can be "yield-blocked" if all threads are ready to execute a
+ * yield.
+ *
+ * @return True if the program is yield-blocked; false otherwise
+ */
+bool ModelExecution::is_yieldblocked() const
+{
+	if (!params->yieldblock)
+		return false;
+
+	for (unsigned int i = 0; i < get_num_threads(); i++) {
+		thread_id_t tid = int_to_id(i);
+		Thread *t = get_thread(tid);
+		if (t->get_pending() && t->get_pending()->is_yield())
+			return true;
+	}
+	return false;
+}
+
+/**
+ * Check if this is a complete execution. That is, have all thread completed
+ * execution (rather than exiting because sleep sets have forced a redundant
+ * execution).
+ *
+ * @return True if the execution is complete.
+ */
+bool ModelExecution::is_complete_execution() const
+{
+	if (is_yieldblocked())
+		return false;
+	for (unsigned int i = 0; i < get_num_threads(); i++)
+		if (is_enabled(int_to_id(i)))
+			return false;
+	return true;
+}
+
+/**
+ * @brief Find the last fence-related backtracking conflict for a ModelAction
+ *
+ * This function performs the search for the most recent conflicting action
+ * against which we should perform backtracking, as affected by fence
+ * operations. This includes pairs of potentially-synchronizing actions which
+ * occur due to fence-acquire or fence-release, and hence should be explored in
+ * the opposite execution order.
+ *
+ * @param act The current action
+ * @return The most recent action which conflicts with act due to fences
+ */
+ModelAction * ModelExecution::get_last_fence_conflict(ModelAction *act) const
+{
+	/* Only perform release/acquire fence backtracking for stores */
+	if (!act->is_write())
+		return NULL;
+
+	/* Find a fence-release (or, act is a release) */
+	ModelAction *last_release;
+	if (act->is_release())
+		last_release = act;
+	else
+		last_release = get_last_fence_release(act->get_tid());
+	if (!last_release)
+		return NULL;
+
+	/* Skip past the release */
+	const action_list_t *list = &action_trace;
+	action_list_t::const_reverse_iterator rit;
+	for (rit = list->rbegin(); rit != list->rend(); rit++)
+		if (*rit == last_release)
+			break;
+	ASSERT(rit != list->rend());
+
+	/* Find a prior:
+	 *   load-acquire
+	 * or
+	 *   load --sb-> fence-acquire */
+	ModelVector<ModelAction *> acquire_fences(get_num_threads(), NULL);
+	ModelVector<ModelAction *> prior_loads(get_num_threads(), NULL);
+	bool found_acquire_fences = false;
+	for ( ; rit != list->rend(); rit++) {
+		ModelAction *prev = *rit;
+		if (act->same_thread(prev))
+			continue;
+
+		int tid = id_to_int(prev->get_tid());
+
+		if (prev->is_read() && act->same_var(prev)) {
+			if (prev->is_acquire()) {
+				/* Found most recent load-acquire, don't need
+				 * to search for more fences */
+				if (!found_acquire_fences)
+					return NULL;
+			} else {
+				prior_loads[tid] = prev;
+			}
+		}
+		if (prev->is_acquire() && prev->is_fence() && !acquire_fences[tid]) {
+			found_acquire_fences = true;
+			acquire_fences[tid] = prev;
+		}
+	}
+
+	ModelAction *latest_backtrack = NULL;
+	for (unsigned int i = 0; i < acquire_fences.size(); i++)
+		if (acquire_fences[i] && prior_loads[i])
+			if (!latest_backtrack || *latest_backtrack < *acquire_fences[i])
+				latest_backtrack = acquire_fences[i];
+	return latest_backtrack;
+}
+
+/**
+ * @brief Find the last backtracking conflict for a ModelAction
+ *
+ * This function performs the search for the most recent conflicting action
+ * against which we should perform backtracking. This primary includes pairs of
+ * synchronizing actions which should be explored in the opposite execution
+ * order.
+ *
+ * @param act The current action
+ * @return The most recent action which conflicts with act
+ */
+ModelAction * ModelExecution::get_last_conflict(ModelAction *act) const
+{
+	switch (act->get_type()) {
+	case ATOMIC_FENCE:
+		/* Only seq-cst fences can (directly) cause backtracking */
+		if (!act->is_seqcst())
+			break;
+	case ATOMIC_READ:
+	case ATOMIC_WRITE:
+	case ATOMIC_RMW: {
+		ModelAction *ret = NULL;
+
+		/* linear search: from most recent to oldest */
+		action_list_t *list = obj_map.get(act->get_location());
+		action_list_t::reverse_iterator rit;
+		for (rit = list->rbegin(); rit != list->rend(); rit++) {
+			ModelAction *prev = *rit;
+			if (prev == act)
+				continue;
+			if (prev->could_synchronize_with(act)) {
+				ret = prev;
+				break;
+			}
+		}
+
+		ModelAction *ret2 = get_last_fence_conflict(act);
+		if (!ret2)
+			return ret;
+		if (!ret)
+			return ret2;
+		if (*ret < *ret2)
+			return ret2;
+		return ret;
+	}
+	case ATOMIC_LOCK:
+	case ATOMIC_TRYLOCK: {
+		/* linear search: from most recent to oldest */
+		action_list_t *list = obj_map.get(act->get_location());
+		action_list_t::reverse_iterator rit;
+		for (rit = list->rbegin(); rit != list->rend(); rit++) {
+			ModelAction *prev = *rit;
+			if (act->is_conflicting_lock(prev))
+				return prev;
+		}
+		break;
+	}
+	case ATOMIC_UNLOCK: {
+		/* linear search: from most recent to oldest */
+		action_list_t *list = obj_map.get(act->get_location());
+		action_list_t::reverse_iterator rit;
+		for (rit = list->rbegin(); rit != list->rend(); rit++) {
+			ModelAction *prev = *rit;
+			if (!act->same_thread(prev) && prev->is_failed_trylock())
+				return prev;
+		}
+		break;
+	}
+	case ATOMIC_WAIT: {
+		/* linear search: from most recent to oldest */
+		action_list_t *list = obj_map.get(act->get_location());
+		action_list_t::reverse_iterator rit;
+		for (rit = list->rbegin(); rit != list->rend(); rit++) {
+			ModelAction *prev = *rit;
+			if (!act->same_thread(prev) && prev->is_failed_trylock())
+				return prev;
+			if (!act->same_thread(prev) && prev->is_notify())
+				return prev;
+		}
+		break;
+	}
+
+	case ATOMIC_NOTIFY_ALL:
+	case ATOMIC_NOTIFY_ONE: {
+		/* linear search: from most recent to oldest */
+		action_list_t *list = obj_map.get(act->get_location());
+		action_list_t::reverse_iterator rit;
+		for (rit = list->rbegin(); rit != list->rend(); rit++) {
+			ModelAction *prev = *rit;
+			if (!act->same_thread(prev) && prev->is_wait())
+				return prev;
+		}
+		break;
+	}
+	default:
+		break;
+	}
+	return NULL;
+}
+
+/** This method finds backtracking points where we should try to
+ * reorder the parameter ModelAction against.
+ *
+ * @param the ModelAction to find backtracking points for.
+ */
+void ModelExecution::set_backtracking(ModelAction *act)
+{
+	Thread *t = get_thread(act);
+	ModelAction *prev = get_last_conflict(act);
+	if (prev == NULL)
+		return;
+
+	Node *node = prev->get_node()->get_parent();
+
+	/* See Dynamic Partial Order Reduction (addendum), POPL '05 */
+	int low_tid, high_tid;
+	if (node->enabled_status(t->get_id()) == THREAD_ENABLED) {
+		low_tid = id_to_int(act->get_tid());
+		high_tid = low_tid + 1;
+	} else {
+		low_tid = 0;
+		high_tid = get_num_threads();
+	}
+
+	for (int i = low_tid; i < high_tid; i++) {
+		thread_id_t tid = int_to_id(i);
+
+		/* Make sure this thread can be enabled here. */
+		if (i >= node->get_num_threads())
+			break;
+
+		/* See Dynamic Partial Order Reduction (addendum), POPL '05 */
+		/* Don't backtrack into a point where the thread is disabled or sleeping. */
+		if (node->enabled_status(tid) != THREAD_ENABLED)
+			continue;
+
+		/* Check if this has been explored already */
+		if (node->has_been_explored(tid))
+			continue;
+
+		/* See if fairness allows */
+		if (params->fairwindow != 0 && !node->has_priority(tid)) {
+			bool unfair = false;
+			for (int t = 0; t < node->get_num_threads(); t++) {
+				thread_id_t tother = int_to_id(t);
+				if (node->is_enabled(tother) && node->has_priority(tother)) {
+					unfair = true;
+					break;
+				}
+			}
+			if (unfair)
+				continue;
+		}
+
+		/* See if CHESS-like yield fairness allows */
+		if (params->yieldon) {
+			bool unfair = false;
+			for (int t = 0; t < node->get_num_threads(); t++) {
+				thread_id_t tother = int_to_id(t);
+				if (node->is_enabled(tother) && node->has_priority_over(tid, tother)) {
+					unfair = true;
+					break;
+				}
+			}
+			if (unfair)
+				continue;
+		}
+
+		/* Cache the latest backtracking point */
+		set_latest_backtrack(prev);
+
+		/* If this is a new backtracking point, mark the tree */
+		if (!node->set_backtrack(tid))
+			continue;
+		DEBUG("Setting backtrack: conflict = %d, instead tid = %d\n",
+					id_to_int(prev->get_tid()),
+					id_to_int(t->get_id()));
+		if (DBG_ENABLED()) {
+			prev->print();
+			act->print();
+		}
+	}
+}
+
+/**
+ * @brief Cache the a backtracking point as the "most recent", if eligible
+ *
+ * Note that this does not prepare the NodeStack for this backtracking
+ * operation, it only caches the action on a per-execution basis
+ *
+ * @param act The operation at which we should explore a different next action
+ * (i.e., backtracking point)
+ * @return True, if this action is now the most recent backtracking point;
+ * false otherwise
+ */
+bool ModelExecution::set_latest_backtrack(ModelAction *act)
+{
+	if (!priv->next_backtrack || *act > *priv->next_backtrack) {
+		priv->next_backtrack = act;
+		return true;
+	}
+	return false;
+}
+
+/**
+ * Returns last backtracking point. The model checker will explore a different
+ * path for this point in the next execution.
+ * @return The ModelAction at which the next execution should diverge.
+ */
+ModelAction * ModelExecution::get_next_backtrack()
+{
+	ModelAction *next = priv->next_backtrack;
+	priv->next_backtrack = NULL;
+	return next;
+}
+
+/**
+ * Processes a read model action.
+ * @param curr is the read model action to process.
+ * @return True if processing this read updates the mo_graph.
+ */
+bool ModelExecution::process_read(ModelAction *curr)
+{
+	Node *node = curr->get_node();
+	while (true) {
+		bool updated = false;
+		switch (node->get_read_from_status()) {
+		case READ_FROM_PAST: {
+			const ModelAction *rf = node->get_read_from_past();
+			ASSERT(rf);
+
+			mo_graph->startChanges();
+
+			ASSERT(!is_infeasible());
+			if (!check_recency(curr, rf)) {
+				if (node->increment_read_from()) {
+					mo_graph->rollbackChanges();
+					continue;
+				} else {
+					priv->too_many_reads = true;
+				}
+			}
+
+			updated = r_modification_order(curr, rf);
+			read_from(curr, rf);
+			mo_graph->commitChanges();
+			mo_check_promises(curr, true);
+			break;
+		}
+		case READ_FROM_PROMISE: {
+			Promise *promise = curr->get_node()->get_read_from_promise();
+			if (promise->add_reader(curr))
+				priv->failed_promise = true;
+			curr->set_read_from_promise(promise);
+			mo_graph->startChanges();
+			if (!check_recency(curr, promise))
+				priv->too_many_reads = true;
+			updated = r_modification_order(curr, promise);
+			mo_graph->commitChanges();
+			break;
+		}
+		case READ_FROM_FUTURE: {
+			/* Read from future value */
+			struct future_value fv = node->get_future_value();
+			Promise *promise = new Promise(this, curr, fv);
+			curr->set_read_from_promise(promise);
+			promises.push_back(promise);
+			mo_graph->startChanges();
+			updated = r_modification_order(curr, promise);
+			mo_graph->commitChanges();
+			break;
+		}
+		default:
+			ASSERT(false);
+		}
+		get_thread(curr)->set_return_value(curr->get_return_value());
+		return updated;
+	}
+}
+
+/**
+ * Processes a lock, trylock, or unlock model action.  @param curr is
+ * the read model action to process.
+ *
+ * The try lock operation checks whether the lock is taken.  If not,
+ * it falls to the normal lock operation case.  If so, it returns
+ * fail.
+ *
+ * The lock operation has already been checked that it is enabled, so
+ * it just grabs the lock and synchronizes with the previous unlock.
+ *
+ * The unlock operation has to re-enable all of the threads that are
+ * waiting on the lock.
+ *
+ * @return True if synchronization was updated; false otherwise
+ */
+bool ModelExecution::process_mutex(ModelAction *curr)
+{
+	std::mutex *mutex = curr->get_mutex();
+	struct std::mutex_state *state = NULL;
+
+	if (mutex)
+		state = mutex->get_state();
+
+	switch (curr->get_type()) {
+	case ATOMIC_TRYLOCK: {
+		bool success = !state->locked;
+		curr->set_try_lock(success);
+		if (!success) {
+			get_thread(curr)->set_return_value(0);
+			break;
+		}
+		get_thread(curr)->set_return_value(1);
+	}
+		//otherwise fall into the lock case
+	case ATOMIC_LOCK: {
+		if (curr->get_cv()->getClock(state->alloc_tid) <= state->alloc_clock)
+			assert_bug("Lock access before initialization");
+		state->locked = get_thread(curr);
+		ModelAction *unlock = get_last_unlock(curr);
+		//synchronize with the previous unlock statement
+		if (unlock != NULL) {
+			synchronize(unlock, curr);
+			return true;
+		}
+		break;
+	}
+	case ATOMIC_WAIT:
+	case ATOMIC_UNLOCK: {
+		/* wake up the other threads */
+		for (unsigned int i = 0; i < get_num_threads(); i++) {
+			Thread *t = get_thread(int_to_id(i));
+			Thread *curr_thrd = get_thread(curr);
+			if (t->waiting_on() == curr_thrd && t->get_pending()->is_lock())
+				scheduler->wake(t);
+		}
+
+		/* unlock the lock - after checking who was waiting on it */
+		state->locked = NULL;
+
+		if (!curr->is_wait())
+			break; /* The rest is only for ATOMIC_WAIT */
+
+		/* Should we go to sleep? (simulate spurious failures) */
+		if (curr->get_node()->get_misc() == 0) {
+			get_safe_ptr_action(&condvar_waiters_map, curr->get_location())->push_back(curr);
+			/* disable us */
+			scheduler->sleep(get_thread(curr));
+		}
+		break;
+	}
+	case ATOMIC_NOTIFY_ALL: {
+		action_list_t *waiters = get_safe_ptr_action(&condvar_waiters_map, curr->get_location());
+		//activate all the waiting threads
+		for (action_list_t::iterator rit = waiters->begin(); rit != waiters->end(); rit++) {
+			scheduler->wake(get_thread(*rit));
+		}
+		waiters->clear();
+		break;
+	}
+	case ATOMIC_NOTIFY_ONE: {
+		action_list_t *waiters = get_safe_ptr_action(&condvar_waiters_map, curr->get_location());
+		int wakeupthread = curr->get_node()->get_misc();
+		action_list_t::iterator it = waiters->begin();
+		advance(it, wakeupthread);
+		scheduler->wake(get_thread(*it));
+		waiters->erase(it);
+		break;
+	}
+
+	default:
+		ASSERT(0);
+	}
+	return false;
+}
+
+/**
+ * @brief Check if the current pending promises allow a future value to be sent
+ *
+ * It is unsafe to pass a future value back if there exists a pending promise Pr
+ * such that:
+ *
+ *    reader --exec-> Pr --exec-> writer
+ *
+ * If such Pr exists, we must save the pending future value until Pr is
+ * resolved.
+ *
+ * @param writer The operation which sends the future value. Must be a write.
+ * @param reader The operation which will observe the value. Must be a read.
+ * @return True if the future value can be sent now; false if it must wait.
+ */
+bool ModelExecution::promises_may_allow(const ModelAction *writer,
+		const ModelAction *reader) const
+{
+	for (int i = promises.size() - 1; i >= 0; i--) {
+		ModelAction *pr = promises[i]->get_reader(0);
+		//reader is after promise...doesn't cross any promise
+		if (*reader > *pr)
+			return true;
+		//writer is after promise, reader before...bad...
+		if (*writer > *pr)
+			return false;
+	}
+	return true;
+}
+
+/**
+ * @brief Add a future value to a reader
+ *
+ * This function performs a few additional checks to ensure that the future
+ * value can be feasibly observed by the reader
+ *
+ * @param writer The operation whose value is sent. Must be a write.
+ * @param reader The read operation which may read the future value. Must be a read.
+ */
+void ModelExecution::add_future_value(const ModelAction *writer, ModelAction *reader)
+{
+	/* Do more ambitious checks now that mo is more complete */
+	if (!mo_may_allow(writer, reader))
+		return;
+
+	Node *node = reader->get_node();
+
+	/* Find an ancestor thread which exists at the time of the reader */
+	Thread *write_thread = get_thread(writer);
+	while (id_to_int(write_thread->get_id()) >= node->get_num_threads())
+		write_thread = write_thread->get_parent();
+
+	struct future_value fv = {
+		writer->get_write_value(),
+		writer->get_seq_number() + params->maxfuturedelay,
+		write_thread->get_id(),
+	};
+	if (node->add_future_value(fv))
+		set_latest_backtrack(reader);
+}
+
+/**
+ * Process a write ModelAction
+ * @param curr The ModelAction to process
+ * @param work The work queue, for adding fixup work
+ * @return True if the mo_graph was updated or promises were resolved
+ */
+bool ModelExecution::process_write(ModelAction *curr, work_queue_t *work)
+{
+	/* Readers to which we may send our future value */
+	ModelVector<ModelAction *> send_fv;
+
+	const ModelAction *earliest_promise_reader;
+	bool updated_promises = false;
+
+	bool updated_mod_order = w_modification_order(curr, &send_fv);
+	Promise *promise = pop_promise_to_resolve(curr);
+
+	if (promise) {
+		earliest_promise_reader = promise->get_reader(0);
+		updated_promises = resolve_promise(curr, promise, work);
+	} else
+		earliest_promise_reader = NULL;
+
+	for (unsigned int i = 0; i < send_fv.size(); i++) {
+		ModelAction *read = send_fv[i];
+
+		/* Don't send future values to reads after the Promise we resolve */
+		if (!earliest_promise_reader || *read < *earliest_promise_reader) {
+			/* Check if future value can be sent immediately */
+			if (promises_may_allow(curr, read)) {
+				add_future_value(curr, read);
+			} else {
+				futurevalues.push_back(PendingFutureValue(curr, read));
+			}
+		}
+	}
+
+	/* Check the pending future values */
+	for (int i = (int)futurevalues.size() - 1; i >= 0; i--) {
+		struct PendingFutureValue pfv = futurevalues[i];
+		if (promises_may_allow(pfv.writer, pfv.reader)) {
+			add_future_value(pfv.writer, pfv.reader);
+			futurevalues.erase(futurevalues.begin() + i);
+		}
+	}
+
+	mo_graph->commitChanges();
+	mo_check_promises(curr, false);
+
+	get_thread(curr)->set_return_value(VALUE_NONE);
+	return updated_mod_order || updated_promises;
+}
+
+/**
+ * Process a fence ModelAction
+ * @param curr The ModelAction to process
+ * @return True if synchronization was updated
+ */
+bool ModelExecution::process_fence(ModelAction *curr)
+{
+	/*
+	 * fence-relaxed: no-op
+	 * fence-release: only log the occurence (not in this function), for
+	 *   use in later synchronization
+	 * fence-acquire (this function): search for hypothetical release
+	 *   sequences
+	 * fence-seq-cst: MO constraints formed in {r,w}_modification_order
+	 */
+	bool updated = false;
+	if (curr->is_acquire()) {
+		action_list_t *list = &action_trace;
+		action_list_t::reverse_iterator rit;
+		/* Find X : is_read(X) && X --sb-> curr */
+		for (rit = list->rbegin(); rit != list->rend(); rit++) {
+			ModelAction *act = *rit;
+			if (act == curr)
+				continue;
+			if (act->get_tid() != curr->get_tid())
+				continue;
+			/* Stop at the beginning of the thread */
+			if (act->is_thread_start())
+				break;
+			/* Stop once we reach a prior fence-acquire */
+			if (act->is_fence() && act->is_acquire())
+				break;
+			if (!act->is_read())
+				continue;
+			/* read-acquire will find its own release sequences */
+			if (act->is_acquire())
+				continue;
+
+			/* Establish hypothetical release sequences */
+			rel_heads_list_t release_heads;
+			get_release_seq_heads(curr, act, &release_heads);
+			for (unsigned int i = 0; i < release_heads.size(); i++)
+				synchronize(release_heads[i], curr);
+			if (release_heads.size() != 0)
+				updated = true;
+		}
+	}
+	return updated;
+}
+
+/**
+ * @brief Process the current action for thread-related activity
+ *
+ * Performs current-action processing for a THREAD_* ModelAction. Proccesses
+ * may include setting Thread status, completing THREAD_FINISH/THREAD_JOIN
+ * synchronization, etc.  This function is a no-op for non-THREAD actions
+ * (e.g., ATOMIC_{READ,WRITE,RMW,LOCK}, etc.)
+ *
+ * @param curr The current action
+ * @return True if synchronization was updated or a thread completed
+ */
+bool ModelExecution::process_thread_action(ModelAction *curr)
+{
+	bool updated = false;
+
+	switch (curr->get_type()) {
+	case THREAD_CREATE: {
+		thrd_t *thrd = (thrd_t *)curr->get_location();
+		struct thread_params *params = (struct thread_params *)curr->get_value();
+		Thread *th = new Thread(get_next_id(), thrd, params->func, params->arg, get_thread(curr));
+		add_thread(th);
+		th->set_creation(curr);
+		/* Promises can be satisfied by children */
+		for (unsigned int i = 0; i < promises.size(); i++) {
+			Promise *promise = promises[i];
+			if (promise->thread_is_available(curr->get_tid()))
+				promise->add_thread(th->get_id());
+		}
+		break;
+	}
+	case THREAD_JOIN: {
+		Thread *blocking = curr->get_thread_operand();
+		ModelAction *act = get_last_action(blocking->get_id());
+		synchronize(act, curr);
+		updated = true; /* trigger rel-seq checks */
+		break;
+	}
+	case THREAD_FINISH: {
+		Thread *th = get_thread(curr);
+		/* Wake up any joining threads */
+		for (unsigned int i = 0; i < get_num_threads(); i++) {
+			Thread *waiting = get_thread(int_to_id(i));
+			if (waiting->waiting_on() == th &&
+					waiting->get_pending()->is_thread_join())
+				scheduler->wake(waiting);
+		}
+		th->complete();
+		/* Completed thread can't satisfy promises */
+		for (unsigned int i = 0; i < promises.size(); i++) {
+			Promise *promise = promises[i];
+			if (promise->thread_is_available(th->get_id()))
+				if (promise->eliminate_thread(th->get_id()))
+					priv->failed_promise = true;
+		}
+		updated = true; /* trigger rel-seq checks */
+		break;
+	}
+	case THREAD_START: {
+		check_promises(curr->get_tid(), NULL, curr->get_cv());
+		break;
+	}
+	default:
+		break;
+	}
+
+	return updated;
+}
+
+/**
+ * @brief Process the current action for release sequence fixup activity
+ *
+ * Performs model-checker release sequence fixups for the current action,
+ * forcing a single pending release sequence to break (with a given, potential
+ * "loose" write) or to complete (i.e., synchronize). If a pending release
+ * sequence forms a complete release sequence, then we must perform the fixup
+ * synchronization, mo_graph additions, etc.
+ *
+ * @param curr The current action; must be a release sequence fixup action
+ * @param work_queue The work queue to which to add work items as they are
+ * generated
+ */
+void ModelExecution::process_relseq_fixup(ModelAction *curr, work_queue_t *work_queue)
+{
+	const ModelAction *write = curr->get_node()->get_relseq_break();
+	struct release_seq *sequence = pending_rel_seqs.back();
+	pending_rel_seqs.pop_back();
+	ASSERT(sequence);
+	ModelAction *acquire = sequence->acquire;
+	const ModelAction *rf = sequence->rf;
+	const ModelAction *release = sequence->release;
+	ASSERT(acquire);
+	ASSERT(release);
+	ASSERT(rf);
+	ASSERT(release->same_thread(rf));
+
+	if (write == NULL) {
+		/**
+		 * @todo Forcing a synchronization requires that we set
+		 * modification order constraints. For instance, we can't allow
+		 * a fixup sequence in which two separate read-acquire
+		 * operations read from the same sequence, where the first one
+		 * synchronizes and the other doesn't. Essentially, we can't
+		 * allow any writes to insert themselves between 'release' and
+		 * 'rf'
+		 */
+
+		/* Must synchronize */
+		if (!synchronize(release, acquire))
+			return;
+
+		/* Propagate the changed clock vector */
+		propagate_clockvector(acquire, work_queue);
+	} else {
+		/* Break release sequence with new edges:
+		 *   release --mo--> write --mo--> rf */
+		mo_graph->addEdge(release, write);
+		mo_graph->addEdge(write, rf);
+	}
+
+	/* See if we have realized a data race */
+	checkDataRaces();
+}
+
+/**
+ * Initialize the current action by performing one or more of the following
+ * actions, as appropriate: merging RMWR and RMWC/RMW actions, stepping forward
+ * in the NodeStack, manipulating backtracking sets, allocating and
+ * initializing clock vectors, and computing the promises to fulfill.
+ *
+ * @param curr The current action, as passed from the user context; may be
+ * freed/invalidated after the execution of this function, with a different
+ * action "returned" its place (pass-by-reference)
+ * @return True if curr is a newly-explored action; false otherwise
+ */
+bool ModelExecution::initialize_curr_action(ModelAction **curr)
+{
+	ModelAction *newcurr;
+
+	if ((*curr)->is_rmwc() || (*curr)->is_rmw()) {
+		newcurr = process_rmw(*curr);
+		delete *curr;
+
+		if (newcurr->is_rmw())
+			compute_promises(newcurr);
+
+		*curr = newcurr;
+		return false;
+	}
+
+	(*curr)->set_seq_number(get_next_seq_num());
+
+	newcurr = node_stack->explore_action(*curr, scheduler->get_enabled_array());
+	if (newcurr) {
+		/* First restore type and order in case of RMW operation */
+		if ((*curr)->is_rmwr())
+			newcurr->copy_typeandorder(*curr);
+
+		ASSERT((*curr)->get_location() == newcurr->get_location());
+		newcurr->copy_from_new(*curr);
+
+		/* Discard duplicate ModelAction; use action from NodeStack */
+		delete *curr;
+
+		/* Always compute new clock vector */
+		newcurr->create_cv(get_parent_action(newcurr->get_tid()));
+
+		*curr = newcurr;
+		return false; /* Action was explored previously */
+	} else {
+		newcurr = *curr;
+
+		/* Always compute new clock vector */
+		newcurr->create_cv(get_parent_action(newcurr->get_tid()));
+
+		/* Assign most recent release fence */
+		newcurr->set_last_fence_release(get_last_fence_release(newcurr->get_tid()));
+
+		/*
+		 * Perform one-time actions when pushing new ModelAction onto
+		 * NodeStack
+		 */
+		if (newcurr->is_write())
+			compute_promises(newcurr);
+		else if (newcurr->is_relseq_fixup())
+			compute_relseq_breakwrites(newcurr);
+		else if (newcurr->is_wait())
+			newcurr->get_node()->set_misc_max(2);
+		else if (newcurr->is_notify_one()) {
+			newcurr->get_node()->set_misc_max(get_safe_ptr_action(&condvar_waiters_map, newcurr->get_location())->size());
+		}
+		return true; /* This was a new ModelAction */
+	}
+}
+
+/**
+ * @brief Establish reads-from relation between two actions
+ *
+ * Perform basic operations involved with establishing a concrete rf relation,
+ * including setting the ModelAction data and checking for release sequences.
+ *
+ * @param act The action that is reading (must be a read)
+ * @param rf The action from which we are reading (must be a write)
+ *
+ * @return True if this read established synchronization
+ */
+bool ModelExecution::read_from(ModelAction *act, const ModelAction *rf)
+{
+	ASSERT(rf);
+	ASSERT(rf->is_write());
+
+	act->set_read_from(rf);
+	if (act->is_acquire()) {
+		rel_heads_list_t release_heads;
+		get_release_seq_heads(act, act, &release_heads);
+		int num_heads = release_heads.size();
+		for (unsigned int i = 0; i < release_heads.size(); i++)
+			if (!synchronize(release_heads[i], act))
+				num_heads--;
+		return num_heads > 0;
+	}
+	return false;
+}
+
+/**
+ * @brief Synchronizes two actions
+ *
+ * When A synchronizes with B (or A --sw-> B), B inherits A's clock vector.
+ * This function performs the synchronization as well as providing other hooks
+ * for other checks along with synchronization.
+ *
+ * @param first The left-hand side of the synchronizes-with relation
+ * @param second The right-hand side of the synchronizes-with relation
+ * @return True if the synchronization was successful (i.e., was consistent
+ * with the execution order); false otherwise
+ */
+bool ModelExecution::synchronize(const ModelAction *first, ModelAction *second)
+{
+	if (*second < *first) {
+		set_bad_synchronization();
+		return false;
+	}
+	check_promises(first->get_tid(), second->get_cv(), first->get_cv());
+	return second->synchronize_with(first);
+}
+
+/**
+ * Check promises and eliminate potentially-satisfying threads when a thread is
+ * blocked (e.g., join, lock). A thread which is waiting on another thread can
+ * no longer satisfy a promise generated from that thread.
+ *
+ * @param blocker The thread on which a thread is waiting
+ * @param waiting The waiting thread
+ */
+void ModelExecution::thread_blocking_check_promises(Thread *blocker, Thread *waiting)
+{
+	for (unsigned int i = 0; i < promises.size(); i++) {
+		Promise *promise = promises[i];
+		if (!promise->thread_is_available(waiting->get_id()))
+			continue;
+		for (unsigned int j = 0; j < promise->get_num_readers(); j++) {
+			ModelAction *reader = promise->get_reader(j);
+			if (reader->get_tid() != blocker->get_id())
+				continue;
+			if (promise->eliminate_thread(waiting->get_id())) {
+				/* Promise has failed */
+				priv->failed_promise = true;
+			} else {
+				/* Only eliminate the 'waiting' thread once */
+				return;
+			}
+		}
+	}
+}
+
+/**
+ * @brief Check whether a model action is enabled.
+ *
+ * Checks whether an operation would be successful (i.e., is a lock already
+ * locked, or is the joined thread already complete).
+ *
+ * For yield-blocking, yields are never enabled.
+ *
+ * @param curr is the ModelAction to check whether it is enabled.
+ * @return a bool that indicates whether the action is enabled.
+ */
+bool ModelExecution::check_action_enabled(ModelAction *curr) {
+	if (curr->is_lock()) {
+		std::mutex *lock = curr->get_mutex();
+		struct std::mutex_state *state = lock->get_state();
+		if (state->locked)
+			return false;
+	} else if (curr->is_thread_join()) {
+		Thread *blocking = curr->get_thread_operand();
+		if (!blocking->is_complete()) {
+			thread_blocking_check_promises(blocking, get_thread(curr));
+			return false;
+		}
+	} else if (params->yieldblock && curr->is_yield()) {
+		return false;
+	}
+
+	return true;
+}
+
+/**
+ * This is the heart of the model checker routine. It performs model-checking
+ * actions corresponding to a given "current action." Among other processes, it
+ * calculates reads-from relationships, updates synchronization clock vectors,
+ * forms a memory_order constraints graph, and handles replay/backtrack
+ * execution when running permutations of previously-observed executions.
+ *
+ * @param curr The current action to process
+ * @return The ModelAction that is actually executed; may be different than
+ * curr
+ */
+ModelAction * ModelExecution::check_current_action(ModelAction *curr)
+{
+	ASSERT(curr);
+	bool second_part_of_rmw = curr->is_rmwc() || curr->is_rmw();
+	bool newly_explored = initialize_curr_action(&curr);
+
+	DBG();
+
+	wake_up_sleeping_actions(curr);
+
+	/* Compute fairness information for CHESS yield algorithm */
+	if (params->yieldon) {
+		curr->get_node()->update_yield(scheduler);
+	}
+
+	/* Add the action to lists before any other model-checking tasks */
+	if (!second_part_of_rmw)
+		add_action_to_lists(curr);
+
+	/* Build may_read_from set for newly-created actions */
+	if (newly_explored && curr->is_read())
+		build_may_read_from(curr);
+
+	/* Initialize work_queue with the "current action" work */
+	work_queue_t work_queue(1, CheckCurrWorkEntry(curr));
+	while (!work_queue.empty() && !has_asserted()) {
+		WorkQueueEntry work = work_queue.front();
+		work_queue.pop_front();
+
+		switch (work.type) {
+		case WORK_CHECK_CURR_ACTION: {
+			ModelAction *act = work.action;
+			bool update = false; /* update this location's release seq's */
+			bool update_all = false; /* update all release seq's */
+
+			if (process_thread_action(curr))
+				update_all = true;
+
+			if (act->is_read() && !second_part_of_rmw && process_read(act))
+				update = true;
+
+			if (act->is_write() && process_write(act, &work_queue))
+				update = true;
+
+			if (act->is_fence() && process_fence(act))
+				update_all = true;
+
+			if (act->is_mutex_op() && process_mutex(act))
+				update_all = true;
+
+			if (act->is_relseq_fixup())
+				process_relseq_fixup(curr, &work_queue);
+
+			if (update_all)
+				work_queue.push_back(CheckRelSeqWorkEntry(NULL));
+			else if (update)
+				work_queue.push_back(CheckRelSeqWorkEntry(act->get_location()));
+			break;
+		}
+		case WORK_CHECK_RELEASE_SEQ:
+			resolve_release_sequences(work.location, &work_queue);
+			break;
+		case WORK_CHECK_MO_EDGES: {
+			/** @todo Complete verification of work_queue */
+			ModelAction *act = work.action;
+			bool updated = false;
+
+			if (act->is_read()) {
+				const ModelAction *rf = act->get_reads_from();
+				const Promise *promise = act->get_reads_from_promise();
+				if (rf) {
+					if (r_modification_order(act, rf))
+						updated = true;
+				} else if (promise) {
+					if (r_modification_order(act, promise))
+						updated = true;
+				}
+			}
+			if (act->is_write()) {
+				if (w_modification_order(act, NULL))
+					updated = true;
+			}
+			mo_graph->commitChanges();
+
+			if (updated)
+				work_queue.push_back(CheckRelSeqWorkEntry(act->get_location()));
+			break;
+		}
+		default:
+			ASSERT(false);
+			break;
+		}
+	}
+
+	check_curr_backtracking(curr);
+	set_backtracking(curr);
+	return curr;
+}
+
+void ModelExecution::check_curr_backtracking(ModelAction *curr)
+{
+	Node *currnode = curr->get_node();
+	Node *parnode = currnode->get_parent();
+
+	if ((parnode && !parnode->backtrack_empty()) ||
+			 !currnode->misc_empty() ||
+			 !currnode->read_from_empty() ||
+			 !currnode->promise_empty() ||
+			 !currnode->relseq_break_empty()) {
+		set_latest_backtrack(curr);
+	}
+}
+
+bool ModelExecution::promises_expired() const
+{
+	for (unsigned int i = 0; i < promises.size(); i++) {
+		Promise *promise = promises[i];
+		if (promise->get_expiration() < priv->used_sequence_numbers)
+			return true;
+	}
+	return false;
+}
+
+/**
+ * This is the strongest feasibility check available.
+ * @return whether the current trace (partial or complete) must be a prefix of
+ * a feasible trace.
+ */
+bool ModelExecution::isfeasibleprefix() const
+{
+	return pending_rel_seqs.size() == 0 && is_feasible_prefix_ignore_relseq();
+}
+
+/**
+ * Print disagnostic information about an infeasible execution
+ * @param prefix A string to prefix the output with; if NULL, then a default
+ * message prefix will be provided
+ */
+void ModelExecution::print_infeasibility(const char *prefix) const
+{
+	char buf[100];
+	char *ptr = buf;
+	if (mo_graph->checkForCycles())
+		ptr += sprintf(ptr, "[mo cycle]");
+	if (priv->failed_promise)
+		ptr += sprintf(ptr, "[failed promise]");
+	if (priv->too_many_reads)
+		ptr += sprintf(ptr, "[too many reads]");
+	if (priv->no_valid_reads)
+		ptr += sprintf(ptr, "[no valid reads-from]");
+	if (priv->bad_synchronization)
+		ptr += sprintf(ptr, "[bad sw ordering]");
+	if (promises_expired())
+		ptr += sprintf(ptr, "[promise expired]");
+	if (promises.size() != 0)
+		ptr += sprintf(ptr, "[unresolved promise]");
+	if (ptr != buf)
+		model_print("%s: %s", prefix ? prefix : "Infeasible", buf);
+}
+
+/**
+ * Returns whether the current completed trace is feasible, except for pending
+ * release sequences.
+ */
+bool ModelExecution::is_feasible_prefix_ignore_relseq() const
+{
+	return !is_infeasible() && promises.size() == 0;
+}
+
+/**
+ * Check if the current partial trace is infeasible. Does not check any
+ * end-of-execution flags, which might rule out the execution. Thus, this is
+ * useful only for ruling an execution as infeasible.
+ * @return whether the current partial trace is infeasible.
+ */
+bool ModelExecution::is_infeasible() const
+{
+	return mo_graph->checkForCycles() ||
+		priv->no_valid_reads ||
+		priv->failed_promise ||
+		priv->too_many_reads ||
+		priv->bad_synchronization ||
+		promises_expired();
+}
+
+/** Close out a RMWR by converting previous RMWR into a RMW or READ. */
+ModelAction * ModelExecution::process_rmw(ModelAction *act) {
+	ModelAction *lastread = get_last_action(act->get_tid());
+	lastread->process_rmw(act);
+	if (act->is_rmw()) {
+		if (lastread->get_reads_from())
+			mo_graph->addRMWEdge(lastread->get_reads_from(), lastread);
+		else
+			mo_graph->addRMWEdge(lastread->get_reads_from_promise(), lastread);
+		mo_graph->commitChanges();
+	}
+	return lastread;
+}
+
+/**
+ * A helper function for ModelExecution::check_recency, to check if the current
+ * thread is able to read from a different write/promise for 'params.maxreads'
+ * number of steps and if that write/promise should become visible (i.e., is
+ * ordered later in the modification order). This helps model memory liveness.
+ *
+ * @param curr The current action. Must be a read.
+ * @param rf The write/promise from which we plan to read
+ * @param other_rf The write/promise from which we may read
+ * @return True if we were able to read from other_rf for params.maxreads steps
+ */
+template <typename T, typename U>
+bool ModelExecution::should_read_instead(const ModelAction *curr, const T *rf, const U *other_rf) const
+{
+	/* Need a different write/promise */
+	if (other_rf->equals(rf))
+		return false;
+
+	/* Only look for "newer" writes/promises */
+	if (!mo_graph->checkReachable(rf, other_rf))
+		return false;
+
+	SnapVector<action_list_t> *thrd_lists = obj_thrd_map.get(curr->get_location());
+	action_list_t *list = &(*thrd_lists)[id_to_int(curr->get_tid())];
+	action_list_t::reverse_iterator rit = list->rbegin();
+	ASSERT((*rit) == curr);
+	/* Skip past curr */
+	rit++;
+
+	/* Does this write/promise work for everyone? */
+	for (int i = 0; i < params->maxreads; i++, rit++) {
+		ModelAction *act = *rit;
+		if (!act->may_read_from(other_rf))
+			return false;
+	}
+	return true;
+}
+
+/**
+ * Checks whether a thread has read from the same write or Promise for too many
+ * times without seeing the effects of a later write/Promise.
+ *
+ * Basic idea:
+ * 1) there must a different write/promise that we could read from,
+ * 2) we must have read from the same write/promise in excess of maxreads times,
+ * 3) that other write/promise must have been in the reads_from set for maxreads times, and
+ * 4) that other write/promise must be mod-ordered after the write/promise we are reading.
+ *
+ * If so, we decide that the execution is no longer feasible.
+ *
+ * @param curr The current action. Must be a read.
+ * @param rf The ModelAction/Promise from which we might read.
+ * @return True if the read should succeed; false otherwise
+ */
+template <typename T>
+bool ModelExecution::check_recency(ModelAction *curr, const T *rf) const
+{
+	if (!params->maxreads)
+		return true;
+
+	//NOTE: Next check is just optimization, not really necessary....
+	if (curr->get_node()->get_read_from_past_size() +
+			curr->get_node()->get_read_from_promise_size() <= 1)
+		return true;
+
+	SnapVector<action_list_t> *thrd_lists = obj_thrd_map.get(curr->get_location());
+	int tid = id_to_int(curr->get_tid());
+	ASSERT(tid < (int)thrd_lists->size());
+	action_list_t *list = &(*thrd_lists)[tid];
+	action_list_t::reverse_iterator rit = list->rbegin();
+	ASSERT((*rit) == curr);
+	/* Skip past curr */
+	rit++;
+
+	action_list_t::reverse_iterator ritcopy = rit;
+	/* See if we have enough reads from the same value */
+	for (int count = 0; count < params->maxreads; ritcopy++, count++) {
+		if (ritcopy == list->rend())
+			return true;
+		ModelAction *act = *ritcopy;
+		if (!act->is_read())
+			return true;
+		if (act->get_reads_from_promise() && !act->get_reads_from_promise()->equals(rf))
+			return true;
+		if (act->get_reads_from() && !act->get_reads_from()->equals(rf))
+			return true;
+		if (act->get_node()->get_read_from_past_size() +
+				act->get_node()->get_read_from_promise_size() <= 1)
+			return true;
+	}
+	for (int i = 0; i < curr->get_node()->get_read_from_past_size(); i++) {
+		const ModelAction *write = curr->get_node()->get_read_from_past(i);
+		if (should_read_instead(curr, rf, write))
+			return false; /* liveness failure */
+	}
+	for (int i = 0; i < curr->get_node()->get_read_from_promise_size(); i++) {
+		const Promise *promise = curr->get_node()->get_read_from_promise(i);
+		if (should_read_instead(curr, rf, promise))
+			return false; /* liveness failure */
+	}
+	return true;
+}
+
+/**
+ * @brief Updates the mo_graph with the constraints imposed from the current
+ * read.
+ *
+ * Basic idea is the following: Go through each other thread and find
+ * the last action that happened before our read.  Two cases:
+ *
+ * -# The action is a write: that write must either occur before
+ * the write we read from or be the write we read from.
+ * -# The action is a read: the write that that action read from
+ * must occur before the write we read from or be the same write.
+ *
+ * @param curr The current action. Must be a read.
+ * @param rf The ModelAction or Promise that curr reads from. Must be a write.
+ * @return True if modification order edges were added; false otherwise
+ */
+template <typename rf_type>
+bool ModelExecution::r_modification_order(ModelAction *curr, const rf_type *rf)
+{
+	SnapVector<action_list_t> *thrd_lists = obj_thrd_map.get(curr->get_location());
+	unsigned int i;
+	bool added = false;
+	ASSERT(curr->is_read());
+
+	/* Last SC fence in the current thread */
+	ModelAction *last_sc_fence_local = get_last_seq_cst_fence(curr->get_tid(), NULL);
+	ModelAction *last_sc_write = NULL;
+	if (curr->is_seqcst())
+		last_sc_write = get_last_seq_cst_write(curr);
+
+	/* Iterate over all threads */
+	for (i = 0; i < thrd_lists->size(); i++) {
+		/* Last SC fence in thread i */
+		ModelAction *last_sc_fence_thread_local = NULL;
+		if (int_to_id((int)i) != curr->get_tid())
+			last_sc_fence_thread_local = get_last_seq_cst_fence(int_to_id(i), NULL);
+
+		/* Last SC fence in thread i, before last SC fence in current thread */
+		ModelAction *last_sc_fence_thread_before = NULL;
+		if (last_sc_fence_local)
+			last_sc_fence_thread_before = get_last_seq_cst_fence(int_to_id(i), last_sc_fence_local);
+
+		/* Iterate over actions in thread, starting from most recent */
+		action_list_t *list = &(*thrd_lists)[i];
+		action_list_t::reverse_iterator rit;
+		for (rit = list->rbegin(); rit != list->rend(); rit++) {
+			ModelAction *act = *rit;
+
+			/* Skip curr */
+			if (act == curr)
+				continue;
+			/* Don't want to add reflexive edges on 'rf' */
+			if (act->equals(rf)) {
+				if (act->happens_before(curr))
+					break;
+				else
+					continue;
+			}
+
+			if (act->is_write()) {
+				/* C++, Section 29.3 statement 5 */
+				if (curr->is_seqcst() && last_sc_fence_thread_local &&
+						*act < *last_sc_fence_thread_local) {
+					added = mo_graph->addEdge(act, rf) || added;
+					break;
+				}
+				/* C++, Section 29.3 statement 4 */
+				else if (act->is_seqcst() && last_sc_fence_local &&
+						*act < *last_sc_fence_local) {
+					added = mo_graph->addEdge(act, rf) || added;
+					break;
+				}
+				/* C++, Section 29.3 statement 6 */
+				else if (last_sc_fence_thread_before &&
+						*act < *last_sc_fence_thread_before) {
+					added = mo_graph->addEdge(act, rf) || added;
+					break;
+				}
+			}
+
+			/* C++, Section 29.3 statement 3 (second subpoint) */
+			if (curr->is_seqcst() && last_sc_write && act == last_sc_write) {
+				added = mo_graph->addEdge(act, rf) || added;
+				break;
+			}
+
+			/*
+			 * Include at most one act per-thread that "happens
+			 * before" curr
+			 */
+			if (act->happens_before(curr)) {
+				if (act->is_write()) {
+					added = mo_graph->addEdge(act, rf) || added;
+				} else {
+					const ModelAction *prevrf = act->get_reads_from();
+					const Promise *prevrf_promise = act->get_reads_from_promise();
+					if (prevrf) {
+						if (!prevrf->equals(rf))
+							added = mo_graph->addEdge(prevrf, rf) || added;
+					} else if (!prevrf_promise->equals(rf)) {
+						added = mo_graph->addEdge(prevrf_promise, rf) || added;
+					}
+				}
+				break;
+			}
+		}
+	}
+
+	/*
+	 * All compatible, thread-exclusive promises must be ordered after any
+	 * concrete loads from the same thread
+	 */
+	for (unsigned int i = 0; i < promises.size(); i++)
+		if (promises[i]->is_compatible_exclusive(curr))
+			added = mo_graph->addEdge(rf, promises[i]) || added;
+
+	return added;
+}
+
+/**
+ * Updates the mo_graph with the constraints imposed from the current write.
+ *
+ * Basic idea is the following: Go through each other thread and find
+ * the lastest action that happened before our write.  Two cases:
+ *
+ * (1) The action is a write => that write must occur before
+ * the current write
+ *
+ * (2) The action is a read => the write that that action read from
+ * must occur before the current write.
+ *
+ * This method also handles two other issues:
+ *
+ * (I) Sequential Consistency: Making sure that if the current write is
+ * seq_cst, that it occurs after the previous seq_cst write.
+ *
+ * (II) Sending the write back to non-synchronizing reads.
+ *
+ * @param curr The current action. Must be a write.
+ * @param send_fv A vector for stashing reads to which we may pass our future
+ * value. If NULL, then don't record any future values.
+ * @return True if modification order edges were added; false otherwise
+ */
+bool ModelExecution::w_modification_order(ModelAction *curr, ModelVector<ModelAction *> *send_fv)
+{
+	SnapVector<action_list_t> *thrd_lists = obj_thrd_map.get(curr->get_location());
+	unsigned int i;
+	bool added = false;
+	ASSERT(curr->is_write());
+
+	if (curr->is_seqcst()) {
+		/* We have to at least see the last sequentially consistent write,
+			 so we are initialized. */
+		ModelAction *last_seq_cst = get_last_seq_cst_write(curr);
+		if (last_seq_cst != NULL) {
+			added = mo_graph->addEdge(last_seq_cst, curr) || added;
+		}
+	}
+
+	/* Last SC fence in the current thread */
+	ModelAction *last_sc_fence_local = get_last_seq_cst_fence(curr->get_tid(), NULL);
+
+	/* Iterate over all threads */
+	for (i = 0; i < thrd_lists->size(); i++) {
+		/* Last SC fence in thread i, before last SC fence in current thread */
+		ModelAction *last_sc_fence_thread_before = NULL;
+		if (last_sc_fence_local && int_to_id((int)i) != curr->get_tid())
+			last_sc_fence_thread_before = get_last_seq_cst_fence(int_to_id(i), last_sc_fence_local);
+
+		/* Iterate over actions in thread, starting from most recent */
+		action_list_t *list = &(*thrd_lists)[i];
+		action_list_t::reverse_iterator rit;
+		for (rit = list->rbegin(); rit != list->rend(); rit++) {
+			ModelAction *act = *rit;
+			if (act == curr) {
+				/*
+				 * 1) If RMW and it actually read from something, then we
+				 * already have all relevant edges, so just skip to next
+				 * thread.
+				 *
+				 * 2) If RMW and it didn't read from anything, we should
+				 * whatever edge we can get to speed up convergence.
+				 *
+				 * 3) If normal write, we need to look at earlier actions, so
+				 * continue processing list.
+				 */
+				if (curr->is_rmw()) {
+					if (curr->get_reads_from() != NULL)
+						break;
+					else
+						continue;
+				} else
+					continue;
+			}
+
+			/* C++, Section 29.3 statement 7 */
+			if (last_sc_fence_thread_before && act->is_write() &&
+					*act < *last_sc_fence_thread_before) {
+				added = mo_graph->addEdge(act, curr) || added;
+				break;
+			}
+
+			/*
+			 * Include at most one act per-thread that "happens
+			 * before" curr
+			 */
+			if (act->happens_before(curr)) {
+				/*
+				 * Note: if act is RMW, just add edge:
+				 *   act --mo--> curr
+				 * The following edge should be handled elsewhere:
+				 *   readfrom(act) --mo--> act
+				 */
+				if (act->is_write())
+					added = mo_graph->addEdge(act, curr) || added;
+				else if (act->is_read()) {
+					//if previous read accessed a null, just keep going
+					if (act->get_reads_from() == NULL)
+						continue;
+					added = mo_graph->addEdge(act->get_reads_from(), curr) || added;
+				}
+				break;
+			} else if (act->is_read() && !act->could_synchronize_with(curr) &&
+			                             !act->same_thread(curr)) {
+				/* We have an action that:
+				   (1) did not happen before us
+				   (2) is a read and we are a write
+				   (3) cannot synchronize with us
+				   (4) is in a different thread
+				   =>
+				   that read could potentially read from our write.  Note that
+				   these checks are overly conservative at this point, we'll
+				   do more checks before actually removing the
+				   pendingfuturevalue.
+
+				 */
+				if (send_fv && thin_air_constraint_may_allow(curr, act)) {
+					if (!is_infeasible())
+						send_fv->push_back(act);
+					else if (curr->is_rmw() && act->is_rmw() && curr->get_reads_from() && curr->get_reads_from() == act->get_reads_from())
+						add_future_value(curr, act);
+				}
+			}
+		}
+	}
+
+	/*
+	 * All compatible, thread-exclusive promises must be ordered after any
+	 * concrete stores to the same thread, or else they can be merged with
+	 * this store later
+	 */
+	for (unsigned int i = 0; i < promises.size(); i++)
+		if (promises[i]->is_compatible_exclusive(curr))
+			added = mo_graph->addEdge(curr, promises[i]) || added;
+
+	return added;
+}
+
+/** Arbitrary reads from the future are not allowed.  Section 29.3
+ * part 9 places some constraints.  This method checks one result of constraint
+ * constraint.  Others require compiler support. */
+bool ModelExecution::thin_air_constraint_may_allow(const ModelAction *writer, const ModelAction *reader) const
+{
+	if (!writer->is_rmw())
+		return true;
+
+	if (!reader->is_rmw())
+		return true;
+
+	for (const ModelAction *search = writer->get_reads_from(); search != NULL; search = search->get_reads_from()) {
+		if (search == reader)
+			return false;
+		if (search->get_tid() == reader->get_tid() &&
+				search->happens_before(reader))
+			break;
+	}
+
+	return true;
+}
+
+/**
+ * Arbitrary reads from the future are not allowed. Section 29.3 part 9 places
+ * some constraints. This method checks one the following constraint (others
+ * require compiler support):
+ *
+ *   If X --hb-> Y --mo-> Z, then X should not read from Z.
+ *   If X --hb-> Y, A --rf-> Y, and A --mo-> Z, then X should not read from Z.
+ */
+bool ModelExecution::mo_may_allow(const ModelAction *writer, const ModelAction *reader)
+{
+	SnapVector<action_list_t> *thrd_lists = obj_thrd_map.get(reader->get_location());
+	unsigned int i;
+	/* Iterate over all threads */
+	for (i = 0; i < thrd_lists->size(); i++) {
+		const ModelAction *write_after_read = NULL;
+
+		/* Iterate over actions in thread, starting from most recent */
+		action_list_t *list = &(*thrd_lists)[i];
+		action_list_t::reverse_iterator rit;
+		for (rit = list->rbegin(); rit != list->rend(); rit++) {
+			ModelAction *act = *rit;
+
+			/* Don't disallow due to act == reader */
+			if (!reader->happens_before(act) || reader == act)
+				break;
+			else if (act->is_write())
+				write_after_read = act;
+			else if (act->is_read() && act->get_reads_from() != NULL)
+				write_after_read = act->get_reads_from();
+		}
+
+		if (write_after_read && write_after_read != writer && mo_graph->checkReachable(write_after_read, writer))
+			return false;
+	}
+	return true;
+}
+
+/**
+ * Finds the head(s) of the release sequence(s) containing a given ModelAction.
+ * The ModelAction under consideration is expected to be taking part in
+ * release/acquire synchronization as an object of the "reads from" relation.
+ * Note that this can only provide release sequence support for RMW chains
+ * which do not read from the future, as those actions cannot be traced until
+ * their "promise" is fulfilled. Similarly, we may not even establish the
+ * presence of a release sequence with certainty, as some modification order
+ * constraints may be decided further in the future. Thus, this function
+ * "returns" two pieces of data: a pass-by-reference vector of @a release_heads
+ * and a boolean representing certainty.
+ *
+ * @param rf The action that might be part of a release sequence. Must be a
+ * write.
+ * @param release_heads A pass-by-reference style return parameter. After
+ * execution of this function, release_heads will contain the heads of all the
+ * relevant release sequences, if any exists with certainty
+ * @param pending A pass-by-reference style return parameter which is only used
+ * when returning false (i.e., uncertain). Returns most information regarding
+ * an uncertain release sequence, including any write operations that might
+ * break the sequence.
+ * @return true, if the ModelExecution is certain that release_heads is complete;
+ * false otherwise
+ */
+bool ModelExecution::release_seq_heads(const ModelAction *rf,
+		rel_heads_list_t *release_heads,
+		struct release_seq *pending) const
+{
+	/* Only check for release sequences if there are no cycles */
+	if (mo_graph->checkForCycles())
+		return false;
+
+	for ( ; rf != NULL; rf = rf->get_reads_from()) {
+		ASSERT(rf->is_write());
+
+		if (rf->is_release())
+			release_heads->push_back(rf);
+		else if (rf->get_last_fence_release())
+			release_heads->push_back(rf->get_last_fence_release());
+		if (!rf->is_rmw())
+			break; /* End of RMW chain */
+
+		/** @todo Need to be smarter here...  In the linux lock
+		 * example, this will run to the beginning of the program for
+		 * every acquire. */
+		/** @todo The way to be smarter here is to keep going until 1
+		 * thread has a release preceded by an acquire and you've seen
+		 *	 both. */
+
+		/* acq_rel RMW is a sufficient stopping condition */
+		if (rf->is_acquire() && rf->is_release())
+			return true; /* complete */
+	};
+	if (!rf) {
+		/* read from future: need to settle this later */
+		pending->rf = NULL;
+		return false; /* incomplete */
+	}
+
+	if (rf->is_release())
+		return true; /* complete */
+
+	/* else relaxed write
+	 * - check for fence-release in the same thread (29.8, stmt. 3)
+	 * - check modification order for contiguous subsequence
+	 *   -> rf must be same thread as release */
+
+	const ModelAction *fence_release = rf->get_last_fence_release();
+	/* Synchronize with a fence-release unconditionally; we don't need to
+	 * find any more "contiguous subsequence..." for it */
+	if (fence_release)
+		release_heads->push_back(fence_release);
+
+	int tid = id_to_int(rf->get_tid());
+	SnapVector<action_list_t> *thrd_lists = obj_thrd_map.get(rf->get_location());
+	action_list_t *list = &(*thrd_lists)[tid];
+	action_list_t::const_reverse_iterator rit;
+
+	/* Find rf in the thread list */
+	rit = std::find(list->rbegin(), list->rend(), rf);
+	ASSERT(rit != list->rend());
+
+	/* Find the last {write,fence}-release */
+	for (; rit != list->rend(); rit++) {
+		if (fence_release && *(*rit) < *fence_release)
+			break;
+		if ((*rit)->is_release())
+			break;
+	}
+	if (rit == list->rend()) {
+		/* No write-release in this thread */
+		return true; /* complete */
+	} else if (fence_release && *(*rit) < *fence_release) {
+		/* The fence-release is more recent (and so, "stronger") than
+		 * the most recent write-release */
+		return true; /* complete */
+	} /* else, need to establish contiguous release sequence */
+	ModelAction *release = *rit;
+
+	ASSERT(rf->same_thread(release));
+
+	pending->writes.clear();
+
+	bool certain = true;
+	for (unsigned int i = 0; i < thrd_lists->size(); i++) {
+		if (id_to_int(rf->get_tid()) == (int)i)
+			continue;
+		list = &(*thrd_lists)[i];
+
+		/* Can we ensure no future writes from this thread may break
+		 * the release seq? */
+		bool future_ordered = false;
+
+		ModelAction *last = get_last_action(int_to_id(i));
+		Thread *th = get_thread(int_to_id(i));
+		if ((last && rf->happens_before(last)) ||
+				!is_enabled(th) ||
+				th->is_complete())
+			future_ordered = true;
+
+		ASSERT(!th->is_model_thread() || future_ordered);
+
+		for (rit = list->rbegin(); rit != list->rend(); rit++) {
+			const ModelAction *act = *rit;
+			/* Reach synchronization -> this thread is complete */
+			if (act->happens_before(release))
+				break;
+			if (rf->happens_before(act)) {
+				future_ordered = true;
+				continue;
+			}
+
+			/* Only non-RMW writes can break release sequences */
+			if (!act->is_write() || act->is_rmw())
+				continue;
+
+			/* Check modification order */
+			if (mo_graph->checkReachable(rf, act)) {
+				/* rf --mo--> act */
+				future_ordered = true;
+				continue;
+			}
+			if (mo_graph->checkReachable(act, release))
+				/* act --mo--> release */
+				break;
+			if (mo_graph->checkReachable(release, act) &&
+			              mo_graph->checkReachable(act, rf)) {
+				/* release --mo-> act --mo--> rf */
+				return true; /* complete */
+			}
+			/* act may break release sequence */
+			pending->writes.push_back(act);
+			certain = false;
+		}
+		if (!future_ordered)
+			certain = false; /* This thread is uncertain */
+	}
+
+	if (certain) {
+		release_heads->push_back(release);
+		pending->writes.clear();
+	} else {
+		pending->release = release;
+		pending->rf = rf;
+	}
+	return certain;
+}
+
+/**
+ * An interface for getting the release sequence head(s) with which a
+ * given ModelAction must synchronize. This function only returns a non-empty
+ * result when it can locate a release sequence head with certainty. Otherwise,
+ * it may mark the internal state of the ModelExecution so that it will handle
+ * the release sequence at a later time, causing @a acquire to update its
+ * synchronization at some later point in execution.
+ *
+ * @param acquire The 'acquire' action that may synchronize with a release
+ * sequence
+ * @param read The read action that may read from a release sequence; this may
+ * be the same as acquire, or else an earlier action in the same thread (i.e.,
+ * when 'acquire' is a fence-acquire)
+ * @param release_heads A pass-by-reference return parameter. Will be filled
+ * with the head(s) of the release sequence(s), if they exists with certainty.
+ * @see ModelExecution::release_seq_heads
+ */
+void ModelExecution::get_release_seq_heads(ModelAction *acquire,
+		ModelAction *read, rel_heads_list_t *release_heads)
+{
+	const ModelAction *rf = read->get_reads_from();
+	struct release_seq *sequence = (struct release_seq *)snapshot_calloc(1, sizeof(struct release_seq));
+	sequence->acquire = acquire;
+	sequence->read = read;
+
+	if (!release_seq_heads(rf, release_heads, sequence)) {
+		/* add act to 'lazy checking' list */
+		pending_rel_seqs.push_back(sequence);
+	} else {
+		snapshot_free(sequence);
+	}
+}
+
+/**
+ * @brief Propagate a modified clock vector to actions later in the execution
+ * order
+ *
+ * After an acquire operation lazily completes a release-sequence
+ * synchronization, we must update all clock vectors for operations later than
+ * the acquire in the execution order.
+ *
+ * @param acquire The ModelAction whose clock vector must be propagated
+ * @param work The work queue to which we can add work items, if this
+ * propagation triggers more updates (e.g., to the modification order)
+ */
+void ModelExecution::propagate_clockvector(ModelAction *acquire, work_queue_t *work)
+{
+	/* Re-check all pending release sequences */
+	work->push_back(CheckRelSeqWorkEntry(NULL));
+	/* Re-check read-acquire for mo_graph edges */
+	work->push_back(MOEdgeWorkEntry(acquire));
+
+	/* propagate synchronization to later actions */
+	action_list_t::reverse_iterator rit = action_trace.rbegin();
+	for (; (*rit) != acquire; rit++) {
+		ModelAction *propagate = *rit;
+		if (acquire->happens_before(propagate)) {
+			synchronize(acquire, propagate);
+			/* Re-check 'propagate' for mo_graph edges */
+			work->push_back(MOEdgeWorkEntry(propagate));
+		}
+	}
+}
+
+/**
+ * Attempt to resolve all stashed operations that might synchronize with a
+ * release sequence for a given location. This implements the "lazy" portion of
+ * determining whether or not a release sequence was contiguous, since not all
+ * modification order information is present at the time an action occurs.
+ *
+ * @param location The location/object that should be checked for release
+ * sequence resolutions. A NULL value means to check all locations.
+ * @param work_queue The work queue to which to add work items as they are
+ * generated
+ * @return True if any updates occurred (new synchronization, new mo_graph
+ * edges)
+ */
+bool ModelExecution::resolve_release_sequences(void *location, work_queue_t *work_queue)
+{
+	bool updated = false;
+	SnapVector<struct release_seq *>::iterator it = pending_rel_seqs.begin();
+	while (it != pending_rel_seqs.end()) {
+		struct release_seq *pending = *it;
+		ModelAction *acquire = pending->acquire;
+		const ModelAction *read = pending->read;
+
+		/* Only resolve sequences on the given location, if provided */
+		if (location && read->get_location() != location) {
+			it++;
+			continue;
+		}
+
+		const ModelAction *rf = read->get_reads_from();
+		rel_heads_list_t release_heads;
+		bool complete;
+		complete = release_seq_heads(rf, &release_heads, pending);
+		for (unsigned int i = 0; i < release_heads.size(); i++)
+			if (!acquire->has_synchronized_with(release_heads[i]))
+				if (synchronize(release_heads[i], acquire))
+					updated = true;
+
+		if (updated) {
+			/* Propagate the changed clock vector */
+			propagate_clockvector(acquire, work_queue);
+		}
+		if (complete) {
+			it = pending_rel_seqs.erase(it);
+			snapshot_free(pending);
+		} else {
+			it++;
+		}
+	}
+
+	// If we resolved promises or data races, see if we have realized a data race.
+	checkDataRaces();
+
+	return updated;
+}
+
+/**
+ * Performs various bookkeeping operations for the current ModelAction. For
+ * instance, adds action to the per-object, per-thread action vector and to the
+ * action trace list of all thread actions.
+ *
+ * @param act is the ModelAction to add.
+ */
+void ModelExecution::add_action_to_lists(ModelAction *act)
+{
+	int tid = id_to_int(act->get_tid());
+	ModelAction *uninit = NULL;
+	int uninit_id = -1;
+	action_list_t *list = get_safe_ptr_action(&obj_map, act->get_location());
+	if (list->empty() && act->is_atomic_var()) {
+		uninit = get_uninitialized_action(act);
+		uninit_id = id_to_int(uninit->get_tid());
+		list->push_front(uninit);
+	}
+	list->push_back(act);
+
+	action_trace.push_back(act);
+	if (uninit)
+		action_trace.push_front(uninit);
+
+	SnapVector<action_list_t> *vec = get_safe_ptr_vect_action(&obj_thrd_map, act->get_location());
+	if (tid >= (int)vec->size())
+		vec->resize(priv->next_thread_id);
+	(*vec)[tid].push_back(act);
+	if (uninit)
+		(*vec)[uninit_id].push_front(uninit);
+
+	if ((int)thrd_last_action.size() <= tid)
+		thrd_last_action.resize(get_num_threads());
+	thrd_last_action[tid] = act;
+	if (uninit)
+		thrd_last_action[uninit_id] = uninit;
+
+	if (act->is_fence() && act->is_release()) {
+		if ((int)thrd_last_fence_release.size() <= tid)
+			thrd_last_fence_release.resize(get_num_threads());
+		thrd_last_fence_release[tid] = act;
+	}
+
+	if (act->is_wait()) {
+		void *mutex_loc = (void *) act->get_value();
+		get_safe_ptr_action(&obj_map, mutex_loc)->push_back(act);
+
+		SnapVector<action_list_t> *vec = get_safe_ptr_vect_action(&obj_thrd_map, mutex_loc);
+		if (tid >= (int)vec->size())
+			vec->resize(priv->next_thread_id);
+		(*vec)[tid].push_back(act);
+	}
+}
+
+/**
+ * @brief Get the last action performed by a particular Thread
+ * @param tid The thread ID of the Thread in question
+ * @return The last action in the thread
+ */
+ModelAction * ModelExecution::get_last_action(thread_id_t tid) const
+{
+	int threadid = id_to_int(tid);
+	if (threadid < (int)thrd_last_action.size())
+		return thrd_last_action[id_to_int(tid)];
+	else
+		return NULL;
+}
+
+/**
+ * @brief Get the last fence release performed by a particular Thread
+ * @param tid The thread ID of the Thread in question
+ * @return The last fence release in the thread, if one exists; NULL otherwise
+ */
+ModelAction * ModelExecution::get_last_fence_release(thread_id_t tid) const
+{
+	int threadid = id_to_int(tid);
+	if (threadid < (int)thrd_last_fence_release.size())
+		return thrd_last_fence_release[id_to_int(tid)];
+	else
+		return NULL;
+}
+
+/**
+ * Gets the last memory_order_seq_cst write (in the total global sequence)
+ * performed on a particular object (i.e., memory location), not including the
+ * current action.
+ * @param curr The current ModelAction; also denotes the object location to
+ * check
+ * @return The last seq_cst write
+ */
+ModelAction * ModelExecution::get_last_seq_cst_write(ModelAction *curr) const
+{
+	void *location = curr->get_location();
+	action_list_t *list = obj_map.get(location);
+	/* Find: max({i in dom(S) | seq_cst(t_i) && isWrite(t_i) && samevar(t_i, t)}) */
+	action_list_t::reverse_iterator rit;
+	for (rit = list->rbegin(); (*rit) != curr; rit++)
+		;
+	rit++; /* Skip past curr */
+	for ( ; rit != list->rend(); rit++)
+		if ((*rit)->is_write() && (*rit)->is_seqcst())
+			return *rit;
+	return NULL;
+}
+
+/**
+ * Gets the last memory_order_seq_cst fence (in the total global sequence)
+ * performed in a particular thread, prior to a particular fence.
+ * @param tid The ID of the thread to check
+ * @param before_fence The fence from which to begin the search; if NULL, then
+ * search for the most recent fence in the thread.
+ * @return The last prior seq_cst fence in the thread, if exists; otherwise, NULL
+ */
+ModelAction * ModelExecution::get_last_seq_cst_fence(thread_id_t tid, const ModelAction *before_fence) const
+{
+	/* All fences should have location FENCE_LOCATION */
+	action_list_t *list = obj_map.get(FENCE_LOCATION);
+
+	if (!list)
+		return NULL;
+
+	action_list_t::reverse_iterator rit = list->rbegin();
+
+	if (before_fence) {
+		for (; rit != list->rend(); rit++)
+			if (*rit == before_fence)
+				break;
+
+		ASSERT(*rit == before_fence);
+		rit++;
+	}
+
+	for (; rit != list->rend(); rit++)
+		if ((*rit)->is_fence() && (tid == (*rit)->get_tid()) && (*rit)->is_seqcst())
+			return *rit;
+	return NULL;
+}
+
+/**
+ * Gets the last unlock operation performed on a particular mutex (i.e., memory
+ * location). This function identifies the mutex according to the current
+ * action, which is presumed to perform on the same mutex.
+ * @param curr The current ModelAction; also denotes the object location to
+ * check
+ * @return The last unlock operation
+ */
+ModelAction * ModelExecution::get_last_unlock(ModelAction *curr) const
+{
+	void *location = curr->get_location();
+	action_list_t *list = obj_map.get(location);
+	/* Find: max({i in dom(S) | isUnlock(t_i) && samevar(t_i, t)}) */
+	action_list_t::reverse_iterator rit;
+	for (rit = list->rbegin(); rit != list->rend(); rit++)
+		if ((*rit)->is_unlock() || (*rit)->is_wait())
+			return *rit;
+	return NULL;
+}
+
+ModelAction * ModelExecution::get_parent_action(thread_id_t tid) const
+{
+	ModelAction *parent = get_last_action(tid);
+	if (!parent)
+		parent = get_thread(tid)->get_creation();
+	return parent;
+}
+
+/**
+ * Returns the clock vector for a given thread.
+ * @param tid The thread whose clock vector we want
+ * @return Desired clock vector
+ */
+ClockVector * ModelExecution::get_cv(thread_id_t tid) const
+{
+	return get_parent_action(tid)->get_cv();
+}
+
+/**
+ * @brief Find the promise (if any) to resolve for the current action and
+ * remove it from the pending promise vector
+ * @param curr The current ModelAction. Should be a write.
+ * @return The Promise to resolve, if any; otherwise NULL
+ */
+Promise * ModelExecution::pop_promise_to_resolve(const ModelAction *curr)
+{
+	for (unsigned int i = 0; i < promises.size(); i++)
+		if (curr->get_node()->get_promise(i)) {
+			Promise *ret = promises[i];
+			promises.erase(promises.begin() + i);
+			return ret;
+		}
+	return NULL;
+}
+
+/**
+ * Resolve a Promise with a current write.
+ * @param write The ModelAction that is fulfilling Promises
+ * @param promise The Promise to resolve
+ * @param work The work queue, for adding new fixup work
+ * @return True if the Promise was successfully resolved; false otherwise
+ */
+bool ModelExecution::resolve_promise(ModelAction *write, Promise *promise,
+		work_queue_t *work)
+{
+	ModelVector<ModelAction *> actions_to_check;
+
+	for (unsigned int i = 0; i < promise->get_num_readers(); i++) {
+		ModelAction *read = promise->get_reader(i);
+		if (read_from(read, write)) {
+			/* Propagate the changed clock vector */
+			propagate_clockvector(read, work);
+		}
+		actions_to_check.push_back(read);
+	}
+	/* Make sure the promise's value matches the write's value */
+	ASSERT(promise->is_compatible(write) && promise->same_value(write));
+	if (!mo_graph->resolvePromise(promise, write))
+		priv->failed_promise = true;
+
+	/**
+	 * @todo  It is possible to end up in an inconsistent state, where a
+	 * "resolved" promise may still be referenced if
+	 * CycleGraph::resolvePromise() failed, so don't delete 'promise'.
+	 *
+	 * Note that the inconsistency only matters when dumping mo_graph to
+	 * file.
+	 *
+	 * delete promise;
+	 */
+
+	//Check whether reading these writes has made threads unable to
+	//resolve promises
+	for (unsigned int i = 0; i < actions_to_check.size(); i++) {
+		ModelAction *read = actions_to_check[i];
+		mo_check_promises(read, true);
+	}
+
+	return true;
+}
+
+/**
+ * Compute the set of promises that could potentially be satisfied by this
+ * action. Note that the set computation actually appears in the Node, not in
+ * ModelExecution.
+ * @param curr The ModelAction that may satisfy promises
+ */
+void ModelExecution::compute_promises(ModelAction *curr)
+{
+	for (unsigned int i = 0; i < promises.size(); i++) {
+		Promise *promise = promises[i];
+		if (!promise->is_compatible(curr) || !promise->same_value(curr))
+			continue;
+
+		bool satisfy = true;
+		for (unsigned int j = 0; j < promise->get_num_readers(); j++) {
+			const ModelAction *act = promise->get_reader(j);
+			if (act->happens_before(curr) ||
+					act->could_synchronize_with(curr)) {
+				satisfy = false;
+				break;
+			}
+		}
+		if (satisfy)
+			curr->get_node()->set_promise(i);
+	}
+}
+
+/** Checks promises in response to change in ClockVector Threads. */
+void ModelExecution::check_promises(thread_id_t tid, ClockVector *old_cv, ClockVector *merge_cv)
+{
+	for (unsigned int i = 0; i < promises.size(); i++) {
+		Promise *promise = promises[i];
+		if (!promise->thread_is_available(tid))
+			continue;
+		for (unsigned int j = 0; j < promise->get_num_readers(); j++) {
+			const ModelAction *act = promise->get_reader(j);
+			if ((!old_cv || !old_cv->synchronized_since(act)) &&
+					merge_cv->synchronized_since(act)) {
+				if (promise->eliminate_thread(tid)) {
+					/* Promise has failed */
+					priv->failed_promise = true;
+					return;
+				}
+			}
+		}
+	}
+}
+
+void ModelExecution::check_promises_thread_disabled()
+{
+	for (unsigned int i = 0; i < promises.size(); i++) {
+		Promise *promise = promises[i];
+		if (promise->has_failed()) {
+			priv->failed_promise = true;
+			return;
+		}
+	}
+}
+
+/**
+ * @brief Checks promises in response to addition to modification order for
+ * threads.
+ *
+ * We test whether threads are still available for satisfying promises after an
+ * addition to our modification order constraints. Those that are unavailable
+ * are "eliminated". Once all threads are eliminated from satisfying a promise,
+ * that promise has failed.
+ *
+ * @param act The ModelAction which updated the modification order
+ * @param is_read_check Should be true if act is a read and we must check for
+ * updates to the store from which it read (there is a distinction here for
+ * RMW's, which are both a load and a store)
+ */
+void ModelExecution::mo_check_promises(const ModelAction *act, bool is_read_check)
+{
+	const ModelAction *write = is_read_check ? act->get_reads_from() : act;
+
+	for (unsigned int i = 0; i < promises.size(); i++) {
+		Promise *promise = promises[i];
+
+		// Is this promise on the same location?
+		if (!promise->same_location(write))
+			continue;
+
+		for (unsigned int j = 0; j < promise->get_num_readers(); j++) {
+			const ModelAction *pread = promise->get_reader(j);
+			if (!pread->happens_before(act))
+			       continue;
+			if (mo_graph->checkPromise(write, promise)) {
+				priv->failed_promise = true;
+				return;
+			}
+			break;
+		}
+
+		// Don't do any lookups twice for the same thread
+		if (!promise->thread_is_available(act->get_tid()))
+			continue;
+
+		if (mo_graph->checkReachable(promise, write)) {
+			if (mo_graph->checkPromise(write, promise)) {
+				priv->failed_promise = true;
+				return;
+			}
+		}
+	}
+}
+
+/**
+ * Compute the set of writes that may break the current pending release
+ * sequence. This information is extracted from previou release sequence
+ * calculations.
+ *
+ * @param curr The current ModelAction. Must be a release sequence fixup
+ * action.
+ */
+void ModelExecution::compute_relseq_breakwrites(ModelAction *curr)
+{
+	if (pending_rel_seqs.empty())
+		return;
+
+	struct release_seq *pending = pending_rel_seqs.back();
+	for (unsigned int i = 0; i < pending->writes.size(); i++) {
+		const ModelAction *write = pending->writes[i];
+		curr->get_node()->add_relseq_break(write);
+	}
+
+	/* NULL means don't break the sequence; just synchronize */
+	curr->get_node()->add_relseq_break(NULL);
+}
+
+/**
+ * Build up an initial set of all past writes that this 'read' action may read
+ * from, as well as any previously-observed future values that must still be valid.
+ *
+ * @param curr is the current ModelAction that we are exploring; it must be a
+ * 'read' operation.
+ */
+void ModelExecution::build_may_read_from(ModelAction *curr)
+{
+	SnapVector<action_list_t> *thrd_lists = obj_thrd_map.get(curr->get_location());
+	unsigned int i;
+	ASSERT(curr->is_read());
+
+	ModelAction *last_sc_write = NULL;
+
+	if (curr->is_seqcst())
+		last_sc_write = get_last_seq_cst_write(curr);
+
+	/* Iterate over all threads */
+	for (i = 0; i < thrd_lists->size(); i++) {
+		/* Iterate over actions in thread, starting from most recent */
+		action_list_t *list = &(*thrd_lists)[i];
+		action_list_t::reverse_iterator rit;
+		for (rit = list->rbegin(); rit != list->rend(); rit++) {
+			ModelAction *act = *rit;
+
+			/* Only consider 'write' actions */
+			if (!act->is_write() || act == curr)
+				continue;
+
+			/* Don't consider more than one seq_cst write if we are a seq_cst read. */
+			bool allow_read = true;
+
+			if (curr->is_seqcst() && (act->is_seqcst() || (last_sc_write != NULL && act->happens_before(last_sc_write))) && act != last_sc_write)
+				allow_read = false;
+			else if (curr->get_sleep_flag() && !curr->is_seqcst() && !sleep_can_read_from(curr, act))
+				allow_read = false;
+
+			if (allow_read) {
+				/* Only add feasible reads */
+				mo_graph->startChanges();
+				r_modification_order(curr, act);
+				if (!is_infeasible())
+					curr->get_node()->add_read_from_past(act);
+				mo_graph->rollbackChanges();
+			}
+
+			/* Include at most one act per-thread that "happens before" curr */
+			if (act->happens_before(curr))
+				break;
+		}
+	}
+
+	/* Inherit existing, promised future values */
+	for (i = 0; i < promises.size(); i++) {
+		const Promise *promise = promises[i];
+		const ModelAction *promise_read = promise->get_reader(0);
+		if (promise_read->same_var(curr)) {
+			/* Only add feasible future-values */
+			mo_graph->startChanges();
+			r_modification_order(curr, promise);
+			if (!is_infeasible())
+				curr->get_node()->add_read_from_promise(promise_read);
+			mo_graph->rollbackChanges();
+		}
+	}
+
+	/* We may find no valid may-read-from only if the execution is doomed */
+	if (!curr->get_node()->read_from_size()) {
+		priv->no_valid_reads = true;
+		set_assert();
+	}
+
+	if (DBG_ENABLED()) {
+		model_print("Reached read action:\n");
+		curr->print();
+		model_print("Printing read_from_past\n");
+		curr->get_node()->print_read_from_past();
+		model_print("End printing read_from_past\n");
+	}
+}
+
+bool ModelExecution::sleep_can_read_from(ModelAction *curr, const ModelAction *write)
+{
+	for ( ; write != NULL; write = write->get_reads_from()) {
+		/* UNINIT actions don't have a Node, and they never sleep */
+		if (write->is_uninitialized())
+			return true;
+		Node *prevnode = write->get_node()->get_parent();
+
+		bool thread_sleep = prevnode->enabled_status(curr->get_tid()) == THREAD_SLEEP_SET;
+		if (write->is_release() && thread_sleep)
+			return true;
+		if (!write->is_rmw())
+			return false;
+	}
+	return true;
+}
+
+/**
+ * @brief Get an action representing an uninitialized atomic
+ *
+ * This function may create a new one or try to retrieve one from the NodeStack
+ *
+ * @param curr The current action, which prompts the creation of an UNINIT action
+ * @return A pointer to the UNINIT ModelAction
+ */
+ModelAction * ModelExecution::get_uninitialized_action(const ModelAction *curr) const
+{
+	Node *node = curr->get_node();
+	ModelAction *act = node->get_uninit_action();
+	if (!act) {
+		act = new ModelAction(ATOMIC_UNINIT, std::memory_order_relaxed, curr->get_location(), params->uninitvalue, model_thread);
+		node->set_uninit_action(act);
+	}
+	act->create_cv(NULL);
+	return act;
+}
+
+static void print_list(const action_list_t *list)
+{
+	action_list_t::const_iterator it;
+
+	model_print("------------------------------------------------------------------------------------\n");
+	model_print("#    t    Action type     MO       Location         Value               Rf  CV\n");
+	model_print("------------------------------------------------------------------------------------\n");
+
+	unsigned int hash = 0;
+
+	for (it = list->begin(); it != list->end(); it++) {
+		const ModelAction *act = *it;
+		if (act->get_seq_number() > 0)
+			act->print();
+		hash = hash^(hash<<3)^((*it)->hash());
+	}
+	model_print("HASH %u\n", hash);
+	model_print("------------------------------------------------------------------------------------\n");
+}
+
+#if SUPPORT_MOD_ORDER_DUMP
+void ModelExecution::dumpGraph(char *filename) const
+{
+	char buffer[200];
+	sprintf(buffer, "%s.dot", filename);
+	FILE *file = fopen(buffer, "w");
+	fprintf(file, "digraph %s {\n", filename);
+	mo_graph->dumpNodes(file);
+	ModelAction **thread_array = (ModelAction **)model_calloc(1, sizeof(ModelAction *) * get_num_threads());
+
+	for (action_list_t::const_iterator it = action_trace.begin(); it != action_trace.end(); it++) {
+		ModelAction *act = *it;
+		if (act->is_read()) {
+			mo_graph->dot_print_node(file, act);
+			if (act->get_reads_from())
+				mo_graph->dot_print_edge(file,
+						act->get_reads_from(),
+						act,
+						"label=\"rf\", color=red, weight=2");
+			else
+				mo_graph->dot_print_edge(file,
+						act->get_reads_from_promise(),
+						act,
+						"label=\"rf\", color=red");
+		}
+		if (thread_array[act->get_tid()]) {
+			mo_graph->dot_print_edge(file,
+					thread_array[id_to_int(act->get_tid())],
+					act,
+					"label=\"sb\", color=blue, weight=400");
+		}
+
+		thread_array[act->get_tid()] = act;
+	}
+	fprintf(file, "}\n");
+	model_free(thread_array);
+	fclose(file);
+}
+#endif
+
+/** @brief Prints an execution trace summary. */
+void ModelExecution::print_summary() const
+{
+#if SUPPORT_MOD_ORDER_DUMP
+	char buffername[100];
+	sprintf(buffername, "exec%04u", get_execution_number());
+	mo_graph->dumpGraphToFile(buffername);
+	sprintf(buffername, "graph%04u", get_execution_number());
+	dumpGraph(buffername);
+#endif
+
+	model_print("Execution trace %d:", get_execution_number());
+	if (isfeasibleprefix()) {
+		if (is_yieldblocked())
+			model_print(" YIELD BLOCKED");
+		if (scheduler->all_threads_sleeping())
+			model_print(" SLEEP-SET REDUNDANT");
+		if (have_bug_reports())
+			model_print(" DETECTED BUG(S)");
+	} else
+		print_infeasibility(" INFEASIBLE");
+	model_print("\n");
+
+	print_list(&action_trace);
+	model_print("\n");
+
+	if (!promises.empty()) {
+		model_print("Pending promises:\n");
+		for (unsigned int i = 0; i < promises.size(); i++) {
+			model_print(" [P%u] ", i);
+			promises[i]->print();
+		}
+		model_print("\n");
+	}
+}
+
+/**
+ * Add a Thread to the system for the first time. Should only be called once
+ * per thread.
+ * @param t The Thread to add
+ */
+void ModelExecution::add_thread(Thread *t)
+{
+	unsigned int i = id_to_int(t->get_id());
+	if (i >= thread_map.size())
+		thread_map.resize(i + 1);
+	thread_map[i] = t;
+	if (!t->is_model_thread())
+		scheduler->add_thread(t);
+}
+
+/**
+ * @brief Get a Thread reference by its ID
+ * @param tid The Thread's ID
+ * @return A Thread reference
+ */
+Thread * ModelExecution::get_thread(thread_id_t tid) const
+{
+	unsigned int i = id_to_int(tid);
+	if (i < thread_map.size())
+		return thread_map[i];
+	return NULL;
+}
+
+/**
+ * @brief Get a reference to the Thread in which a ModelAction was executed
+ * @param act The ModelAction
+ * @return A Thread reference
+ */
+Thread * ModelExecution::get_thread(const ModelAction *act) const
+{
+	return get_thread(act->get_tid());
+}
+
+/**
+ * @brief Get a Promise's "promise number"
+ *
+ * A "promise number" is an index number that is unique to a promise, valid
+ * only for a specific snapshot of an execution trace. Promises may come and go
+ * as they are generated an resolved, so an index only retains meaning for the
+ * current snapshot.
+ *
+ * @param promise The Promise to check
+ * @return The promise index, if the promise still is valid; otherwise -1
+ */
+int ModelExecution::get_promise_number(const Promise *promise) const
+{
+	for (unsigned int i = 0; i < promises.size(); i++)
+		if (promises[i] == promise)
+			return i;
+	/* Not found */
+	return -1;
+}
+
+/**
+ * @brief Check if a Thread is currently enabled
+ * @param t The Thread to check
+ * @return True if the Thread is currently enabled
+ */
+bool ModelExecution::is_enabled(Thread *t) const
+{
+	return scheduler->is_enabled(t);
+}
+
+/**
+ * @brief Check if a Thread is currently enabled
+ * @param tid The ID of the Thread to check
+ * @return True if the Thread is currently enabled
+ */
+bool ModelExecution::is_enabled(thread_id_t tid) const
+{
+	return scheduler->is_enabled(tid);
+}
+
+/**
+ * @brief Select the next thread to execute based on the curren action
+ *
+ * RMW actions occur in two parts, and we cannot split them. And THREAD_CREATE
+ * actions should be followed by the execution of their child thread. In either
+ * case, the current action should determine the next thread schedule.
+ *
+ * @param curr The current action
+ * @return The next thread to run, if the current action will determine this
+ * selection; otherwise NULL
+ */
+Thread * ModelExecution::action_select_next_thread(const ModelAction *curr) const
+{
+	/* Do not split atomic RMW */
+	if (curr->is_rmwr())
+		return get_thread(curr);
+	/* Follow CREATE with the created thread */
+	if (curr->get_type() == THREAD_CREATE)
+		return curr->get_thread_operand();
+	return NULL;
+}
+
+/** @return True if the execution has taken too many steps */
+bool ModelExecution::too_many_steps() const
+{
+	return params->bound != 0 && priv->used_sequence_numbers > params->bound;
+}
+
+/**
+ * Takes the next step in the execution, if possible.
+ * @param curr The current step to take
+ * @return Returns the next Thread to run, if any; NULL if this execution
+ * should terminate
+ */
+Thread * ModelExecution::take_step(ModelAction *curr)
+{
+	Thread *curr_thrd = get_thread(curr);
+	ASSERT(curr_thrd->get_state() == THREAD_READY);
+
+	ASSERT(check_action_enabled(curr)); /* May have side effects? */
+	curr = check_current_action(curr);
+	ASSERT(curr);
+
+	if (curr_thrd->is_blocked() || curr_thrd->is_complete())
+		scheduler->remove_thread(curr_thrd);
+
+	return action_select_next_thread(curr);
+}
+
+/**
+ * Launch end-of-execution release sequence fixups only when
+ * the execution is otherwise feasible AND there are:
+ *
+ * (1) pending release sequences
+ * (2) pending assertions that could be invalidated by a change
+ * in clock vectors (i.e., data races)
+ * (3) no pending promises
+ */
+void ModelExecution::fixup_release_sequences()
+{
+	while (!pending_rel_seqs.empty() &&
+			is_feasible_prefix_ignore_relseq() &&
+			haveUnrealizedRaces()) {
+		model_print("*** WARNING: release sequence fixup action "
+				"(%zu pending release seuqence(s)) ***\n",
+				pending_rel_seqs.size());
+		ModelAction *fixup = new ModelAction(MODEL_FIXUP_RELSEQ,
+				std::memory_order_seq_cst, NULL, VALUE_NONE,
+				model_thread);
+		take_step(fixup);
+	};
+}
diff --git a/execution.h b/execution.h
new file mode 100644
index 0000000..9c9c1ca
--- /dev/null
+++ b/execution.h
@@ -0,0 +1,254 @@
+/** @file execution.h
+ *  @brief Model-checker core
+ */
+
+#ifndef __EXECUTION_H__
+#define __EXECUTION_H__
+
+#include <cstddef>
+#include <inttypes.h>
+
+#include "mymemory.h"
+#include "hashtable.h"
+#include "workqueue.h"
+#include "config.h"
+#include "modeltypes.h"
+#include "stl-model.h"
+#include "params.h"
+
+/* Forward declaration */
+class Node;
+class NodeStack;
+class CycleGraph;
+class Promise;
+class Scheduler;
+class Thread;
+class ClockVector;
+struct model_snapshot_members;
+class ModelChecker;
+struct bug_message;
+
+/** @brief Shorthand for a list of release sequence heads */
+typedef ModelVector<const ModelAction *> rel_heads_list_t;
+typedef SnapList<ModelAction *> action_list_t;
+
+struct PendingFutureValue {
+	PendingFutureValue(ModelAction *writer, ModelAction *reader) :
+		writer(writer), reader(reader)
+	{ }
+	const ModelAction *writer;
+	ModelAction *reader;
+};
+
+/** @brief Records information regarding a single pending release sequence */
+struct release_seq {
+	/** @brief The acquire operation */
+	ModelAction *acquire;
+	/** @brief The read operation that may read from a release sequence;
+	 *  may be the same as acquire, or else an earlier action in the same
+	 *  thread (i.e., when 'acquire' is a fence-acquire) */
+	const ModelAction *read;
+	/** @brief The head of the RMW chain from which 'read' reads; may be
+	 *  equal to 'release' */
+	const ModelAction *rf;
+	/** @brief The head of the potential longest release sequence chain */
+	const ModelAction *release;
+	/** @brief The write(s) that may break the release sequence */
+	SnapVector<const ModelAction *> writes;
+};
+
+/** @brief The central structure for model-checking */
+class ModelExecution {
+public:
+	ModelExecution(ModelChecker *m,
+			const struct model_params *params,
+			Scheduler *scheduler,
+			NodeStack *node_stack);
+	~ModelExecution();
+
+	const struct model_params * get_params() const { return params; }
+
+	Thread * take_step(ModelAction *curr);
+	void fixup_release_sequences();
+
+	void print_summary() const;
+#if SUPPORT_MOD_ORDER_DUMP
+	void dumpGraph(char *filename) const;
+#endif
+
+	void add_thread(Thread *t);
+	Thread * get_thread(thread_id_t tid) const;
+	Thread * get_thread(const ModelAction *act) const;
+	int get_promise_number(const Promise *promise) const;
+
+	bool is_enabled(Thread *t) const;
+	bool is_enabled(thread_id_t tid) const;
+
+	thread_id_t get_next_id();
+	unsigned int get_num_threads() const;
+
+	ClockVector * get_cv(thread_id_t tid) const;
+	ModelAction * get_parent_action(thread_id_t tid) const;
+	void check_promises_thread_disabled();
+	bool isfeasibleprefix() const;
+
+	action_list_t * get_actions_on_obj(void * obj, thread_id_t tid) const;
+	ModelAction * get_last_action(thread_id_t tid) const;
+
+	bool check_action_enabled(ModelAction *curr);
+
+	bool assert_bug(const char *msg);
+	bool have_bug_reports() const;
+	SnapVector<bug_message *> * get_bugs() const;
+
+	bool has_asserted() const;
+	void set_assert();
+	bool is_complete_execution() const;
+
+	void print_infeasibility(const char *prefix) const;
+	bool is_feasible_prefix_ignore_relseq() const;
+	bool is_infeasible() const;
+	bool is_deadlocked() const;
+	bool is_yieldblocked() const;
+	bool too_many_steps() const;
+
+	ModelAction * get_next_backtrack();
+
+	action_list_t * get_action_trace() { return &action_trace; }
+
+	SNAPSHOTALLOC
+private:
+	int get_execution_number() const;
+
+	ModelChecker *model;
+
+	const model_params * const params;
+
+	/** The scheduler to use: tracks the running/ready Threads */
+	Scheduler * const scheduler;
+
+	bool sleep_can_read_from(ModelAction *curr, const ModelAction *write);
+	bool thin_air_constraint_may_allow(const ModelAction *writer, const ModelAction *reader) const;
+	bool mo_may_allow(const ModelAction *writer, const ModelAction *reader);
+	bool promises_may_allow(const ModelAction *writer, const ModelAction *reader) const;
+	void set_bad_synchronization();
+	bool promises_expired() const;
+	bool should_wake_up(const ModelAction *curr, const Thread *thread) const;
+	void wake_up_sleeping_actions(ModelAction *curr);
+	modelclock_t get_next_seq_num();
+
+	bool next_execution();
+	ModelAction * check_current_action(ModelAction *curr);
+	bool initialize_curr_action(ModelAction **curr);
+	bool process_read(ModelAction *curr);
+	bool process_write(ModelAction *curr, work_queue_t *work);
+	bool process_fence(ModelAction *curr);
+	bool process_mutex(ModelAction *curr);
+	bool process_thread_action(ModelAction *curr);
+	void process_relseq_fixup(ModelAction *curr, work_queue_t *work_queue);
+	bool read_from(ModelAction *act, const ModelAction *rf);
+	bool synchronize(const ModelAction *first, ModelAction *second);
+
+	template <typename T>
+	bool check_recency(ModelAction *curr, const T *rf) const;
+
+	template <typename T, typename U>
+	bool should_read_instead(const ModelAction *curr, const T *rf, const U *other_rf) const;
+
+	ModelAction * get_last_fence_conflict(ModelAction *act) const;
+	ModelAction * get_last_conflict(ModelAction *act) const;
+	void set_backtracking(ModelAction *act);
+	bool set_latest_backtrack(ModelAction *act);
+	Promise * pop_promise_to_resolve(const ModelAction *curr);
+	bool resolve_promise(ModelAction *curr, Promise *promise,
+			work_queue_t *work);
+	void compute_promises(ModelAction *curr);
+	void compute_relseq_breakwrites(ModelAction *curr);
+
+	void check_promises(thread_id_t tid, ClockVector *old_cv, ClockVector *merge_cv);
+	void mo_check_promises(const ModelAction *act, bool is_read_check);
+	void thread_blocking_check_promises(Thread *blocker, Thread *waiting);
+
+	void check_curr_backtracking(ModelAction *curr);
+	void add_action_to_lists(ModelAction *act);
+	ModelAction * get_last_fence_release(thread_id_t tid) const;
+	ModelAction * get_last_seq_cst_write(ModelAction *curr) const;
+	ModelAction * get_last_seq_cst_fence(thread_id_t tid, const ModelAction *before_fence) const;
+	ModelAction * get_last_unlock(ModelAction *curr) const;
+	void build_may_read_from(ModelAction *curr);
+	ModelAction * process_rmw(ModelAction *curr);
+
+	template <typename rf_type>
+	bool r_modification_order(ModelAction *curr, const rf_type *rf);
+
+	bool w_modification_order(ModelAction *curr, ModelVector<ModelAction *> *send_fv);
+	void get_release_seq_heads(ModelAction *acquire, ModelAction *read, rel_heads_list_t *release_heads);
+	bool release_seq_heads(const ModelAction *rf, rel_heads_list_t *release_heads, struct release_seq *pending) const;
+	void propagate_clockvector(ModelAction *acquire, work_queue_t *work);
+	bool resolve_release_sequences(void *location, work_queue_t *work_queue);
+	void add_future_value(const ModelAction *writer, ModelAction *reader);
+
+	ModelAction * get_uninitialized_action(const ModelAction *curr) const;
+
+	action_list_t action_trace;
+	SnapVector<Thread *> thread_map;
+
+	/** Per-object list of actions. Maps an object (i.e., memory location)
+	 * to a trace of all actions performed on the object. */
+	HashTable<const void *, action_list_t *, uintptr_t, 4> obj_map;
+
+	/** Per-object list of actions. Maps an object (i.e., memory location)
+	 * to a trace of all actions performed on the object. */
+	HashTable<const void *, action_list_t *, uintptr_t, 4> condvar_waiters_map;
+
+	HashTable<void *, SnapVector<action_list_t> *, uintptr_t, 4> obj_thrd_map;
+
+	/**
+	 * @brief List of currently-pending promises
+	 *
+	 * Promises are sorted by the execution order of the read(s) which
+	 * created them
+	 */
+	SnapVector<Promise *> promises;
+	SnapVector<struct PendingFutureValue> futurevalues;
+
+	/**
+	 * List of pending release sequences. Release sequences might be
+	 * determined lazily as promises are fulfilled and modification orders
+	 * are established. Each entry in the list may only be partially
+	 * filled, depending on its pending status.
+	 */
+	SnapVector<struct release_seq *> pending_rel_seqs;
+
+	SnapVector<ModelAction *> thrd_last_action;
+	SnapVector<ModelAction *> thrd_last_fence_release;
+	NodeStack * const node_stack;
+
+	/** A special model-checker Thread; used for associating with
+	 *  model-checker-related ModelAcitons */
+	Thread *model_thread;
+
+	/** Private data members that should be snapshotted. They are grouped
+	 * together for efficiency and maintainability. */
+	struct model_snapshot_members * const priv;
+
+	/**
+	 * @brief The modification order graph
+	 *
+	 * A directed acyclic graph recording observations of the modification
+	 * order on all the atomic objects in the system. This graph should
+	 * never contain any cycles, as that represents a violation of the
+	 * memory model (total ordering). This graph really consists of many
+	 * disjoint (unconnected) subgraphs, each graph corresponding to a
+	 * separate ordering on a distinct object.
+	 *
+	 * The edges in this graph represent the "ordered before" relation,
+	 * such that <tt>a --> b</tt> means <tt>a</tt> was ordered before
+	 * <tt>b</tt>.
+	 */
+	CycleGraph * const mo_graph;
+
+	Thread * action_select_next_thread(const ModelAction *curr) const;
+};
+
+#endif /* __EXECUTION_H__ */
diff --git a/hashtable.h b/hashtable.h
new file mode 100644
index 0000000..2802eab
--- /dev/null
+++ b/hashtable.h
@@ -0,0 +1,222 @@
+/** @file hashtable.h
+ *  @brief Hashtable.  Standard chained bucket variety.
+ */
+
+#ifndef __HASHTABLE_H__
+#define __HASHTABLE_H__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "mymemory.h"
+#include "common.h"
+
+/**
+ * @brief HashTable node
+ *
+ * @tparam _Key    Type name for the key
+ * @tparam _Val    Type name for the values to be stored
+ */
+template<typename _Key, typename _Val>
+struct hashlistnode {
+	_Key key;
+	_Val val;
+};
+
+/**
+ * @brief A simple, custom hash table
+ *
+ * By default it is snapshotting, but you can pass in your own allocation
+ * functions. Note that this table does not support the value 0 (NULL) used as
+ * a key and is designed primarily with pointer-based keys in mind. Other
+ * primitive key types are supported only for non-zero values.
+ *
+ * @tparam _Key    Type name for the key
+ * @tparam _Val    Type name for the values to be stored
+ * @tparam _KeyInt Integer type that is at least as large as _Key. Used for key
+ *                 manipulation and storage.
+ * @tparam _Shift  Logical shift to apply to all keys. Default 0.
+ * @tparam _malloc Provide your own 'malloc' for the table, or default to
+ *                 snapshotting.
+ * @tparam _calloc Provide your own 'calloc' for the table, or default to
+ *                 snapshotting.
+ * @tparam _free   Provide your own 'free' for the table, or default to
+ *                 snapshotting.
+ */
+template<typename _Key, typename _Val, typename _KeyInt, int _Shift = 0, void * (* _malloc)(size_t) = snapshot_malloc, void * (* _calloc)(size_t, size_t) = snapshot_calloc, void (*_free)(void *) = snapshot_free>
+class HashTable {
+ public:
+	/**
+	 * @brief Hash table constructor
+	 * @param initialcapacity Sets the initial capacity of the hash table.
+	 * Default size 1024.
+	 * @param factor Sets the percentage full before the hashtable is
+	 * resized. Default ratio 0.5.
+	 */
+	HashTable(unsigned int initialcapacity = 1024, double factor = 0.5) {
+		// Allocate space for the hash table
+		table = (struct hashlistnode<_Key, _Val> *)_calloc(initialcapacity, sizeof(struct hashlistnode<_Key, _Val>));
+		loadfactor = factor;
+		capacity = initialcapacity;
+		capacitymask = initialcapacity - 1;
+
+		threshold = (unsigned int)(initialcapacity * loadfactor);
+		size = 0; // Initial number of elements in the hash
+	}
+
+	/** @brief Hash table destructor */
+	~HashTable() {
+		_free(table);
+	}
+
+	/** Override: new operator */
+	void * operator new(size_t size) {
+		return _malloc(size);
+	}
+
+	/** Override: delete operator */
+	void operator delete(void *p, size_t size) {
+		_free(p);
+	}
+
+	/** Override: new[] operator */
+	void * operator new[](size_t size) {
+		return _malloc(size);
+	}
+
+	/** Override: delete[] operator */
+	void operator delete[](void *p, size_t size) {
+		_free(p);
+	}
+
+	/** @brief Reset the table to its initial state. */
+	void reset() {
+		memset(table, 0, capacity * sizeof(struct hashlistnode<_Key, _Val>));
+		size = 0;
+	}
+
+	/**
+	 * @brief Put a key/value pair into the table
+	 * @param key The key for the new value; must not be 0 or NULL
+	 * @param val The value to store in the table
+	 */
+	void put(_Key key, _Val val) {
+		/* HashTable cannot handle 0 as a key */
+		ASSERT(key);
+
+		if (size > threshold)
+			resize(capacity << 1);
+
+		struct hashlistnode<_Key, _Val> *search;
+
+		unsigned int index = ((_KeyInt)key) >> _Shift;
+		do {
+			index &= capacitymask;
+			search = &table[index];
+			if (search->key == key) {
+				search->val = val;
+				return;
+			}
+			index++;
+		} while (search->key);
+
+		search->key = key;
+		search->val = val;
+		size++;
+	}
+
+	/**
+	 * @brief Lookup the corresponding value for the given key
+	 * @param key The key for finding the value; must not be 0 or NULL
+	 * @return The value in the table, if the key is found; otherwise 0
+	 */
+	_Val get(_Key key) const {
+		struct hashlistnode<_Key, _Val> *search;
+
+		/* HashTable cannot handle 0 as a key */
+		ASSERT(key);
+
+		unsigned int index = ((_KeyInt)key) >> _Shift;
+		do {
+			index &= capacitymask;
+			search = &table[index];
+			if (search->key == key)
+				return search->val;
+			index++;
+		} while (search->key);
+		return (_Val)0;
+	}
+
+	/**
+	 * @brief Check whether the table contains a value for the given key
+	 * @param key The key for finding the value; must not be 0 or NULL
+	 * @return True, if the key is found; false otherwise
+	 */
+	bool contains(_Key key) const {
+		struct hashlistnode<_Key, _Val> *search;
+
+		/* HashTable cannot handle 0 as a key */
+		ASSERT(key);
+
+		unsigned int index = ((_KeyInt)key) >> _Shift;
+		do {
+			index &= capacitymask;
+			search = &table[index];
+			if (search->key == key)
+				return true;
+			index++;
+		} while (search->key);
+		return false;
+	}
+
+	/**
+	 * @brief Resize the table
+	 * @param newsize The new size of the table
+	 */
+	void resize(unsigned int newsize) {
+		struct hashlistnode<_Key, _Val> *oldtable = table;
+		struct hashlistnode<_Key, _Val> *newtable;
+		unsigned int oldcapacity = capacity;
+
+		if ((newtable = (struct hashlistnode<_Key, _Val> *)_calloc(newsize, sizeof(struct hashlistnode<_Key, _Val>))) == NULL) {
+			model_print("calloc error %s %d\n", __FILE__, __LINE__);
+			exit(EXIT_FAILURE);
+		}
+
+		table = newtable;          // Update the global hashtable upon resize()
+		capacity = newsize;
+		capacitymask = newsize - 1;
+
+		threshold = (unsigned int)(newsize * loadfactor);
+
+		struct hashlistnode<_Key, _Val> *bin = &oldtable[0];
+		struct hashlistnode<_Key, _Val> *lastbin = &oldtable[oldcapacity];
+		for (; bin < lastbin; bin++) {
+			_Key key = bin->key;
+
+			struct hashlistnode<_Key, _Val> *search;
+
+			unsigned int index = ((_KeyInt)key) >> _Shift;
+			do {
+				index &= capacitymask;
+				search = &table[index];
+				index++;
+			} while (search->key);
+
+			search->key = key;
+			search->val = bin->val;
+		}
+
+		_free(oldtable);            // Free the memory of the old hash table
+	}
+
+ private:
+	struct hashlistnode<_Key, _Val> *table;
+	unsigned int capacity;
+	unsigned int size;
+	unsigned int capacitymask;
+	unsigned int threshold;
+	double loadfactor;
+};
+
+#endif /* __HASHTABLE_H__ */
diff --git a/impatomic.cc b/impatomic.cc
new file mode 100644
index 0000000..2d48989
--- /dev/null
+++ b/impatomic.cc
@@ -0,0 +1,36 @@
+#include "impatomic.h"
+#include "common.h"
+#include "model.h"
+#include "threads-model.h"
+#include "action.h"
+
+namespace std {
+
+bool atomic_flag_test_and_set_explicit ( volatile atomic_flag * __a__, memory_order __x__ ) {
+	volatile bool * __p__ = &((__a__)->__f__);
+	bool result = (bool) model->switch_to_master(new ModelAction(ATOMIC_RMWR, __x__, (void *) __p__));
+	model->switch_to_master(new ModelAction(ATOMIC_RMW, __x__, (void *) __p__, true));
+	return result;
+}
+
+bool atomic_flag_test_and_set( volatile atomic_flag* __a__ )
+{ return atomic_flag_test_and_set_explicit( __a__, memory_order_seq_cst ); }
+
+void atomic_flag_clear_explicit
+( volatile atomic_flag* __a__, memory_order __x__ )
+{
+	volatile bool * __p__ = &((__a__)->__f__);
+	model->switch_to_master(new ModelAction(ATOMIC_WRITE, __x__, (void *) __p__, false));
+}
+
+void atomic_flag_clear( volatile atomic_flag* __a__ )
+{ atomic_flag_clear_explicit( __a__, memory_order_seq_cst ); }
+
+void __atomic_flag_wait__( volatile atomic_flag* __a__ )
+{ while ( atomic_flag_test_and_set( __a__ ) ); }
+
+void __atomic_flag_wait_explicit__( volatile atomic_flag* __a__,
+                                    memory_order __x__ )
+{ while ( atomic_flag_test_and_set_explicit( __a__, __x__ ) ); }
+
+}
diff --git a/include/atomic b/include/atomic
new file mode 100644
index 0000000..5984e72
--- /dev/null
+++ b/include/atomic
@@ -0,0 +1,11 @@
+/**
+ * @file atomic
+ * @brief C++11 atomic interface header
+ */
+
+#ifndef __CXX_ATOMIC__
+#define __CXX_ATOMIC__
+
+#include "impatomic.h"
+
+#endif /* __CXX_ATOMIC__ */
diff --git a/include/cdsannotate.h b/include/cdsannotate.h
new file mode 100644
index 0000000..bb6e3d6
--- /dev/null
+++ b/include/cdsannotate.h
@@ -0,0 +1,7 @@
+#ifndef CDS_ANNOTATE_H
+#define CDS_ANNOTATE_H
+#include <stdint.h>
+
+void cdsannotate(uint64_t analysistype, void *annotation);
+
+#endif
diff --git a/include/cmodelint.h b/include/cmodelint.h
new file mode 100644
index 0000000..24c5f6f
--- /dev/null
+++ b/include/cmodelint.h
@@ -0,0 +1,28 @@
+/** @file cmodelint.h
+ *  @brief C interface to the model checker.
+ */
+
+#ifndef CMODELINT_H
+#define CMODELINT_H
+#include <inttypes.h>
+#include "memoryorder.h"
+
+#if __cplusplus
+using std::memory_order;
+extern "C" {
+#endif
+
+uint64_t model_read_action(void * obj, memory_order ord);
+void model_write_action(void * obj, memory_order ord, uint64_t val);
+void model_init_action(void * obj, uint64_t val);
+uint64_t model_rmwr_action(void *obj, memory_order ord);
+void model_rmw_action(void *obj, memory_order ord, uint64_t val);
+void model_rmwc_action(void *obj, memory_order ord);
+void model_fence_action(memory_order ord);
+
+
+#if __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/condition_variable b/include/condition_variable
new file mode 100644
index 0000000..2a7447b
--- /dev/null
+++ b/include/condition_variable
@@ -0,0 +1,24 @@
+#ifndef __CXX_CONDITION_VARIABLE__
+#define __CXX_CONDITION_VARIABLE__
+
+namespace std {
+	class mutex;
+
+	struct condition_variable_state {
+		int reserved;
+	};
+
+	class condition_variable {
+	public:
+		condition_variable();
+		~condition_variable();
+		void notify_one();
+		void notify_all();
+		void wait(mutex& lock);
+		
+	private:
+		struct condition_variable_state state;
+	};
+}
+
+#endif /* __CXX_CONDITION_VARIABLE__ */
diff --git a/include/cstdatomic b/include/cstdatomic
new file mode 100644
index 0000000..b441097
--- /dev/null
+++ b/include/cstdatomic
@@ -0,0 +1,7 @@
+/**
+ * @file cstdatomic
+ * @brief C11 atomic interface header
+ */
+
+#include "impatomic.h"
+
diff --git a/include/impatomic.h b/include/impatomic.h
new file mode 100644
index 0000000..1b9ce6b
--- /dev/null
+++ b/include/impatomic.h
@@ -0,0 +1,3923 @@
+/**
+ * @file impatomic.h
+ * @brief Common header for C11/C++11 atomics
+ *
+ * Note that some features are unavailable, as they require support from a true
+ * C11/C++11 compiler.
+ */
+
+#ifndef __IMPATOMIC_H__
+#define __IMPATOMIC_H__
+
+#include "memoryorder.h"
+#include "cmodelint.h"
+
+#ifdef __cplusplus
+namespace std {
+#else
+#include <stdbool.h>
+#endif
+
+#define CPP0X( feature )
+
+typedef struct atomic_flag
+{
+#ifdef __cplusplus
+    bool test_and_set( memory_order = memory_order_seq_cst ) volatile;
+    void clear( memory_order = memory_order_seq_cst ) volatile;
+
+    CPP0X( atomic_flag() = default; )
+    CPP0X( atomic_flag( const atomic_flag& ) = delete; )
+    atomic_flag& operator =( const atomic_flag& ) CPP0X(=delete);
+
+CPP0X(private:)
+#endif
+    bool __f__;
+} atomic_flag;
+
+#define ATOMIC_FLAG_INIT { false }
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern bool atomic_flag_test_and_set( volatile atomic_flag* );
+extern bool atomic_flag_test_and_set_explicit
+( volatile atomic_flag*, memory_order );
+extern void atomic_flag_clear( volatile atomic_flag* );
+extern void atomic_flag_clear_explicit
+( volatile atomic_flag*, memory_order );
+extern void __atomic_flag_wait__
+( volatile atomic_flag* );
+extern void __atomic_flag_wait_explicit__
+( volatile atomic_flag*, memory_order );
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef __cplusplus
+
+inline bool atomic_flag::test_and_set( memory_order __x__ ) volatile
+{ return atomic_flag_test_and_set_explicit( this, __x__ ); }
+
+inline void atomic_flag::clear( memory_order __x__ ) volatile
+{ atomic_flag_clear_explicit( this, __x__ ); }
+
+#endif
+
+
+/*
+        The remainder of the example implementation uses the following
+        macros. These macros exploit GNU extensions for value-returning
+        blocks (AKA statement expressions) and __typeof__.
+
+        The macros rely on data fields of atomic structs being named __f__.
+        Other symbols used are __a__=atomic, __e__=expected, __f__=field,
+        __g__=flag, __m__=modified, __o__=operation, __r__=result,
+        __p__=pointer to field, __v__=value (for single evaluation),
+        __x__=memory-ordering, and __y__=memory-ordering.
+*/
+
+#define _ATOMIC_LOAD_( __a__, __x__ )                                         \
+        ({ volatile __typeof__((__a__)->__f__)* __p__ = & ((__a__)->__f__);   \
+                __typeof__((__a__)->__f__) __r__ = (__typeof__((__a__)->__f__))model_read_action((void *)__p__, __x__);  \
+                __r__; })
+
+#define _ATOMIC_STORE_( __a__, __m__, __x__ )                                 \
+        ({ volatile __typeof__((__a__)->__f__)* __p__ = & ((__a__)->__f__);   \
+                __typeof__(__m__) __v__ = (__m__);                            \
+                model_write_action((void *) __p__,  __x__, (uint64_t) __v__); \
+                __v__ = __v__; /* Silence clang (-Wunused-value) */           \
+         })
+
+
+#define _ATOMIC_INIT_( __a__, __m__ )                                         \
+        ({ volatile __typeof__((__a__)->__f__)* __p__ = & ((__a__)->__f__);   \
+                __typeof__(__m__) __v__ = (__m__);                            \
+                model_init_action((void *) __p__,  (uint64_t) __v__);         \
+                __v__ = __v__; /* Silence clang (-Wunused-value) */           \
+         })
+
+#define _ATOMIC_MODIFY_( __a__, __o__, __m__, __x__ )                         \
+        ({ volatile __typeof__((__a__)->__f__)* __p__ = & ((__a__)->__f__);   \
+        __typeof__((__a__)->__f__) __old__=(__typeof__((__a__)->__f__)) model_rmwr_action((void *)__p__, __x__); \
+        __typeof__(__m__) __v__ = (__m__);                                    \
+        __typeof__((__a__)->__f__) __copy__= __old__;                         \
+        __copy__ __o__ __v__;                                                 \
+        model_rmw_action((void *)__p__, __x__, (uint64_t) __copy__);          \
+        __old__ = __old__; /* Silence clang (-Wunused-value) */               \
+         })
+
+/* No spurious failure for now */
+#define _ATOMIC_CMPSWP_WEAK_ _ATOMIC_CMPSWP_
+
+#define _ATOMIC_CMPSWP_( __a__, __e__, __m__, __x__ )                         \
+        ({ volatile __typeof__((__a__)->__f__)* __p__ = & ((__a__)->__f__);   \
+                __typeof__(__e__) __q__ = (__e__);                            \
+                __typeof__(__m__) __v__ = (__m__);                            \
+                bool __r__;                                                   \
+                __typeof__((__a__)->__f__) __t__=(__typeof__((__a__)->__f__)) model_rmwr_action((void *)__p__, __x__); \
+                if (__t__ == * __q__ ) {                                      \
+                        model_rmw_action((void *)__p__, __x__, (uint64_t) __v__); __r__ = true; } \
+                else {  model_rmwc_action((void *)__p__, __x__); *__q__ = __t__;  __r__ = false;} \
+                __r__; })
+
+#define _ATOMIC_FENCE_( __x__ ) \
+	({ model_fence_action(__x__);})
+ 
+
+#define ATOMIC_CHAR_LOCK_FREE 1
+#define ATOMIC_CHAR16_T_LOCK_FREE 1
+#define ATOMIC_CHAR32_T_LOCK_FREE 1
+#define ATOMIC_WCHAR_T_LOCK_FREE 1
+#define ATOMIC_SHORT_LOCK_FREE 1
+#define ATOMIC_INT_LOCK_FREE 1
+#define ATOMIC_LONG_LOCK_FREE 1
+#define ATOMIC_LLONG_LOCK_FREE 1
+#define ATOMIC_ADDRESS_LOCK_FREE 1
+
+typedef struct atomic_bool
+{
+#ifdef __cplusplus
+    bool is_lock_free() const volatile;
+    void store( bool, memory_order = memory_order_seq_cst ) volatile;
+    bool load( memory_order = memory_order_seq_cst ) volatile;
+    bool exchange( bool, memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_weak ( bool&, bool, memory_order, memory_order ) volatile;
+    bool compare_exchange_strong ( bool&, bool, memory_order, memory_order ) volatile;
+    bool compare_exchange_weak ( bool&, bool,
+                        memory_order = memory_order_seq_cst) volatile;
+    bool compare_exchange_strong ( bool&, bool,
+                        memory_order = memory_order_seq_cst) volatile;
+
+    CPP0X( atomic_bool() = delete; )
+    CPP0X( constexpr explicit atomic_bool( bool __v__ ) : __f__( __v__ ) { } )
+    CPP0X( atomic_bool( const atomic_bool& ) = delete; )
+    atomic_bool& operator =( const atomic_bool& ) CPP0X(=delete);
+
+    bool operator =( bool __v__ ) volatile
+    { store( __v__ ); return __v__; }
+
+    friend void atomic_store_explicit( volatile atomic_bool*, bool,
+                                       memory_order );
+    friend bool atomic_load_explicit( volatile atomic_bool*, memory_order );
+    friend bool atomic_exchange_explicit( volatile atomic_bool*, bool,
+                                      memory_order );
+    friend bool atomic_compare_exchange_weak_explicit( volatile atomic_bool*, bool*, bool,
+                                              memory_order, memory_order );
+    friend bool atomic_compare_exchange_strong_explicit( volatile atomic_bool*, bool*, bool,
+                                              memory_order, memory_order );
+
+CPP0X(private:)
+#endif
+    bool __f__;
+} atomic_bool;
+
+
+typedef struct atomic_address
+{
+#ifdef __cplusplus
+    bool is_lock_free() const volatile;
+    void store( void*, memory_order = memory_order_seq_cst ) volatile;
+    void* load( memory_order = memory_order_seq_cst ) volatile;
+    void* exchange( void*, memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_weak( void*&, void*, memory_order, memory_order ) volatile;
+    bool compare_exchange_strong( void*&, void*, memory_order, memory_order ) volatile;
+    bool compare_exchange_weak( void*&, void*,
+                       memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_strong( void*&, void*,
+                       memory_order = memory_order_seq_cst ) volatile;
+    void* fetch_add( ptrdiff_t, memory_order = memory_order_seq_cst ) volatile;
+    void* fetch_sub( ptrdiff_t, memory_order = memory_order_seq_cst ) volatile;
+
+    CPP0X( atomic_address() = default; )
+    CPP0X( constexpr explicit atomic_address( void* __v__ ) : __f__( __v__) { } )
+    CPP0X( atomic_address( const atomic_address& ) = delete; )
+    atomic_address& operator =( const atomic_address & ) CPP0X(=delete);
+
+    void* operator =( void* __v__ ) volatile
+    { store( __v__ ); return __v__; }
+
+    void* operator +=( ptrdiff_t __v__ ) volatile
+    { return fetch_add( __v__ ); }
+
+    void* operator -=( ptrdiff_t __v__ ) volatile
+    { return fetch_sub( __v__ ); }
+
+    friend void atomic_store_explicit( volatile atomic_address*, void*,
+                                       memory_order );
+    friend void* atomic_load_explicit( volatile atomic_address*, memory_order );
+    friend void* atomic_exchange_explicit( volatile atomic_address*, void*,
+                                       memory_order );
+    friend bool atomic_compare_exchange_weak_explicit( volatile atomic_address*,
+                              void**, void*, memory_order, memory_order );
+    friend bool atomic_compare_exchange_strong_explicit( volatile atomic_address*,
+                              void**, void*, memory_order, memory_order );
+    friend void* atomic_fetch_add_explicit( volatile atomic_address*, ptrdiff_t,
+                                            memory_order );
+    friend void* atomic_fetch_sub_explicit( volatile atomic_address*, ptrdiff_t,
+                                            memory_order );
+
+CPP0X(private:)
+#endif
+    void* __f__;
+} atomic_address;
+
+
+typedef struct atomic_char
+{
+#ifdef __cplusplus
+    bool is_lock_free() const volatile;
+    void store( char,
+                memory_order = memory_order_seq_cst ) volatile;
+    char load( memory_order = memory_order_seq_cst ) volatile;
+    char exchange( char,
+                      memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_weak( char&, char,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_strong( char&, char,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_weak( char&, char,
+                       memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_strong( char&, char,
+                       memory_order = memory_order_seq_cst ) volatile;
+    char fetch_add( char,
+                           memory_order = memory_order_seq_cst ) volatile;
+    char fetch_sub( char,
+                           memory_order = memory_order_seq_cst ) volatile;
+    char fetch_and( char,
+                           memory_order = memory_order_seq_cst ) volatile;
+    char fetch_or( char,
+                           memory_order = memory_order_seq_cst ) volatile;
+    char fetch_xor( char,
+                           memory_order = memory_order_seq_cst ) volatile;
+
+    CPP0X( atomic_char() = default; )
+    CPP0X( constexpr atomic_char( char __v__ ) : __f__( __v__) { } )
+    CPP0X( atomic_char( const atomic_char& ) = delete; )
+    atomic_char& operator =( const atomic_char& ) CPP0X(=delete);
+
+    char operator =( char __v__ ) volatile
+    { store( __v__ ); return __v__; }
+
+    char operator ++( int ) volatile
+    { return fetch_add( 1 ); }
+
+    char operator --( int ) volatile
+    { return fetch_sub( 1 ); }
+
+    char operator ++() volatile
+    { return fetch_add( 1 ) + 1; }
+
+    char operator --() volatile
+    { return fetch_sub( 1 ) - 1; }
+
+    char operator +=( char __v__ ) volatile
+    { return fetch_add( __v__ ) + __v__; }
+
+    char operator -=( char __v__ ) volatile
+    { return fetch_sub( __v__ ) - __v__; }
+
+    char operator &=( char __v__ ) volatile
+    { return fetch_and( __v__ ) & __v__; }
+
+    char operator |=( char __v__ ) volatile
+    { return fetch_or( __v__ ) | __v__; }
+
+    char operator ^=( char __v__ ) volatile
+    { return fetch_xor( __v__ ) ^ __v__; }
+
+    friend void atomic_store_explicit( volatile atomic_char*, char,
+                                       memory_order );
+    friend char atomic_load_explicit( volatile atomic_char*,
+                                             memory_order );
+    friend char atomic_exchange_explicit( volatile atomic_char*,
+                                             char, memory_order );
+    friend bool atomic_compare_exchange_weak_explicit( volatile atomic_char*,
+                      char*, char, memory_order, memory_order );
+    friend bool atomic_compare_exchange_strong_explicit( volatile atomic_char*,
+                      char*, char, memory_order, memory_order );
+    friend char atomic_fetch_add_explicit( volatile atomic_char*,
+                                                  char, memory_order );
+    friend char atomic_fetch_sub_explicit( volatile atomic_char*,
+                                                  char, memory_order );
+    friend char atomic_fetch_and_explicit( volatile atomic_char*,
+                                                  char, memory_order );
+    friend char atomic_fetch_or_explicit(  volatile atomic_char*,
+                                                  char, memory_order );
+    friend char atomic_fetch_xor_explicit( volatile atomic_char*,
+                                                  char, memory_order );
+
+CPP0X(private:)
+#endif
+    char __f__;
+} atomic_char;
+
+
+typedef struct atomic_schar
+{
+#ifdef __cplusplus
+    bool is_lock_free() const volatile;
+    void store( signed char,
+                memory_order = memory_order_seq_cst ) volatile;
+    signed char load( memory_order = memory_order_seq_cst ) volatile;
+    signed char exchange( signed char,
+                      memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_weak( signed char&, signed char,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_strong( signed char&, signed char,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_weak( signed char&, signed char,
+                       memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_strong( signed char&, signed char,
+                       memory_order = memory_order_seq_cst ) volatile;
+    signed char fetch_add( signed char,
+                           memory_order = memory_order_seq_cst ) volatile;
+    signed char fetch_sub( signed char,
+                           memory_order = memory_order_seq_cst ) volatile;
+    signed char fetch_and( signed char,
+                           memory_order = memory_order_seq_cst ) volatile;
+    signed char fetch_or( signed char,
+                           memory_order = memory_order_seq_cst ) volatile;
+    signed char fetch_xor( signed char,
+                           memory_order = memory_order_seq_cst ) volatile;
+
+    CPP0X( atomic_schar() = default; )
+    CPP0X( constexpr atomic_schar( signed char __v__ ) : __f__( __v__) { } )
+    CPP0X( atomic_schar( const atomic_schar& ) = delete; )
+    atomic_schar& operator =( const atomic_schar& ) CPP0X(=delete);
+
+    signed char operator =( signed char __v__ ) volatile
+    { store( __v__ ); return __v__; }
+
+    signed char operator ++( int ) volatile
+    { return fetch_add( 1 ); }
+
+    signed char operator --( int ) volatile
+    { return fetch_sub( 1 ); }
+
+    signed char operator ++() volatile
+    { return fetch_add( 1 ) + 1; }
+
+    signed char operator --() volatile
+    { return fetch_sub( 1 ) - 1; }
+
+    signed char operator +=( signed char __v__ ) volatile
+    { return fetch_add( __v__ ) + __v__; }
+
+    signed char operator -=( signed char __v__ ) volatile
+    { return fetch_sub( __v__ ) - __v__; }
+
+    signed char operator &=( signed char __v__ ) volatile
+    { return fetch_and( __v__ ) & __v__; }
+
+    signed char operator |=( signed char __v__ ) volatile
+    { return fetch_or( __v__ ) | __v__; }
+
+    signed char operator ^=( signed char __v__ ) volatile
+    { return fetch_xor( __v__ ) ^ __v__; }
+
+    friend void atomic_store_explicit( volatile atomic_schar*, signed char,
+                                       memory_order );
+    friend signed char atomic_load_explicit( volatile atomic_schar*,
+                                             memory_order );
+    friend signed char atomic_exchange_explicit( volatile atomic_schar*,
+                                             signed char, memory_order );
+    friend bool atomic_compare_exchange_weak_explicit( volatile atomic_schar*,
+                      signed char*, signed char, memory_order, memory_order );
+    friend bool atomic_compare_exchange_strong_explicit( volatile atomic_schar*,
+                      signed char*, signed char, memory_order, memory_order );
+    friend signed char atomic_fetch_add_explicit( volatile atomic_schar*,
+                                                  signed char, memory_order );
+    friend signed char atomic_fetch_sub_explicit( volatile atomic_schar*,
+                                                  signed char, memory_order );
+    friend signed char atomic_fetch_and_explicit( volatile atomic_schar*,
+                                                  signed char, memory_order );
+    friend signed char atomic_fetch_or_explicit(  volatile atomic_schar*,
+                                                  signed char, memory_order );
+    friend signed char atomic_fetch_xor_explicit( volatile atomic_schar*,
+                                                  signed char, memory_order );
+
+CPP0X(private:)
+#endif
+    signed char __f__;
+} atomic_schar;
+
+
+typedef struct atomic_uchar
+{
+#ifdef __cplusplus
+    bool is_lock_free() const volatile;
+    void store( unsigned char,
+                memory_order = memory_order_seq_cst ) volatile;
+    unsigned char load( memory_order = memory_order_seq_cst ) volatile;
+    unsigned char exchange( unsigned char,
+                      memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_weak( unsigned char&, unsigned char,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_strong( unsigned char&, unsigned char,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_weak( unsigned char&, unsigned char,
+                       memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_strong( unsigned char&, unsigned char,
+                       memory_order = memory_order_seq_cst ) volatile;
+    unsigned char fetch_add( unsigned char,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned char fetch_sub( unsigned char,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned char fetch_and( unsigned char,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned char fetch_or( unsigned char,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned char fetch_xor( unsigned char,
+                           memory_order = memory_order_seq_cst ) volatile;
+
+    CPP0X( atomic_uchar() = default; )
+    CPP0X( constexpr atomic_uchar( unsigned char __v__ ) : __f__( __v__) { } )
+    CPP0X( atomic_uchar( const atomic_uchar& ) = delete; )
+    atomic_uchar& operator =( const atomic_uchar& ) CPP0X(=delete);
+
+    unsigned char operator =( unsigned char __v__ ) volatile
+    { store( __v__ ); return __v__; }
+
+    unsigned char operator ++( int ) volatile
+    { return fetch_add( 1 ); }
+
+    unsigned char operator --( int ) volatile
+    { return fetch_sub( 1 ); }
+
+    unsigned char operator ++() volatile
+    { return fetch_add( 1 ) + 1; }
+
+    unsigned char operator --() volatile
+    { return fetch_sub( 1 ) - 1; }
+
+    unsigned char operator +=( unsigned char __v__ ) volatile
+    { return fetch_add( __v__ ) + __v__; }
+
+    unsigned char operator -=( unsigned char __v__ ) volatile
+    { return fetch_sub( __v__ ) - __v__; }
+
+    unsigned char operator &=( unsigned char __v__ ) volatile
+    { return fetch_and( __v__ ) & __v__; }
+
+    unsigned char operator |=( unsigned char __v__ ) volatile
+    { return fetch_or( __v__ ) | __v__; }
+
+    unsigned char operator ^=( unsigned char __v__ ) volatile
+    { return fetch_xor( __v__ ) ^ __v__; }
+
+    friend void atomic_store_explicit( volatile atomic_uchar*, unsigned char,
+                                       memory_order );
+    friend unsigned char atomic_load_explicit( volatile atomic_uchar*,
+                                             memory_order );
+    friend unsigned char atomic_exchange_explicit( volatile atomic_uchar*,
+                                             unsigned char, memory_order );
+    friend bool atomic_compare_exchange_weak_explicit( volatile atomic_uchar*,
+                      unsigned char*, unsigned char, memory_order, memory_order );
+    friend bool atomic_compare_exchange_strong_explicit( volatile atomic_uchar*,
+                      unsigned char*, unsigned char, memory_order, memory_order );
+    friend unsigned char atomic_fetch_add_explicit( volatile atomic_uchar*,
+                                                  unsigned char, memory_order );
+    friend unsigned char atomic_fetch_sub_explicit( volatile atomic_uchar*,
+                                                  unsigned char, memory_order );
+    friend unsigned char atomic_fetch_and_explicit( volatile atomic_uchar*,
+                                                  unsigned char, memory_order );
+    friend unsigned char atomic_fetch_or_explicit(  volatile atomic_uchar*,
+                                                  unsigned char, memory_order );
+    friend unsigned char atomic_fetch_xor_explicit( volatile atomic_uchar*,
+                                                  unsigned char, memory_order );
+
+CPP0X(private:)
+#endif
+    unsigned char __f__;
+} atomic_uchar;
+
+
+typedef struct atomic_short
+{
+#ifdef __cplusplus
+    bool is_lock_free() const volatile;
+    void store( short,
+                memory_order = memory_order_seq_cst ) volatile;
+    short load( memory_order = memory_order_seq_cst ) volatile;
+    short exchange( short,
+                      memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_weak( short&, short,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_strong( short&, short,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_weak( short&, short,
+                       memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_strong( short&, short,
+                       memory_order = memory_order_seq_cst ) volatile;
+    short fetch_add( short,
+                           memory_order = memory_order_seq_cst ) volatile;
+    short fetch_sub( short,
+                           memory_order = memory_order_seq_cst ) volatile;
+    short fetch_and( short,
+                           memory_order = memory_order_seq_cst ) volatile;
+    short fetch_or( short,
+                           memory_order = memory_order_seq_cst ) volatile;
+    short fetch_xor( short,
+                           memory_order = memory_order_seq_cst ) volatile;
+
+    CPP0X( atomic_short() = default; )
+    CPP0X( constexpr atomic_short( short __v__ ) : __f__( __v__) { } )
+    CPP0X( atomic_short( const atomic_short& ) = delete; )
+    atomic_short& operator =( const atomic_short& ) CPP0X(=delete);
+
+    short operator =( short __v__ ) volatile
+    { store( __v__ ); return __v__; }
+
+    short operator ++( int ) volatile
+    { return fetch_add( 1 ); }
+
+    short operator --( int ) volatile
+    { return fetch_sub( 1 ); }
+
+    short operator ++() volatile
+    { return fetch_add( 1 ) + 1; }
+
+    short operator --() volatile
+    { return fetch_sub( 1 ) - 1; }
+
+    short operator +=( short __v__ ) volatile
+    { return fetch_add( __v__ ) + __v__; }
+
+    short operator -=( short __v__ ) volatile
+    { return fetch_sub( __v__ ) - __v__; }
+
+    short operator &=( short __v__ ) volatile
+    { return fetch_and( __v__ ) & __v__; }
+
+    short operator |=( short __v__ ) volatile
+    { return fetch_or( __v__ ) | __v__; }
+
+    short operator ^=( short __v__ ) volatile
+    { return fetch_xor( __v__ ) ^ __v__; }
+
+    friend void atomic_store_explicit( volatile atomic_short*, short,
+                                       memory_order );
+    friend short atomic_load_explicit( volatile atomic_short*,
+                                             memory_order );
+    friend short atomic_exchange_explicit( volatile atomic_short*,
+                                             short, memory_order );
+    friend bool atomic_compare_exchange_weak_explicit( volatile atomic_short*,
+                      short*, short, memory_order, memory_order );
+    friend bool atomic_compare_exchange_strong_explicit( volatile atomic_short*,
+                      short*, short, memory_order, memory_order );
+    friend short atomic_fetch_add_explicit( volatile atomic_short*,
+                                                  short, memory_order );
+    friend short atomic_fetch_sub_explicit( volatile atomic_short*,
+                                                  short, memory_order );
+    friend short atomic_fetch_and_explicit( volatile atomic_short*,
+                                                  short, memory_order );
+    friend short atomic_fetch_or_explicit(  volatile atomic_short*,
+                                                  short, memory_order );
+    friend short atomic_fetch_xor_explicit( volatile atomic_short*,
+                                                  short, memory_order );
+
+CPP0X(private:)
+#endif
+    short __f__;
+} atomic_short;
+
+
+typedef struct atomic_ushort
+{
+#ifdef __cplusplus
+    bool is_lock_free() const volatile;
+    void store( unsigned short,
+                memory_order = memory_order_seq_cst ) volatile;
+    unsigned short load( memory_order = memory_order_seq_cst ) volatile;
+    unsigned short exchange( unsigned short,
+                      memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_weak( unsigned short&, unsigned short,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_strong( unsigned short&, unsigned short,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_weak( unsigned short&, unsigned short,
+                       memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_strong( unsigned short&, unsigned short,
+                       memory_order = memory_order_seq_cst ) volatile;
+    unsigned short fetch_add( unsigned short,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned short fetch_sub( unsigned short,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned short fetch_and( unsigned short,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned short fetch_or( unsigned short,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned short fetch_xor( unsigned short,
+                           memory_order = memory_order_seq_cst ) volatile;
+
+    CPP0X( atomic_ushort() = default; )
+    CPP0X( constexpr atomic_ushort( unsigned short __v__ ) : __f__( __v__) { } )
+    CPP0X( atomic_ushort( const atomic_ushort& ) = delete; )
+    atomic_ushort& operator =( const atomic_ushort& ) CPP0X(=delete);
+
+    unsigned short operator =( unsigned short __v__ ) volatile
+    { store( __v__ ); return __v__; }
+
+    unsigned short operator ++( int ) volatile
+    { return fetch_add( 1 ); }
+
+    unsigned short operator --( int ) volatile
+    { return fetch_sub( 1 ); }
+
+    unsigned short operator ++() volatile
+    { return fetch_add( 1 ) + 1; }
+
+    unsigned short operator --() volatile
+    { return fetch_sub( 1 ) - 1; }
+
+    unsigned short operator +=( unsigned short __v__ ) volatile
+    { return fetch_add( __v__ ) + __v__; }
+
+    unsigned short operator -=( unsigned short __v__ ) volatile
+    { return fetch_sub( __v__ ) - __v__; }
+
+    unsigned short operator &=( unsigned short __v__ ) volatile
+    { return fetch_and( __v__ ) & __v__; }
+
+    unsigned short operator |=( unsigned short __v__ ) volatile
+    { return fetch_or( __v__ ) | __v__; }
+
+    unsigned short operator ^=( unsigned short __v__ ) volatile
+    { return fetch_xor( __v__ ) ^ __v__; }
+
+    friend void atomic_store_explicit( volatile atomic_ushort*, unsigned short,
+                                       memory_order );
+    friend unsigned short atomic_load_explicit( volatile atomic_ushort*,
+                                             memory_order );
+    friend unsigned short atomic_exchange_explicit( volatile atomic_ushort*,
+                                             unsigned short, memory_order );
+    friend bool atomic_compare_exchange_weak_explicit( volatile atomic_ushort*,
+                      unsigned short*, unsigned short, memory_order, memory_order );
+    friend bool atomic_compare_exchange_strong_explicit( volatile atomic_ushort*,
+                      unsigned short*, unsigned short, memory_order, memory_order );
+    friend unsigned short atomic_fetch_add_explicit( volatile atomic_ushort*,
+                                                  unsigned short, memory_order );
+    friend unsigned short atomic_fetch_sub_explicit( volatile atomic_ushort*,
+                                                  unsigned short, memory_order );
+    friend unsigned short atomic_fetch_and_explicit( volatile atomic_ushort*,
+                                                  unsigned short, memory_order );
+    friend unsigned short atomic_fetch_or_explicit(  volatile atomic_ushort*,
+                                                  unsigned short, memory_order );
+    friend unsigned short atomic_fetch_xor_explicit( volatile atomic_ushort*,
+                                                  unsigned short, memory_order );
+
+CPP0X(private:)
+#endif
+    unsigned short __f__;
+} atomic_ushort;
+
+
+typedef struct atomic_int
+{
+#ifdef __cplusplus
+    bool is_lock_free() const volatile;
+    void store( int,
+                memory_order = memory_order_seq_cst ) volatile;
+    int load( memory_order = memory_order_seq_cst ) volatile;
+    int exchange( int,
+                      memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_weak( int&, int,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_strong( int&, int,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_weak( int&, int,
+                       memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_strong( int&, int,
+                       memory_order = memory_order_seq_cst ) volatile;
+    int fetch_add( int,
+                           memory_order = memory_order_seq_cst ) volatile;
+    int fetch_sub( int,
+                           memory_order = memory_order_seq_cst ) volatile;
+    int fetch_and( int,
+                           memory_order = memory_order_seq_cst ) volatile;
+    int fetch_or( int,
+                           memory_order = memory_order_seq_cst ) volatile;
+    int fetch_xor( int,
+                           memory_order = memory_order_seq_cst ) volatile;
+
+    CPP0X( atomic_int() = default; )
+    CPP0X( constexpr atomic_int( int __v__ ) : __f__( __v__) { } )
+    CPP0X( atomic_int( const atomic_int& ) = delete; )
+    atomic_int& operator =( const atomic_int& ) CPP0X(=delete);
+
+    int operator =( int __v__ ) volatile
+    { store( __v__ ); return __v__; }
+
+    int operator ++( int ) volatile
+    { return fetch_add( 1 ); }
+
+    int operator --( int ) volatile
+    { return fetch_sub( 1 ); }
+
+    int operator ++() volatile
+    { return fetch_add( 1 ) + 1; }
+
+    int operator --() volatile
+    { return fetch_sub( 1 ) - 1; }
+
+    int operator +=( int __v__ ) volatile
+    { return fetch_add( __v__ ) + __v__; }
+
+    int operator -=( int __v__ ) volatile
+    { return fetch_sub( __v__ ) - __v__; }
+
+    int operator &=( int __v__ ) volatile
+    { return fetch_and( __v__ ) & __v__; }
+
+    int operator |=( int __v__ ) volatile
+    { return fetch_or( __v__ ) | __v__; }
+
+    int operator ^=( int __v__ ) volatile
+    { return fetch_xor( __v__ ) ^ __v__; }
+
+    friend void atomic_store_explicit( volatile atomic_int*, int,
+                                       memory_order );
+    friend int atomic_load_explicit( volatile atomic_int*,
+                                             memory_order );
+    friend int atomic_exchange_explicit( volatile atomic_int*,
+                                             int, memory_order );
+    friend bool atomic_compare_exchange_weak_explicit( volatile atomic_int*,
+                      int*, int, memory_order, memory_order );
+    friend bool atomic_compare_exchange_strong_explicit( volatile atomic_int*,
+                      int*, int, memory_order, memory_order );
+    friend int atomic_fetch_add_explicit( volatile atomic_int*,
+                                                  int, memory_order );
+    friend int atomic_fetch_sub_explicit( volatile atomic_int*,
+                                                  int, memory_order );
+    friend int atomic_fetch_and_explicit( volatile atomic_int*,
+                                                  int, memory_order );
+    friend int atomic_fetch_or_explicit(  volatile atomic_int*,
+                                                  int, memory_order );
+    friend int atomic_fetch_xor_explicit( volatile atomic_int*,
+                                                  int, memory_order );
+
+CPP0X(private:)
+#endif
+    int __f__;
+} atomic_int;
+
+
+typedef struct atomic_uint
+{
+#ifdef __cplusplus
+    bool is_lock_free() const volatile;
+    void store( unsigned int,
+                memory_order = memory_order_seq_cst ) volatile;
+    unsigned int load( memory_order = memory_order_seq_cst ) volatile;
+    unsigned int exchange( unsigned int,
+                      memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_weak( unsigned int&, unsigned int,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_strong( unsigned int&, unsigned int,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_weak( unsigned int&, unsigned int,
+                       memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_strong( unsigned int&, unsigned int,
+                       memory_order = memory_order_seq_cst ) volatile;
+    unsigned int fetch_add( unsigned int,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned int fetch_sub( unsigned int,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned int fetch_and( unsigned int,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned int fetch_or( unsigned int,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned int fetch_xor( unsigned int,
+                           memory_order = memory_order_seq_cst ) volatile;
+
+    CPP0X( atomic_uint() = default; )
+    CPP0X( constexpr atomic_uint( unsigned int __v__ ) : __f__( __v__) { } )
+    CPP0X( atomic_uint( const atomic_uint& ) = delete; )
+    atomic_uint& operator =( const atomic_uint& ) CPP0X(=delete);
+
+    unsigned int operator =( unsigned int __v__ ) volatile
+    { store( __v__ ); return __v__; }
+
+    unsigned int operator ++( int ) volatile
+    { return fetch_add( 1 ); }
+
+    unsigned int operator --( int ) volatile
+    { return fetch_sub( 1 ); }
+
+    unsigned int operator ++() volatile
+    { return fetch_add( 1 ) + 1; }
+
+    unsigned int operator --() volatile
+    { return fetch_sub( 1 ) - 1; }
+
+    unsigned int operator +=( unsigned int __v__ ) volatile
+    { return fetch_add( __v__ ) + __v__; }
+
+    unsigned int operator -=( unsigned int __v__ ) volatile
+    { return fetch_sub( __v__ ) - __v__; }
+
+    unsigned int operator &=( unsigned int __v__ ) volatile
+    { return fetch_and( __v__ ) & __v__; }
+
+    unsigned int operator |=( unsigned int __v__ ) volatile
+    { return fetch_or( __v__ ) | __v__; }
+
+    unsigned int operator ^=( unsigned int __v__ ) volatile
+    { return fetch_xor( __v__ ) ^ __v__; }
+
+    friend void atomic_store_explicit( volatile atomic_uint*, unsigned int,
+                                       memory_order );
+    friend unsigned int atomic_load_explicit( volatile atomic_uint*,
+                                             memory_order );
+    friend unsigned int atomic_exchange_explicit( volatile atomic_uint*,
+                                             unsigned int, memory_order );
+    friend bool atomic_compare_exchange_weak_explicit( volatile atomic_uint*,
+                      unsigned int*, unsigned int, memory_order, memory_order );
+    friend bool atomic_compare_exchange_strong_explicit( volatile atomic_uint*,
+                      unsigned int*, unsigned int, memory_order, memory_order );
+    friend unsigned int atomic_fetch_add_explicit( volatile atomic_uint*,
+                                                  unsigned int, memory_order );
+    friend unsigned int atomic_fetch_sub_explicit( volatile atomic_uint*,
+                                                  unsigned int, memory_order );
+    friend unsigned int atomic_fetch_and_explicit( volatile atomic_uint*,
+                                                  unsigned int, memory_order );
+    friend unsigned int atomic_fetch_or_explicit(  volatile atomic_uint*,
+                                                  unsigned int, memory_order );
+    friend unsigned int atomic_fetch_xor_explicit( volatile atomic_uint*,
+                                                  unsigned int, memory_order );
+
+CPP0X(private:)
+#endif
+    unsigned int __f__;
+} atomic_uint;
+
+
+typedef struct atomic_long
+{
+#ifdef __cplusplus
+    bool is_lock_free() const volatile;
+    void store( long,
+                memory_order = memory_order_seq_cst ) volatile;
+    long load( memory_order = memory_order_seq_cst ) volatile;
+    long exchange( long,
+                      memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_weak( long&, long,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_strong( long&, long,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_weak( long&, long,
+                       memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_strong( long&, long,
+                       memory_order = memory_order_seq_cst ) volatile;
+    long fetch_add( long,
+                           memory_order = memory_order_seq_cst ) volatile;
+    long fetch_sub( long,
+                           memory_order = memory_order_seq_cst ) volatile;
+    long fetch_and( long,
+                           memory_order = memory_order_seq_cst ) volatile;
+    long fetch_or( long,
+                           memory_order = memory_order_seq_cst ) volatile;
+    long fetch_xor( long,
+                           memory_order = memory_order_seq_cst ) volatile;
+
+    CPP0X( atomic_long() = default; )
+    CPP0X( constexpr atomic_long( long __v__ ) : __f__( __v__) { } )
+    CPP0X( atomic_long( const atomic_long& ) = delete; )
+    atomic_long& operator =( const atomic_long& ) CPP0X(=delete);
+
+    long operator =( long __v__ ) volatile
+    { store( __v__ ); return __v__; }
+
+    long operator ++( int ) volatile
+    { return fetch_add( 1 ); }
+
+    long operator --( int ) volatile
+    { return fetch_sub( 1 ); }
+
+    long operator ++() volatile
+    { return fetch_add( 1 ) + 1; }
+
+    long operator --() volatile
+    { return fetch_sub( 1 ) - 1; }
+
+    long operator +=( long __v__ ) volatile
+    { return fetch_add( __v__ ) + __v__; }
+
+    long operator -=( long __v__ ) volatile
+    { return fetch_sub( __v__ ) - __v__; }
+
+    long operator &=( long __v__ ) volatile
+    { return fetch_and( __v__ ) & __v__; }
+
+    long operator |=( long __v__ ) volatile
+    { return fetch_or( __v__ ) | __v__; }
+
+    long operator ^=( long __v__ ) volatile
+    { return fetch_xor( __v__ ) ^ __v__; }
+
+    friend void atomic_store_explicit( volatile atomic_long*, long,
+                                       memory_order );
+    friend long atomic_load_explicit( volatile atomic_long*,
+                                             memory_order );
+    friend long atomic_exchange_explicit( volatile atomic_long*,
+                                             long, memory_order );
+    friend bool atomic_compare_exchange_weak_explicit( volatile atomic_long*,
+                      long*, long, memory_order, memory_order );
+    friend bool atomic_compare_exchange_strong_explicit( volatile atomic_long*,
+                      long*, long, memory_order, memory_order );
+    friend long atomic_fetch_add_explicit( volatile atomic_long*,
+                                                  long, memory_order );
+    friend long atomic_fetch_sub_explicit( volatile atomic_long*,
+                                                  long, memory_order );
+    friend long atomic_fetch_and_explicit( volatile atomic_long*,
+                                                  long, memory_order );
+    friend long atomic_fetch_or_explicit(  volatile atomic_long*,
+                                                  long, memory_order );
+    friend long atomic_fetch_xor_explicit( volatile atomic_long*,
+                                                  long, memory_order );
+
+CPP0X(private:)
+#endif
+    long __f__;
+} atomic_long;
+
+
+typedef struct atomic_ulong
+{
+#ifdef __cplusplus
+    bool is_lock_free() const volatile;
+    void store( unsigned long,
+                memory_order = memory_order_seq_cst ) volatile;
+    unsigned long load( memory_order = memory_order_seq_cst ) volatile;
+    unsigned long exchange( unsigned long,
+                      memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_weak( unsigned long&, unsigned long,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_strong( unsigned long&, unsigned long,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_weak( unsigned long&, unsigned long,
+                       memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_strong( unsigned long&, unsigned long,
+                       memory_order = memory_order_seq_cst ) volatile;
+    unsigned long fetch_add( unsigned long,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned long fetch_sub( unsigned long,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned long fetch_and( unsigned long,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned long fetch_or( unsigned long,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned long fetch_xor( unsigned long,
+                           memory_order = memory_order_seq_cst ) volatile;
+
+    CPP0X( atomic_ulong() = default; )
+    CPP0X( constexpr atomic_ulong( unsigned long __v__ ) : __f__( __v__) { } )
+    CPP0X( atomic_ulong( const atomic_ulong& ) = delete; )
+    atomic_ulong& operator =( const atomic_ulong& ) CPP0X(=delete);
+
+    unsigned long operator =( unsigned long __v__ ) volatile
+    { store( __v__ ); return __v__; }
+
+    unsigned long operator ++( int ) volatile
+    { return fetch_add( 1 ); }
+
+    unsigned long operator --( int ) volatile
+    { return fetch_sub( 1 ); }
+
+    unsigned long operator ++() volatile
+    { return fetch_add( 1 ) + 1; }
+
+    unsigned long operator --() volatile
+    { return fetch_sub( 1 ) - 1; }
+
+    unsigned long operator +=( unsigned long __v__ ) volatile
+    { return fetch_add( __v__ ) + __v__; }
+
+    unsigned long operator -=( unsigned long __v__ ) volatile
+    { return fetch_sub( __v__ ) - __v__; }
+
+    unsigned long operator &=( unsigned long __v__ ) volatile
+    { return fetch_and( __v__ ) & __v__; }
+
+    unsigned long operator |=( unsigned long __v__ ) volatile
+    { return fetch_or( __v__ ) | __v__; }
+
+    unsigned long operator ^=( unsigned long __v__ ) volatile
+    { return fetch_xor( __v__ ) ^ __v__; }
+
+    friend void atomic_store_explicit( volatile atomic_ulong*, unsigned long,
+                                       memory_order );
+    friend unsigned long atomic_load_explicit( volatile atomic_ulong*,
+                                             memory_order );
+    friend unsigned long atomic_exchange_explicit( volatile atomic_ulong*,
+                                             unsigned long, memory_order );
+    friend bool atomic_compare_exchange_weak_explicit( volatile atomic_ulong*,
+                      unsigned long*, unsigned long, memory_order, memory_order );
+    friend bool atomic_compare_exchange_strong_explicit( volatile atomic_ulong*,
+                      unsigned long*, unsigned long, memory_order, memory_order );
+    friend unsigned long atomic_fetch_add_explicit( volatile atomic_ulong*,
+                                                  unsigned long, memory_order );
+    friend unsigned long atomic_fetch_sub_explicit( volatile atomic_ulong*,
+                                                  unsigned long, memory_order );
+    friend unsigned long atomic_fetch_and_explicit( volatile atomic_ulong*,
+                                                  unsigned long, memory_order );
+    friend unsigned long atomic_fetch_or_explicit(  volatile atomic_ulong*,
+                                                  unsigned long, memory_order );
+    friend unsigned long atomic_fetch_xor_explicit( volatile atomic_ulong*,
+                                                  unsigned long, memory_order );
+
+CPP0X(private:)
+#endif
+    unsigned long __f__;
+} atomic_ulong;
+
+
+typedef struct atomic_llong
+{
+#ifdef __cplusplus
+    bool is_lock_free() const volatile;
+    void store( long long,
+                memory_order = memory_order_seq_cst ) volatile;
+    long long load( memory_order = memory_order_seq_cst ) volatile;
+    long long exchange( long long,
+                      memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_weak( long long&, long long,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_strong( long long&, long long,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_weak( long long&, long long,
+                       memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_strong( long long&, long long,
+                       memory_order = memory_order_seq_cst ) volatile;
+    long long fetch_add( long long,
+                           memory_order = memory_order_seq_cst ) volatile;
+    long long fetch_sub( long long,
+                           memory_order = memory_order_seq_cst ) volatile;
+    long long fetch_and( long long,
+                           memory_order = memory_order_seq_cst ) volatile;
+    long long fetch_or( long long,
+                           memory_order = memory_order_seq_cst ) volatile;
+    long long fetch_xor( long long,
+                           memory_order = memory_order_seq_cst ) volatile;
+
+    CPP0X( atomic_llong() = default; )
+    CPP0X( constexpr atomic_llong( long long __v__ ) : __f__( __v__) { } )
+    CPP0X( atomic_llong( const atomic_llong& ) = delete; )
+    atomic_llong& operator =( const atomic_llong& ) CPP0X(=delete);
+
+    long long operator =( long long __v__ ) volatile
+    { store( __v__ ); return __v__; }
+
+    long long operator ++( int ) volatile
+    { return fetch_add( 1 ); }
+
+    long long operator --( int ) volatile
+    { return fetch_sub( 1 ); }
+
+    long long operator ++() volatile
+    { return fetch_add( 1 ) + 1; }
+
+    long long operator --() volatile
+    { return fetch_sub( 1 ) - 1; }
+
+    long long operator +=( long long __v__ ) volatile
+    { return fetch_add( __v__ ) + __v__; }
+
+    long long operator -=( long long __v__ ) volatile
+    { return fetch_sub( __v__ ) - __v__; }
+
+    long long operator &=( long long __v__ ) volatile
+    { return fetch_and( __v__ ) & __v__; }
+
+    long long operator |=( long long __v__ ) volatile
+    { return fetch_or( __v__ ) | __v__; }
+
+    long long operator ^=( long long __v__ ) volatile
+    { return fetch_xor( __v__ ) ^ __v__; }
+
+    friend void atomic_store_explicit( volatile atomic_llong*, long long,
+                                       memory_order );
+    friend long long atomic_load_explicit( volatile atomic_llong*,
+                                             memory_order );
+    friend long long atomic_exchange_explicit( volatile atomic_llong*,
+                                             long long, memory_order );
+    friend bool atomic_compare_exchange_weak_explicit( volatile atomic_llong*,
+                      long long*, long long, memory_order, memory_order );
+    friend bool atomic_compare_exchange_strong_explicit( volatile atomic_llong*,
+                      long long*, long long, memory_order, memory_order );
+    friend long long atomic_fetch_add_explicit( volatile atomic_llong*,
+                                                  long long, memory_order );
+    friend long long atomic_fetch_sub_explicit( volatile atomic_llong*,
+                                                  long long, memory_order );
+    friend long long atomic_fetch_and_explicit( volatile atomic_llong*,
+                                                  long long, memory_order );
+    friend long long atomic_fetch_or_explicit(  volatile atomic_llong*,
+                                                  long long, memory_order );
+    friend long long atomic_fetch_xor_explicit( volatile atomic_llong*,
+                                                  long long, memory_order );
+
+CPP0X(private:)
+#endif
+    long long __f__;
+} atomic_llong;
+
+
+typedef struct atomic_ullong
+{
+#ifdef __cplusplus
+    bool is_lock_free() const volatile;
+    void store( unsigned long long,
+                memory_order = memory_order_seq_cst ) volatile;
+    unsigned long long load( memory_order = memory_order_seq_cst ) volatile;
+    unsigned long long exchange( unsigned long long,
+                      memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_weak( unsigned long long&, unsigned long long,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_strong( unsigned long long&, unsigned long long,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_weak( unsigned long long&, unsigned long long,
+                       memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_strong( unsigned long long&, unsigned long long,
+                       memory_order = memory_order_seq_cst ) volatile;
+    unsigned long long fetch_add( unsigned long long,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned long long fetch_sub( unsigned long long,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned long long fetch_and( unsigned long long,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned long long fetch_or( unsigned long long,
+                           memory_order = memory_order_seq_cst ) volatile;
+    unsigned long long fetch_xor( unsigned long long,
+                           memory_order = memory_order_seq_cst ) volatile;
+
+    CPP0X( atomic_ullong() = default; )
+    CPP0X( constexpr atomic_ullong( unsigned long long __v__ ) : __f__( __v__) { } )
+    CPP0X( atomic_ullong( const atomic_ullong& ) = delete; )
+    atomic_ullong& operator =( const atomic_ullong& ) CPP0X(=delete);
+
+    unsigned long long operator =( unsigned long long __v__ ) volatile
+    { store( __v__ ); return __v__; }
+
+    unsigned long long operator ++( int ) volatile
+    { return fetch_add( 1 ); }
+
+    unsigned long long operator --( int ) volatile
+    { return fetch_sub( 1 ); }
+
+    unsigned long long operator ++() volatile
+    { return fetch_add( 1 ) + 1; }
+
+    unsigned long long operator --() volatile
+    { return fetch_sub( 1 ) - 1; }
+
+    unsigned long long operator +=( unsigned long long __v__ ) volatile
+    { return fetch_add( __v__ ) + __v__; }
+
+    unsigned long long operator -=( unsigned long long __v__ ) volatile
+    { return fetch_sub( __v__ ) - __v__; }
+
+    unsigned long long operator &=( unsigned long long __v__ ) volatile
+    { return fetch_and( __v__ ) & __v__; }
+
+    unsigned long long operator |=( unsigned long long __v__ ) volatile
+    { return fetch_or( __v__ ) | __v__; }
+
+    unsigned long long operator ^=( unsigned long long __v__ ) volatile
+    { return fetch_xor( __v__ ) ^ __v__; }
+
+    friend void atomic_store_explicit( volatile atomic_ullong*, unsigned long long,
+                                       memory_order );
+    friend unsigned long long atomic_load_explicit( volatile atomic_ullong*,
+                                             memory_order );
+    friend unsigned long long atomic_exchange_explicit( volatile atomic_ullong*,
+                                             unsigned long long, memory_order );
+    friend bool atomic_compare_exchange_weak_explicit( volatile atomic_ullong*,
+                      unsigned long long*, unsigned long long, memory_order, memory_order );
+    friend bool atomic_compare_exchange_strong_explicit( volatile atomic_ullong*,
+                      unsigned long long*, unsigned long long, memory_order, memory_order );
+    friend unsigned long long atomic_fetch_add_explicit( volatile atomic_ullong*,
+                                                  unsigned long long, memory_order );
+    friend unsigned long long atomic_fetch_sub_explicit( volatile atomic_ullong*,
+                                                  unsigned long long, memory_order );
+    friend unsigned long long atomic_fetch_and_explicit( volatile atomic_ullong*,
+                                                  unsigned long long, memory_order );
+    friend unsigned long long atomic_fetch_or_explicit(  volatile atomic_ullong*,
+                                                  unsigned long long, memory_order );
+    friend unsigned long long atomic_fetch_xor_explicit( volatile atomic_ullong*,
+                                                  unsigned long long, memory_order );
+
+CPP0X(private:)
+#endif
+    unsigned long long __f__;
+} atomic_ullong;
+
+
+typedef atomic_schar atomic_int_least8_t;
+typedef atomic_uchar atomic_uint_least8_t;
+typedef atomic_short atomic_int_least16_t;
+typedef atomic_ushort atomic_uint_least16_t;
+typedef atomic_int atomic_int_least32_t;
+typedef atomic_uint atomic_uint_least32_t;
+typedef atomic_llong atomic_int_least64_t;
+typedef atomic_ullong atomic_uint_least64_t;
+
+typedef atomic_schar atomic_int_fast8_t;
+typedef atomic_uchar atomic_uint_fast8_t;
+typedef atomic_short atomic_int_fast16_t;
+typedef atomic_ushort atomic_uint_fast16_t;
+typedef atomic_int atomic_int_fast32_t;
+typedef atomic_uint atomic_uint_fast32_t;
+typedef atomic_llong atomic_int_fast64_t;
+typedef atomic_ullong atomic_uint_fast64_t;
+
+typedef atomic_long atomic_intptr_t;
+typedef atomic_ulong atomic_uintptr_t;
+
+typedef atomic_long atomic_ssize_t;
+typedef atomic_ulong atomic_size_t;
+
+typedef atomic_long atomic_ptrdiff_t;
+
+typedef atomic_llong atomic_intmax_t;
+typedef atomic_ullong atomic_uintmax_t;
+
+
+#ifdef __cplusplus
+
+
+typedef struct atomic_wchar_t
+{
+#ifdef __cplusplus
+    bool is_lock_free() const volatile;
+    void store( wchar_t, memory_order = memory_order_seq_cst ) volatile;
+    wchar_t load( memory_order = memory_order_seq_cst ) volatile;
+    wchar_t exchange( wchar_t,
+                      memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_weak( wchar_t&, wchar_t,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_strong( wchar_t&, wchar_t,
+                       memory_order, memory_order ) volatile;
+    bool compare_exchange_weak( wchar_t&, wchar_t,
+                       memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_strong( wchar_t&, wchar_t,
+                       memory_order = memory_order_seq_cst ) volatile;
+    wchar_t fetch_add( wchar_t,
+                           memory_order = memory_order_seq_cst ) volatile;
+    wchar_t fetch_sub( wchar_t,
+                           memory_order = memory_order_seq_cst ) volatile;
+    wchar_t fetch_and( wchar_t,
+                           memory_order = memory_order_seq_cst ) volatile;
+    wchar_t fetch_or( wchar_t,
+                           memory_order = memory_order_seq_cst ) volatile;
+    wchar_t fetch_xor( wchar_t,
+                           memory_order = memory_order_seq_cst ) volatile;
+
+    CPP0X( atomic_wchar_t() = default; )
+    CPP0X( constexpr atomic_wchar_t( wchar_t __v__ ) : __f__( __v__) { } )
+    CPP0X( atomic_wchar_t( const atomic_wchar_t& ) = delete; )
+    atomic_wchar_t& operator =( const atomic_wchar_t& ) CPP0X(=delete);
+
+    wchar_t operator =( wchar_t __v__ ) volatile
+    { store( __v__ ); return __v__; }
+
+    wchar_t operator ++( int ) volatile
+    { return fetch_add( 1 ); }
+
+    wchar_t operator --( int ) volatile
+    { return fetch_sub( 1 ); }
+
+    wchar_t operator ++() volatile
+    { return fetch_add( 1 ) + 1; }
+
+    wchar_t operator --() volatile
+    { return fetch_sub( 1 ) - 1; }
+
+    wchar_t operator +=( wchar_t __v__ ) volatile
+    { return fetch_add( __v__ ) + __v__; }
+
+    wchar_t operator -=( wchar_t __v__ ) volatile
+    { return fetch_sub( __v__ ) - __v__; }
+
+    wchar_t operator &=( wchar_t __v__ ) volatile
+    { return fetch_and( __v__ ) & __v__; }
+
+    wchar_t operator |=( wchar_t __v__ ) volatile
+    { return fetch_or( __v__ ) | __v__; }
+
+    wchar_t operator ^=( wchar_t __v__ ) volatile
+    { return fetch_xor( __v__ ) ^ __v__; }
+
+    friend void atomic_store_explicit( volatile atomic_wchar_t*, wchar_t,
+                                       memory_order );
+    friend wchar_t atomic_load_explicit( volatile atomic_wchar_t*,
+                                             memory_order );
+    friend wchar_t atomic_exchange_explicit( volatile atomic_wchar_t*,
+                                             wchar_t, memory_order );
+    friend bool atomic_compare_exchange_weak_explicit( volatile atomic_wchar_t*,
+                    wchar_t*, wchar_t, memory_order, memory_order );
+    friend bool atomic_compare_exchange_strong_explicit( volatile atomic_wchar_t*,
+                    wchar_t*, wchar_t, memory_order, memory_order );
+    friend wchar_t atomic_fetch_add_explicit( volatile atomic_wchar_t*,
+                                                  wchar_t, memory_order );
+    friend wchar_t atomic_fetch_sub_explicit( volatile atomic_wchar_t*,
+                                                  wchar_t, memory_order );
+    friend wchar_t atomic_fetch_and_explicit( volatile atomic_wchar_t*,
+                                                  wchar_t, memory_order );
+    friend wchar_t atomic_fetch_or_explicit( volatile atomic_wchar_t*,
+                                                  wchar_t, memory_order );
+    friend wchar_t atomic_fetch_xor_explicit( volatile atomic_wchar_t*,
+                                                  wchar_t, memory_order );
+
+CPP0X(private:)
+#endif
+    wchar_t __f__;
+} atomic_wchar_t;
+
+
+#else
+
+typedef atomic_int_least16_t atomic_char16_t;
+typedef atomic_int_least32_t atomic_char32_t;
+typedef atomic_int_least32_t atomic_wchar_t;
+
+#endif
+
+
+#ifdef __cplusplus
+
+template< typename T >
+struct atomic
+{
+#ifdef __cplusplus
+
+    bool is_lock_free() const volatile;
+    void store( T, memory_order = memory_order_seq_cst ) volatile;
+    T load( memory_order = memory_order_seq_cst ) volatile;
+    T exchange( T __v__, memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_weak( T&, T, memory_order, memory_order ) volatile;
+    bool compare_exchange_strong( T&, T, memory_order, memory_order ) volatile;
+    bool compare_exchange_weak( T&, T, memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_strong( T&, T, memory_order = memory_order_seq_cst ) volatile;
+
+    CPP0X( atomic() = default; )
+    CPP0X( constexpr explicit atomic( T __v__ ) : __f__( __v__ ) { } )
+    CPP0X( atomic( const atomic& ) = delete; )
+    atomic& operator =( const atomic& ) CPP0X(=delete);
+
+    T operator =( T __v__ ) volatile
+    { store( __v__ ); return __v__; }
+
+CPP0X(private:)
+#endif
+    T __f__;
+};
+
+#endif
+
+#ifdef __cplusplus
+
+template<typename T> struct atomic< T* > : atomic_address
+{
+    T* load( memory_order = memory_order_seq_cst ) volatile;
+    T* exchange( T*, memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_weak( T*&, T*, memory_order, memory_order ) volatile;
+    bool compare_exchange_strong( T*&, T*, memory_order, memory_order ) volatile;
+    bool compare_exchange_weak( T*&, T*,
+                       memory_order = memory_order_seq_cst ) volatile;
+    bool compare_exchange_strong( T*&, T*,
+                       memory_order = memory_order_seq_cst ) volatile;
+    T* fetch_add( ptrdiff_t, memory_order = memory_order_seq_cst ) volatile;
+    T* fetch_sub( ptrdiff_t, memory_order = memory_order_seq_cst ) volatile;
+
+    CPP0X( atomic() = default; )
+    CPP0X( constexpr explicit atomic( T __v__ ) : atomic_address( __v__ ) { } )
+    CPP0X( atomic( const atomic& ) = delete; )
+    atomic& operator =( const atomic& ) CPP0X(=delete);
+
+    T* operator =( T* __v__ ) volatile
+    { store( __v__ ); return __v__; }
+
+    T* operator ++( int ) volatile
+    { return fetch_add( 1 ); }
+
+    T* operator --( int ) volatile
+    { return fetch_sub( 1 ); }
+
+    T* operator ++() volatile
+    { return fetch_add( 1 ) + 1; }
+
+    T* operator --() volatile
+    { return fetch_sub( 1 ) - 1; }
+
+    T* operator +=( T* __v__ ) volatile
+    { return fetch_add( __v__ ) + __v__; }
+
+    T* operator -=( T* __v__ ) volatile
+    { return fetch_sub( __v__ ) - __v__; }
+};
+
+#endif
+
+#ifdef __cplusplus
+
+
+template<> struct atomic< bool > : atomic_bool
+{
+    CPP0X( atomic() = default; )
+    CPP0X( constexpr explicit atomic( bool __v__ )
+    : atomic_bool( __v__ ) { } )
+    CPP0X( atomic( const atomic& ) = delete; )
+    atomic& operator =( const atomic& ) CPP0X(=delete);
+
+    bool operator =( bool __v__ ) volatile
+    { store( __v__ ); return __v__; }
+};
+
+
+template<> struct atomic< void* > : atomic_address
+{
+    CPP0X( atomic() = default; )
+    CPP0X( constexpr explicit atomic( void* __v__ )
+    : atomic_address( __v__ ) { } )
+    CPP0X( atomic( const atomic& ) = delete; )
+    atomic& operator =( const atomic& ) CPP0X(=delete);
+
+    void* operator =( void* __v__ ) volatile
+    { store( __v__ ); return __v__; }
+};
+
+
+template<> struct atomic< char > : atomic_char
+{
+    CPP0X( atomic() = default; )
+    CPP0X( constexpr explicit atomic( char __v__ )
+    : atomic_char( __v__ ) { } )
+    CPP0X( atomic( const atomic& ) = delete; )
+    atomic& operator =( const atomic& ) CPP0X(=delete);
+
+    char operator =( char __v__ ) volatile
+    { store( __v__ ); return __v__; }
+};
+
+
+template<> struct atomic< signed char > : atomic_schar
+{
+    CPP0X( atomic() = default; )
+    CPP0X( constexpr explicit atomic( signed char __v__ )
+    : atomic_schar( __v__ ) { } )
+    CPP0X( atomic( const atomic& ) = delete; )
+    atomic& operator =( const atomic& ) CPP0X(=delete);
+
+    signed char operator =( signed char __v__ ) volatile
+    { store( __v__ ); return __v__; }
+};
+
+
+template<> struct atomic< unsigned char > : atomic_uchar
+{
+    CPP0X( atomic() = default; )
+    CPP0X( constexpr explicit atomic( unsigned char __v__ )
+    : atomic_uchar( __v__ ) { } )
+    CPP0X( atomic( const atomic& ) = delete; )
+    atomic& operator =( const atomic& ) CPP0X(=delete);
+
+    unsigned char operator =( unsigned char __v__ ) volatile
+    { store( __v__ ); return __v__; }
+};
+
+
+template<> struct atomic< short > : atomic_short
+{
+    CPP0X( atomic() = default; )
+    CPP0X( constexpr explicit atomic( short __v__ )
+    : atomic_short( __v__ ) { } )
+    CPP0X( atomic( const atomic& ) = delete; )
+    atomic& operator =( const atomic& ) CPP0X(=delete);
+
+    short operator =( short __v__ ) volatile
+    { store( __v__ ); return __v__; }
+};
+
+
+template<> struct atomic< unsigned short > : atomic_ushort
+{
+    CPP0X( atomic() = default; )
+    CPP0X( constexpr explicit atomic( unsigned short __v__ )
+    : atomic_ushort( __v__ ) { } )
+    CPP0X( atomic( const atomic& ) = delete; )
+    atomic& operator =( const atomic& ) CPP0X(=delete);
+
+    unsigned short operator =( unsigned short __v__ ) volatile
+    { store( __v__ ); return __v__; }
+};
+
+
+template<> struct atomic< int > : atomic_int
+{
+    CPP0X( atomic() = default; )
+    CPP0X( constexpr explicit atomic( int __v__ )
+    : atomic_int( __v__ ) { } )
+    CPP0X( atomic( const atomic& ) = delete; )
+    atomic& operator =( const atomic& ) CPP0X(=delete);
+
+    int operator =( int __v__ ) volatile
+    { store( __v__ ); return __v__; }
+};
+
+
+template<> struct atomic< unsigned int > : atomic_uint
+{
+    CPP0X( atomic() = default; )
+    CPP0X( constexpr explicit atomic( unsigned int __v__ )
+    : atomic_uint( __v__ ) { } )
+    CPP0X( atomic( const atomic& ) = delete; )
+    atomic& operator =( const atomic& ) CPP0X(=delete);
+
+    unsigned int operator =( unsigned int __v__ ) volatile
+    { store( __v__ ); return __v__; }
+};
+
+
+template<> struct atomic< long > : atomic_long
+{
+    CPP0X( atomic() = default; )
+    CPP0X( constexpr explicit atomic( long __v__ )
+    : atomic_long( __v__ ) { } )
+    CPP0X( atomic( const atomic& ) = delete; )
+    atomic& operator =( const atomic& ) CPP0X(=delete);
+
+    long operator =( long __v__ ) volatile
+    { store( __v__ ); return __v__; }
+};
+
+
+template<> struct atomic< unsigned long > : atomic_ulong
+{
+    CPP0X( atomic() = default; )
+    CPP0X( constexpr explicit atomic( unsigned long __v__ )
+    : atomic_ulong( __v__ ) { } )
+    CPP0X( atomic( const atomic& ) = delete; )
+    atomic& operator =( const atomic& ) CPP0X(=delete);
+
+    unsigned long operator =( unsigned long __v__ ) volatile
+    { store( __v__ ); return __v__; }
+};
+
+
+template<> struct atomic< long long > : atomic_llong
+{
+    CPP0X( atomic() = default; )
+    CPP0X( constexpr explicit atomic( long long __v__ )
+    : atomic_llong( __v__ ) { } )
+    CPP0X( atomic( const atomic& ) = delete; )
+    atomic& operator =( const atomic& ) CPP0X(=delete);
+
+    long long operator =( long long __v__ ) volatile
+    { store( __v__ ); return __v__; }
+};
+
+
+template<> struct atomic< unsigned long long > : atomic_ullong
+{
+    CPP0X( atomic() = default; )
+    CPP0X( constexpr explicit atomic( unsigned long long __v__ )
+    : atomic_ullong( __v__ ) { } )
+    CPP0X( atomic( const atomic& ) = delete; )
+    atomic& operator =( const atomic& ) CPP0X(=delete);
+
+    unsigned long long operator =( unsigned long long __v__ ) volatile
+    { store( __v__ ); return __v__; }
+};
+
+
+template<> struct atomic< wchar_t > : atomic_wchar_t
+{
+    CPP0X( atomic() = default; )
+    CPP0X( constexpr explicit atomic( wchar_t __v__ )
+    : atomic_wchar_t( __v__ ) { } )
+    CPP0X( atomic( const atomic& ) = delete; )
+    atomic& operator =( const atomic& ) CPP0X(=delete);
+
+    wchar_t operator =( wchar_t __v__ ) volatile
+    { store( __v__ ); return __v__; }
+};
+
+
+#endif
+
+
+#ifdef __cplusplus
+
+
+inline bool atomic_is_lock_free
+( const volatile atomic_bool* __a__ )
+{ return false; }
+
+inline bool atomic_load_explicit
+( volatile atomic_bool* __a__, memory_order __x__ )
+{ return _ATOMIC_LOAD_( __a__, __x__ ); }
+
+inline bool atomic_load
+( volatile atomic_bool* __a__ ) { return atomic_load_explicit( __a__, memory_order_seq_cst ); }
+
+inline void atomic_init
+( volatile atomic_bool* __a__, bool __m__ )
+{ _ATOMIC_INIT_( __a__, __m__ ); }
+
+inline void atomic_store_explicit
+( volatile atomic_bool* __a__, bool __m__, memory_order __x__ )
+{ _ATOMIC_STORE_( __a__, __m__, __x__ ); }
+
+inline void atomic_store
+( volatile atomic_bool* __a__, bool __m__ )
+{ atomic_store_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline bool atomic_exchange_explicit
+( volatile atomic_bool* __a__, bool __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, =, __m__, __x__ ); }
+
+inline bool atomic_exchange
+( volatile atomic_bool* __a__, bool __m__ )
+{ return atomic_exchange_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_weak_explicit
+( volatile atomic_bool* __a__, bool* __e__, bool __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_WEAK_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_strong_explicit
+( volatile atomic_bool* __a__, bool* __e__, bool __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_weak
+( volatile atomic_bool* __a__, bool* __e__, bool __m__ )
+{ return atomic_compare_exchange_weak_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_strong
+( volatile atomic_bool* __a__, bool* __e__, bool __m__ )
+{ return atomic_compare_exchange_strong_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+
+inline bool atomic_is_lock_free( const volatile atomic_address* __a__ )
+{ return false; }
+
+inline void* atomic_load_explicit
+( volatile atomic_address* __a__, memory_order __x__ )
+{ return _ATOMIC_LOAD_( __a__, __x__ ); }
+
+inline void* atomic_load( volatile atomic_address* __a__ )
+{ return atomic_load_explicit( __a__, memory_order_seq_cst ); }
+
+inline void atomic_init
+( volatile atomic_address* __a__, void* __m__ )
+{ _ATOMIC_INIT_( __a__, __m__ ); }
+
+inline void atomic_store_explicit
+( volatile atomic_address* __a__, void* __m__, memory_order __x__ )
+{ _ATOMIC_STORE_( __a__, __m__, __x__ ); }
+
+inline void atomic_store
+( volatile atomic_address* __a__, void* __m__ )
+{ atomic_store_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline void* atomic_exchange_explicit
+( volatile atomic_address* __a__, void* __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, =, __m__,  __x__ ); }
+
+inline void* atomic_exchange
+( volatile atomic_address* __a__, void* __m__ )
+{ return atomic_exchange_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_weak_explicit
+( volatile atomic_address* __a__, void** __e__, void* __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_WEAK_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_strong_explicit
+( volatile atomic_address* __a__, void** __e__, void* __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_weak
+( volatile atomic_address* __a__, void** __e__, void* __m__ )
+{ return atomic_compare_exchange_weak_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_strong
+( volatile atomic_address* __a__, void** __e__, void* __m__ )
+{ return atomic_compare_exchange_strong_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+
+inline bool atomic_is_lock_free( const volatile atomic_char* __a__ )
+{ return false; }
+
+inline char atomic_load_explicit
+( volatile atomic_char* __a__, memory_order __x__ )
+{ return _ATOMIC_LOAD_( __a__, __x__ ); }
+
+inline char atomic_load( volatile atomic_char* __a__ )
+{ return atomic_load_explicit( __a__, memory_order_seq_cst ); }
+
+inline void atomic_init
+( volatile atomic_char* __a__, char __m__ )
+{ _ATOMIC_INIT_( __a__, __m__ ); }
+
+inline void atomic_store_explicit
+( volatile atomic_char* __a__, char __m__, memory_order __x__ )
+{ _ATOMIC_STORE_( __a__, __m__, __x__ ); }
+
+inline void atomic_store
+( volatile atomic_char* __a__, char __m__ )
+{ atomic_store_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline char atomic_exchange_explicit
+( volatile atomic_char* __a__, char __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, =, __m__, __x__ ); }
+
+inline char atomic_exchange
+( volatile atomic_char* __a__, char __m__ )
+{ return atomic_exchange_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_weak_explicit
+( volatile atomic_char* __a__, char* __e__, char __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_WEAK_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_strong_explicit
+( volatile atomic_char* __a__, char* __e__, char __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_weak
+( volatile atomic_char* __a__, char* __e__, char __m__ )
+{ return atomic_compare_exchange_weak_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_strong
+( volatile atomic_char* __a__, char* __e__, char __m__ )
+{ return atomic_compare_exchange_strong_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+
+inline bool atomic_is_lock_free( const volatile atomic_schar* __a__ )
+{ return false; }
+
+inline signed char atomic_load_explicit
+( volatile atomic_schar* __a__, memory_order __x__ )
+{ return _ATOMIC_LOAD_( __a__, __x__ ); }
+
+inline signed char atomic_load( volatile atomic_schar* __a__ )
+{ return atomic_load_explicit( __a__, memory_order_seq_cst ); }
+
+inline void atomic_init
+( volatile atomic_schar* __a__, signed char __m__ )
+{ _ATOMIC_INIT_( __a__, __m__ ); }
+
+inline void atomic_store_explicit
+( volatile atomic_schar* __a__, signed char __m__, memory_order __x__ )
+{ _ATOMIC_STORE_( __a__, __m__, __x__ ); }
+
+inline void atomic_store
+( volatile atomic_schar* __a__, signed char __m__ )
+{ atomic_store_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline signed char atomic_exchange_explicit
+( volatile atomic_schar* __a__, signed char __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, =, __m__, __x__ ); }
+
+inline signed char atomic_exchange
+( volatile atomic_schar* __a__, signed char __m__ )
+{ return atomic_exchange_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_weak_explicit
+( volatile atomic_schar* __a__, signed char* __e__, signed char __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_WEAK_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_strong_explicit
+( volatile atomic_schar* __a__, signed char* __e__, signed char __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_weak
+( volatile atomic_schar* __a__, signed char* __e__, signed char __m__ )
+{ return atomic_compare_exchange_weak_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_strong
+( volatile atomic_schar* __a__, signed char* __e__, signed char __m__ )
+{ return atomic_compare_exchange_strong_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+
+inline bool atomic_is_lock_free( const volatile atomic_uchar* __a__ )
+{ return false; }
+
+inline unsigned char atomic_load_explicit
+( volatile atomic_uchar* __a__, memory_order __x__ )
+{ return _ATOMIC_LOAD_( __a__, __x__ ); }
+
+inline unsigned char atomic_load( volatile atomic_uchar* __a__ )
+{ return atomic_load_explicit( __a__, memory_order_seq_cst ); }
+
+inline void atomic_init
+( volatile atomic_uchar* __a__, unsigned char __m__ )
+{ _ATOMIC_INIT_( __a__, __m__ ); }
+
+inline void atomic_store_explicit
+( volatile atomic_uchar* __a__, unsigned char __m__, memory_order __x__ )
+{ _ATOMIC_STORE_( __a__, __m__, __x__ ); }
+
+inline void atomic_store
+( volatile atomic_uchar* __a__, unsigned char __m__ )
+{ atomic_store_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline unsigned char atomic_exchange_explicit
+( volatile atomic_uchar* __a__, unsigned char __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, =, __m__, __x__ ); }
+
+inline unsigned char atomic_exchange
+( volatile atomic_uchar* __a__, unsigned char __m__ )
+{ return atomic_exchange_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_weak_explicit
+( volatile atomic_uchar* __a__, unsigned char* __e__, unsigned char __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_WEAK_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_strong_explicit
+( volatile atomic_uchar* __a__, unsigned char* __e__, unsigned char __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_weak
+( volatile atomic_uchar* __a__, unsigned char* __e__, unsigned char __m__ )
+{ return atomic_compare_exchange_weak_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_strong
+( volatile atomic_uchar* __a__, unsigned char* __e__, unsigned char __m__ )
+{ return atomic_compare_exchange_strong_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+
+inline bool atomic_is_lock_free( const volatile atomic_short* __a__ )
+{ return false; }
+
+inline short atomic_load_explicit
+( volatile atomic_short* __a__, memory_order __x__ )
+{ return _ATOMIC_LOAD_( __a__, __x__ ); }
+
+inline short atomic_load( volatile atomic_short* __a__ )
+{ return atomic_load_explicit( __a__, memory_order_seq_cst ); }
+
+inline void atomic_init
+( volatile atomic_short* __a__, short __m__ )
+{ _ATOMIC_INIT_( __a__, __m__ ); }
+
+inline void atomic_store_explicit
+( volatile atomic_short* __a__, short __m__, memory_order __x__ )
+{ _ATOMIC_STORE_( __a__, __m__, __x__ ); }
+
+inline void atomic_store
+( volatile atomic_short* __a__, short __m__ )
+{ atomic_store_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline short atomic_exchange_explicit
+( volatile atomic_short* __a__, short __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, =, __m__, __x__ ); }
+
+inline short atomic_exchange
+( volatile atomic_short* __a__, short __m__ )
+{ return atomic_exchange_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_weak_explicit
+( volatile atomic_short* __a__, short* __e__, short __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_WEAK_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_strong_explicit
+( volatile atomic_short* __a__, short* __e__, short __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_weak
+( volatile atomic_short* __a__, short* __e__, short __m__ )
+{ return atomic_compare_exchange_weak_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_strong
+( volatile atomic_short* __a__, short* __e__, short __m__ )
+{ return atomic_compare_exchange_strong_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+
+inline bool atomic_is_lock_free( const volatile atomic_ushort* __a__ )
+{ return false; }
+
+inline unsigned short atomic_load_explicit
+( volatile atomic_ushort* __a__, memory_order __x__ )
+{ return _ATOMIC_LOAD_( __a__, __x__ ); }
+
+inline unsigned short atomic_load( volatile atomic_ushort* __a__ )
+{ return atomic_load_explicit( __a__, memory_order_seq_cst ); }
+
+inline void atomic_init
+( volatile atomic_ushort* __a__, unsigned short __m__ )
+{ _ATOMIC_INIT_( __a__, __m__ ); }
+
+inline void atomic_store_explicit
+( volatile atomic_ushort* __a__, unsigned short __m__, memory_order __x__ )
+{ _ATOMIC_STORE_( __a__, __m__, __x__ ); }
+
+inline void atomic_store
+( volatile atomic_ushort* __a__, unsigned short __m__ )
+{ atomic_store_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline unsigned short atomic_exchange_explicit
+( volatile atomic_ushort* __a__, unsigned short __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, =, __m__, __x__ ); }
+
+inline unsigned short atomic_exchange
+( volatile atomic_ushort* __a__, unsigned short __m__ )
+{ return atomic_exchange_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_weak_explicit
+( volatile atomic_ushort* __a__, unsigned short* __e__, unsigned short __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_WEAK_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_strong_explicit
+( volatile atomic_ushort* __a__, unsigned short* __e__, unsigned short __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_weak
+( volatile atomic_ushort* __a__, unsigned short* __e__, unsigned short __m__ )
+{ return atomic_compare_exchange_weak_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_strong
+( volatile atomic_ushort* __a__, unsigned short* __e__, unsigned short __m__ )
+{ return atomic_compare_exchange_strong_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+
+inline bool atomic_is_lock_free( const volatile atomic_int* __a__ )
+{ return false; }
+
+inline int atomic_load_explicit
+( volatile atomic_int* __a__, memory_order __x__ )
+{ return _ATOMIC_LOAD_( __a__, __x__ ); }
+
+inline int atomic_load( volatile atomic_int* __a__ )
+{ return atomic_load_explicit( __a__, memory_order_seq_cst ); }
+
+inline void atomic_init
+( volatile atomic_int* __a__, int __m__ )
+{ _ATOMIC_INIT_( __a__, __m__ ); }
+
+inline void atomic_store_explicit
+( volatile atomic_int* __a__, int __m__, memory_order __x__ )
+{ _ATOMIC_STORE_( __a__, __m__, __x__ ); }
+
+inline void atomic_store
+( volatile atomic_int* __a__, int __m__ )
+{ atomic_store_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline int atomic_exchange_explicit
+( volatile atomic_int* __a__, int __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, =, __m__, __x__ ); }
+
+inline int atomic_exchange
+( volatile atomic_int* __a__, int __m__ )
+{ return atomic_exchange_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_weak_explicit
+( volatile atomic_int* __a__, int* __e__, int __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_WEAK_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_strong_explicit
+( volatile atomic_int* __a__, int* __e__, int __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_weak
+( volatile atomic_int* __a__, int* __e__, int __m__ )
+{ return atomic_compare_exchange_weak_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_strong
+( volatile atomic_int* __a__, int* __e__, int __m__ )
+{ return atomic_compare_exchange_strong_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+
+inline bool atomic_is_lock_free( const volatile atomic_uint* __a__ )
+{ return false; }
+
+inline unsigned int atomic_load_explicit
+( volatile atomic_uint* __a__, memory_order __x__ )
+{ return _ATOMIC_LOAD_( __a__, __x__ ); }
+
+inline unsigned int atomic_load( volatile atomic_uint* __a__ )
+{ return atomic_load_explicit( __a__, memory_order_seq_cst ); }
+
+inline void atomic_init
+( volatile atomic_uint* __a__, unsigned int __m__ )
+{ _ATOMIC_INIT_( __a__, __m__ ); }
+
+inline void atomic_store_explicit
+( volatile atomic_uint* __a__, unsigned int __m__, memory_order __x__ )
+{ _ATOMIC_STORE_( __a__, __m__, __x__ ); }
+
+inline void atomic_store
+( volatile atomic_uint* __a__, unsigned int __m__ )
+{ atomic_store_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline unsigned int atomic_exchange_explicit
+( volatile atomic_uint* __a__, unsigned int __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, =, __m__, __x__ ); }
+
+inline unsigned int atomic_exchange
+( volatile atomic_uint* __a__, unsigned int __m__ )
+{ return atomic_exchange_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_weak_explicit
+( volatile atomic_uint* __a__, unsigned int* __e__, unsigned int __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_WEAK_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_strong_explicit
+( volatile atomic_uint* __a__, unsigned int* __e__, unsigned int __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_weak
+( volatile atomic_uint* __a__, unsigned int* __e__, unsigned int __m__ )
+{ return atomic_compare_exchange_weak_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_strong
+( volatile atomic_uint* __a__, unsigned int* __e__, unsigned int __m__ )
+{ return atomic_compare_exchange_strong_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+
+inline bool atomic_is_lock_free( const volatile atomic_long* __a__ )
+{ return false; }
+
+inline long atomic_load_explicit
+( volatile atomic_long* __a__, memory_order __x__ )
+{ return _ATOMIC_LOAD_( __a__, __x__ ); }
+
+inline long atomic_load( volatile atomic_long* __a__ )
+{ return atomic_load_explicit( __a__, memory_order_seq_cst ); }
+
+inline void atomic_init
+( volatile atomic_long* __a__, long __m__ )
+{ _ATOMIC_INIT_( __a__, __m__ ); }
+
+inline void atomic_store_explicit
+( volatile atomic_long* __a__, long __m__, memory_order __x__ )
+{ _ATOMIC_STORE_( __a__, __m__, __x__ ); }
+
+inline void atomic_store
+( volatile atomic_long* __a__, long __m__ )
+{ atomic_store_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline long atomic_exchange_explicit
+( volatile atomic_long* __a__, long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, =, __m__, __x__ ); }
+
+inline long atomic_exchange
+( volatile atomic_long* __a__, long __m__ )
+{ return atomic_exchange_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_weak_explicit
+( volatile atomic_long* __a__, long* __e__, long __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_WEAK_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_strong_explicit
+( volatile atomic_long* __a__, long* __e__, long __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_weak
+( volatile atomic_long* __a__, long* __e__, long __m__ )
+{ return atomic_compare_exchange_weak_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_strong
+( volatile atomic_long* __a__, long* __e__, long __m__ )
+{ return atomic_compare_exchange_strong_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+
+inline bool atomic_is_lock_free( const volatile atomic_ulong* __a__ )
+{ return false; }
+
+inline unsigned long atomic_load_explicit
+( volatile atomic_ulong* __a__, memory_order __x__ )
+{ return _ATOMIC_LOAD_( __a__, __x__ ); }
+
+inline unsigned long atomic_load( volatile atomic_ulong* __a__ )
+{ return atomic_load_explicit( __a__, memory_order_seq_cst ); }
+
+inline void atomic_init
+( volatile atomic_ulong* __a__, unsigned long __m__ )
+{ _ATOMIC_INIT_( __a__, __m__ ); }
+
+inline void atomic_store_explicit
+( volatile atomic_ulong* __a__, unsigned long __m__, memory_order __x__ )
+{ _ATOMIC_STORE_( __a__, __m__, __x__ ); }
+
+inline void atomic_store
+( volatile atomic_ulong* __a__, unsigned long __m__ )
+{ atomic_store_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline unsigned long atomic_exchange_explicit
+( volatile atomic_ulong* __a__, unsigned long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, =, __m__, __x__ ); }
+
+inline unsigned long atomic_exchange
+( volatile atomic_ulong* __a__, unsigned long __m__ )
+{ return atomic_exchange_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_weak_explicit
+( volatile atomic_ulong* __a__, unsigned long* __e__, unsigned long __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_WEAK_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_strong_explicit
+( volatile atomic_ulong* __a__, unsigned long* __e__, unsigned long __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_weak
+( volatile atomic_ulong* __a__, unsigned long* __e__, unsigned long __m__ )
+{ return atomic_compare_exchange_weak_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_strong
+( volatile atomic_ulong* __a__, unsigned long* __e__, unsigned long __m__ )
+{ return atomic_compare_exchange_strong_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+
+inline bool atomic_is_lock_free( const volatile atomic_llong* __a__ )
+{ return false; }
+
+inline long long atomic_load_explicit
+( volatile atomic_llong* __a__, memory_order __x__ )
+{ return _ATOMIC_LOAD_( __a__, __x__ ); }
+
+inline long long atomic_load( volatile atomic_llong* __a__ )
+{ return atomic_load_explicit( __a__, memory_order_seq_cst ); }
+
+inline void atomic_init
+( volatile atomic_llong* __a__, long long __m__ )
+{ _ATOMIC_INIT_( __a__, __m__ ); }
+
+inline void atomic_store_explicit
+( volatile atomic_llong* __a__, long long __m__, memory_order __x__ )
+{ _ATOMIC_STORE_( __a__, __m__, __x__ ); }
+
+inline void atomic_store
+( volatile atomic_llong* __a__, long long __m__ )
+{ atomic_store_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline long long atomic_exchange_explicit
+( volatile atomic_llong* __a__, long long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, =, __m__, __x__ ); }
+
+inline long long atomic_exchange
+( volatile atomic_llong* __a__, long long __m__ )
+{ return atomic_exchange_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_weak_explicit
+( volatile atomic_llong* __a__, long long* __e__, long long __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_WEAK_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_strong_explicit
+( volatile atomic_llong* __a__, long long* __e__, long long __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_weak
+( volatile atomic_llong* __a__, long long* __e__, long long __m__ )
+{ return atomic_compare_exchange_weak_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_strong
+( volatile atomic_llong* __a__, long long* __e__, long long __m__ )
+{ return atomic_compare_exchange_strong_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+
+inline bool atomic_is_lock_free( const volatile atomic_ullong* __a__ )
+{ return false; }
+
+inline unsigned long long atomic_load_explicit
+( volatile atomic_ullong* __a__, memory_order __x__ )
+{ return _ATOMIC_LOAD_( __a__, __x__ ); }
+
+inline unsigned long long atomic_load( volatile atomic_ullong* __a__ )
+{ return atomic_load_explicit( __a__, memory_order_seq_cst ); }
+
+inline void atomic_init
+( volatile atomic_ullong* __a__, unsigned long long __m__ )
+{ _ATOMIC_INIT_( __a__, __m__ ); }
+
+inline void atomic_store_explicit
+( volatile atomic_ullong* __a__, unsigned long long __m__, memory_order __x__ )
+{ _ATOMIC_STORE_( __a__, __m__, __x__ ); }
+
+inline void atomic_store
+( volatile atomic_ullong* __a__, unsigned long long __m__ )
+{ atomic_store_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline unsigned long long atomic_exchange_explicit
+( volatile atomic_ullong* __a__, unsigned long long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, =, __m__, __x__ ); }
+
+inline unsigned long long atomic_exchange
+( volatile atomic_ullong* __a__, unsigned long long __m__ )
+{ return atomic_exchange_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_weak_explicit
+( volatile atomic_ullong* __a__, unsigned long long* __e__, unsigned long long __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_WEAK_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_strong_explicit
+( volatile atomic_ullong* __a__, unsigned long long* __e__, unsigned long long __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_weak
+( volatile atomic_ullong* __a__, unsigned long long* __e__, unsigned long long __m__ )
+{ return atomic_compare_exchange_weak_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_strong
+( volatile atomic_ullong* __a__, unsigned long long* __e__, unsigned long long __m__ )
+{ return atomic_compare_exchange_strong_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+
+inline bool atomic_is_lock_free( const volatile atomic_wchar_t* __a__ )
+{ return false; }
+
+inline wchar_t atomic_load_explicit
+( volatile atomic_wchar_t* __a__, memory_order __x__ )
+{ return _ATOMIC_LOAD_( __a__, __x__ ); }
+
+inline wchar_t atomic_load( volatile atomic_wchar_t* __a__ )
+{ return atomic_load_explicit( __a__, memory_order_seq_cst ); }
+
+inline void atomic_init
+( volatile atomic_wchar_t* __a__, wchar_t __m__ )
+{ _ATOMIC_INIT_( __a__, __m__ ); }
+
+inline void atomic_store_explicit
+( volatile atomic_wchar_t* __a__, wchar_t __m__, memory_order __x__ )
+{ _ATOMIC_STORE_( __a__, __m__, __x__ ); }
+
+inline void atomic_store
+( volatile atomic_wchar_t* __a__, wchar_t __m__ )
+{ atomic_store_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline wchar_t atomic_exchange_explicit
+( volatile atomic_wchar_t* __a__, wchar_t __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, =, __m__, __x__ ); }
+
+inline wchar_t atomic_exchange
+( volatile atomic_wchar_t* __a__, wchar_t __m__ )
+{ return atomic_exchange_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_weak_explicit
+( volatile atomic_wchar_t* __a__, wchar_t* __e__, wchar_t __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_WEAK_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_strong_explicit
+( volatile atomic_wchar_t* __a__, wchar_t* __e__, wchar_t __m__,
+  memory_order __x__, memory_order __y__ )
+{ return _ATOMIC_CMPSWP_( __a__, __e__, __m__, __x__ ); }
+
+inline bool atomic_compare_exchange_weak
+( volatile atomic_wchar_t* __a__, wchar_t* __e__, wchar_t __m__ )
+{ return atomic_compare_exchange_weak_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+inline bool atomic_compare_exchange_strong
+( volatile atomic_wchar_t* __a__, wchar_t* __e__, wchar_t __m__ )
+{ return atomic_compare_exchange_strong_explicit( __a__, __e__, __m__,
+                 memory_order_seq_cst, memory_order_seq_cst ); }
+
+
+inline void* atomic_fetch_add_explicit
+( volatile atomic_address* __a__, ptrdiff_t __m__, memory_order __x__ )
+{
+	void* volatile* __p__ = &((__a__)->__f__);
+	void* __r__ = (void *) model_rmwr_action((void *)__p__, __x__);
+	model_rmw_action((void *)__p__, __x__, (uint64_t) ((char*)(*__p__) + __m__));
+  return __r__; }
+
+inline void* atomic_fetch_add
+( volatile atomic_address* __a__, ptrdiff_t __m__ )
+{ return atomic_fetch_add_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline void* atomic_fetch_sub_explicit
+( volatile atomic_address* __a__, ptrdiff_t __m__, memory_order __x__ )
+{
+	void* volatile* __p__ = &((__a__)->__f__);
+	void* __r__ = (void *) model_rmwr_action((void *)__p__, __x__);
+	model_rmw_action((void *)__p__, __x__, (uint64_t)((char*)(*__p__) - __m__));
+  return __r__; }
+
+inline void* atomic_fetch_sub
+( volatile atomic_address* __a__, ptrdiff_t __m__ )
+{ return atomic_fetch_sub_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+inline char atomic_fetch_add_explicit
+( volatile atomic_char* __a__, char __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, +=, __m__, __x__ ); }
+
+inline char atomic_fetch_add
+( volatile atomic_char* __a__, char __m__ )
+{ return atomic_fetch_add_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline char atomic_fetch_sub_explicit
+( volatile atomic_char* __a__, char __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, -=, __m__, __x__ ); }
+
+inline char atomic_fetch_sub
+( volatile atomic_char* __a__, char __m__ )
+{ return atomic_fetch_sub_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline char atomic_fetch_and_explicit
+( volatile atomic_char* __a__, char __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, &=, __m__, __x__ ); }
+
+inline char atomic_fetch_and
+( volatile atomic_char* __a__, char __m__ )
+{ return atomic_fetch_and_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline char atomic_fetch_or_explicit
+( volatile atomic_char* __a__, char __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, |=, __m__, __x__ ); }
+
+inline char atomic_fetch_or
+( volatile atomic_char* __a__, char __m__ )
+{ return atomic_fetch_or_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline char atomic_fetch_xor_explicit
+( volatile atomic_char* __a__, char __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, ^=, __m__, __x__ ); }
+
+inline char atomic_fetch_xor
+( volatile atomic_char* __a__, char __m__ )
+{ return atomic_fetch_xor_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline signed char atomic_fetch_add_explicit
+( volatile atomic_schar* __a__, signed char __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, +=, __m__, __x__ ); }
+
+inline signed char atomic_fetch_add
+( volatile atomic_schar* __a__, signed char __m__ )
+{ return atomic_fetch_add_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline signed char atomic_fetch_sub_explicit
+( volatile atomic_schar* __a__, signed char __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, -=, __m__, __x__ ); }
+
+inline signed char atomic_fetch_sub
+( volatile atomic_schar* __a__, signed char __m__ )
+{ return atomic_fetch_sub_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline signed char atomic_fetch_and_explicit
+( volatile atomic_schar* __a__, signed char __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, &=, __m__, __x__ ); }
+
+inline signed char atomic_fetch_and
+( volatile atomic_schar* __a__, signed char __m__ )
+{ return atomic_fetch_and_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline signed char atomic_fetch_or_explicit
+( volatile atomic_schar* __a__, signed char __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, |=, __m__, __x__ ); }
+
+inline signed char atomic_fetch_or
+( volatile atomic_schar* __a__, signed char __m__ )
+{ return atomic_fetch_or_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline signed char atomic_fetch_xor_explicit
+( volatile atomic_schar* __a__, signed char __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, ^=, __m__, __x__ ); }
+
+inline signed char atomic_fetch_xor
+( volatile atomic_schar* __a__, signed char __m__ )
+{ return atomic_fetch_xor_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned char atomic_fetch_add_explicit
+( volatile atomic_uchar* __a__, unsigned char __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, +=, __m__, __x__ ); }
+
+inline unsigned char atomic_fetch_add
+( volatile atomic_uchar* __a__, unsigned char __m__ )
+{ return atomic_fetch_add_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned char atomic_fetch_sub_explicit
+( volatile atomic_uchar* __a__, unsigned char __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, -=, __m__, __x__ ); }
+
+inline unsigned char atomic_fetch_sub
+( volatile atomic_uchar* __a__, unsigned char __m__ )
+{ return atomic_fetch_sub_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned char atomic_fetch_and_explicit
+( volatile atomic_uchar* __a__, unsigned char __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, &=, __m__, __x__ ); }
+
+inline unsigned char atomic_fetch_and
+( volatile atomic_uchar* __a__, unsigned char __m__ )
+{ return atomic_fetch_and_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned char atomic_fetch_or_explicit
+( volatile atomic_uchar* __a__, unsigned char __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, |=, __m__, __x__ ); }
+
+inline unsigned char atomic_fetch_or
+( volatile atomic_uchar* __a__, unsigned char __m__ )
+{ return atomic_fetch_or_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned char atomic_fetch_xor_explicit
+( volatile atomic_uchar* __a__, unsigned char __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, ^=, __m__, __x__ ); }
+
+inline unsigned char atomic_fetch_xor
+( volatile atomic_uchar* __a__, unsigned char __m__ )
+{ return atomic_fetch_xor_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline short atomic_fetch_add_explicit
+( volatile atomic_short* __a__, short __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, +=, __m__, __x__ ); }
+
+inline short atomic_fetch_add
+( volatile atomic_short* __a__, short __m__ )
+{ return atomic_fetch_add_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline short atomic_fetch_sub_explicit
+( volatile atomic_short* __a__, short __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, -=, __m__, __x__ ); }
+
+inline short atomic_fetch_sub
+( volatile atomic_short* __a__, short __m__ )
+{ return atomic_fetch_sub_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline short atomic_fetch_and_explicit
+( volatile atomic_short* __a__, short __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, &=, __m__, __x__ ); }
+
+inline short atomic_fetch_and
+( volatile atomic_short* __a__, short __m__ )
+{ return atomic_fetch_and_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline short atomic_fetch_or_explicit
+( volatile atomic_short* __a__, short __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, |=, __m__, __x__ ); }
+
+inline short atomic_fetch_or
+( volatile atomic_short* __a__, short __m__ )
+{ return atomic_fetch_or_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline short atomic_fetch_xor_explicit
+( volatile atomic_short* __a__, short __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, ^=, __m__, __x__ ); }
+
+inline short atomic_fetch_xor
+( volatile atomic_short* __a__, short __m__ )
+{ return atomic_fetch_xor_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned short atomic_fetch_add_explicit
+( volatile atomic_ushort* __a__, unsigned short __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, +=, __m__, __x__ ); }
+
+inline unsigned short atomic_fetch_add
+( volatile atomic_ushort* __a__, unsigned short __m__ )
+{ return atomic_fetch_add_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned short atomic_fetch_sub_explicit
+( volatile atomic_ushort* __a__, unsigned short __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, -=, __m__, __x__ ); }
+
+inline unsigned short atomic_fetch_sub
+( volatile atomic_ushort* __a__, unsigned short __m__ )
+{ return atomic_fetch_sub_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned short atomic_fetch_and_explicit
+( volatile atomic_ushort* __a__, unsigned short __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, &=, __m__, __x__ ); }
+
+inline unsigned short atomic_fetch_and
+( volatile atomic_ushort* __a__, unsigned short __m__ )
+{ return atomic_fetch_and_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned short atomic_fetch_or_explicit
+( volatile atomic_ushort* __a__, unsigned short __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, |=, __m__, __x__ ); }
+
+inline unsigned short atomic_fetch_or
+( volatile atomic_ushort* __a__, unsigned short __m__ )
+{ return atomic_fetch_or_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned short atomic_fetch_xor_explicit
+( volatile atomic_ushort* __a__, unsigned short __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, ^=, __m__, __x__ ); }
+
+inline unsigned short atomic_fetch_xor
+( volatile atomic_ushort* __a__, unsigned short __m__ )
+{ return atomic_fetch_xor_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline int atomic_fetch_add_explicit
+( volatile atomic_int* __a__, int __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, +=, __m__, __x__ ); }
+
+inline int atomic_fetch_add
+( volatile atomic_int* __a__, int __m__ )
+{ return atomic_fetch_add_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline int atomic_fetch_sub_explicit
+( volatile atomic_int* __a__, int __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, -=, __m__, __x__ ); }
+
+inline int atomic_fetch_sub
+( volatile atomic_int* __a__, int __m__ )
+{ return atomic_fetch_sub_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline int atomic_fetch_and_explicit
+( volatile atomic_int* __a__, int __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, &=, __m__, __x__ ); }
+
+inline int atomic_fetch_and
+( volatile atomic_int* __a__, int __m__ )
+{ return atomic_fetch_and_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline int atomic_fetch_or_explicit
+( volatile atomic_int* __a__, int __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, |=, __m__, __x__ ); }
+
+inline int atomic_fetch_or
+( volatile atomic_int* __a__, int __m__ )
+{ return atomic_fetch_or_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline int atomic_fetch_xor_explicit
+( volatile atomic_int* __a__, int __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, ^=, __m__, __x__ ); }
+
+inline int atomic_fetch_xor
+( volatile atomic_int* __a__, int __m__ )
+{ return atomic_fetch_xor_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned int atomic_fetch_add_explicit
+( volatile atomic_uint* __a__, unsigned int __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, +=, __m__, __x__ ); }
+
+inline unsigned int atomic_fetch_add
+( volatile atomic_uint* __a__, unsigned int __m__ )
+{ return atomic_fetch_add_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned int atomic_fetch_sub_explicit
+( volatile atomic_uint* __a__, unsigned int __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, -=, __m__, __x__ ); }
+
+inline unsigned int atomic_fetch_sub
+( volatile atomic_uint* __a__, unsigned int __m__ )
+{ return atomic_fetch_sub_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned int atomic_fetch_and_explicit
+( volatile atomic_uint* __a__, unsigned int __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, &=, __m__, __x__ ); }
+
+inline unsigned int atomic_fetch_and
+( volatile atomic_uint* __a__, unsigned int __m__ )
+{ return atomic_fetch_and_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned int atomic_fetch_or_explicit
+( volatile atomic_uint* __a__, unsigned int __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, |=, __m__, __x__ ); }
+
+inline unsigned int atomic_fetch_or
+( volatile atomic_uint* __a__, unsigned int __m__ )
+{ return atomic_fetch_or_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned int atomic_fetch_xor_explicit
+( volatile atomic_uint* __a__, unsigned int __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, ^=, __m__, __x__ ); }
+
+inline unsigned int atomic_fetch_xor
+( volatile atomic_uint* __a__, unsigned int __m__ )
+{ return atomic_fetch_xor_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline long atomic_fetch_add_explicit
+( volatile atomic_long* __a__, long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, +=, __m__, __x__ ); }
+
+inline long atomic_fetch_add
+( volatile atomic_long* __a__, long __m__ )
+{ return atomic_fetch_add_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline long atomic_fetch_sub_explicit
+( volatile atomic_long* __a__, long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, -=, __m__, __x__ ); }
+
+inline long atomic_fetch_sub
+( volatile atomic_long* __a__, long __m__ )
+{ return atomic_fetch_sub_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline long atomic_fetch_and_explicit
+( volatile atomic_long* __a__, long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, &=, __m__, __x__ ); }
+
+inline long atomic_fetch_and
+( volatile atomic_long* __a__, long __m__ )
+{ return atomic_fetch_and_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline long atomic_fetch_or_explicit
+( volatile atomic_long* __a__, long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, |=, __m__, __x__ ); }
+
+inline long atomic_fetch_or
+( volatile atomic_long* __a__, long __m__ )
+{ return atomic_fetch_or_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline long atomic_fetch_xor_explicit
+( volatile atomic_long* __a__, long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, ^=, __m__, __x__ ); }
+
+inline long atomic_fetch_xor
+( volatile atomic_long* __a__, long __m__ )
+{ return atomic_fetch_xor_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned long atomic_fetch_add_explicit
+( volatile atomic_ulong* __a__, unsigned long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, +=, __m__, __x__ ); }
+
+inline unsigned long atomic_fetch_add
+( volatile atomic_ulong* __a__, unsigned long __m__ )
+{ return atomic_fetch_add_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned long atomic_fetch_sub_explicit
+( volatile atomic_ulong* __a__, unsigned long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, -=, __m__, __x__ ); }
+
+inline unsigned long atomic_fetch_sub
+( volatile atomic_ulong* __a__, unsigned long __m__ )
+{ return atomic_fetch_sub_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned long atomic_fetch_and_explicit
+( volatile atomic_ulong* __a__, unsigned long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, &=, __m__, __x__ ); }
+
+inline unsigned long atomic_fetch_and
+( volatile atomic_ulong* __a__, unsigned long __m__ )
+{ return atomic_fetch_and_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned long atomic_fetch_or_explicit
+( volatile atomic_ulong* __a__, unsigned long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, |=, __m__, __x__ ); }
+
+inline unsigned long atomic_fetch_or
+( volatile atomic_ulong* __a__, unsigned long __m__ )
+{ return atomic_fetch_or_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned long atomic_fetch_xor_explicit
+( volatile atomic_ulong* __a__, unsigned long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, ^=, __m__, __x__ ); }
+
+inline unsigned long atomic_fetch_xor
+( volatile atomic_ulong* __a__, unsigned long __m__ )
+{ return atomic_fetch_xor_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline long long atomic_fetch_add_explicit
+( volatile atomic_llong* __a__, long long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, +=, __m__, __x__ ); }
+
+inline long long atomic_fetch_add
+( volatile atomic_llong* __a__, long long __m__ )
+{ return atomic_fetch_add_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline long long atomic_fetch_sub_explicit
+( volatile atomic_llong* __a__, long long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, -=, __m__, __x__ ); }
+
+inline long long atomic_fetch_sub
+( volatile atomic_llong* __a__, long long __m__ )
+{ return atomic_fetch_sub_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline long long atomic_fetch_and_explicit
+( volatile atomic_llong* __a__, long long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, &=, __m__, __x__ ); }
+
+inline long long atomic_fetch_and
+( volatile atomic_llong* __a__, long long __m__ )
+{ return atomic_fetch_and_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline long long atomic_fetch_or_explicit
+( volatile atomic_llong* __a__, long long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, |=, __m__, __x__ ); }
+
+inline long long atomic_fetch_or
+( volatile atomic_llong* __a__, long long __m__ )
+{ return atomic_fetch_or_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline long long atomic_fetch_xor_explicit
+( volatile atomic_llong* __a__, long long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, ^=, __m__, __x__ ); }
+
+inline long long atomic_fetch_xor
+( volatile atomic_llong* __a__, long long __m__ )
+{ return atomic_fetch_xor_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned long long atomic_fetch_add_explicit
+( volatile atomic_ullong* __a__, unsigned long long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, +=, __m__, __x__ ); }
+
+inline unsigned long long atomic_fetch_add
+( volatile atomic_ullong* __a__, unsigned long long __m__ )
+{ return atomic_fetch_add_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned long long atomic_fetch_sub_explicit
+( volatile atomic_ullong* __a__, unsigned long long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, -=, __m__, __x__ ); }
+
+inline unsigned long long atomic_fetch_sub
+( volatile atomic_ullong* __a__, unsigned long long __m__ )
+{ return atomic_fetch_sub_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned long long atomic_fetch_and_explicit
+( volatile atomic_ullong* __a__, unsigned long long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, &=, __m__, __x__ ); }
+
+inline unsigned long long atomic_fetch_and
+( volatile atomic_ullong* __a__, unsigned long long __m__ )
+{ return atomic_fetch_and_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned long long atomic_fetch_or_explicit
+( volatile atomic_ullong* __a__, unsigned long long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, |=, __m__, __x__ ); }
+
+inline unsigned long long atomic_fetch_or
+( volatile atomic_ullong* __a__, unsigned long long __m__ )
+{ return atomic_fetch_or_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline unsigned long long atomic_fetch_xor_explicit
+( volatile atomic_ullong* __a__, unsigned long long __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, ^=, __m__, __x__ ); }
+
+inline unsigned long long atomic_fetch_xor
+( volatile atomic_ullong* __a__, unsigned long long __m__ )
+{ return atomic_fetch_xor_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline wchar_t atomic_fetch_add_explicit
+( volatile atomic_wchar_t* __a__, wchar_t __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, +=, __m__, __x__ ); }
+
+inline wchar_t atomic_fetch_add
+( volatile atomic_wchar_t* __a__, wchar_t __m__ )
+{ return atomic_fetch_add_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline wchar_t atomic_fetch_sub_explicit
+( volatile atomic_wchar_t* __a__, wchar_t __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, -=, __m__, __x__ ); }
+
+inline wchar_t atomic_fetch_sub
+( volatile atomic_wchar_t* __a__, wchar_t __m__ )
+{ return atomic_fetch_sub_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline wchar_t atomic_fetch_and_explicit
+( volatile atomic_wchar_t* __a__, wchar_t __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, &=, __m__, __x__ ); }
+
+inline wchar_t atomic_fetch_and
+( volatile atomic_wchar_t* __a__, wchar_t __m__ )
+{ return atomic_fetch_and_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline wchar_t atomic_fetch_or_explicit
+( volatile atomic_wchar_t* __a__, wchar_t __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, |=, __m__, __x__ ); }
+
+inline wchar_t atomic_fetch_or
+( volatile atomic_wchar_t* __a__, wchar_t __m__ )
+{ return atomic_fetch_or_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+inline wchar_t atomic_fetch_xor_explicit
+( volatile atomic_wchar_t* __a__, wchar_t __m__, memory_order __x__ )
+{ return _ATOMIC_MODIFY_( __a__, ^=, __m__, __x__ ); }
+
+inline wchar_t atomic_fetch_xor
+( volatile atomic_wchar_t* __a__, wchar_t __m__ )
+{ return atomic_fetch_xor_explicit( __a__, __m__, memory_order_seq_cst ); }
+
+
+#else
+
+
+#define atomic_is_lock_free( __a__ ) \
+false
+
+#define atomic_load( __a__ ) \
+_ATOMIC_LOAD_( __a__, memory_order_seq_cst )
+
+#define atomic_load_explicit( __a__, __x__ ) \
+_ATOMIC_LOAD_( __a__, __x__ )
+
+#define atomic_init( __a__, __m__ ) \
+_ATOMIC_INIT_( __a__, __m__ )
+
+#define atomic_store( __a__, __m__ ) \
+_ATOMIC_STORE_( __a__, __m__, memory_order_seq_cst )
+
+#define atomic_store_explicit( __a__, __m__, __x__ ) \
+_ATOMIC_STORE_( __a__, __m__, __x__ )
+
+#define atomic_exchange( __a__, __m__ ) \
+_ATOMIC_MODIFY_( __a__, =, __m__, memory_order_seq_cst )
+
+#define atomic_exchange_explicit( __a__, __m__, __x__ ) \
+_ATOMIC_MODIFY_( __a__, =, __m__, __x__ )
+
+#define atomic_compare_exchange_weak( __a__, __e__, __m__ ) \
+_ATOMIC_CMPSWP_WEAK_( __a__, __e__, __m__, memory_order_seq_cst )
+
+#define atomic_compare_exchange_strong( __a__, __e__, __m__ ) \
+_ATOMIC_CMPSWP_( __a__, __e__, __m__, memory_order_seq_cst )
+
+#define atomic_compare_exchange_weak_explicit( __a__, __e__, __m__, __x__, __y__ ) \
+_ATOMIC_CMPSWP_WEAK_( __a__, __e__, __m__, __x__ )
+
+#define atomic_compare_exchange_strong_explicit( __a__, __e__, __m__, __x__, __y__ ) \
+_ATOMIC_CMPSWP_( __a__, __e__, __m__, __x__ )
+
+
+#define atomic_fetch_add_explicit( __a__, __m__, __x__ ) \
+_ATOMIC_MODIFY_( __a__, +=, __m__, __x__ )
+
+#define atomic_fetch_add( __a__, __m__ ) \
+_ATOMIC_MODIFY_( __a__, +=, __m__, memory_order_seq_cst )
+
+
+#define atomic_fetch_sub_explicit( __a__, __m__, __x__ ) \
+_ATOMIC_MODIFY_( __a__, -=, __m__, __x__ )
+
+#define atomic_fetch_sub( __a__, __m__ ) \
+_ATOMIC_MODIFY_( __a__, -=, __m__, memory_order_seq_cst )
+
+
+#define atomic_fetch_and_explicit( __a__, __m__, __x__ ) \
+_ATOMIC_MODIFY_( __a__, &=, __m__, __x__ )
+
+#define atomic_fetch_and( __a__, __m__ ) \
+_ATOMIC_MODIFY_( __a__, &=, __m__, memory_order_seq_cst )
+
+
+#define atomic_fetch_or_explicit( __a__, __m__, __x__ ) \
+_ATOMIC_MODIFY_( __a__, |=, __m__, __x__ )
+
+#define atomic_fetch_or( __a__, __m__ ) \
+_ATOMIC_MODIFY_( __a__, |=, __m__, memory_order_seq_cst )
+
+
+#define atomic_fetch_xor_explicit( __a__, __m__, __x__ ) \
+_ATOMIC_MODIFY_( __a__, ^=, __m__, __x__ )
+
+#define atomic_fetch_xor( __a__, __m__ ) \
+_ATOMIC_MODIFY_( __a__, ^=, __m__, memory_order_seq_cst )
+
+
+#endif
+
+
+#ifdef __cplusplus
+
+
+inline bool atomic_bool::is_lock_free() const volatile
+{ return false; }
+
+inline void atomic_bool::store
+( bool __m__, memory_order __x__ ) volatile
+{ atomic_store_explicit( this, __m__, __x__ ); }
+
+inline bool atomic_bool::load
+( memory_order __x__ ) volatile
+{ return atomic_load_explicit( this, __x__ ); }
+
+inline bool atomic_bool::exchange
+( bool __m__, memory_order __x__ ) volatile
+{ return atomic_exchange_explicit( this, __m__, __x__ ); }
+
+inline bool atomic_bool::compare_exchange_weak
+( bool& __e__, bool __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_bool::compare_exchange_strong
+( bool& __e__, bool __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_bool::compare_exchange_weak
+( bool& __e__, bool __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+inline bool atomic_bool::compare_exchange_strong
+( bool& __e__, bool __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+
+inline bool atomic_address::is_lock_free() const volatile
+{ return false; }
+
+inline void atomic_address::store
+( void* __m__, memory_order __x__ ) volatile
+{ atomic_store_explicit( this, __m__, __x__ ); }
+
+inline void* atomic_address::load
+( memory_order __x__ ) volatile
+{ return atomic_load_explicit( this, __x__ ); }
+
+inline void* atomic_address::exchange
+( void* __m__, memory_order __x__ ) volatile
+{ return atomic_exchange_explicit( this, __m__, __x__ ); }
+
+inline bool atomic_address::compare_exchange_weak
+( void*& __e__, void* __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_address::compare_exchange_strong
+( void*& __e__, void* __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_address::compare_exchange_weak
+( void*& __e__, void* __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+inline bool atomic_address::compare_exchange_strong
+( void*& __e__, void* __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+
+inline bool atomic_char::is_lock_free() const volatile
+{ return false; }
+
+inline void atomic_char::store
+( char __m__, memory_order __x__ ) volatile
+{ atomic_store_explicit( this, __m__, __x__ ); }
+
+inline char atomic_char::load
+( memory_order __x__ ) volatile
+{ return atomic_load_explicit( this, __x__ ); }
+
+inline char atomic_char::exchange
+( char __m__, memory_order __x__ ) volatile
+{ return atomic_exchange_explicit( this, __m__, __x__ ); }
+
+inline bool atomic_char::compare_exchange_weak
+( char& __e__, char __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_char::compare_exchange_strong
+( char& __e__, char __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_char::compare_exchange_weak
+( char& __e__, char __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+inline bool atomic_char::compare_exchange_strong
+( char& __e__, char __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+
+inline bool atomic_schar::is_lock_free() const volatile
+{ return false; }
+
+inline void atomic_schar::store
+( signed char __m__, memory_order __x__ ) volatile
+{ atomic_store_explicit( this, __m__, __x__ ); }
+
+inline signed char atomic_schar::load
+( memory_order __x__ ) volatile
+{ return atomic_load_explicit( this, __x__ ); }
+
+inline signed char atomic_schar::exchange
+( signed char __m__, memory_order __x__ ) volatile
+{ return atomic_exchange_explicit( this, __m__, __x__ ); }
+
+inline bool atomic_schar::compare_exchange_weak
+( signed char& __e__, signed char __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_schar::compare_exchange_strong
+( signed char& __e__, signed char __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_schar::compare_exchange_weak
+( signed char& __e__, signed char __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+inline bool atomic_schar::compare_exchange_strong
+( signed char& __e__, signed char __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+
+inline bool atomic_uchar::is_lock_free() const volatile
+{ return false; }
+
+inline void atomic_uchar::store
+( unsigned char __m__, memory_order __x__ ) volatile
+{ atomic_store_explicit( this, __m__, __x__ ); }
+
+inline unsigned char atomic_uchar::load
+( memory_order __x__ ) volatile
+{ return atomic_load_explicit( this, __x__ ); }
+
+inline unsigned char atomic_uchar::exchange
+( unsigned char __m__, memory_order __x__ ) volatile
+{ return atomic_exchange_explicit( this, __m__, __x__ ); }
+
+inline bool atomic_uchar::compare_exchange_weak
+( unsigned char& __e__, unsigned char __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_uchar::compare_exchange_strong
+( unsigned char& __e__, unsigned char __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_uchar::compare_exchange_weak
+( unsigned char& __e__, unsigned char __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+inline bool atomic_uchar::compare_exchange_strong
+( unsigned char& __e__, unsigned char __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+
+inline bool atomic_short::is_lock_free() const volatile
+{ return false; }
+
+inline void atomic_short::store
+( short __m__, memory_order __x__ ) volatile
+{ atomic_store_explicit( this, __m__, __x__ ); }
+
+inline short atomic_short::load
+( memory_order __x__ ) volatile
+{ return atomic_load_explicit( this, __x__ ); }
+
+inline short atomic_short::exchange
+( short __m__, memory_order __x__ ) volatile
+{ return atomic_exchange_explicit( this, __m__, __x__ ); }
+
+inline bool atomic_short::compare_exchange_weak
+( short& __e__, short __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_short::compare_exchange_strong
+( short& __e__, short __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_short::compare_exchange_weak
+( short& __e__, short __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+inline bool atomic_short::compare_exchange_strong
+( short& __e__, short __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+
+inline bool atomic_ushort::is_lock_free() const volatile
+{ return false; }
+
+inline void atomic_ushort::store
+( unsigned short __m__, memory_order __x__ ) volatile
+{ atomic_store_explicit( this, __m__, __x__ ); }
+
+inline unsigned short atomic_ushort::load
+( memory_order __x__ ) volatile
+{ return atomic_load_explicit( this, __x__ ); }
+
+inline unsigned short atomic_ushort::exchange
+( unsigned short __m__, memory_order __x__ ) volatile
+{ return atomic_exchange_explicit( this, __m__, __x__ ); }
+
+inline bool atomic_ushort::compare_exchange_weak
+( unsigned short& __e__, unsigned short __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_ushort::compare_exchange_strong
+( unsigned short& __e__, unsigned short __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_ushort::compare_exchange_weak
+( unsigned short& __e__, unsigned short __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+inline bool atomic_ushort::compare_exchange_strong
+( unsigned short& __e__, unsigned short __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+
+inline bool atomic_int::is_lock_free() const volatile
+{ return false; }
+
+inline void atomic_int::store
+( int __m__, memory_order __x__ ) volatile
+{ atomic_store_explicit( this, __m__, __x__ ); }
+
+inline int atomic_int::load
+( memory_order __x__ ) volatile
+{ return atomic_load_explicit( this, __x__ ); }
+
+inline int atomic_int::exchange
+( int __m__, memory_order __x__ ) volatile
+{ return atomic_exchange_explicit( this, __m__, __x__ ); }
+
+inline bool atomic_int::compare_exchange_weak
+( int& __e__, int __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_int::compare_exchange_strong
+( int& __e__, int __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_int::compare_exchange_weak
+( int& __e__, int __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+inline bool atomic_int::compare_exchange_strong
+( int& __e__, int __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+
+inline bool atomic_uint::is_lock_free() const volatile
+{ return false; }
+
+inline void atomic_uint::store
+( unsigned int __m__, memory_order __x__ ) volatile
+{ atomic_store_explicit( this, __m__, __x__ ); }
+
+inline unsigned int atomic_uint::load
+( memory_order __x__ ) volatile
+{ return atomic_load_explicit( this, __x__ ); }
+
+inline unsigned int atomic_uint::exchange
+( unsigned int __m__, memory_order __x__ ) volatile
+{ return atomic_exchange_explicit( this, __m__, __x__ ); }
+
+inline bool atomic_uint::compare_exchange_weak
+( unsigned int& __e__, unsigned int __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_uint::compare_exchange_strong
+( unsigned int& __e__, unsigned int __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_uint::compare_exchange_weak
+( unsigned int& __e__, unsigned int __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+inline bool atomic_uint::compare_exchange_strong
+( unsigned int& __e__, unsigned int __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+
+inline bool atomic_long::is_lock_free() const volatile
+{ return false; }
+
+inline void atomic_long::store
+( long __m__, memory_order __x__ ) volatile
+{ atomic_store_explicit( this, __m__, __x__ ); }
+
+inline long atomic_long::load
+( memory_order __x__ ) volatile
+{ return atomic_load_explicit( this, __x__ ); }
+
+inline long atomic_long::exchange
+( long __m__, memory_order __x__ ) volatile
+{ return atomic_exchange_explicit( this, __m__, __x__ ); }
+
+inline bool atomic_long::compare_exchange_weak
+( long& __e__, long __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_long::compare_exchange_strong
+( long& __e__, long __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_long::compare_exchange_weak
+( long& __e__, long __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+inline bool atomic_long::compare_exchange_strong
+( long& __e__, long __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+
+inline bool atomic_ulong::is_lock_free() const volatile
+{ return false; }
+
+inline void atomic_ulong::store
+( unsigned long __m__, memory_order __x__ ) volatile
+{ atomic_store_explicit( this, __m__, __x__ ); }
+
+inline unsigned long atomic_ulong::load
+( memory_order __x__ ) volatile
+{ return atomic_load_explicit( this, __x__ ); }
+
+inline unsigned long atomic_ulong::exchange
+( unsigned long __m__, memory_order __x__ ) volatile
+{ return atomic_exchange_explicit( this, __m__, __x__ ); }
+
+inline bool atomic_ulong::compare_exchange_weak
+( unsigned long& __e__, unsigned long __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_ulong::compare_exchange_strong
+( unsigned long& __e__, unsigned long __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_ulong::compare_exchange_weak
+( unsigned long& __e__, unsigned long __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+inline bool atomic_ulong::compare_exchange_strong
+( unsigned long& __e__, unsigned long __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+
+inline bool atomic_llong::is_lock_free() const volatile
+{ return false; }
+
+inline void atomic_llong::store
+( long long __m__, memory_order __x__ ) volatile
+{ atomic_store_explicit( this, __m__, __x__ ); }
+
+inline long long atomic_llong::load
+( memory_order __x__ ) volatile
+{ return atomic_load_explicit( this, __x__ ); }
+
+inline long long atomic_llong::exchange
+( long long __m__, memory_order __x__ ) volatile
+{ return atomic_exchange_explicit( this, __m__, __x__ ); }
+
+inline bool atomic_llong::compare_exchange_weak
+( long long& __e__, long long __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_llong::compare_exchange_strong
+( long long& __e__, long long __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_llong::compare_exchange_weak
+( long long& __e__, long long __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+inline bool atomic_llong::compare_exchange_strong
+( long long& __e__, long long __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+
+inline bool atomic_ullong::is_lock_free() const volatile
+{ return false; }
+
+inline void atomic_ullong::store
+( unsigned long long __m__, memory_order __x__ ) volatile
+{ atomic_store_explicit( this, __m__, __x__ ); }
+
+inline unsigned long long atomic_ullong::load
+( memory_order __x__ ) volatile
+{ return atomic_load_explicit( this, __x__ ); }
+
+inline unsigned long long atomic_ullong::exchange
+( unsigned long long __m__, memory_order __x__ ) volatile
+{ return atomic_exchange_explicit( this, __m__, __x__ ); }
+
+inline bool atomic_ullong::compare_exchange_weak
+( unsigned long long& __e__, unsigned long long __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_ullong::compare_exchange_strong
+( unsigned long long& __e__, unsigned long long __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_ullong::compare_exchange_weak
+( unsigned long long& __e__, unsigned long long __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+inline bool atomic_ullong::compare_exchange_strong
+( unsigned long long& __e__, unsigned long long __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+
+inline bool atomic_wchar_t::is_lock_free() const volatile
+{ return false; }
+
+inline void atomic_wchar_t::store
+( wchar_t __m__, memory_order __x__ ) volatile
+{ atomic_store_explicit( this, __m__, __x__ ); }
+
+inline wchar_t atomic_wchar_t::load
+( memory_order __x__ ) volatile
+{ return atomic_load_explicit( this, __x__ ); }
+
+inline wchar_t atomic_wchar_t::exchange
+( wchar_t __m__, memory_order __x__ ) volatile
+{ return atomic_exchange_explicit( this, __m__, __x__ ); }
+
+inline bool atomic_wchar_t::compare_exchange_weak
+( wchar_t& __e__, wchar_t __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_wchar_t::compare_exchange_strong
+( wchar_t& __e__, wchar_t __m__,
+  memory_order __x__, memory_order __y__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__, __y__ ); }
+
+inline bool atomic_wchar_t::compare_exchange_weak
+( wchar_t& __e__, wchar_t __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_weak_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+inline bool atomic_wchar_t::compare_exchange_strong
+( wchar_t& __e__, wchar_t __m__, memory_order __x__ ) volatile
+{ return atomic_compare_exchange_strong_explicit( this, &__e__, __m__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+
+template< typename T >
+inline bool atomic<T>::is_lock_free() const volatile
+{ return false; }
+
+template< typename T >
+inline void atomic<T>::store( T __v__, memory_order __x__ ) volatile
+{ _ATOMIC_STORE_( this, __v__, __x__ ); }
+
+template< typename T >
+inline T atomic<T>::load( memory_order __x__ ) volatile
+{ return _ATOMIC_LOAD_( this, __x__ ); }
+
+template< typename T >
+inline T atomic<T>::exchange( T __v__, memory_order __x__ ) volatile
+{ return _ATOMIC_MODIFY_( this, =, __v__, __x__ ); }
+
+template< typename T >
+inline bool atomic<T>::compare_exchange_weak
+( T& __r__, T __v__, memory_order __x__, memory_order __y__ ) volatile
+{ return _ATOMIC_CMPSWP_WEAK_( this, &__r__, __v__, __x__ ); }
+
+template< typename T >
+inline bool atomic<T>::compare_exchange_strong
+( T& __r__, T __v__, memory_order __x__, memory_order __y__ ) volatile
+{ return _ATOMIC_CMPSWP_( this, &__r__, __v__, __x__ ); }
+
+template< typename T >
+inline bool atomic<T>::compare_exchange_weak
+( T& __r__, T __v__, memory_order __x__ ) volatile
+{ return compare_exchange_weak( __r__, __v__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+template< typename T >
+inline bool atomic<T>::compare_exchange_strong
+( T& __r__, T __v__, memory_order __x__ ) volatile
+{ return compare_exchange_strong( __r__, __v__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+
+inline void* atomic_address::fetch_add
+( ptrdiff_t __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_add_explicit( this, __m__, __x__ ); }
+
+inline void* atomic_address::fetch_sub
+( ptrdiff_t __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_sub_explicit( this, __m__, __x__ ); }
+
+
+inline char atomic_char::fetch_add
+( char __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_add_explicit( this, __m__, __x__ ); }
+
+
+inline char atomic_char::fetch_sub
+( char __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_sub_explicit( this, __m__, __x__ ); }
+
+
+inline char atomic_char::fetch_and
+( char __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_and_explicit( this, __m__, __x__ ); }
+
+
+inline char atomic_char::fetch_or
+( char __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_or_explicit( this, __m__, __x__ ); }
+
+
+inline char atomic_char::fetch_xor
+( char __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_xor_explicit( this, __m__, __x__ ); }
+
+
+inline signed char atomic_schar::fetch_add
+( signed char __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_add_explicit( this, __m__, __x__ ); }
+
+
+inline signed char atomic_schar::fetch_sub
+( signed char __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_sub_explicit( this, __m__, __x__ ); }
+
+
+inline signed char atomic_schar::fetch_and
+( signed char __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_and_explicit( this, __m__, __x__ ); }
+
+
+inline signed char atomic_schar::fetch_or
+( signed char __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_or_explicit( this, __m__, __x__ ); }
+
+
+inline signed char atomic_schar::fetch_xor
+( signed char __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_xor_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned char atomic_uchar::fetch_add
+( unsigned char __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_add_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned char atomic_uchar::fetch_sub
+( unsigned char __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_sub_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned char atomic_uchar::fetch_and
+( unsigned char __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_and_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned char atomic_uchar::fetch_or
+( unsigned char __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_or_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned char atomic_uchar::fetch_xor
+( unsigned char __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_xor_explicit( this, __m__, __x__ ); }
+
+
+inline short atomic_short::fetch_add
+( short __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_add_explicit( this, __m__, __x__ ); }
+
+
+inline short atomic_short::fetch_sub
+( short __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_sub_explicit( this, __m__, __x__ ); }
+
+
+inline short atomic_short::fetch_and
+( short __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_and_explicit( this, __m__, __x__ ); }
+
+
+inline short atomic_short::fetch_or
+( short __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_or_explicit( this, __m__, __x__ ); }
+
+
+inline short atomic_short::fetch_xor
+( short __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_xor_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned short atomic_ushort::fetch_add
+( unsigned short __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_add_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned short atomic_ushort::fetch_sub
+( unsigned short __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_sub_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned short atomic_ushort::fetch_and
+( unsigned short __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_and_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned short atomic_ushort::fetch_or
+( unsigned short __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_or_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned short atomic_ushort::fetch_xor
+( unsigned short __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_xor_explicit( this, __m__, __x__ ); }
+
+
+inline int atomic_int::fetch_add
+( int __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_add_explicit( this, __m__, __x__ ); }
+
+
+inline int atomic_int::fetch_sub
+( int __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_sub_explicit( this, __m__, __x__ ); }
+
+
+inline int atomic_int::fetch_and
+( int __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_and_explicit( this, __m__, __x__ ); }
+
+
+inline int atomic_int::fetch_or
+( int __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_or_explicit( this, __m__, __x__ ); }
+
+
+inline int atomic_int::fetch_xor
+( int __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_xor_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned int atomic_uint::fetch_add
+( unsigned int __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_add_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned int atomic_uint::fetch_sub
+( unsigned int __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_sub_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned int atomic_uint::fetch_and
+( unsigned int __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_and_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned int atomic_uint::fetch_or
+( unsigned int __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_or_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned int atomic_uint::fetch_xor
+( unsigned int __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_xor_explicit( this, __m__, __x__ ); }
+
+
+inline long atomic_long::fetch_add
+( long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_add_explicit( this, __m__, __x__ ); }
+
+
+inline long atomic_long::fetch_sub
+( long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_sub_explicit( this, __m__, __x__ ); }
+
+
+inline long atomic_long::fetch_and
+( long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_and_explicit( this, __m__, __x__ ); }
+
+
+inline long atomic_long::fetch_or
+( long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_or_explicit( this, __m__, __x__ ); }
+
+
+inline long atomic_long::fetch_xor
+( long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_xor_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned long atomic_ulong::fetch_add
+( unsigned long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_add_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned long atomic_ulong::fetch_sub
+( unsigned long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_sub_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned long atomic_ulong::fetch_and
+( unsigned long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_and_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned long atomic_ulong::fetch_or
+( unsigned long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_or_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned long atomic_ulong::fetch_xor
+( unsigned long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_xor_explicit( this, __m__, __x__ ); }
+
+
+inline long long atomic_llong::fetch_add
+( long long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_add_explicit( this, __m__, __x__ ); }
+
+
+inline long long atomic_llong::fetch_sub
+( long long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_sub_explicit( this, __m__, __x__ ); }
+
+
+inline long long atomic_llong::fetch_and
+( long long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_and_explicit( this, __m__, __x__ ); }
+
+
+inline long long atomic_llong::fetch_or
+( long long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_or_explicit( this, __m__, __x__ ); }
+
+
+inline long long atomic_llong::fetch_xor
+( long long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_xor_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned long long atomic_ullong::fetch_add
+( unsigned long long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_add_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned long long atomic_ullong::fetch_sub
+( unsigned long long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_sub_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned long long atomic_ullong::fetch_and
+( unsigned long long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_and_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned long long atomic_ullong::fetch_or
+( unsigned long long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_or_explicit( this, __m__, __x__ ); }
+
+
+inline unsigned long long atomic_ullong::fetch_xor
+( unsigned long long __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_xor_explicit( this, __m__, __x__ ); }
+
+
+inline wchar_t atomic_wchar_t::fetch_add
+( wchar_t __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_add_explicit( this, __m__, __x__ ); }
+
+
+inline wchar_t atomic_wchar_t::fetch_sub
+( wchar_t __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_sub_explicit( this, __m__, __x__ ); }
+
+
+inline wchar_t atomic_wchar_t::fetch_and
+( wchar_t __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_and_explicit( this, __m__, __x__ ); }
+
+
+inline wchar_t atomic_wchar_t::fetch_or
+( wchar_t __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_or_explicit( this, __m__, __x__ ); }
+
+
+inline wchar_t atomic_wchar_t::fetch_xor
+( wchar_t __m__, memory_order __x__ ) volatile
+{ return atomic_fetch_xor_explicit( this, __m__, __x__ ); }
+
+
+template< typename T >
+T* atomic<T*>::load( memory_order __x__ ) volatile
+{ return static_cast<T*>( atomic_address::load( __x__ ) ); }
+
+template< typename T >
+T* atomic<T*>::exchange( T* __v__, memory_order __x__ ) volatile
+{ return static_cast<T*>( atomic_address::exchange( __v__, __x__ ) ); }
+
+template< typename T >
+bool atomic<T*>::compare_exchange_weak
+( T*& __r__, T* __v__, memory_order __x__, memory_order __y__) volatile
+{ return atomic_address::compare_exchange_weak( *reinterpret_cast<void**>( &__r__ ),
+               static_cast<void*>( __v__ ), __x__, __y__ ); }
+//{ return _ATOMIC_CMPSWP_WEAK_( this, &__r__, __v__, __x__ ); }
+
+template< typename T >
+bool atomic<T*>::compare_exchange_strong
+( T*& __r__, T* __v__, memory_order __x__, memory_order __y__) volatile
+{ return atomic_address::compare_exchange_strong( *reinterpret_cast<void**>( &__r__ ),
+               static_cast<void*>( __v__ ), __x__, __y__ ); }
+//{ return _ATOMIC_CMPSWP_( this, &__r__, __v__, __x__ ); }
+
+template< typename T >
+bool atomic<T*>::compare_exchange_weak
+( T*& __r__, T* __v__, memory_order __x__ ) volatile
+{ return compare_exchange_weak( __r__, __v__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+template< typename T >
+bool atomic<T*>::compare_exchange_strong
+( T*& __r__, T* __v__, memory_order __x__ ) volatile
+{ return compare_exchange_strong( __r__, __v__, __x__,
+      __x__ == memory_order_acq_rel ? memory_order_acquire :
+      __x__ == memory_order_release ? memory_order_relaxed : __x__ ); }
+
+template< typename T >
+T* atomic<T*>::fetch_add( ptrdiff_t __v__, memory_order __x__ ) volatile
+{ return atomic_fetch_add_explicit( this, sizeof(T) * __v__, __x__ ); }
+
+template< typename T >
+T* atomic<T*>::fetch_sub( ptrdiff_t __v__, memory_order __x__ ) volatile
+{ return atomic_fetch_sub_explicit( this, sizeof(T) * __v__, __x__ ); }
+
+
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+static inline void atomic_thread_fence(memory_order order)
+{ _ATOMIC_FENCE_(order); }
+
+/** @todo Do we want to try to support a user's signal-handler? */
+static inline void atomic_signal_fence(memory_order order)
+{ /* No-op? */ }
+#ifdef __cplusplus
+}
+#endif
+
+
+#ifdef __cplusplus
+} // namespace std
+#endif
+
+#endif /* __IMPATOMIC_H__ */
diff --git a/include/librace.h b/include/librace.h
new file mode 100644
index 0000000..cabf066
--- /dev/null
+++ b/include/librace.h
@@ -0,0 +1,28 @@
+/** @file librace.h
+ *  @brief Interface to check normal memory operations for data races.
+ */
+
+#ifndef __LIBRACE_H__
+#define __LIBRACE_H__
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	void store_8(void *addr, uint8_t val);
+	void store_16(void *addr, uint16_t val);
+	void store_32(void *addr, uint32_t val);
+	void store_64(void *addr, uint64_t val);
+
+	uint8_t load_8(const void *addr);
+	uint16_t load_16(const void *addr);
+	uint32_t load_32(const void *addr);
+	uint64_t load_64(const void *addr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __LIBRACE_H__ */
diff --git a/include/memoryorder.h b/include/memoryorder.h
new file mode 100644
index 0000000..ba0dafd
--- /dev/null
+++ b/include/memoryorder.h
@@ -0,0 +1,28 @@
+/**
+ * @file memoryorder.h
+ * @brief C11/C++11 atomic memory order listings
+ */
+
+#ifndef MEMORYORDER_H
+#define MEMORYORDER_H
+#ifdef __cplusplus
+#include <cstddef>
+namespace std {
+#else
+#include <stddef.h>
+#endif
+
+
+typedef enum memory_order {
+    memory_order_relaxed, memory_order_acquire, memory_order_release,
+    memory_order_acq_rel, memory_order_seq_cst
+} memory_order;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+
+#endif
diff --git a/include/model-assert.h b/include/model-assert.h
new file mode 100644
index 0000000..ddc4427
--- /dev/null
+++ b/include/model-assert.h
@@ -0,0 +1,17 @@
+#ifndef __MODEL_ASSERT_H__
+#define __MODEL_ASSERT_H__
+
+#if __cplusplus
+extern "C" {
+#else
+#include <stdbool.h>
+#endif
+
+void model_assert(bool expr, const char *file, int line);
+#define MODEL_ASSERT(expr) model_assert((expr), __FILE__, __LINE__)
+
+#if __cplusplus
+}
+#endif
+
+#endif /* __MODEL_ASSERT_H__ */
diff --git a/include/modeltypes.h b/include/modeltypes.h
new file mode 100644
index 0000000..34525d2
--- /dev/null
+++ b/include/modeltypes.h
@@ -0,0 +1,28 @@
+/**
+ * @file modeltypes.h
+ * @brief Common typedefs for the model-checker
+ */
+
+#ifndef __MODELTYPES_H__
+#define __MODELTYPES_H__
+
+/**
+ * @brief Represents a unique ID for a Thread
+ *
+ * The space of unique IDs may need to become a non-compact
+ * or non-zero-indexed set of integers (or even some other
+ * type). So this typedef is used to help identify which is
+ * which, where a simple 'int' is meant to be a compact,
+ * zero-indexed set and a 'thread_id_t' may be another type
+ * entirely.
+ *
+ * @see id_to_int
+ * @see int_to_id
+ */
+typedef int thread_id_t;
+
+#define THREAD_ID_T_NONE	-1
+
+typedef unsigned int modelclock_t;
+
+#endif /* __MODELTYPES_H__ */
diff --git a/include/mutex b/include/mutex
new file mode 100644
index 0000000..bd65a78
--- /dev/null
+++ b/include/mutex
@@ -0,0 +1,31 @@
+/**
+ * @file mutex
+ * @brief C++11 mutex interface header
+ */
+
+#ifndef __CXX_MUTEX__
+#define __CXX_MUTEX__
+
+#include "modeltypes.h"
+
+namespace std {
+	struct mutex_state {
+		void *locked; /* Thread holding the lock */
+		thread_id_t alloc_tid;
+		modelclock_t alloc_clock;
+	};
+
+	class mutex {
+	public:
+		mutex();
+		~mutex() {}
+		void lock();
+		bool try_lock();
+		void unlock();
+		struct mutex_state * get_state() {return &state;}
+		
+	private:
+		struct mutex_state state;
+	};
+}
+#endif /* __CXX_MUTEX__ */
diff --git a/include/stdatomic.h b/include/stdatomic.h
new file mode 100644
index 0000000..d4d2198
--- /dev/null
+++ b/include/stdatomic.h
@@ -0,0 +1,72 @@
+/**
+ * @file stdatomic.h
+ * @brief C11 atomic interface header
+ */
+
+#ifndef __STDATOMIC_H__
+#define __STDATOMIC_H__
+
+#include "impatomic.h"
+
+#ifdef __cplusplus
+
+
+using std::atomic_flag;
+
+
+using std::atomic_bool;
+
+
+using std::atomic_address;
+
+
+using std::atomic_char;
+
+
+using std::atomic_schar;
+
+
+using std::atomic_uchar;
+
+
+using std::atomic_short;
+
+
+using std::atomic_ushort;
+
+
+using std::atomic_int;
+
+
+using std::atomic_uint;
+
+
+using std::atomic_long;
+
+
+using std::atomic_ulong;
+
+
+using std::atomic_llong;
+
+
+using std::atomic_ullong;
+
+
+using std::atomic_wchar_t;
+
+
+using std::atomic;
+using std::memory_order;
+using std::memory_order_relaxed;
+using std::memory_order_acquire;
+using std::memory_order_release;
+using std::memory_order_acq_rel;
+using std::memory_order_seq_cst;
+
+using std::atomic_thread_fence;
+using std::atomic_signal_fence;
+
+#endif /* __cplusplus */
+
+#endif /* __STDATOMIC_H__ */
diff --git a/include/threads.h b/include/threads.h
new file mode 100644
index 0000000..f38be0a
--- /dev/null
+++ b/include/threads.h
@@ -0,0 +1,37 @@
+/** @file threads.h
+ *  @brief C11 Thread Library Functionality
+ */
+
+#ifndef __THREADS_H__
+#define __THREADS_H__
+
+/* Forward declaration */
+#ifdef __cplusplus
+typedef class Thread *__thread_identifier;
+#else
+/* For C, we just need an opaque pointer */
+typedef void *__thread_identifier;
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	typedef void (*thrd_start_t)(void *);
+
+	typedef struct {
+		__thread_identifier priv;
+	} thrd_t;
+
+	int thrd_create(thrd_t *t, thrd_start_t start_routine, void *arg);
+	int thrd_join(thrd_t);
+	void thrd_yield(void);
+	thrd_t thrd_current(void);
+
+	int user_main(int, char**);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __THREADS_H__ */
diff --git a/libannotate.cc b/libannotate.cc
new file mode 100644
index 0000000..b1fad8a
--- /dev/null
+++ b/libannotate.cc
@@ -0,0 +1,14 @@
+#include <cdsannotate.h>
+#include "common.h"
+#include "action.h"
+#include "model.h"
+
+/** Pass in an annotation that a trace analysis will use.  The
+ *  analysis type is a unique number that specifies which trace
+ *  analysis needs the annotation.  The reference is to a data
+ *  structure that the trace understands. */
+
+void cdsannotate(uint64_t analysistype, void *annotation) {
+	/* seq_cst is just a 'don't care' parameter */
+	model->switch_to_master(new ModelAction(ATOMIC_ANNOTATION, std::memory_order_seq_cst, annotation, analysistype));
+}
diff --git a/librace.cc b/librace.cc
new file mode 100644
index 0000000..2c36054
--- /dev/null
+++ b/librace.cc
@@ -0,0 +1,94 @@
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+
+#include "librace.h"
+#include "common.h"
+#include "datarace.h"
+#include "model.h"
+#include "threads-model.h"
+
+void store_8(void *addr, uint8_t val)
+{
+	DEBUG("addr = %p, val = %" PRIu8 "\n", addr, val);
+	thread_id_t tid = thread_current()->get_id();
+	raceCheckWrite(tid, addr);
+	(*(uint8_t *)addr) = val;
+}
+
+void store_16(void *addr, uint16_t val)
+{
+	DEBUG("addr = %p, val = %" PRIu16 "\n", addr, val);
+	thread_id_t tid = thread_current()->get_id();
+	raceCheckWrite(tid, addr);
+	raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 1));
+	(*(uint16_t *)addr) = val;
+}
+
+void store_32(void *addr, uint32_t val)
+{
+	DEBUG("addr = %p, val = %" PRIu32 "\n", addr, val);
+	thread_id_t tid = thread_current()->get_id();
+	raceCheckWrite(tid, addr);
+	raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 1));
+	raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 2));
+	raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 3));
+	(*(uint32_t *)addr) = val;
+}
+
+void store_64(void *addr, uint64_t val)
+{
+	DEBUG("addr = %p, val = %" PRIu64 "\n", addr, val);
+	thread_id_t tid = thread_current()->get_id();
+	raceCheckWrite(tid, addr);
+	raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 1));
+	raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 2));
+	raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 3));
+	raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 4));
+	raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 5));
+	raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 6));
+	raceCheckWrite(tid, (void *)(((uintptr_t)addr) + 7));
+	(*(uint64_t *)addr) = val;
+}
+
+uint8_t load_8(const void *addr)
+{
+	DEBUG("addr = %p\n", addr);
+	thread_id_t tid = thread_current()->get_id();
+	raceCheckRead(tid, addr);
+	return *((uint8_t *)addr);
+}
+
+uint16_t load_16(const void *addr)
+{
+	DEBUG("addr = %p\n", addr);
+	thread_id_t tid = thread_current()->get_id();
+	raceCheckRead(tid, addr);
+	raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 1));
+	return *((uint16_t *)addr);
+}
+
+uint32_t load_32(const void *addr)
+{
+	DEBUG("addr = %p\n", addr);
+	thread_id_t tid = thread_current()->get_id();
+	raceCheckRead(tid, addr);
+	raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 1));
+	raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 2));
+	raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 3));
+	return *((uint32_t *)addr);
+}
+
+uint64_t load_64(const void *addr)
+{
+	DEBUG("addr = %p\n", addr);
+	thread_id_t tid = thread_current()->get_id();
+	raceCheckRead(tid, addr);
+	raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 1));
+	raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 2));
+	raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 3));
+	raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 4));
+	raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 5));
+	raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 6));
+	raceCheckRead(tid, (const void *)(((uintptr_t)addr) + 7));
+	return *((uint64_t *)addr);
+}
diff --git a/libthreads.cc b/libthreads.cc
new file mode 100644
index 0000000..75d1910
--- /dev/null
+++ b/libthreads.cc
@@ -0,0 +1,36 @@
+#include <threads.h>
+#include "common.h"
+#include "threads-model.h"
+#include "action.h"
+
+/* global "model" object */
+#include "model.h"
+
+/*
+ * User program API functions
+ */
+int thrd_create(thrd_t *t, thrd_start_t start_routine, void *arg)
+{
+	struct thread_params params = { start_routine, arg };
+	/* seq_cst is just a 'don't care' parameter */
+	model->switch_to_master(new ModelAction(THREAD_CREATE, std::memory_order_seq_cst, t, (uint64_t)&params));
+	return 0;
+}
+
+int thrd_join(thrd_t t)
+{
+	Thread *th = t.priv;
+	model->switch_to_master(new ModelAction(THREAD_JOIN, std::memory_order_seq_cst, th, id_to_int(thrd_to_id(t))));
+	return 0;
+}
+
+/** A no-op, for now */
+void thrd_yield(void)
+{
+	model->switch_to_master(new ModelAction(THREAD_YIELD, std::memory_order_seq_cst, thread_current(), VALUE_NONE));
+}
+
+thrd_t thrd_current(void)
+{
+	return thread_current()->get_thrd_t();
+}
diff --git a/main.cc b/main.cc
new file mode 100644
index 0000000..d489d96
--- /dev/null
+++ b/main.cc
@@ -0,0 +1,267 @@
+/** @file main.cc
+ *  @brief Entry point for the model checker.
+ */
+
+#include <unistd.h>
+#include <getopt.h>
+#include <string.h>
+
+#include "common.h"
+#include "output.h"
+
+#include "datarace.h"
+
+/* global "model" object */
+#include "model.h"
+#include "params.h"
+#include "snapshot-interface.h"
+#include "scanalysis.h"
+#include "plugins.h"
+
+static void param_defaults(struct model_params *params)
+{
+	params->maxreads = 0;
+	params->maxfuturedelay = 6;
+	params->fairwindow = 0;
+	params->yieldon = false;
+	params->yieldblock = false;
+	params->enabledcount = 1;
+	params->bound = 0;
+	params->maxfuturevalues = 0;
+	params->expireslop = 4;
+	params->verbose = !!DBG_ENABLED();
+	params->uninitvalue = 0;
+}
+
+static void print_usage(const char *program_name, struct model_params *params)
+{
+	ModelVector<TraceAnalysis *> * registeredanalysis=getRegisteredTraceAnalysis();
+	/* Reset defaults before printing */
+	param_defaults(params);
+
+	model_print(
+"Copyright (c) 2013 Regents of the University of California. All rights reserved.\n"
+"Distributed under the GPLv2\n"
+"Written by Brian Norris and Brian Demsky\n"
+"\n"
+"Usage: %s [MODEL-CHECKER OPTIONS] -- [PROGRAM ARGS]\n"
+"\n"
+"MODEL-CHECKER OPTIONS can be any of the model-checker options listed below. Arguments\n"
+"provided after the `--' (the PROGRAM ARGS) are passed to the user program.\n"
+"\n"
+"Model-checker options:\n"
+"-h, --help                  Display this help message and exit\n"
+"-m, --liveness=NUM          Maximum times a thread can read from the same write\n"
+"                              while other writes exist.\n"
+"                              Default: %d\n"
+"-M, --maxfv=NUM             Maximum number of future values that can be sent to\n"
+"                              the same read.\n"
+"                              Default: %d\n"
+"-s, --maxfvdelay=NUM        Maximum actions that the model checker will wait for\n"
+"                              a write from the future past the expected number\n"
+"                              of actions.\n"
+"                              Default: %d\n"
+"-S, --fvslop=NUM            Future value expiration sloppiness.\n"
+"                              Default: %u\n"
+"-y, --yield                 Enable CHESS-like yield-based fairness support\n"
+"                              (requires thrd_yield() in test program).\n"
+"                              Default: %s\n"
+"-Y, --yieldblock            Prohibit an execution from running a yield.\n"
+"                              Default: %s\n"
+"-f, --fairness=WINDOW       Specify a fairness window in which actions that are\n"
+"                              enabled sufficiently many times should receive\n"
+"                              priority for execution (not recommended).\n"
+"                              Default: %d\n"
+"-e, --enabled=COUNT         Enabled count.\n"
+"                              Default: %d\n"
+"-b, --bound=MAX             Upper length bound.\n"
+"                              Default: %d\n"
+"-v[NUM], --verbose[=NUM]    Print verbose execution information. NUM is optional:\n"
+"                              0 is quiet; 1 is noisy; 2 is noisier.\n"
+"                              Default: %d\n"
+"-u, --uninitialized=VALUE   Return VALUE any load which may read from an\n"
+"                              uninitialized atomic.\n"
+"                              Default: %u\n"
+"-t, --analysis=NAME         Use Analysis Plugin.\n"
+"-o, --options=NAME          Option for previous analysis plugin.  \n"
+"                            -o help for a list of options\n"
+" --                         Program arguments follow.\n\n",
+		program_name,
+		params->maxreads,
+		params->maxfuturevalues,
+		params->maxfuturedelay,
+		params->expireslop,
+		params->yieldon ? "enabled" : "disabled",
+		params->yieldblock ? "enabled" : "disabled",
+		params->fairwindow,
+		params->enabledcount,
+		params->bound,
+		params->verbose,
+		params->uninitvalue);
+	model_print("Analysis plugins:\n");
+	for(unsigned int i=0;i<registeredanalysis->size();i++) {
+		TraceAnalysis * analysis=(*registeredanalysis)[i];
+		model_print("%s\n", analysis->name());
+	}
+	exit(EXIT_SUCCESS);
+}
+
+bool install_plugin(char * name) {
+	ModelVector<TraceAnalysis *> * registeredanalysis=getRegisteredTraceAnalysis();
+	ModelVector<TraceAnalysis *> * installedanalysis=getInstalledTraceAnalysis();
+
+	for(unsigned int i=0;i<registeredanalysis->size();i++) {
+		TraceAnalysis * analysis=(*registeredanalysis)[i];
+		if (strcmp(name, analysis->name())==0) {
+			installedanalysis->push_back(analysis);
+			return false;
+		}
+	}
+	model_print("Analysis %s Not Found\n", name);
+	return true;
+}
+
+static void parse_options(struct model_params *params, int argc, char **argv)
+{
+	const char *shortopts = "hyYt:o:m:M:s:S:f:e:b:u:v::";
+	const struct option longopts[] = {
+		{"help", no_argument, NULL, 'h'},
+		{"liveness", required_argument, NULL, 'm'},
+		{"maxfv", required_argument, NULL, 'M'},
+		{"maxfvdelay", required_argument, NULL, 's'},
+		{"fvslop", required_argument, NULL, 'S'},
+		{"fairness", required_argument, NULL, 'f'},
+		{"yield", no_argument, NULL, 'y'},
+		{"yieldblock", no_argument, NULL, 'Y'},
+		{"enabled", required_argument, NULL, 'e'},
+		{"bound", required_argument, NULL, 'b'},
+		{"verbose", optional_argument, NULL, 'v'},
+		{"uninitialized", optional_argument, NULL, 'u'},
+		{"analysis", optional_argument, NULL, 't'},
+		{"options", optional_argument, NULL, 'o'},
+		{0, 0, 0, 0} /* Terminator */
+	};
+	int opt, longindex;
+	bool error = false;
+	while (!error && (opt = getopt_long(argc, argv, shortopts, longopts, &longindex)) != -1) {
+		switch (opt) {
+		case 'h':
+			print_usage(argv[0], params);
+			break;
+		case 's':
+			params->maxfuturedelay = atoi(optarg);
+			break;
+		case 'S':
+			params->expireslop = atoi(optarg);
+			break;
+		case 'f':
+			params->fairwindow = atoi(optarg);
+			break;
+		case 'e':
+			params->enabledcount = atoi(optarg);
+			break;
+		case 'b':
+			params->bound = atoi(optarg);
+			break;
+		case 'm':
+			params->maxreads = atoi(optarg);
+			break;
+		case 'M':
+			params->maxfuturevalues = atoi(optarg);
+			break;
+		case 'v':
+			params->verbose = optarg ? atoi(optarg) : 1;
+			break;
+		case 'u':
+			params->uninitvalue = atoi(optarg);
+			break;
+		case 'y':
+			params->yieldon = true;
+			break;
+		case 't':
+			if (install_plugin(optarg))
+				error = true;
+			break;
+		case 'o':
+			{
+				ModelVector<TraceAnalysis *> * analyses = getInstalledTraceAnalysis();
+				if ( analyses->size() == 0 || (*analyses)[analyses->size()-1]->option(optarg))
+					error = true;
+			}
+			break;
+		case 'Y':
+			params->yieldblock = true;
+			break;
+		default: /* '?' */
+			error = true;
+			break;
+		}
+	}
+
+	/* Pass remaining arguments to user program */
+	params->argc = argc - (optind - 1);
+	params->argv = argv + (optind - 1);
+
+	/* Reset program name */
+	params->argv[0] = argv[0];
+
+	/* Reset (global) optind for potential use by user program */
+	optind = 1;
+
+	if (error)
+		print_usage(argv[0], params);
+}
+
+int main_argc;
+char **main_argv;
+
+static void install_trace_analyses(ModelExecution *execution)
+{
+	ModelVector<TraceAnalysis *> * installedanalysis=getInstalledTraceAnalysis();
+	for(unsigned int i=0;i<installedanalysis->size();i++) {
+		TraceAnalysis * ta=(*installedanalysis)[i];
+		ta->setExecution(execution);
+		model->add_trace_analysis(ta);
+	}
+}
+
+/** The model_main function contains the main model checking loop. */
+static void model_main()
+{
+	struct model_params params;
+
+	param_defaults(&params);
+	register_plugins();
+
+	parse_options(&params, main_argc, main_argv);
+
+	//Initialize race detector
+	initRaceDetector();
+
+	snapshot_stack_init();
+
+	model = new ModelChecker(params);
+	install_trace_analyses(model->get_execution());
+
+	snapshot_record(0);
+	model->run();
+	delete model;
+
+	DEBUG("Exiting\n");
+}
+
+/**
+ * Main function.  Just initializes snapshotting library and the
+ * snapshotting library calls the model_main function.
+ */
+int main(int argc, char **argv)
+{
+	main_argc = argc;
+	main_argv = argv;
+
+	/* Configure output redirection for the model-checker */
+	redirect_output();
+
+	/* Let's jump in quickly and start running stuff */
+	snapshot_system_init(10000, 1024, 1024, 4000, &model_main);
+}
diff --git a/mainpage.dox b/mainpage.dox
new file mode 100644
index 0000000..a311802
--- /dev/null
+++ b/mainpage.dox
@@ -0,0 +1,4 @@
+/**
+ * @mainpage
+ * @htmlinclude README.html
+ */
diff --git a/malloc.c b/malloc.c
new file mode 100644
index 0000000..7189353
--- /dev/null
+++ b/malloc.c
@@ -0,0 +1,6252 @@
+/*
+  This is a version (aka dlmalloc) of malloc/free/realloc written by
+  Doug Lea and released to the public domain, as explained at
+  http://creativecommons.org/publicdomain/zero/1.0/ Send questions,
+  comments, complaints, performance data, etc to dl@cs.oswego.edu
+
+* Version 2.8.5 Sun May 22 10:26:02 2011  Doug Lea  (dl at gee)
+
+   Note: There may be an updated version of this malloc obtainable at
+           ftp://gee.cs.oswego.edu/pub/misc/malloc.c
+         Check before installing!
+
+* Quickstart
+
+  This library is all in one file to simplify the most common usage:
+  ftp it, compile it (-O3), and link it into another program. All of
+  the compile-time options default to reasonable values for use on
+  most platforms.  You might later want to step through various
+  compile-time and dynamic tuning options.
+
+  For convenience, an include file for code using this malloc is at:
+     ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.5.h
+  You don't really need this .h file unless you call functions not
+  defined in your system include files.  The .h file contains only the
+  excerpts from this file needed for using this malloc on ANSI C/C++
+  systems, so long as you haven't changed compile-time options about
+  naming and tuning parameters.  If you do, then you can create your
+  own malloc.h that does include all settings by cutting at the point
+  indicated below. Note that you may already by default be using a C
+  library containing a malloc that is based on some version of this
+  malloc (for example in linux). You might still want to use the one
+  in this file to customize settings or to avoid overheads associated
+  with library versions.
+
+* Vital statistics:
+
+  Supported pointer/size_t representation:       4 or 8 bytes
+       size_t MUST be an unsigned type of the same width as
+       pointers. (If you are using an ancient system that declares
+       size_t as a signed type, or need it to be a different width
+       than pointers, you can use a previous release of this malloc
+       (e.g. 2.7.2) supporting these.)
+
+  Alignment:                                     8 bytes (default)
+       This suffices for nearly all current machines and C compilers.
+       However, you can define MALLOC_ALIGNMENT to be wider than this
+       if necessary (up to 128bytes), at the expense of using more space.
+
+  Minimum overhead per allocated chunk:   4 or  8 bytes (if 4byte sizes)
+                                          8 or 16 bytes (if 8byte sizes)
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information, and additional cross-check word
+       if FOOTERS is defined.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including overhead)
+                          8-byte ptrs:  32 bytes    (including overhead)
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is about
+       32 bytes plus the remainder from a system page (the minimal
+       mmap unit); typically 4096 or 8192 bytes.
+
+  Security: static-safe; optionally more or less
+       The "security" of malloc refers to the ability of malicious
+       code to accentuate the effects of errors (for example, freeing
+       space that is not currently malloc'ed or overwriting past the
+       ends of chunks) in code that calls malloc.  This malloc
+       guarantees not to modify any memory locations below the base of
+       heap, i.e., static variables, even in the presence of usage
+       errors.  The routines additionally detect most improper frees
+       and reallocs.  All this holds as long as the static bookkeeping
+       for malloc itself is not corrupted by some other means.  This
+       is only one aspect of security -- these checks do not, and
+       cannot, detect all possible programming errors.
+
+       If FOOTERS is defined nonzero, then each allocated chunk
+       carries an additional check word to verify that it was malloced
+       from its space.  These check words are the same within each
+       execution of a program using malloc, but differ across
+       executions, so externally crafted fake chunks cannot be
+       freed. This improves security by rejecting frees/reallocs that
+       could corrupt heap memory, in addition to the checks preventing
+       writes to statics that are always on.  This may further improve
+       security at the expense of time and space overhead.  (Note that
+       FOOTERS may also be worth using with MSPACES.)
+
+       By default detected errors cause the program to abort (calling
+       "abort()"). You can override this to instead proceed past
+       errors by defining PROCEED_ON_ERROR.  In this case, a bad free
+       has no effect, and a malloc that encounters a bad address
+       caused by user overwrites will ignore the bad address by
+       dropping pointers and indices to all known memory. This may
+       be appropriate for programs that should continue if at all
+       possible in the face of programming errors, although they may
+       run out of memory because dropped memory is never reclaimed.
+
+       If you don't like either of these options, you can define
+       CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything
+       else. And if if you are sure that your program using malloc has
+       no errors or vulnerabilities, you can define INSECURE to 1,
+       which might (or might not) provide a small performance improvement.
+
+       It is also possible to limit the maximum total allocatable
+       space, using malloc_set_footprint_limit. This is not
+       designed as a security feature in itself (calls to set limits
+       are not screened or privileged), but may be useful as one
+       aspect of a secure implementation.
+
+  Thread-safety: NOT thread-safe unless USE_LOCKS defined non-zero
+       When USE_LOCKS is defined, each public call to malloc, free,
+       etc is surrounded with a lock. By default, this uses a plain
+       pthread mutex, win32 critical section, or a spin-lock if if
+       available for the platform and not disabled by setting
+       USE_SPIN_LOCKS=0.  However, if USE_RECURSIVE_LOCKS is defined,
+       recursive versions are used instead (which are not required for
+       base functionality but may be needed in layered extensions).
+       Using a global lock is not especially fast, and can be a major
+       bottleneck.  It is designed only to provide minimal protection
+       in concurrent environments, and to provide a basis for
+       extensions.  If you are using malloc in a concurrent program,
+       consider instead using nedmalloc
+       (http://www.nedprod.com/programs/portable/nedmalloc/) or
+       ptmalloc (See http://www.malloc.de), which are derived from
+       versions of this malloc.
+
+  System requirements: Any combination of MORECORE and/or MMAP/MUNMAP
+       This malloc can use unix sbrk or any emulation (invoked using
+       the CALL_MORECORE macro) and/or mmap/munmap or any emulation
+       (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system
+       memory.  On most unix systems, it tends to work best if both
+       MORECORE and MMAP are enabled.  On Win32, it uses emulations
+       based on VirtualAlloc. It also uses common C library functions
+       like memset.
+
+  Compliance: I believe it is compliant with the Single Unix Specification
+       (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably
+       others as well.
+
+* Overview of algorithms
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and
+  tunable.  Consistent balance across these factors results in a good
+  general-purpose allocator for malloc-intensive programs.
+
+  In most ways, this malloc is a best-fit allocator. Generally, it
+  chooses the best-fitting existing chunk for a request, with ties
+  broken in approximately least-recently-used order. (This strategy
+  normally maintains low fragmentation.) However, for requests less
+  than 256bytes, it deviates from best-fit when there is not an
+  exactly fitting available chunk by preferring to use space adjacent
+  to that used for the previous small request, as well as by breaking
+  ties in approximately most-recently-used order. (These enhance
+  locality of series of small allocations.)  And for very large requests
+  (>= 256Kb by default), it relies on system memory mapping
+  facilities, if supported.  (This helps avoid carrying around and
+  possibly fragmenting memory used only for large chunks.)
+
+  All operations (except malloc_stats and mallinfo) have execution
+  times that are bounded by a constant factor of the number of bits in
+  a size_t, not counting any clearing in calloc or copying in realloc,
+  or actions surrounding MORECORE and MMAP that have times
+  proportional to the number of non-contiguous regions returned by
+  system allocation routines, which is often just 1. In real-time
+  applications, you can optionally suppress segment traversals using
+  NO_SEGMENT_TRAVERSAL, which assures bounded execution even when
+  system allocators return non-contiguous spaces, at the typical
+  expense of carrying around more memory and increased fragmentation.
+
+  The implementation is not very modular and seriously overuses
+  macros. Perhaps someday all C compilers will do as good a job
+  inlining modular code as can now be done by brute-force expansion,
+  but now, enough of them seem not to.
+
+  Some compilers issue a lot of warnings about code that is
+  dead/unreachable only on some platforms, and also about intentional
+  uses of negation on unsigned types. All known cases of each can be
+  ignored.
+
+  For a longer but out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+* MSPACES
+  If MSPACES is defined, then in addition to malloc, free, etc.,
+  this file also defines mspace_malloc, mspace_free, etc. These
+  are versions of malloc routines that take an "mspace" argument
+  obtained using create_mspace, to control all internal bookkeeping.
+  If ONLY_MSPACES is defined, only these versions are compiled.
+  So if you would like to use this allocator for only some allocations,
+  and your system malloc for others, you can compile with
+  ONLY_MSPACES and then do something like...
+    static mspace mymspace = create_mspace(0,0); // for example
+    #define mymalloc(bytes)  mspace_malloc(mymspace, bytes)
+
+  (Note: If you only need one instance of an mspace, you can instead
+  use "USE_DL_PREFIX" to relabel the global malloc.)
+
+  You can similarly create thread-local allocators by storing
+  mspaces as thread-locals. For example:
+    static __thread mspace tlms = 0;
+    void*  tlmalloc(size_t bytes) {
+      if (tlms == 0) tlms = create_mspace(0, 0);
+      return mspace_malloc(tlms, bytes);
+    }
+    void  tlfree(void* mem) { mspace_free(tlms, mem); }
+
+  Unless FOOTERS is defined, each mspace is completely independent.
+  You cannot allocate from one and free to another (although
+  conformance is only weakly checked, so usage errors are not always
+  caught). If FOOTERS is defined, then each chunk carries around a tag
+  indicating its originating mspace, and frees are directed to their
+  originating spaces. Normally, this requires use of locks.
+
+ -------------------------  Compile-time options ---------------------------
+
+Be careful in setting #define values for numerical constants of type
+size_t. On some systems, literal values are not automatically extended
+to size_t precision unless they are explicitly casted. You can also
+use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below.
+
+WIN32                    default: defined if _WIN32 defined
+  Defining WIN32 sets up defaults for MS environment and compilers.
+  Otherwise defaults are for unix. Beware that there seem to be some
+  cases where this malloc might not be a pure drop-in replacement for
+  Win32 malloc: Random-looking failures from Win32 GDI API's (eg;
+  SetDIBits()) may be due to bugs in some video driver implementations
+  when pixel buffers are malloc()ed, and the region spans more than
+  one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb)
+  default granularity, pixel buffers may straddle virtual allocation
+  regions more often than when using the Microsoft allocator.  You can
+  avoid this by using VirtualAlloc() and VirtualFree() for all pixel
+  buffers rather than using malloc().  If this is not possible,
+  recompile this malloc with a larger DEFAULT_GRANULARITY. Note:
+  in cases where MSC and gcc (cygwin) are known to differ on WIN32,
+  conditions use _MSC_VER to distinguish them.
+
+DLMALLOC_EXPORT       default: extern
+  Defines how public APIs are declared. If you want to export via a
+  Windows DLL, you might define this as
+    #define DLMALLOC_EXPORT extern  __declspace(dllexport)
+  If you want a POSIX ELF shared object, you might use
+    #define DLMALLOC_EXPORT extern __attribute__((visibility("default")))
+
+MALLOC_ALIGNMENT         default: (size_t)8
+  Controls the minimum alignment for malloc'ed chunks.  It must be a
+  power of two and at least 8, even on machines for which smaller
+  alignments would suffice. It may be defined as larger than this
+  though. Note however that code and data structures are optimized for
+  the case of 8-byte alignment.
+
+MSPACES                  default: 0 (false)
+  If true, compile in support for independent allocation spaces.
+  This is only supported if HAVE_MMAP is true.
+
+ONLY_MSPACES             default: 0 (false)
+  If true, only compile in mspace versions, not regular versions.
+
+USE_LOCKS                default: 0 (false)
+  Causes each call to each public routine to be surrounded with
+  pthread or WIN32 mutex lock/unlock. (If set true, this can be
+  overridden on a per-mspace basis for mspace versions.) If set to a
+  non-zero value other than 1, locks are used, but their
+  implementation is left out, so lock functions must be supplied manually,
+  as described below.
+
+USE_SPIN_LOCKS           default: 1 iff USE_LOCKS and spin locks available
+  If true, uses custom spin locks for locking. This is currently
+  supported only gcc >= 4.1, older gccs on x86 platforms, and recent
+  MS compilers.  Otherwise, posix locks or win32 critical sections are
+  used.
+
+USE_RECURSIVE_LOCKS      default: not defined
+  If defined nonzero, uses recursive (aka reentrant) locks, otherwise
+  uses plain mutexes. This is not required for malloc proper, but may
+  be needed for layered allocators such as nedmalloc.
+
+FOOTERS                  default: 0
+  If true, provide extra checking and dispatching by placing
+  information in the footers of allocated chunks. This adds
+  space and time overhead.
+
+INSECURE                 default: 0
+  If true, omit checks for usage errors and heap space overwrites.
+
+USE_DL_PREFIX            default: NOT defined
+  Causes compiler to prefix all public routines with the string 'dl'.
+  This can be useful when you only want to use this malloc in one part
+  of a program, using your regular system malloc elsewhere.
+
+MALLOC_INSPECT_ALL       default: NOT defined
+  If defined, compiles malloc_inspect_all and mspace_inspect_all, that
+  perform traversal of all heap space.  Unless access to these
+  functions is otherwise restricted, you probably do not want to
+  include them in secure implementations.
+
+ABORT                    default: defined as abort()
+  Defines how to abort on failed checks.  On most systems, a failed
+  check cannot die with an "assert" or even print an informative
+  message, because the underlying print routines in turn call malloc,
+  which will fail again.  Generally, the best policy is to simply call
+  abort(). It's not very useful to do more than this because many
+  errors due to overwriting will show up as address faults (null, odd
+  addresses etc) rather than malloc-triggered checks, so will also
+  abort.  Also, most compilers know that abort() does not return, so
+  can better optimize code conditionally calling it.
+
+PROCEED_ON_ERROR           default: defined as 0 (false)
+  Controls whether detected bad addresses cause them to bypassed
+  rather than aborting. If set, detected bad arguments to free and
+  realloc are ignored. And all bookkeeping information is zeroed out
+  upon a detected overwrite of freed heap space, thus losing the
+  ability to ever return it from malloc again, but enabling the
+  application to proceed. If PROCEED_ON_ERROR is defined, the
+  static variable malloc_corruption_error_count is compiled in
+  and can be examined to see if errors have occurred. This option
+  generates slower code than the default abort policy.
+
+DEBUG                    default: NOT defined
+  The DEBUG setting is mainly intended for people trying to modify
+  this code or diagnose problems when porting to new platforms.
+  However, it may also be able to better isolate user errors than just
+  using runtime checks.  The assertions in the check routines spell
+  out in more detail the assumptions and invariants underlying the
+  algorithms.  The checking is fairly extensive, and will slow down
+  execution noticeably. Calling malloc_stats or mallinfo with DEBUG
+  set will attempt to check every non-mmapped allocated and free chunk
+  in the course of computing the summaries.
+
+ABORT_ON_ASSERT_FAILURE   default: defined as 1 (true)
+  Debugging assertion failures can be nearly impossible if your
+  version of the assert macro causes malloc to be called, which will
+  lead to a cascade of further failures, blowing the runtime stack.
+  ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(),
+  which will usually make debugging easier.
+
+MALLOC_FAILURE_ACTION     default: sets errno to ENOMEM, or no-op on win32
+  The action to take before "return 0" when malloc fails to be able to
+  return memory because there is none available.
+
+HAVE_MORECORE             default: 1 (true) unless win32 or ONLY_MSPACES
+  True if this system supports sbrk or an emulation of it.
+
+MORECORE                  default: sbrk
+  The name of the sbrk-style system routine to call to obtain more
+  memory.  See below for guidance on writing custom MORECORE
+  functions. The type of the argument to sbrk/MORECORE varies across
+  systems.  It cannot be size_t, because it supports negative
+  arguments, so it is normally the signed type of the same width as
+  size_t (sometimes declared as "intptr_t").  It doesn't much matter
+  though. Internally, we only call it with arguments less than half
+  the max value of a size_t, which should work across all reasonable
+  possibilities, although sometimes generating compiler warnings.
+
+MORECORE_CONTIGUOUS       default: 1 (true) if HAVE_MORECORE
+  If true, take advantage of fact that consecutive calls to MORECORE
+  with positive arguments always return contiguous increasing
+  addresses.  This is true of unix sbrk. It does not hurt too much to
+  set it true anyway, since malloc copes with non-contiguities.
+  Setting it false when definitely non-contiguous saves time
+  and possibly wasted space it would take to discover this though.
+
+MORECORE_CANNOT_TRIM      default: NOT defined
+  True if MORECORE cannot release space back to the system when given
+  negative arguments. This is generally necessary only if you are
+  using a hand-crafted MORECORE function that cannot handle negative
+  arguments.
+
+NO_SEGMENT_TRAVERSAL       default: 0
+  If non-zero, suppresses traversals of memory segments
+  returned by either MORECORE or CALL_MMAP. This disables
+  merging of segments that are contiguous, and selectively
+  releasing them to the OS if unused, but bounds execution times.
+
+HAVE_MMAP                 default: 1 (true)
+  True if this system supports mmap or an emulation of it.  If so, and
+  HAVE_MORECORE is not true, MMAP is used for all system
+  allocation. If set and HAVE_MORECORE is true as well, MMAP is
+  primarily used to directly allocate very large blocks. It is also
+  used as a backup strategy in cases where MORECORE fails to provide
+  space from system. Note: A single call to MUNMAP is assumed to be
+  able to unmap memory that may have be allocated using multiple calls
+  to MMAP, so long as they are adjacent.
+
+HAVE_MREMAP               default: 1 on linux, else 0
+  If true realloc() uses mremap() to re-allocate large blocks and
+  extend or shrink allocation spaces.
+
+MMAP_CLEARS               default: 1 except on WINCE.
+  True if mmap clears memory so calloc doesn't need to. This is true
+  for standard unix mmap using /dev/zero and on WIN32 except for WINCE.
+
+USE_BUILTIN_FFS            default: 0 (i.e., not used)
+  Causes malloc to use the builtin ffs() function to compute indices.
+  Some compilers may recognize and intrinsify ffs to be faster than the
+  supplied C version. Also, the case of x86 using gcc is special-cased
+  to an asm instruction, so is already as fast as it can be, and so
+  this setting has no effect. Similarly for Win32 under recent MS compilers.
+  (On most x86s, the asm version is only slightly faster than the C version.)
+
+malloc_getpagesize         default: derive from system includes, or 4096.
+  The system page size. To the extent possible, this malloc manages
+  memory from the system in page-size units.  This may be (and
+  usually is) a function rather than a constant. This is ignored
+  if WIN32, where page size is determined using getSystemInfo during
+  initialization.
+
+USE_DEV_RANDOM             default: 0 (i.e., not used)
+  Causes malloc to use /dev/random to initialize secure magic seed for
+  stamping footers. Otherwise, the current time is used.
+
+NO_MALLINFO                default: 0
+  If defined, don't compile "mallinfo". This can be a simple way
+  of dealing with mismatches between system declarations and
+  those in this file.
+
+MALLINFO_FIELD_TYPE        default: size_t
+  The type of the fields in the mallinfo struct. This was originally
+  defined as "int" in SVID etc, but is more usefully defined as
+  size_t. The value is used only if  HAVE_USR_INCLUDE_MALLOC_H is not set
+
+NO_MALLOC_STATS            default: 0
+  If defined, don't compile "malloc_stats". This avoids calls to
+  fprintf and bringing in stdio dependencies you might not want.
+
+REALLOC_ZERO_BYTES_FREES    default: not defined
+  This should be set if a call to realloc with zero bytes should
+  be the same as a call to free. Some people think it should. Otherwise,
+  since this malloc returns a unique pointer for malloc(0), so does
+  realloc(p, 0).
+
+LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H
+LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H,  LACKS_ERRNO_H
+LACKS_STDLIB_H LACKS_SCHED_H LACKS_TIME_H  default: NOT defined unless on WIN32
+  Define these if your system does not have these header files.
+  You might need to manually insert some of the declarations they provide.
+
+DEFAULT_GRANULARITY        default: page size if MORECORE_CONTIGUOUS,
+                                system_info.dwAllocationGranularity in WIN32,
+                                otherwise 64K.
+      Also settable using mallopt(M_GRANULARITY, x)
+  The unit for allocating and deallocating memory from the system.  On
+  most systems with contiguous MORECORE, there is no reason to
+  make this more than a page. However, systems with MMAP tend to
+  either require or encourage larger granularities.  You can increase
+  this value to prevent system allocation functions to be called so
+  often, especially if they are slow.  The value must be at least one
+  page and must be a power of two.  Setting to 0 causes initialization
+  to either page size or win32 region size.  (Note: In previous
+  versions of malloc, the equivalent of this option was called
+  "TOP_PAD")
+
+DEFAULT_TRIM_THRESHOLD    default: 2MB
+      Also settable using mallopt(M_TRIM_THRESHOLD, x)
+  The maximum amount of unused top-most memory to keep before
+  releasing via malloc_trim in free().  Automatic trimming is mainly
+  useful in long-lived programs using contiguous MORECORE.  Because
+  trimming via sbrk can be slow on some systems, and can sometimes be
+  wasteful (in cases where programs immediately afterward allocate
+  more large chunks) the value should be high enough so that your
+  overall system performance would improve by releasing this much
+  memory.  As a rough guide, you might set to a value close to the
+  average size of a process (program) running on your system.
+  Releasing this much memory would allow such a process to run in
+  memory.  Generally, it is worth tuning trim thresholds when a
+  program undergoes phases where several large chunks are allocated
+  and released in ways that can reuse each other's storage, perhaps
+  mixed with phases where there are no such chunks at all. The trim
+  value must be greater than page size to have any useful effect.  To
+  disable trimming completely, you can set to MAX_SIZE_T. Note that the trick
+  some people use of mallocing a huge space and then freeing it at
+  program startup, in an attempt to reserve system memory, doesn't
+  have the intended effect under automatic trimming, since that memory
+  will immediately be returned to the system.
+
+DEFAULT_MMAP_THRESHOLD       default: 256K
+      Also settable using mallopt(M_MMAP_THRESHOLD, x)
+  The request size threshold for using MMAP to directly service a
+  request. Requests of at least this size that cannot be allocated
+  using already-existing space will be serviced via mmap.  (If enough
+  normal freed space already exists it is used instead.)  Using mmap
+  segregates relatively large chunks of memory so that they can be
+  individually obtained and released from the host system. A request
+  serviced through mmap is never reused by any other request (at least
+  not directly; the system may just so happen to remap successive
+  requests to the same locations).  Segregating space in this way has
+  the benefits that: Mmapped space can always be individually released
+  back to the system, which helps keep the system level memory demands
+  of a long-lived program low.  Also, mapped memory doesn't become
+  `locked' between other chunks, as can happen with normally allocated
+  chunks, which means that even trimming via malloc_trim would not
+  release them.  However, it has the disadvantage that the space
+  cannot be reclaimed, consolidated, and then used to service later
+  requests, as happens with normal chunks.  The advantages of mmap
+  nearly always outweigh disadvantages for "large" chunks, but the
+  value of "large" may vary across systems.  The default is an
+  empirically derived value that works well in most systems. You can
+  disable mmap by setting to MAX_SIZE_T.
+
+MAX_RELEASE_CHECK_RATE   default: 4095 unless not HAVE_MMAP
+  The number of consolidated frees between checks to release
+  unused segments when freeing. When using non-contiguous segments,
+  especially with multiple mspaces, checking only for topmost space
+  doesn't always suffice to trigger trimming. To compensate for this,
+  free() will, with a period of MAX_RELEASE_CHECK_RATE (or the
+  current number of segments, if greater) try to release unused
+  segments to the OS when freeing chunks that result in
+  consolidation. The best value for this parameter is a compromise
+  between slowing down frees with relatively costly checks that
+  rarely trigger versus holding on to unused memory. To effectively
+  disable, set to MAX_SIZE_T. This may lead to a very slight speed
+  improvement at the expense of carrying around more memory.
+*/
+
+/* Version identifier to allow people to support multiple versions */
+#ifndef DLMALLOC_VERSION
+#define DLMALLOC_VERSION 20805
+#endif /* DLMALLOC_VERSION */
+
+#ifndef DLMALLOC_EXPORT
+#define DLMALLOC_EXPORT extern
+#endif
+
+#ifndef WIN32
+#ifdef _WIN32
+#define WIN32 1
+#endif  /* _WIN32 */
+#ifdef _WIN32_WCE
+#define LACKS_FCNTL_H
+#define WIN32 1
+#endif /* _WIN32_WCE */
+#endif  /* WIN32 */
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <tchar.h>
+#define HAVE_MMAP 1
+#define HAVE_MORECORE 0
+#define LACKS_UNISTD_H
+#define LACKS_SYS_PARAM_H
+#define LACKS_SYS_MMAN_H
+#define LACKS_STRING_H
+#define LACKS_STRINGS_H
+#define LACKS_SYS_TYPES_H
+#define LACKS_ERRNO_H
+#define LACKS_SCHED_H
+#ifndef MALLOC_FAILURE_ACTION
+#define MALLOC_FAILURE_ACTION
+#endif /* MALLOC_FAILURE_ACTION */
+#ifndef MMAP_CLEARS
+#ifdef _WIN32_WCE /* WINCE reportedly does not clear */
+#define MMAP_CLEARS 0
+#else
+#define MMAP_CLEARS 1
+#endif /* _WIN32_WCE */
+#endif /*MMAP_CLEARS */
+#endif  /* WIN32 */
+
+#if defined(DARWIN) || defined(_DARWIN)
+/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */
+#ifndef HAVE_MORECORE
+#define HAVE_MORECORE 0
+#define HAVE_MMAP 1
+/* OSX allocators provide 16 byte alignment */
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT ((size_t)16U)
+#endif
+#endif  /* HAVE_MORECORE */
+#endif  /* DARWIN */
+
+#ifndef LACKS_SYS_TYPES_H
+#include <sys/types.h>  /* For size_t */
+#endif  /* LACKS_SYS_TYPES_H */
+
+/* The maximum possible size_t value has all bits set */
+#define MAX_SIZE_T           (~(size_t)0)
+
+#ifndef USE_LOCKS /* ensure true if spin or recursive locks set */
+#define USE_LOCKS  ((defined(USE_SPIN_LOCKS) && USE_SPIN_LOCKS != 0) || \
+                    (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0))
+#endif /* USE_LOCKS */
+
+#if USE_LOCKS /* Spin locks for gcc >= 4.1, older gcc on x86, MSC >= 1310 */
+#if ((defined(__GNUC__) &&                                              \
+      ((__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) ||      \
+       defined(__i386__) || defined(__x86_64__))) ||                    \
+     (defined(_MSC_VER) && _MSC_VER>=1310))
+#ifndef USE_SPIN_LOCKS
+#define USE_SPIN_LOCKS 1
+#endif /* USE_SPIN_LOCKS */
+#elif USE_SPIN_LOCKS
+#error "USE_SPIN_LOCKS defined without implementation"
+#endif /* ... locks available... */
+#elif !defined(USE_SPIN_LOCKS)
+#define USE_SPIN_LOCKS 0
+#endif /* USE_LOCKS */
+
+#ifndef ONLY_MSPACES
+#define ONLY_MSPACES 0
+#endif  /* ONLY_MSPACES */
+#ifndef MSPACES
+#if ONLY_MSPACES
+#define MSPACES 1
+#else   /* ONLY_MSPACES */
+#define MSPACES 0
+#endif  /* ONLY_MSPACES */
+#endif  /* MSPACES */
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT ((size_t)8U)
+#endif  /* MALLOC_ALIGNMENT */
+#ifndef FOOTERS
+#define FOOTERS 0
+#endif  /* FOOTERS */
+#ifndef ABORT
+#define ABORT  abort()
+#endif  /* ABORT */
+#ifndef ABORT_ON_ASSERT_FAILURE
+#define ABORT_ON_ASSERT_FAILURE 1
+#endif  /* ABORT_ON_ASSERT_FAILURE */
+#ifndef PROCEED_ON_ERROR
+#define PROCEED_ON_ERROR 0
+#endif  /* PROCEED_ON_ERROR */
+
+#ifndef INSECURE
+#define INSECURE 0
+#endif  /* INSECURE */
+#ifndef MALLOC_INSPECT_ALL
+#define MALLOC_INSPECT_ALL 0
+#endif  /* MALLOC_INSPECT_ALL */
+#ifndef HAVE_MMAP
+#define HAVE_MMAP 1
+#endif  /* HAVE_MMAP */
+#ifndef MMAP_CLEARS
+#define MMAP_CLEARS 1
+#endif  /* MMAP_CLEARS */
+#ifndef HAVE_MREMAP
+#ifdef linux
+#define HAVE_MREMAP 1
+#define _GNU_SOURCE /* Turns on mremap() definition */
+#else   /* linux */
+#define HAVE_MREMAP 0
+#endif  /* linux */
+#endif  /* HAVE_MREMAP */
+#ifndef MALLOC_FAILURE_ACTION
+#define MALLOC_FAILURE_ACTION  errno = ENOMEM;
+#endif  /* MALLOC_FAILURE_ACTION */
+#ifndef HAVE_MORECORE
+#if ONLY_MSPACES
+#define HAVE_MORECORE 0
+#else   /* ONLY_MSPACES */
+#define HAVE_MORECORE 1
+#endif  /* ONLY_MSPACES */
+#endif  /* HAVE_MORECORE */
+#if !HAVE_MORECORE
+#define MORECORE_CONTIGUOUS 0
+#else   /* !HAVE_MORECORE */
+#define MORECORE_DEFAULT sbrk
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif  /* MORECORE_CONTIGUOUS */
+#endif  /* HAVE_MORECORE */
+#ifndef DEFAULT_GRANULARITY
+#if (MORECORE_CONTIGUOUS || defined(WIN32))
+#define DEFAULT_GRANULARITY (0)  /* 0 means to compute in init_mparams */
+#else   /* MORECORE_CONTIGUOUS */
+#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
+#endif  /* MORECORE_CONTIGUOUS */
+#endif  /* DEFAULT_GRANULARITY */
+#ifndef DEFAULT_TRIM_THRESHOLD
+#ifndef MORECORE_CANNOT_TRIM
+#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
+#else   /* MORECORE_CANNOT_TRIM */
+#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T
+#endif  /* MORECORE_CANNOT_TRIM */
+#endif  /* DEFAULT_TRIM_THRESHOLD */
+#ifndef DEFAULT_MMAP_THRESHOLD
+#if HAVE_MMAP
+#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
+#else   /* HAVE_MMAP */
+#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
+#endif  /* HAVE_MMAP */
+#endif  /* DEFAULT_MMAP_THRESHOLD */
+#ifndef MAX_RELEASE_CHECK_RATE
+#if HAVE_MMAP
+#define MAX_RELEASE_CHECK_RATE 4095
+#else
+#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T
+#endif /* HAVE_MMAP */
+#endif /* MAX_RELEASE_CHECK_RATE */
+#ifndef USE_BUILTIN_FFS
+#define USE_BUILTIN_FFS 0
+#endif  /* USE_BUILTIN_FFS */
+#ifndef USE_DEV_RANDOM
+#define USE_DEV_RANDOM 0
+#endif  /* USE_DEV_RANDOM */
+#ifndef NO_MALLINFO
+#define NO_MALLINFO 0
+#endif  /* NO_MALLINFO */
+#ifndef MALLINFO_FIELD_TYPE
+#define MALLINFO_FIELD_TYPE size_t
+#endif  /* MALLINFO_FIELD_TYPE */
+#ifndef NO_MALLOC_STATS
+#define NO_MALLOC_STATS 0
+#endif  /* NO_MALLOC_STATS */
+#ifndef NO_SEGMENT_TRAVERSAL
+#define NO_SEGMENT_TRAVERSAL 0
+#endif /* NO_SEGMENT_TRAVERSAL */
+
+/*
+  mallopt tuning options.  SVID/XPG defines four standard parameter
+  numbers for mallopt, normally defined in malloc.h.  None of these
+  are used in this malloc, so setting them has no effect. But this
+  malloc does support the following options.
+*/
+
+#define M_TRIM_THRESHOLD     (-1)
+#define M_GRANULARITY        (-2)
+#define M_MMAP_THRESHOLD     (-3)
+
+/* ------------------------ Mallinfo declarations ------------------------ */
+
+#if !NO_MALLINFO
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any system that has a
+  /usr/include/malloc.h defining struct mallinfo.  The main
+  declaration needed is the mallinfo struct that is returned (by-copy)
+  by mallinfo().  The malloinfo struct contains a bunch of fields that
+  are not even meaningful in this version of malloc.  These fields are
+  are instead filled by mallinfo() with other numbers that might be of
+  interest.
+
+  HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
+  /usr/include/malloc.h file that includes a declaration of struct
+  mallinfo.  If so, it is included; else a compliant version is
+  declared below.  These must be precisely the same for mallinfo() to
+  work.  The original SVID version of this struct, defined on most
+  systems with mallinfo, declares all fields as ints. But some others
+  define as unsigned long. If your system defines the fields using a
+  type of different width than listed here, you MUST #include your
+  system version and #define HAVE_USR_INCLUDE_MALLOC_H.
+*/
+
+/* #define HAVE_USR_INCLUDE_MALLOC_H */
+
+#ifdef HAVE_USR_INCLUDE_MALLOC_H
+#include "/usr/include/malloc.h"
+#else /* HAVE_USR_INCLUDE_MALLOC_H */
+#ifndef STRUCT_MALLINFO_DECLARED
+/* HP-UX (and others?) redefines mallinfo unless _STRUCT_MALLINFO is defined */
+#define _STRUCT_MALLINFO
+#define STRUCT_MALLINFO_DECLARED 1
+struct mallinfo {
+  MALLINFO_FIELD_TYPE arena;    /* non-mmapped space allocated from system */
+  MALLINFO_FIELD_TYPE ordblks;  /* number of free chunks */
+  MALLINFO_FIELD_TYPE smblks;   /* always 0 */
+  MALLINFO_FIELD_TYPE hblks;    /* always 0 */
+  MALLINFO_FIELD_TYPE hblkhd;   /* space in mmapped regions */
+  MALLINFO_FIELD_TYPE usmblks;  /* maximum total allocated space */
+  MALLINFO_FIELD_TYPE fsmblks;  /* always 0 */
+  MALLINFO_FIELD_TYPE uordblks; /* total allocated space */
+  MALLINFO_FIELD_TYPE fordblks; /* total free space */
+  MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */
+};
+#endif /* STRUCT_MALLINFO_DECLARED */
+#endif /* HAVE_USR_INCLUDE_MALLOC_H */
+#endif /* NO_MALLINFO */
+
+/*
+  Try to persuade compilers to inline. The most critical functions for
+  inlining are defined as macros, so these aren't used for them.
+*/
+
+#ifndef FORCEINLINE
+  #if defined(__GNUC__)
+#define FORCEINLINE __inline __attribute__ ((always_inline))
+  #elif defined(_MSC_VER)
+    #define FORCEINLINE __forceinline
+  #endif
+#endif
+#ifndef NOINLINE
+  #if defined(__GNUC__)
+    #define NOINLINE __attribute__ ((noinline))
+  #elif defined(_MSC_VER)
+    #define NOINLINE __declspec(noinline)
+  #else
+    #define NOINLINE
+  #endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#ifndef FORCEINLINE
+ #define FORCEINLINE inline
+#endif
+#endif /* __cplusplus */
+#ifndef FORCEINLINE
+ #define FORCEINLINE
+#endif
+
+#if !ONLY_MSPACES
+
+/* ------------------- Declarations of public routines ------------------- */
+
+#ifndef USE_DL_PREFIX
+#define dlcalloc               calloc
+#define dlfree                 free
+#define dlmalloc               malloc
+#define dlmemalign             memalign
+#define dlposix_memalign       posix_memalign
+#define dlrealloc              realloc
+#define dlrealloc_in_place     realloc_in_place
+#define dlvalloc               valloc
+#define dlpvalloc              pvalloc
+#define dlmallinfo             mallinfo
+#define dlmallopt              mallopt
+#define dlmalloc_trim          malloc_trim
+#define dlmalloc_stats         malloc_stats
+#define dlmalloc_usable_size   malloc_usable_size
+#define dlmalloc_footprint     malloc_footprint
+#define dlmalloc_max_footprint malloc_max_footprint
+#define dlmalloc_footprint_limit malloc_footprint_limit
+#define dlmalloc_set_footprint_limit malloc_set_footprint_limit
+#define dlmalloc_inspect_all   malloc_inspect_all
+#define dlindependent_calloc   independent_calloc
+#define dlindependent_comalloc independent_comalloc
+#define dlbulk_free            bulk_free
+#endif /* USE_DL_PREFIX */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or
+  null if no space is available, in which case errno is set to ENOMEM
+  on ANSI C systems.
+
+  If n is zero, malloc returns a minimum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 32 bytes on 64bit
+  systems.)  Note that size_t is an unsigned type, so calls with
+  arguments that would be negative if signed are interpreted as
+  requests for huge amounts of space, which will often fail. The
+  maximum supported value of n differs across systems, but is in all
+  cases less than the maximum representable value of a size_t.
+*/
+DLMALLOC_EXPORT void* dlmalloc(size_t);
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. If p was not malloced or already
+  freed, free(p) will by default cause the current program to abort.
+*/
+DLMALLOC_EXPORT void  dlfree(void*);
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+DLMALLOC_EXPORT void* dlcalloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p in most cases when possible, otherwise it
+  employs the equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  realloc with a size
+  argument of zero (re)allocates a minimum-sized chunk.
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+DLMALLOC_EXPORT void* dlrealloc(void*, size_t);
+
+/*
+  realloc_in_place(void* p, size_t n)
+  Resizes the space allocated for p to size n, only if this can be
+  done without moving p (i.e., only if there is adjacent space
+  available if n is greater than p's current allocated size, or n is
+  less than or equal to p's size). This may be used instead of plain
+  realloc if an alternative allocation strategy is needed upon failure
+  to expand space; for example, reallocation of a buffer that must be
+  memory-aligned or cleared. You can use realloc_in_place to trigger
+  these alternatives only when needed.
+
+  Returns p if successful; otherwise null.
+*/
+DLMALLOC_EXPORT void* dlrealloc_in_place(void*, size_t);
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+DLMALLOC_EXPORT void* dlmemalign(size_t, size_t);
+
+/*
+  int posix_memalign(void** pp, size_t alignment, size_t n);
+  Allocates a chunk of n bytes, aligned in accord with the alignment
+  argument. Differs from memalign only in that it (1) assigns the
+  allocated memory to *pp rather than returning it, (2) fails and
+  returns EINVAL if the alignment is not a power of two (3) fails and
+  returns ENOMEM if memory cannot be allocated.
+*/
+DLMALLOC_EXPORT int dlposix_memalign(void**, size_t, size_t);
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+DLMALLOC_EXPORT void* dlvalloc(size_t);
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  To workaround the fact that mallopt is specified to use int,
+  not size_t parameters, the value -1 is specially treated as the
+  maximum unsigned size_t value.
+
+  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  None of these are use in this malloc,
+  so setting them has no effect. But this malloc also supports other
+  options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #  default    allowed param values
+  M_TRIM_THRESHOLD     -1   2*1024*1024   any   (-1 disables)
+  M_GRANULARITY        -2     page size   any power of 2 >= page size
+  M_MMAP_THRESHOLD     -3      256*1024   any   (or 0 if no MMAP support)
+*/
+DLMALLOC_EXPORT int dlmallopt(int, int);
+
+/*
+  malloc_footprint();
+  Returns the number of bytes obtained from the system.  The total
+  number of bytes allocated by malloc, realloc etc., is less than this
+  value. Unlike mallinfo, this function returns only a precomputed
+  result, so can be called frequently to monitor memory consumption.
+  Even if locks are otherwise defined, this function does not use them,
+  so results might not be up to date.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_footprint(void);
+
+/*
+  malloc_max_footprint();
+  Returns the maximum number of bytes obtained from the system. This
+  value will be greater than current footprint if deallocated space
+  has been reclaimed by the system. The peak number of bytes allocated
+  by malloc, realloc etc., is less than this value. Unlike mallinfo,
+  this function returns only a precomputed result, so can be called
+  frequently to monitor memory consumption.  Even if locks are
+  otherwise defined, this function does not use them, so results might
+  not be up to date.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_max_footprint(void);
+
+/*
+  malloc_footprint_limit();
+  Returns the number of bytes that the heap is allowed to obtain from
+  the system, returning the last value returned by
+  malloc_set_footprint_limit, or the maximum size_t value if
+  never set. The returned value reflects a permission. There is no
+  guarantee that this number of bytes can actually be obtained from
+  the system.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_footprint_limit();
+
+/*
+  malloc_set_footprint_limit();
+  Sets the maximum number of bytes to obtain from the system, causing
+  failure returns from malloc and related functions upon attempts to
+  exceed this value. The argument value may be subject to page
+  rounding to an enforceable limit; this actual value is returned.
+  Using an argument of the maximum possible size_t effectively
+  disables checks. If the argument is less than or equal to the
+  current malloc_footprint, then all future allocations that require
+  additional system memory will fail. However, invocation cannot
+  retroactively deallocate existing used memory.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_set_footprint_limit(size_t bytes);
+
+#if MALLOC_INSPECT_ALL
+/*
+  malloc_inspect_all(void(*handler)(void *start,
+                                    void *end,
+                                    size_t used_bytes,
+                                    void* callback_arg),
+                      void* arg);
+  Traverses the heap and calls the given handler for each managed
+  region, skipping all bytes that are (or may be) used for bookkeeping
+  purposes.  Traversal does not include include chunks that have been
+  directly memory mapped. Each reported region begins at the start
+  address, and continues up to but not including the end address.  The
+  first used_bytes of the region contain allocated data. If
+  used_bytes is zero, the region is unallocated. The handler is
+  invoked with the given callback argument. If locks are defined, they
+  are held during the entire traversal. It is a bad idea to invoke
+  other malloc functions from within the handler.
+
+  For example, to count the number of in-use chunks with size greater
+  than 1000, you could write:
+  static int count = 0;
+  void count_chunks(void* start, void* end, size_t used, void* arg) {
+    if (used >= 1000) ++count;
+  }
+  then:
+    malloc_inspect_all(count_chunks, NULL);
+
+  malloc_inspect_all is compiled only if MALLOC_INSPECT_ALL is defined.
+*/
+DLMALLOC_EXPORT void dlmalloc_inspect_all(void(*handler)(void*, void *, size_t, void*),
+                           void* arg);
+
+#endif /* MALLOC_INSPECT_ALL */
+
+#if !NO_MALLINFO
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    always zero.
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   the maximum total allocated space. This will be greater
+                than current total if trimming has occurred.
+  fsmblks:   always zero
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+               back to system via malloc_trim. ("ideally" means that
+               it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+DLMALLOC_EXPORT struct mallinfo dlmallinfo(void);
+#endif /* NO_MALLINFO */
+
+/*
+  independent_calloc(size_t n_elements, size_t element_size, void* chunks[]);
+
+  independent_calloc is similar to calloc, but instead of returning a
+  single cleared space, it returns an array of pointers to n_elements
+  independent elements that can hold contents of size elem_size, each
+  of which starts out cleared, and can be independently freed,
+  realloc'ed etc. The elements are guaranteed to be adjacently
+  allocated (this is not guaranteed to occur with multiple callocs or
+  mallocs), which may also improve cache locality in some
+  applications.
+
+  The "chunks" argument is optional (i.e., may be null, which is
+  probably the most typical usage). If it is null, the returned array
+  is itself dynamically allocated and should also be freed when it is
+  no longer needed. Otherwise, the chunks array must be of at least
+  n_elements in length. It is filled in with the pointers to the
+  chunks.
+
+  In either case, independent_calloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and "chunks"
+  is null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+
+  Each element must be freed when it is no longer needed. This can be
+  done all at once using bulk_free.
+
+  independent_calloc simplifies and speeds up implementations of many
+  kinds of pools.  It may also be useful when constructing large data
+  structures that initially have a fixed number of fixed-sized nodes,
+  but the number is not known at compile time, and some of the nodes
+  may later need to be freed. For example:
+
+  struct Node { int item; struct Node* next; };
+
+  struct Node* build_list() {
+    struct Node** pool;
+    int n = read_number_of_nodes_needed();
+    if (n <= 0) return 0;
+    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
+    if (pool == 0) die();
+    // organize into a linked list...
+    struct Node* first = pool[0];
+    for (i = 0; i < n-1; ++i)
+      pool[i]->next = pool[i+1];
+    free(pool);     // Can now free the array (or not, if it is needed later)
+    return first;
+  }
+*/
+DLMALLOC_EXPORT void** dlindependent_calloc(size_t, size_t, void**);
+
+/*
+  independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+
+  independent_comalloc allocates, all at once, a set of n_elements
+  chunks with sizes indicated in the "sizes" array.    It returns
+  an array of pointers to these elements, each of which can be
+  independently freed, realloc'ed etc. The elements are guaranteed to
+  be adjacently allocated (this is not guaranteed to occur with
+  multiple callocs or mallocs), which may also improve cache locality
+  in some applications.
+
+  The "chunks" argument is optional (i.e., may be null). If it is null
+  the returned array is itself dynamically allocated and should also
+  be freed when it is no longer needed. Otherwise, the chunks array
+  must be of at least n_elements in length. It is filled in with the
+  pointers to the chunks.
+
+  In either case, independent_comalloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and chunks is
+  null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+
+  Each element must be freed when it is no longer needed. This can be
+  done all at once using bulk_free.
+
+  independent_comallac differs from independent_calloc in that each
+  element may have a different size, and also that it does not
+  automatically clear elements.
+
+  independent_comalloc can be used to speed up allocation in cases
+  where several structs or objects must always be allocated at the
+  same time.  For example:
+
+  struct Head { ... }
+  struct Foot { ... }
+
+  void send_message(char* msg) {
+    int msglen = strlen(msg);
+    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
+    void* chunks[3];
+    if (independent_comalloc(3, sizes, chunks) == 0)
+      die();
+    struct Head* head = (struct Head*)(chunks[0]);
+    char*        body = (char*)(chunks[1]);
+    struct Foot* foot = (struct Foot*)(chunks[2]);
+    // ...
+  }
+
+  In general though, independent_comalloc is worth using only for
+  larger values of n_elements. For small values, you probably won't
+  detect enough difference from series of malloc calls to bother.
+
+  Overuse of independent_comalloc can increase overall memory usage,
+  since it cannot reuse existing noncontiguous small chunks that
+  might be available for some of the elements.
+*/
+DLMALLOC_EXPORT void** dlindependent_comalloc(size_t, size_t*, void**);
+
+/*
+  bulk_free(void* array[], size_t n_elements)
+  Frees and clears (sets to null) each non-null pointer in the given
+  array.  This is likely to be faster than freeing them one-by-one.
+  If footers are used, pointers that have been allocated in different
+  mspaces are not freed or cleared, and the count of all such pointers
+  is returned.  For large arrays of pointers with poor locality, it
+  may be worthwhile to sort this array before calling bulk_free.
+*/
+DLMALLOC_EXPORT size_t  dlbulk_free(void**, size_t n_elements);
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+DLMALLOC_EXPORT void*  dlpvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative arguments
+  to sbrk) if there is unused memory at the `high' end of the malloc
+  pool or in unused MMAP segments. You can call this after freeing
+  large blocks of memory to potentially reduce the system-level memory
+  requirements of a program. However, it cannot guarantee to reduce
+  memory. Under some allocation patterns, some large free blocks of
+  memory will be locked between two used chunks, so they cannot be
+  given back to the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero, only
+  the minimum amount of memory to maintain internal data structures
+  will be left. Non-zero arguments can be supplied to maintain enough
+  trailing space to service future expected allocations without having
+  to re-obtain memory from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+*/
+DLMALLOC_EXPORT int  dlmalloc_trim(size_t);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+*/
+DLMALLOC_EXPORT void  dlmalloc_stats(void);
+
+#endif /* ONLY_MSPACES */
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+*/
+size_t dlmalloc_usable_size(void*);
+
+#if MSPACES
+
+/*
+  mspace is an opaque type representing an independent
+  region of space that supports mspace_malloc, etc.
+*/
+typedef void* mspace;
+
+/*
+  create_mspace creates and returns a new independent space with the
+  given initial capacity, or, if 0, the default granularity size.  It
+  returns null if there is no system memory available to create the
+  space.  If argument locked is non-zero, the space uses a separate
+  lock to control access. The capacity of the space will grow
+  dynamically as needed to service mspace_malloc requests.  You can
+  control the sizes of incremental increases of this space by
+  compiling with a different DEFAULT_GRANULARITY or dynamically
+  setting with mallopt(M_GRANULARITY, value).
+*/
+DLMALLOC_EXPORT mspace create_mspace(size_t capacity, int locked);
+
+/*
+  destroy_mspace destroys the given space, and attempts to return all
+  of its memory back to the system, returning the total number of
+  bytes freed. After destruction, the results of access to all memory
+  used by the space become undefined.
+*/
+DLMALLOC_EXPORT size_t destroy_mspace(mspace msp);
+
+/*
+  create_mspace_with_base uses the memory supplied as the initial base
+  of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this
+  space is used for bookkeeping, so the capacity must be at least this
+  large. (Otherwise 0 is returned.) When this initial space is
+  exhausted, additional memory will be obtained from the system.
+  Destroying this space will deallocate all additionally allocated
+  space (if possible) but not the initial base.
+*/
+DLMALLOC_EXPORT mspace create_mspace_with_base(void* base, size_t capacity, int locked);
+
+/*
+  mspace_track_large_chunks controls whether requests for large chunks
+  are allocated in their own untracked mmapped regions, separate from
+  others in this mspace. By default large chunks are not tracked,
+  which reduces fragmentation. However, such chunks are not
+  necessarily released to the system upon destroy_mspace.  Enabling
+  tracking by setting to true may increase fragmentation, but avoids
+  leakage when relying on destroy_mspace to release all memory
+  allocated using this space.  The function returns the previous
+  setting.
+*/
+DLMALLOC_EXPORT int mspace_track_large_chunks(mspace msp, int enable);
+
+
+/*
+  mspace_malloc behaves as malloc, but operates within
+  the given space.
+*/
+DLMALLOC_EXPORT void* mspace_malloc(mspace msp, size_t bytes);
+
+/*
+  mspace_free behaves as free, but operates within
+  the given space.
+
+  If compiled with FOOTERS==1, mspace_free is not actually needed.
+  free may be called instead of mspace_free because freed chunks from
+  any space are handled by their originating spaces.
+*/
+DLMALLOC_EXPORT void mspace_free(mspace msp, void* mem);
+
+/*
+  mspace_realloc behaves as realloc, but operates within
+  the given space.
+
+  If compiled with FOOTERS==1, mspace_realloc is not actually
+  needed.  realloc may be called instead of mspace_realloc because
+  realloced chunks from any space are handled by their originating
+  spaces.
+*/
+DLMALLOC_EXPORT void* mspace_realloc(mspace msp, void* mem, size_t newsize);
+
+/*
+  mspace_calloc behaves as calloc, but operates within
+  the given space.
+*/
+DLMALLOC_EXPORT void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size);
+
+/*
+  mspace_memalign behaves as memalign, but operates within
+  the given space.
+*/
+DLMALLOC_EXPORT void* mspace_memalign(mspace msp, size_t alignment, size_t bytes);
+
+/*
+  mspace_independent_calloc behaves as independent_calloc, but
+  operates within the given space.
+*/
+DLMALLOC_EXPORT void** mspace_independent_calloc(mspace msp, size_t n_elements,
+                                 size_t elem_size, void* chunks[]);
+
+/*
+  mspace_independent_comalloc behaves as independent_comalloc, but
+  operates within the given space.
+*/
+DLMALLOC_EXPORT void** mspace_independent_comalloc(mspace msp, size_t n_elements,
+                                   size_t sizes[], void* chunks[]);
+
+/*
+  mspace_footprint() returns the number of bytes obtained from the
+  system for this space.
+*/
+DLMALLOC_EXPORT size_t mspace_footprint(mspace msp);
+
+/*
+  mspace_max_footprint() returns the peak number of bytes obtained from the
+  system for this space.
+*/
+DLMALLOC_EXPORT size_t mspace_max_footprint(mspace msp);
+
+
+#if !NO_MALLINFO
+/*
+  mspace_mallinfo behaves as mallinfo, but reports properties of
+  the given space.
+*/
+DLMALLOC_EXPORT struct mallinfo mspace_mallinfo(mspace msp);
+#endif /* NO_MALLINFO */
+
+/*
+  malloc_usable_size(void* p) behaves the same as malloc_usable_size;
+*/
+DLMALLOC_EXPORT size_t mspace_usable_size(void* mem);
+
+/*
+  mspace_malloc_stats behaves as malloc_stats, but reports
+  properties of the given space.
+*/
+DLMALLOC_EXPORT void mspace_malloc_stats(mspace msp);
+
+/*
+  mspace_trim behaves as malloc_trim, but
+  operates within the given space.
+*/
+DLMALLOC_EXPORT int mspace_trim(mspace msp, size_t pad);
+
+/*
+  An alias for mallopt.
+*/
+DLMALLOC_EXPORT int mspace_mallopt(int, int);
+
+#endif /* MSPACES */
+
+#ifdef __cplusplus
+}  /* end of extern "C" */
+#endif /* __cplusplus */
+
+/*
+  ========================================================================
+  To make a fully customizable malloc.h header file, cut everything
+  above this line, put into file malloc.h, edit to suit, and #include it
+  on the next line, as well as in programs that use this malloc.
+  ========================================================================
+*/
+
+/* #include "malloc.h" */
+
+/*------------------------------ internal #includes ---------------------- */
+
+#ifdef _MSC_VER
+#pragma warning( disable : 4146 ) /* no "unsigned" warnings */
+#endif /* _MSC_VER */
+#if !NO_MALLOC_STATS
+#include <stdio.h>       /* for printing in malloc_stats */
+#endif /* NO_MALLOC_STATS */
+#ifndef LACKS_ERRNO_H
+#include <errno.h>       /* for MALLOC_FAILURE_ACTION */
+#endif /* LACKS_ERRNO_H */
+#ifdef DEBUG
+#if ABORT_ON_ASSERT_FAILURE
+#undef assert
+#define assert(x) if(!(x)) ABORT
+#else /* ABORT_ON_ASSERT_FAILURE */
+#include <assert.h>
+#endif /* ABORT_ON_ASSERT_FAILURE */
+#else  /* DEBUG */
+#ifndef assert
+#define assert(x)
+#endif
+#define DEBUG 0
+#endif /* DEBUG */
+#if !defined(WIN32) && !defined(LACKS_TIME_H)
+#include <time.h>        /* for magic initialization */
+#endif /* WIN32 */
+#ifndef LACKS_STDLIB_H
+#include <stdlib.h>      /* for abort() */
+#endif /* LACKS_STDLIB_H */
+#ifndef LACKS_STRING_H
+#include <string.h>      /* for memset etc */
+#endif  /* LACKS_STRING_H */
+#if USE_BUILTIN_FFS
+#ifndef LACKS_STRINGS_H
+#include <strings.h>     /* for ffs */
+#endif /* LACKS_STRINGS_H */
+#endif /* USE_BUILTIN_FFS */
+#if HAVE_MMAP
+#ifndef LACKS_SYS_MMAN_H
+/* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */
+#if (defined(linux) && !defined(__USE_GNU))
+#define __USE_GNU 1
+#include <sys/mman.h>    /* for mmap */
+#undef __USE_GNU
+#else
+#include <sys/mman.h>    /* for mmap */
+#endif /* linux */
+#endif /* LACKS_SYS_MMAN_H */
+#ifndef LACKS_FCNTL_H
+#include <fcntl.h>
+#endif /* LACKS_FCNTL_H */
+#endif /* HAVE_MMAP */
+#ifndef LACKS_UNISTD_H
+#include <unistd.h>     /* for sbrk, sysconf */
+#else /* LACKS_UNISTD_H */
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
+extern void*     sbrk(ptrdiff_t);
+#endif /* FreeBSD etc */
+#endif /* LACKS_UNISTD_H */
+
+/* Declarations for locking */
+#if USE_LOCKS
+#ifndef WIN32
+#if defined (__SVR4) && defined (__sun)  /* solaris */
+#include <thread.h>
+#elif !defined(LACKS_SCHED_H)
+#include <sched.h>
+#endif /* solaris or LACKS_SCHED_H */
+#if (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0) || !USE_SPIN_LOCKS
+#include <pthread.h>
+#endif /* USE_RECURSIVE_LOCKS ... */
+#elif defined(_MSC_VER)
+#ifndef _M_AMD64
+/* These are already defined on AMD64 builds */
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange, LONG Comp);
+LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value);
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* _M_AMD64 */
+#pragma intrinsic (_InterlockedCompareExchange)
+#pragma intrinsic (_InterlockedExchange)
+#define interlockedcompareexchange _InterlockedCompareExchange
+#define interlockedexchange _InterlockedExchange
+#elif defined(WIN32) && defined(__GNUC__)
+#define interlockedcompareexchange(a, b, c) __sync_val_compare_and_swap(a, c, b)
+#define interlockedexchange __sync_lock_test_and_set
+#endif /* Win32 */
+#endif /* USE_LOCKS */
+
+/* Declarations for bit scanning on win32 */
+#if defined(_MSC_VER) && _MSC_VER>=1300
+#ifndef BitScanForward	/* Try to avoid pulling in WinNT.h */
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+unsigned char _BitScanForward(unsigned long *index, unsigned long mask);
+unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#define BitScanForward _BitScanForward
+#define BitScanReverse _BitScanReverse
+#pragma intrinsic(_BitScanForward)
+#pragma intrinsic(_BitScanReverse)
+#endif /* BitScanForward */
+#endif /* defined(_MSC_VER) && _MSC_VER>=1300 */
+
+#ifndef WIN32
+#ifndef malloc_getpagesize
+#  ifdef _SC_PAGESIZE         /* some SVR4 systems omit an underscore */
+#    ifndef _SC_PAGE_SIZE
+#      define _SC_PAGE_SIZE _SC_PAGESIZE
+#    endif
+#  endif
+#  ifdef _SC_PAGE_SIZE
+#    define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
+#  else
+#    if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
+       extern size_t getpagesize();
+#      define malloc_getpagesize getpagesize()
+#    else
+#      ifdef WIN32 /* use supplied emulation of getpagesize */
+#        define malloc_getpagesize getpagesize()
+#      else
+#        ifndef LACKS_SYS_PARAM_H
+#          include <sys/param.h>
+#        endif
+#        ifdef EXEC_PAGESIZE
+#          define malloc_getpagesize EXEC_PAGESIZE
+#        else
+#          ifdef NBPG
+#            ifndef CLSIZE
+#              define malloc_getpagesize NBPG
+#            else
+#              define malloc_getpagesize (NBPG * CLSIZE)
+#            endif
+#          else
+#            ifdef NBPC
+#              define malloc_getpagesize NBPC
+#            else
+#              ifdef PAGESIZE
+#                define malloc_getpagesize PAGESIZE
+#              else /* just guess */
+#                define malloc_getpagesize ((size_t)4096U)
+#              endif
+#            endif
+#          endif
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+#endif
+
+/* ------------------- size_t and alignment properties -------------------- */
+
+/* The byte and bit size of a size_t */
+#define SIZE_T_SIZE         (sizeof(size_t))
+#define SIZE_T_BITSIZE      (sizeof(size_t) << 3)
+
+/* Some constants coerced to size_t */
+/* Annoying but necessary to avoid errors on some platforms */
+#define SIZE_T_ZERO         ((size_t)0)
+#define SIZE_T_ONE          ((size_t)1)
+#define SIZE_T_TWO          ((size_t)2)
+#define SIZE_T_FOUR         ((size_t)4)
+#define TWO_SIZE_T_SIZES    (SIZE_T_SIZE<<1)
+#define FOUR_SIZE_T_SIZES   (SIZE_T_SIZE<<2)
+#define SIX_SIZE_T_SIZES    (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES)
+#define HALF_MAX_SIZE_T     (MAX_SIZE_T / 2U)
+
+/* The bit mask value corresponding to MALLOC_ALIGNMENT */
+#define CHUNK_ALIGN_MASK    (MALLOC_ALIGNMENT - SIZE_T_ONE)
+
+/* True if address a has acceptable alignment */
+#define is_aligned(A)       (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0)
+
+/* the number of bytes to offset an address to align it */
+#define align_offset(A)\
+ ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\
+  ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK))
+
+/* -------------------------- MMAP preliminaries ------------------------- */
+
+/*
+   If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and
+   checks to fail so compiler optimizer can delete code rather than
+   using so many "#if"s.
+*/
+
+
+/* MORECORE and MMAP must return MFAIL on failure */
+#define MFAIL                ((void*)(MAX_SIZE_T))
+#define CMFAIL               ((char*)(MFAIL)) /* defined for convenience */
+
+#if HAVE_MMAP
+
+#ifndef WIN32
+#define MUNMAP_DEFAULT(a, s)  munmap((a), (s))
+#define MMAP_PROT            (PROT_READ|PROT_WRITE)
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+#define MAP_ANONYMOUS        MAP_ANON
+#endif /* MAP_ANON */
+#ifdef MAP_ANONYMOUS
+#define MMAP_FLAGS           (MAP_PRIVATE|MAP_ANONYMOUS)
+#define MMAP_DEFAULT(s)       mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0)
+#else /* MAP_ANONYMOUS */
+/*
+   Nearly all versions of mmap support MAP_ANONYMOUS, so the following
+   is unlikely to be needed, but is supplied just in case.
+*/
+#define MMAP_FLAGS           (MAP_PRIVATE)
+static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
+#define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \
+           (dev_zero_fd = open("/dev/zero", O_RDWR), \
+            mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \
+            mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0))
+#endif /* MAP_ANONYMOUS */
+
+#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s)
+
+#else /* WIN32 */
+
+/* Win32 MMAP via VirtualAlloc */
+static FORCEINLINE void* win32mmap(size_t size) {
+  void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+  return (ptr != 0)? ptr: MFAIL;
+}
+
+/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
+static FORCEINLINE void* win32direct_mmap(size_t size) {
+  void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
+                           PAGE_READWRITE);
+  return (ptr != 0)? ptr: MFAIL;
+}
+
+/* This function supports releasing coalesed segments */
+static FORCEINLINE int win32munmap(void* ptr, size_t size) {
+  MEMORY_BASIC_INFORMATION minfo;
+  char* cptr = (char*)ptr;
+  while (size) {
+    if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
+      return -1;
+    if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
+        minfo.State != MEM_COMMIT || minfo.RegionSize > size)
+      return -1;
+    if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
+      return -1;
+    cptr += minfo.RegionSize;
+    size -= minfo.RegionSize;
+  }
+  return 0;
+}
+
+#define MMAP_DEFAULT(s)             win32mmap(s)
+#define MUNMAP_DEFAULT(a, s)        win32munmap((a), (s))
+#define DIRECT_MMAP_DEFAULT(s)      win32direct_mmap(s)
+#endif /* WIN32 */
+#endif /* HAVE_MMAP */
+
+#if HAVE_MREMAP
+#ifndef WIN32
+#define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv))
+#endif /* WIN32 */
+#endif /* HAVE_MREMAP */
+
+/**
+ * Define CALL_MORECORE
+ */
+#if HAVE_MORECORE
+    #ifdef MORECORE
+        #define CALL_MORECORE(S)    MORECORE(S)
+    #else  /* MORECORE */
+        #define CALL_MORECORE(S)    MORECORE_DEFAULT(S)
+    #endif /* MORECORE */
+#else  /* HAVE_MORECORE */
+    #define CALL_MORECORE(S)        MFAIL
+#endif /* HAVE_MORECORE */
+
+/**
+ * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP
+ */
+#if HAVE_MMAP
+    #define USE_MMAP_BIT            (SIZE_T_ONE)
+
+    #ifdef MMAP
+        #define CALL_MMAP(s)        MMAP(s)
+    #else /* MMAP */
+        #define CALL_MMAP(s)        MMAP_DEFAULT(s)
+    #endif /* MMAP */
+    #ifdef MUNMAP
+        #define CALL_MUNMAP(a, s)   MUNMAP((a), (s))
+    #else /* MUNMAP */
+        #define CALL_MUNMAP(a, s)   MUNMAP_DEFAULT((a), (s))
+    #endif /* MUNMAP */
+    #ifdef DIRECT_MMAP
+        #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
+    #else /* DIRECT_MMAP */
+        #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s)
+    #endif /* DIRECT_MMAP */
+#else  /* HAVE_MMAP */
+    #define USE_MMAP_BIT            (SIZE_T_ZERO)
+
+    #define MMAP(s)                 MFAIL
+    #define MUNMAP(a, s)            (-1)
+    #define DIRECT_MMAP(s)          MFAIL
+    #define CALL_DIRECT_MMAP(s)     DIRECT_MMAP(s)
+    #define CALL_MMAP(s)            MMAP(s)
+    #define CALL_MUNMAP(a, s)       MUNMAP((a), (s))
+#endif /* HAVE_MMAP */
+
+/**
+ * Define CALL_MREMAP
+ */
+#if HAVE_MMAP && HAVE_MREMAP
+    #ifdef MREMAP
+        #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv))
+    #else /* MREMAP */
+        #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv))
+    #endif /* MREMAP */
+#else  /* HAVE_MMAP && HAVE_MREMAP */
+    #define CALL_MREMAP(addr, osz, nsz, mv)     MFAIL
+#endif /* HAVE_MMAP && HAVE_MREMAP */
+
+/* mstate bit set if continguous morecore disabled or failed */
+#define USE_NONCONTIGUOUS_BIT (4U)
+
+/* segment bit set in create_mspace_with_base */
+#define EXTERN_BIT            (8U)
+
+
+/* --------------------------- Lock preliminaries ------------------------ */
+
+/*
+  When locks are defined, there is one global lock, plus
+  one per-mspace lock.
+
+  The global lock_ensures that mparams.magic and other unique
+  mparams values are initialized only once. It also protects
+  sequences of calls to MORECORE.  In many cases sys_alloc requires
+  two calls, that should not be interleaved with calls by other
+  threads.  This does not protect against direct calls to MORECORE
+  by other threads not using this lock, so there is still code to
+  cope the best we can on interference.
+
+  Per-mspace locks surround calls to malloc, free, etc.
+  By default, locks are simple non-reentrant mutexes.
+
+  Because lock-protected regions generally have bounded times, it is
+  OK to use the supplied simple spinlocks. Spinlocks are likely to
+  improve performance for lightly contended applications, but worsen
+  performance under heavy contention.
+
+  If USE_LOCKS is > 1, the definitions of lock routines here are
+  bypassed, in which case you will need to define the type MLOCK_T,
+  and at least INITIAL_LOCK, DESTROY_LOCK, ACQUIRE_LOCK, RELEASE_LOCK
+  and TRY_LOCK.  You must also declare a
+    static MLOCK_T malloc_global_mutex = { initialization values };.
+
+*/
+
+#if !USE_LOCKS
+#define USE_LOCK_BIT               (0U)
+#define INITIAL_LOCK(l)            (0)
+#define DESTROY_LOCK(l)            (0)
+#define ACQUIRE_MALLOC_GLOBAL_LOCK()
+#define RELEASE_MALLOC_GLOBAL_LOCK()
+
+#else
+#if USE_LOCKS > 1
+/* -----------------------  User-defined locks ------------------------ */
+/* Define your own lock implementation here */
+/* #define INITIAL_LOCK(lk)  ... */
+/* #define DESTROY_LOCK(lk)  ... */
+/* #define ACQUIRE_LOCK(lk)  ... */
+/* #define RELEASE_LOCK(lk)  ... */
+/* #define TRY_LOCK(lk) ... */
+/* static MLOCK_T malloc_global_mutex = ... */
+
+#elif USE_SPIN_LOCKS
+
+/* First, define CAS_LOCK and CLEAR_LOCK on ints */
+/* Note CAS_LOCK defined to return 0 on success */
+
+#if defined(__GNUC__)&& (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
+#define CAS_LOCK(sl)     __sync_lock_test_and_set(sl, 1)
+#define CLEAR_LOCK(sl)   __sync_lock_release(sl)
+
+#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
+/* Custom spin locks for older gcc on x86 */
+static FORCEINLINE int x86_cas_lock(int *sl) {
+  int ret;
+  int val = 1;
+  int cmp = 0;
+  __asm__ __volatile__  ("lock; cmpxchgl %1, %2"
+                         : "=a" (ret)
+                         : "r" (val), "m" (*(sl)), "0"(cmp)
+                         : "memory", "cc");
+  return ret;
+}
+
+static FORCEINLINE void x86_clear_lock(int* sl) {
+  assert(*sl != 0);
+  int prev = 0;
+  int ret;
+  __asm__ __volatile__ ("lock; xchgl %0, %1"
+                        : "=r" (ret)
+                        : "m" (*(sl)), "0"(prev)
+                        : "memory");
+}
+
+#define CAS_LOCK(sl)     x86_cas_lock(sl)
+#define CLEAR_LOCK(sl)   x86_clear_lock(sl)
+
+#else /* Win32 MSC */
+#define CAS_LOCK(sl)     interlockedexchange(sl, 1)
+#define CLEAR_LOCK(sl)   interlockedexchange (sl, 0)
+
+#endif /* ... gcc spins locks ... */
+
+/* How to yield for a spin lock */
+#define SPINS_PER_YIELD       63
+#if defined(_MSC_VER)
+#define SLEEP_EX_DURATION     50 /* delay for yield/sleep */
+#define SPIN_LOCK_YIELD  SleepEx(SLEEP_EX_DURATION, FALSE)
+#elif defined (__SVR4) && defined (__sun) /* solaris */
+#define SPIN_LOCK_YIELD   thr_yield();
+#elif !defined(LACKS_SCHED_H)
+#define SPIN_LOCK_YIELD   sched_yield();
+#else
+#define SPIN_LOCK_YIELD
+#endif /* ... yield ... */
+
+#if !defined(USE_RECURSIVE_LOCKS) || USE_RECURSIVE_LOCKS == 0
+/* Plain spin locks use single word (embedded in malloc_states) */
+static int spin_acquire_lock(int *sl) {
+  int spins = 0;
+  while (*(volatile int *)sl != 0 || CAS_LOCK(sl)) {
+    if ((++spins & SPINS_PER_YIELD) == 0) {
+      SPIN_LOCK_YIELD;
+    }
+  }
+  return 0;
+}
+
+#define MLOCK_T               int
+#define TRY_LOCK(sl)          !CAS_LOCK(sl)
+#define RELEASE_LOCK(sl)      CLEAR_LOCK(sl)
+#define ACQUIRE_LOCK(sl)      (CAS_LOCK(sl)? spin_acquire_lock(sl) : 0)
+#define INITIAL_LOCK(sl)      (*sl = 0)
+#define DESTROY_LOCK(sl)      (0)
+static MLOCK_T malloc_global_mutex = 0;
+
+#else /* USE_RECURSIVE_LOCKS */
+/* types for lock owners */
+#ifdef WIN32
+#define THREAD_ID_T           DWORD
+#define CURRENT_THREAD        GetCurrentThreadId()
+#define EQ_OWNER(X,Y)         ((X) == (Y))
+#else
+/*
+  Note: the following assume that pthread_t is a type that can be
+  initialized to (casted) zero. If this is not the case, you will need to
+  somehow redefine these or not use spin locks.
+*/
+#define THREAD_ID_T           pthread_t
+#define CURRENT_THREAD        pthread_self()
+#define EQ_OWNER(X,Y)         pthread_equal(X, Y)
+#endif
+
+struct malloc_recursive_lock {
+  int sl;
+  unsigned int c;
+  THREAD_ID_T threadid;
+};
+
+#define MLOCK_T  struct malloc_recursive_lock
+static MLOCK_T malloc_global_mutex = { 0, 0, (THREAD_ID_T)0};
+
+static FORCEINLINE void recursive_release_lock(MLOCK_T *lk) {
+  assert(lk->sl != 0);
+  if (--lk->c == 0) {
+    CLEAR_LOCK(&lk->sl);
+  }
+}
+
+static FORCEINLINE int recursive_acquire_lock(MLOCK_T *lk) {
+  THREAD_ID_T mythreadid = CURRENT_THREAD;
+  int spins = 0;
+  for (;;) {
+    if (*((volatile int *)(&lk->sl)) == 0) {
+      if (!CAS_LOCK(&lk->sl)) {
+        lk->threadid = mythreadid;
+        lk->c = 1;
+        return 0;
+      }
+    }
+    else if (EQ_OWNER(lk->threadid, mythreadid)) {
+      ++lk->c;
+      return 0;
+    }
+    if ((++spins & SPINS_PER_YIELD) == 0) {
+      SPIN_LOCK_YIELD;
+    }
+  }
+}
+
+static FORCEINLINE int recursive_try_lock(MLOCK_T *lk) {
+  THREAD_ID_T mythreadid = CURRENT_THREAD;
+  if (*((volatile int *)(&lk->sl)) == 0) {
+    if (!CAS_LOCK(&lk->sl)) {
+      lk->threadid = mythreadid;
+      lk->c = 1;
+      return 1;
+    }
+  }
+  else if (EQ_OWNER(lk->threadid, mythreadid)) {
+    ++lk->c;
+    return 1;
+  }
+  return 0;
+}
+
+#define RELEASE_LOCK(lk)      recursive_release_lock(lk)
+#define TRY_LOCK(lk)          recursive_try_lock(lk)
+#define ACQUIRE_LOCK(lk)      recursive_acquire_lock(lk)
+#define INITIAL_LOCK(lk)      ((lk)->threadid = (THREAD_ID_T)0, (lk)->sl = 0, (lk)->c = 0)
+#define DESTROY_LOCK(lk)      (0)
+#endif /* USE_RECURSIVE_LOCKS */
+
+#elif defined(WIN32) /* Win32 critical sections */
+#define MLOCK_T               CRITICAL_SECTION
+#define ACQUIRE_LOCK(lk)      (EnterCriticalSection(lk), 0)
+#define RELEASE_LOCK(lk)      LeaveCriticalSection(lk)
+#define TRY_LOCK(lk)          TryEnterCriticalSection(lk)
+#define INITIAL_LOCK(lk)      (!InitializeCriticalSectionAndSpinCount((lk), 0x80000000|4000))
+#define DESTROY_LOCK(lk)      (DeleteCriticalSection(lk), 0)
+#define NEED_GLOBAL_LOCK_INIT
+
+static MLOCK_T malloc_global_mutex;
+static volatile long malloc_global_mutex_status;
+
+/* Use spin loop to initialize global lock */
+static void init_malloc_global_mutex() {
+  for (;;) {
+    long stat = malloc_global_mutex_status;
+    if (stat > 0)
+      return;
+    /* transition to < 0 while initializing, then to > 0) */
+    if (stat == 0 &&
+        interlockedcompareexchange(&malloc_global_mutex_status, -1, 0) == 0) {
+      InitializeCriticalSection(&malloc_global_mutex);
+      interlockedexchange(&malloc_global_mutex_status,1);
+      return;
+    }
+    SleepEx(0, FALSE);
+  }
+}
+
+#else /* pthreads-based locks */
+#define MLOCK_T               pthread_mutex_t
+#define ACQUIRE_LOCK(lk)      pthread_mutex_lock(lk)
+#define RELEASE_LOCK(lk)      pthread_mutex_unlock(lk)
+#define TRY_LOCK(lk)          (!pthread_mutex_trylock(lk))
+#define INITIAL_LOCK(lk)      pthread_init_lock(lk)
+#define DESTROY_LOCK(lk)      pthread_mutex_destroy(lk)
+
+#if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0 && defined(linux) && !defined(PTHREAD_MUTEX_RECURSIVE)
+/* Cope with old-style linux recursive lock initialization by adding */
+/* skipped internal declaration from pthread.h */
+extern int pthread_mutexattr_setkind_np __P ((pthread_mutexattr_t *__attr,
+					   int __kind));
+#define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP
+#define pthread_mutexattr_settype(x,y) pthread_mutexattr_setkind_np(x,y)
+#endif /* USE_RECURSIVE_LOCKS ... */
+
+static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static int pthread_init_lock (MLOCK_T *lk) {
+  pthread_mutexattr_t attr;
+  if (pthread_mutexattr_init(&attr)) return 1;
+#if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0
+  if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1;
+#endif
+  if (pthread_mutex_init(lk, &attr)) return 1;
+  if (pthread_mutexattr_destroy(&attr)) return 1;
+  return 0;
+}
+
+#endif /* ... lock types ... */
+
+/* Common code for all lock types */
+#define USE_LOCK_BIT               (2U)
+
+#ifndef ACQUIRE_MALLOC_GLOBAL_LOCK
+#define ACQUIRE_MALLOC_GLOBAL_LOCK()  ACQUIRE_LOCK(&malloc_global_mutex);
+#endif
+
+#ifndef RELEASE_MALLOC_GLOBAL_LOCK
+#define RELEASE_MALLOC_GLOBAL_LOCK()  RELEASE_LOCK(&malloc_global_mutex);
+#endif
+
+#endif /* USE_LOCKS */
+
+/* -----------------------  Chunk representations ------------------------ */
+
+/*
+  (The following includes lightly edited explanations by Colin Plumb.)
+
+  The malloc_chunk declaration below is misleading (but accurate and
+  necessary).  It declares a "view" into memory allowing access to
+  necessary fields at known offsets from a given base.
+
+  Chunks of memory are maintained using a `boundary tag' method as
+  originally described by Knuth.  (See the paper by Paul Wilson
+  ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such
+  techniques.)  Sizes of free chunks are stored both in the front of
+  each chunk and at the end.  This makes consolidating fragmented
+  chunks into bigger chunks fast.  The head fields also hold bits
+  representing whether chunks are free or in use.
+
+  Here are some pictures to make it clearer.  They are "exploded" to
+  show that the state of a chunk can be thought of as extending from
+  the high 31 bits of the head field of its header through the
+  prev_foot and PINUSE_BIT bit of the following chunk header.
+
+  A chunk that's in use looks like:
+
+   chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+           | Size of previous chunk (if P = 0)                             |
+           +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
+         | Size of this chunk                                         1| +-+
+   mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         |                                                               |
+         +-                                                             -+
+         |                                                               |
+         +-                                                             -+
+         |                                                               :
+         +-      size - sizeof(size_t) available payload bytes          -+
+         :                                                               |
+ chunk-> +-                                                             -+
+         |                                                               |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1|
+       | Size of next chunk (may or may not be in use)               | +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    And if it's free, it looks like this:
+
+   chunk-> +-                                                             -+
+           | User payload (must be in use, or we would have merged!)       |
+           +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
+         | Size of this chunk                                         0| +-+
+   mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         | Next pointer                                                  |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         | Prev pointer                                                  |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         |                                                               :
+         +-      size - sizeof(struct chunk) unused bytes               -+
+         :                                                               |
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         | Size of this chunk                                            |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0|
+       | Size of next chunk (must be in use, or we would have merged)| +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                                                               :
+       +- User payload                                                -+
+       :                                                               |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+                                                                     |0|
+                                                                     +-+
+  Note that since we always merge adjacent free chunks, the chunks
+  adjacent to a free chunk must be in use.
+
+  Given a pointer to a chunk (which can be derived trivially from the
+  payload pointer) we can, in O(1) time, find out whether the adjacent
+  chunks are free, and if so, unlink them from the lists that they
+  are on and merge them with the current chunk.
+
+  Chunks always begin on even word boundaries, so the mem portion
+  (which is returned to the user) is also on an even word boundary, and
+  thus at least double-word aligned.
+
+  The P (PINUSE_BIT) bit, stored in the unused low-order bit of the
+  chunk size (which is always a multiple of two words), is an in-use
+  bit for the *previous* chunk.  If that bit is *clear*, then the
+  word before the current chunk size contains the previous chunk
+  size, and can be used to find the front of the previous chunk.
+  The very first chunk allocated always has this bit set, preventing
+  access to non-existent (or non-owned) memory. If pinuse is set for
+  any given chunk, then you CANNOT determine the size of the
+  previous chunk, and might even get a memory addressing fault when
+  trying to do so.
+
+  The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of
+  the chunk size redundantly records whether the current chunk is
+  inuse (unless the chunk is mmapped). This redundancy enables usage
+  checks within free and realloc, and reduces indirection when freeing
+  and consolidating chunks.
+
+  Each freshly allocated chunk must have both cinuse and pinuse set.
+  That is, each allocated chunk borders either a previously allocated
+  and still in-use chunk, or the base of its memory arena. This is
+  ensured by making all allocations from the `lowest' part of any
+  found chunk.  Further, no free chunk physically borders another one,
+  so each free chunk is known to be preceded and followed by either
+  inuse chunks or the ends of memory.
+
+  Note that the `foot' of the current chunk is actually represented
+  as the prev_foot of the NEXT chunk. This makes it easier to
+  deal with alignments etc but can be very confusing when trying
+  to extend or adapt this code.
+
+  The exceptions to all this are
+
+     1. The special chunk `top' is the top-most available chunk (i.e.,
+        the one bordering the end of available memory). It is treated
+        specially.  Top is never included in any bin, is used only if
+        no other chunk is available, and is released back to the
+        system if it is very large (see M_TRIM_THRESHOLD).  In effect,
+        the top chunk is treated as larger (and thus less well
+        fitting) than any other available chunk.  The top chunk
+        doesn't update its trailing size field since there is no next
+        contiguous chunk that would have to index off it. However,
+        space is still allocated for it (TOP_FOOT_SIZE) to enable
+        separation or merging when space is extended.
+
+     3. Chunks allocated via mmap, have both cinuse and pinuse bits
+        cleared in their head fields.  Because they are allocated
+        one-by-one, each must carry its own prev_foot field, which is
+        also used to hold the offset this chunk has within its mmapped
+        region, which is needed to preserve alignment. Each mmapped
+        chunk is trailed by the first two fields of a fake next-chunk
+        for sake of usage checks.
+
+*/
+
+struct malloc_chunk {
+  size_t               prev_foot;  /* Size of previous chunk (if free).  */
+  size_t               head;       /* Size and inuse bits. */
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+};
+
+typedef struct malloc_chunk  mchunk;
+typedef struct malloc_chunk* mchunkptr;
+typedef struct malloc_chunk* sbinptr;  /* The type of bins of chunks */
+typedef unsigned int bindex_t;         /* Described below */
+typedef unsigned int binmap_t;         /* Described below */
+typedef unsigned int flag_t;           /* The type of various bit flag sets */
+
+/* ------------------- Chunks sizes and alignments ----------------------- */
+
+#define MCHUNK_SIZE         (sizeof(mchunk))
+
+#if FOOTERS
+#define CHUNK_OVERHEAD      (TWO_SIZE_T_SIZES)
+#else /* FOOTERS */
+#define CHUNK_OVERHEAD      (SIZE_T_SIZE)
+#endif /* FOOTERS */
+
+/* MMapped chunks need a second word of overhead ... */
+#define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
+/* ... and additional padding for fake next-chunk at foot */
+#define MMAP_FOOT_PAD       (FOUR_SIZE_T_SIZES)
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+#define MIN_CHUNK_SIZE\
+  ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+
+/* conversion from malloc headers to user pointers, and back */
+#define chunk2mem(p)        ((void*)((char*)(p)       + TWO_SIZE_T_SIZES))
+#define mem2chunk(mem)      ((mchunkptr)((char*)(mem) - TWO_SIZE_T_SIZES))
+/* chunk associated with aligned address A */
+#define align_as_chunk(A)   (mchunkptr)((A) + align_offset(chunk2mem(A)))
+
+/* Bounds on request (not chunk) sizes. */
+#define MAX_REQUEST         ((-MIN_CHUNK_SIZE) << 2)
+#define MIN_REQUEST         (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE)
+
+/* pad request bytes into a usable size */
+#define pad_request(req) \
+   (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+
+/* pad request, checking for minimum (but not maximum) */
+#define request2size(req) \
+  (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req))
+
+
+/* ------------------ Operations on head and foot fields ----------------- */
+
+/*
+  The head field of a chunk is or'ed with PINUSE_BIT when previous
+  adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in
+  use, unless mmapped, in which case both bits are cleared.
+
+  FLAG4_BIT is not used by this malloc, but might be useful in extensions.
+*/
+
+#define PINUSE_BIT          (SIZE_T_ONE)
+#define CINUSE_BIT          (SIZE_T_TWO)
+#define FLAG4_BIT           (SIZE_T_FOUR)
+#define INUSE_BITS          (PINUSE_BIT|CINUSE_BIT)
+#define FLAG_BITS           (PINUSE_BIT|CINUSE_BIT|FLAG4_BIT)
+
+/* Head value for fenceposts */
+#define FENCEPOST_HEAD      (INUSE_BITS|SIZE_T_SIZE)
+
+/* extraction of fields from head words */
+#define cinuse(p)           ((p)->head & CINUSE_BIT)
+#define pinuse(p)           ((p)->head & PINUSE_BIT)
+#define flag4inuse(p)       ((p)->head & FLAG4_BIT)
+#define is_inuse(p)         (((p)->head & INUSE_BITS) != PINUSE_BIT)
+#define is_mmapped(p)       (((p)->head & INUSE_BITS) == 0)
+
+#define chunksize(p)        ((p)->head & ~(FLAG_BITS))
+
+#define clear_pinuse(p)     ((p)->head &= ~PINUSE_BIT)
+#define set_flag4(p)        ((p)->head |= FLAG4_BIT)
+#define clear_flag4(p)      ((p)->head &= ~FLAG4_BIT)
+
+/* Treat space at ptr +/- offset as a chunk */
+#define chunk_plus_offset(p, s)  ((mchunkptr)(((char*)(p)) + (s)))
+#define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s)))
+
+/* Ptr to next or previous physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~FLAG_BITS)))
+#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) ))
+
+/* extract next chunk's pinuse bit */
+#define next_pinuse(p)  ((next_chunk(p)->head) & PINUSE_BIT)
+
+/* Get/set size at footer */
+#define get_foot(p, s)  (((mchunkptr)((char*)(p) + (s)))->prev_foot)
+#define set_foot(p, s)  (((mchunkptr)((char*)(p) + (s)))->prev_foot = (s))
+
+/* Set size, pinuse bit, and foot */
+#define set_size_and_pinuse_of_free_chunk(p, s)\
+  ((p)->head = (s|PINUSE_BIT), set_foot(p, s))
+
+/* Set size, pinuse bit, foot, and clear next pinuse */
+#define set_free_with_pinuse(p, s, n)\
+  (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s))
+
+/* Get the internal overhead associated with chunk p */
+#define overhead_for(p)\
+ (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD)
+
+/* Return true if malloced space is not necessarily cleared */
+#if MMAP_CLEARS
+#define calloc_must_clear(p) (!is_mmapped(p))
+#else /* MMAP_CLEARS */
+#define calloc_must_clear(p) (1)
+#endif /* MMAP_CLEARS */
+
+/* ---------------------- Overlaid data structures ----------------------- */
+
+/*
+  When chunks are not in use, they are treated as nodes of either
+  lists or trees.
+
+  "Small"  chunks are stored in circular doubly-linked lists, and look
+  like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk                            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Forward pointer to next chunk in list             |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Back pointer to previous chunk in list            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Unused space (may be 0 bytes long)                .
+            .                                                               .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+  Larger chunks are kept in a form of bitwise digital trees (aka
+  tries) keyed on chunksizes.  Because malloc_tree_chunks are only for
+  free chunks greater than 256 bytes, their size doesn't impose any
+  constraints on user chunk sizes.  Each node looks like:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk                            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Forward pointer to next chunk of same size        |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Back pointer to previous chunk of same size       |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to left child (child[0])                  |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to right child (child[1])                 |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to parent                                 |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             bin index of this chunk                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Unused space                                      .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+  Each tree holding treenodes is a tree of unique chunk sizes.  Chunks
+  of the same size are arranged in a circularly-linked list, with only
+  the oldest chunk (the next to be used, in our FIFO ordering)
+  actually in the tree.  (Tree members are distinguished by a non-null
+  parent pointer.)  If a chunk with the same size an an existing node
+  is inserted, it is linked off the existing node using pointers that
+  work in the same way as fd/bk pointers of small chunks.
+
+  Each tree contains a power of 2 sized range of chunk sizes (the
+  smallest is 0x100 <= x < 0x180), which is is divided in half at each
+  tree level, with the chunks in the smaller half of the range (0x100
+  <= x < 0x140 for the top nose) in the left subtree and the larger
+  half (0x140 <= x < 0x180) in the right subtree.  This is, of course,
+  done by inspecting individual bits.
+
+  Using these rules, each node's left subtree contains all smaller
+  sizes than its right subtree.  However, the node at the root of each
+  subtree has no particular ordering relationship to either.  (The
+  dividing line between the subtree sizes is based on trie relation.)
+  If we remove the last chunk of a given size from the interior of the
+  tree, we need to replace it with a leaf node.  The tree ordering
+  rules permit a node to be replaced by any leaf below it.
+
+  The smallest chunk in a tree (a common operation in a best-fit
+  allocator) can be found by walking a path to the leftmost leaf in
+  the tree.  Unlike a usual binary tree, where we follow left child
+  pointers until we reach a null, here we follow the right child
+  pointer any time the left one is null, until we reach a leaf with
+  both child pointers null. The smallest chunk in the tree will be
+  somewhere along that path.
+
+  The worst case number of steps to add, find, or remove a node is
+  bounded by the number of bits differentiating chunks within
+  bins. Under current bin calculations, this ranges from 6 up to 21
+  (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case
+  is of course much better.
+*/
+
+struct malloc_tree_chunk {
+  /* The first four fields must be compatible with malloc_chunk */
+  size_t                    prev_foot;
+  size_t                    head;
+  struct malloc_tree_chunk* fd;
+  struct malloc_tree_chunk* bk;
+
+  struct malloc_tree_chunk* child[2];
+  struct malloc_tree_chunk* parent;
+  bindex_t                  index;
+};
+
+typedef struct malloc_tree_chunk  tchunk;
+typedef struct malloc_tree_chunk* tchunkptr;
+typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */
+
+/* A little helper macro for trees */
+#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1])
+
+/* ----------------------------- Segments -------------------------------- */
+
+/*
+  Each malloc space may include non-contiguous segments, held in a
+  list headed by an embedded malloc_segment record representing the
+  top-most space. Segments also include flags holding properties of
+  the space. Large chunks that are directly allocated by mmap are not
+  included in this list. They are instead independently created and
+  destroyed without otherwise keeping track of them.
+
+  Segment management mainly comes into play for spaces allocated by
+  MMAP.  Any call to MMAP might or might not return memory that is
+  adjacent to an existing segment.  MORECORE normally contiguously
+  extends the current space, so this space is almost always adjacent,
+  which is simpler and faster to deal with. (This is why MORECORE is
+  used preferentially to MMAP when both are available -- see
+  sys_alloc.)  When allocating using MMAP, we don't use any of the
+  hinting mechanisms (inconsistently) supported in various
+  implementations of unix mmap, or distinguish reserving from
+  committing memory. Instead, we just ask for space, and exploit
+  contiguity when we get it.  It is probably possible to do
+  better than this on some systems, but no general scheme seems
+  to be significantly better.
+
+  Management entails a simpler variant of the consolidation scheme
+  used for chunks to reduce fragmentation -- new adjacent memory is
+  normally prepended or appended to an existing segment. However,
+  there are limitations compared to chunk consolidation that mostly
+  reflect the fact that segment processing is relatively infrequent
+  (occurring only when getting memory from system) and that we
+  don't expect to have huge numbers of segments:
+
+  * Segments are not indexed, so traversal requires linear scans.  (It
+    would be possible to index these, but is not worth the extra
+    overhead and complexity for most programs on most platforms.)
+  * New segments are only appended to old ones when holding top-most
+    memory; if they cannot be prepended to others, they are held in
+    different segments.
+
+  Except for the top-most segment of an mstate, each segment record
+  is kept at the tail of its segment. Segments are added by pushing
+  segment records onto the list headed by &mstate.seg for the
+  containing mstate.
+
+  Segment flags control allocation/merge/deallocation policies:
+  * If EXTERN_BIT set, then we did not allocate this segment,
+    and so should not try to deallocate or merge with others.
+    (This currently holds only for the initial segment passed
+    into create_mspace_with_base.)
+  * If USE_MMAP_BIT set, the segment may be merged with
+    other surrounding mmapped segments and trimmed/de-allocated
+    using munmap.
+  * If neither bit is set, then the segment was obtained using
+    MORECORE so can be merged with surrounding MORECORE'd segments
+    and deallocated/trimmed using MORECORE with negative arguments.
+*/
+
+struct malloc_segment {
+  char*        base;             /* base address */
+  size_t       size;             /* allocated size */
+  struct malloc_segment* next;   /* ptr to next segment */
+  flag_t       sflags;           /* mmap and extern flag */
+};
+
+#define is_mmapped_segment(S)  ((S)->sflags & USE_MMAP_BIT)
+#define is_extern_segment(S)   ((S)->sflags & EXTERN_BIT)
+
+typedef struct malloc_segment  msegment;
+typedef struct malloc_segment* msegmentptr;
+
+/* ---------------------------- malloc_state ----------------------------- */
+
+/*
+   A malloc_state holds all of the bookkeeping for a space.
+   The main fields are:
+
+  Top
+    The topmost chunk of the currently active segment. Its size is
+    cached in topsize.  The actual size of topmost space is
+    topsize+TOP_FOOT_SIZE, which includes space reserved for adding
+    fenceposts and segment records if necessary when getting more
+    space from the system.  The size at which to autotrim top is
+    cached from mparams in trim_check, except that it is disabled if
+    an autotrim fails.
+
+  Designated victim (dv)
+    This is the preferred chunk for servicing small requests that
+    don't have exact fits.  It is normally the chunk split off most
+    recently to service another small request.  Its size is cached in
+    dvsize. The link fields of this chunk are not maintained since it
+    is not kept in a bin.
+
+  SmallBins
+    An array of bin headers for free chunks.  These bins hold chunks
+    with sizes less than MIN_LARGE_SIZE bytes. Each bin contains
+    chunks of all the same size, spaced 8 bytes apart.  To simplify
+    use in double-linked lists, each bin header acts as a malloc_chunk
+    pointing to the real first node, if it exists (else pointing to
+    itself).  This avoids special-casing for headers.  But to avoid
+    waste, we allocate only the fd/bk pointers of bins, and then use
+    repositioning tricks to treat these as the fields of a chunk.
+
+  TreeBins
+    Treebins are pointers to the roots of trees holding a range of
+    sizes. There are 2 equally spaced treebins for each power of two
+    from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything
+    larger.
+
+  Bin maps
+    There is one bit map for small bins ("smallmap") and one for
+    treebins ("treemap).  Each bin sets its bit when non-empty, and
+    clears the bit when empty.  Bit operations are then used to avoid
+    bin-by-bin searching -- nearly all "search" is done without ever
+    looking at bins that won't be selected.  The bit maps
+    conservatively use 32 bits per map word, even if on 64bit system.
+    For a good description of some of the bit-based techniques used
+    here, see Henry S. Warren Jr's book "Hacker's Delight" (and
+    supplement at http://hackersdelight.org/). Many of these are
+    intended to reduce the branchiness of paths through malloc etc, as
+    well as to reduce the number of memory locations read or written.
+
+  Segments
+    A list of segments headed by an embedded malloc_segment record
+    representing the initial space.
+
+  Address check support
+    The least_addr field is the least address ever obtained from
+    MORECORE or MMAP. Attempted frees and reallocs of any address less
+    than this are trapped (unless INSECURE is defined).
+
+  Magic tag
+    A cross-check field that should always hold same value as mparams.magic.
+
+  Max allowed footprint
+    The maximum allowed bytes to allocate from system (zero means no limit)
+
+  Flags
+    Bits recording whether to use MMAP, locks, or contiguous MORECORE
+
+  Statistics
+    Each space keeps track of current and maximum system memory
+    obtained via MORECORE or MMAP.
+
+  Trim support
+    Fields holding the amount of unused topmost memory that should trigger
+    trimming, and a counter to force periodic scanning to release unused
+    non-topmost segments.
+
+  Locking
+    If USE_LOCKS is defined, the "mutex" lock is acquired and released
+    around every public call using this mspace.
+
+  Extension support
+    A void* pointer and a size_t field that can be used to help implement
+    extensions to this malloc.
+*/
+
+/* Bin types, widths and sizes */
+#define NSMALLBINS        (32U)
+#define NTREEBINS         (32U)
+#define SMALLBIN_SHIFT    (3U)
+#define SMALLBIN_WIDTH    (SIZE_T_ONE << SMALLBIN_SHIFT)
+#define TREEBIN_SHIFT     (8U)
+#define MIN_LARGE_SIZE    (SIZE_T_ONE << TREEBIN_SHIFT)
+#define MAX_SMALL_SIZE    (MIN_LARGE_SIZE - SIZE_T_ONE)
+#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD)
+
+struct malloc_state {
+  binmap_t   smallmap;
+  binmap_t   treemap;
+  size_t     dvsize;
+  size_t     topsize;
+  char*      least_addr;
+  mchunkptr  dv;
+  mchunkptr  top;
+  size_t     trim_check;
+  size_t     release_checks;
+  size_t     magic;
+  mchunkptr  smallbins[(NSMALLBINS+1)*2];
+  tbinptr    treebins[NTREEBINS];
+  size_t     footprint;
+  size_t     max_footprint;
+  size_t     footprint_limit; /* zero means no limit */
+  flag_t     mflags;
+#if USE_LOCKS
+  MLOCK_T    mutex;     /* locate lock among fields that rarely change */
+#endif /* USE_LOCKS */
+  msegment   seg;
+  void*      extp;      /* Unused but available for extensions */
+  size_t     exts;
+};
+
+typedef struct malloc_state*    mstate;
+
+/* ------------- Global malloc_state and malloc_params ------------------- */
+
+/*
+  malloc_params holds global properties, including those that can be
+  dynamically set using mallopt. There is a single instance, mparams,
+  initialized in init_mparams. Note that the non-zeroness of "magic"
+  also serves as an initialization flag.
+*/
+
+struct malloc_params {
+  size_t magic;
+  size_t page_size;
+  size_t granularity;
+  size_t mmap_threshold;
+  size_t trim_threshold;
+  flag_t default_mflags;
+};
+
+static struct malloc_params mparams;
+
+/* Ensure mparams initialized */
+#define ensure_initialization() (void)(mparams.magic != 0 || init_mparams())
+
+#if !ONLY_MSPACES
+
+/* The global malloc_state used for all non-"mspace" calls */
+static struct malloc_state _gm_;
+#define gm                 (&_gm_)
+#define is_global(M)       ((M) == &_gm_)
+
+#endif /* !ONLY_MSPACES */
+
+#define is_initialized(M)  ((M)->top != 0)
+
+/* -------------------------- system alloc setup ------------------------- */
+
+/* Operations on mflags */
+
+#define use_lock(M)           ((M)->mflags &   USE_LOCK_BIT)
+#define enable_lock(M)        ((M)->mflags |=  USE_LOCK_BIT)
+#if USE_LOCKS
+#define disable_lock(M)       ((M)->mflags &= ~USE_LOCK_BIT)
+#else
+#define disable_lock(M)
+#endif
+
+#define use_mmap(M)           ((M)->mflags &   USE_MMAP_BIT)
+#define enable_mmap(M)        ((M)->mflags |=  USE_MMAP_BIT)
+#if HAVE_MMAP
+#define disable_mmap(M)       ((M)->mflags &= ~USE_MMAP_BIT)
+#else
+#define disable_mmap(M)
+#endif
+
+#define use_noncontiguous(M)  ((M)->mflags &   USE_NONCONTIGUOUS_BIT)
+#define disable_contiguous(M) ((M)->mflags |=  USE_NONCONTIGUOUS_BIT)
+
+#define set_lock(M,L)\
+ ((M)->mflags = (L)?\
+  ((M)->mflags | USE_LOCK_BIT) :\
+  ((M)->mflags & ~USE_LOCK_BIT))
+
+/* page-align a size */
+#define page_align(S)\
+ (((S) + (mparams.page_size - SIZE_T_ONE)) & ~(mparams.page_size - SIZE_T_ONE))
+
+/* granularity-align a size */
+#define granularity_align(S)\
+  (((S) + (mparams.granularity - SIZE_T_ONE))\
+   & ~(mparams.granularity - SIZE_T_ONE))
+
+
+/* For mmap, use granularity alignment on windows, else page-align */
+#ifdef WIN32
+#define mmap_align(S) granularity_align(S)
+#else
+#define mmap_align(S) page_align(S)
+#endif
+
+/* For sys_alloc, enough padding to ensure can malloc request on success */
+#define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT)
+
+#define is_page_aligned(S)\
+   (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0)
+#define is_granularity_aligned(S)\
+   (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0)
+
+/*  True if segment S holds address A */
+#define segment_holds(S, A)\
+  ((char*)(A) >= S->base && (char*)(A) < S->base + S->size)
+
+/* Return segment holding given address */
+static msegmentptr segment_holding(mstate m, char* addr) {
+  msegmentptr sp = &m->seg;
+  for (;;) {
+    if (addr >= sp->base && addr < sp->base + sp->size)
+      return sp;
+    if ((sp = sp->next) == 0)
+      return 0;
+  }
+}
+
+/* Return true if segment contains a segment link */
+static int has_segment_link(mstate m, msegmentptr ss) {
+  msegmentptr sp = &m->seg;
+  for (;;) {
+    if ((char*)sp >= ss->base && (char*)sp < ss->base + ss->size)
+      return 1;
+    if ((sp = sp->next) == 0)
+      return 0;
+  }
+}
+
+#ifndef MORECORE_CANNOT_TRIM
+#define should_trim(M,s)  ((s) > (M)->trim_check)
+#else  /* MORECORE_CANNOT_TRIM */
+#define should_trim(M,s)  (0)
+#endif /* MORECORE_CANNOT_TRIM */
+
+/*
+  TOP_FOOT_SIZE is padding at the end of a segment, including space
+  that may be needed to place segment records and fenceposts when new
+  noncontiguous segments are added.
+*/
+#define TOP_FOOT_SIZE\
+  (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
+
+
+/* -------------------------------  Hooks -------------------------------- */
+
+/*
+  PREACTION should be defined to return 0 on success, and nonzero on
+  failure. If you are not using locking, you can redefine these to do
+  anything you like.
+*/
+
+#if USE_LOCKS
+#define PREACTION(M)  ((use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0)
+#define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); }
+#else /* USE_LOCKS */
+
+#ifndef PREACTION
+#define PREACTION(M) (0)
+#endif  /* PREACTION */
+
+#ifndef POSTACTION
+#define POSTACTION(M)
+#endif  /* POSTACTION */
+
+#endif /* USE_LOCKS */
+
+/*
+  CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses.
+  USAGE_ERROR_ACTION is triggered on detected bad frees and
+  reallocs. The argument p is an address that might have triggered the
+  fault. It is ignored by the two predefined actions, but might be
+  useful in custom actions that try to help diagnose errors.
+*/
+
+#if PROCEED_ON_ERROR
+
+/* A count of the number of corruption errors causing resets */
+int malloc_corruption_error_count;
+
+/* default corruption action */
+static void reset_on_error(mstate m);
+
+#define CORRUPTION_ERROR_ACTION(m)  reset_on_error(m)
+#define USAGE_ERROR_ACTION(m, p)
+
+#else /* PROCEED_ON_ERROR */
+
+#ifndef CORRUPTION_ERROR_ACTION
+#define CORRUPTION_ERROR_ACTION(m) ABORT
+#endif /* CORRUPTION_ERROR_ACTION */
+
+#ifndef USAGE_ERROR_ACTION
+#define USAGE_ERROR_ACTION(m,p) ABORT
+#endif /* USAGE_ERROR_ACTION */
+
+#endif /* PROCEED_ON_ERROR */
+
+
+/* -------------------------- Debugging setup ---------------------------- */
+
+#if ! DEBUG
+
+#define check_free_chunk(M,P)
+#define check_inuse_chunk(M,P)
+#define check_malloced_chunk(M,P,N)
+#define check_mmapped_chunk(M,P)
+#define check_malloc_state(M)
+#define check_top_chunk(M,P)
+
+#else /* DEBUG */
+#define check_free_chunk(M,P)       do_check_free_chunk(M,P)
+#define check_inuse_chunk(M,P)      do_check_inuse_chunk(M,P)
+#define check_top_chunk(M,P)        do_check_top_chunk(M,P)
+#define check_malloced_chunk(M,P,N) do_check_malloced_chunk(M,P,N)
+#define check_mmapped_chunk(M,P)    do_check_mmapped_chunk(M,P)
+#define check_malloc_state(M)       do_check_malloc_state(M)
+
+static void   do_check_any_chunk(mstate m, mchunkptr p);
+static void   do_check_top_chunk(mstate m, mchunkptr p);
+static void   do_check_mmapped_chunk(mstate m, mchunkptr p);
+static void   do_check_inuse_chunk(mstate m, mchunkptr p);
+static void   do_check_free_chunk(mstate m, mchunkptr p);
+static void   do_check_malloced_chunk(mstate m, void* mem, size_t s);
+static void   do_check_tree(mstate m, tchunkptr t);
+static void   do_check_treebin(mstate m, bindex_t i);
+static void   do_check_smallbin(mstate m, bindex_t i);
+static void   do_check_malloc_state(mstate m);
+static int    bin_find(mstate m, mchunkptr x);
+static size_t traverse_and_check(mstate m);
+#endif /* DEBUG */
+
+/* ---------------------------- Indexing Bins ---------------------------- */
+
+#define is_small(s)         (((s) >> SMALLBIN_SHIFT) < NSMALLBINS)
+#define small_index(s)      (bindex_t)((s)  >> SMALLBIN_SHIFT)
+#define small_index2size(i) ((i)  << SMALLBIN_SHIFT)
+#define MIN_SMALL_INDEX     (small_index(MIN_CHUNK_SIZE))
+
+/* addressing by index. See above about smallbin repositioning */
+#define smallbin_at(M, i)   ((sbinptr)((char*)&((M)->smallbins[(i)<<1])))
+#define treebin_at(M,i)     (&((M)->treebins[i]))
+
+/* assign tree index for size S to variable I. Use x86 asm if possible  */
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+#define compute_tree_index(S, I)\
+{\
+  unsigned int X = S >> TREEBIN_SHIFT;\
+  if (X == 0)\
+    I = 0;\
+  else if (X > 0xFFFF)\
+    I = NTREEBINS-1;\
+  else {\
+    unsigned int K = (unsigned) sizeof(X)*__CHAR_BIT__ - 1 - (unsigned) __builtin_clz(X); \
+    I =  (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
+  }\
+}
+
+#elif defined (__INTEL_COMPILER)
+#define compute_tree_index(S, I)\
+{\
+  size_t X = S >> TREEBIN_SHIFT;\
+  if (X == 0)\
+    I = 0;\
+  else if (X > 0xFFFF)\
+    I = NTREEBINS-1;\
+  else {\
+    unsigned int K = _bit_scan_reverse (X); \
+    I =  (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
+  }\
+}
+
+#elif defined(_MSC_VER) && _MSC_VER>=1300
+#define compute_tree_index(S, I)\
+{\
+  size_t X = S >> TREEBIN_SHIFT;\
+  if (X == 0)\
+    I = 0;\
+  else if (X > 0xFFFF)\
+    I = NTREEBINS-1;\
+  else {\
+    unsigned int K;\
+    _BitScanReverse((DWORD *) &K, (DWORD) X);\
+    I =  (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
+  }\
+}
+
+#else /* GNUC */
+#define compute_tree_index(S, I)\
+{\
+  size_t X = S >> TREEBIN_SHIFT;\
+  if (X == 0)\
+    I = 0;\
+  else if (X > 0xFFFF)\
+    I = NTREEBINS-1;\
+  else {\
+    unsigned int Y = (unsigned int)X;\
+    unsigned int N = ((Y - 0x100) >> 16) & 8;\
+    unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;\
+    N += K;\
+    N += K = (((Y <<= K) - 0x4000) >> 16) & 2;\
+    K = 14 - N + ((Y <<= K) >> 15);\
+    I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\
+  }\
+}
+#endif /* GNUC */
+
+/* Bit representing maximum resolved size in a treebin at i */
+#define bit_for_tree_index(i) \
+   (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2)
+
+/* Shift placing maximum resolved bit in a treebin at i as sign bit */
+#define leftshift_for_tree_index(i) \
+   ((i == NTREEBINS-1)? 0 : \
+    ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2)))
+
+/* The size of the smallest chunk held in bin with index i */
+#define minsize_for_tree_index(i) \
+   ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) |  \
+   (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1)))
+
+
+/* ------------------------ Operations on bin maps ----------------------- */
+
+/* bit corresponding to given index */
+#define idx2bit(i)              ((binmap_t)(1) << (i))
+
+/* Mark/Clear bits with given index */
+#define mark_smallmap(M,i)      ((M)->smallmap |=  idx2bit(i))
+#define clear_smallmap(M,i)     ((M)->smallmap &= ~idx2bit(i))
+#define smallmap_is_marked(M,i) ((M)->smallmap &   idx2bit(i))
+
+#define mark_treemap(M,i)       ((M)->treemap  |=  idx2bit(i))
+#define clear_treemap(M,i)      ((M)->treemap  &= ~idx2bit(i))
+#define treemap_is_marked(M,i)  ((M)->treemap  &   idx2bit(i))
+
+/* isolate the least set bit of a bitmap */
+#define least_bit(x)         ((x) & -(x))
+
+/* mask with all bits to left of least bit of x on */
+#define left_bits(x)         ((x<<1) | -(x<<1))
+
+/* mask with all bits to left of or equal to least bit of x on */
+#define same_or_left_bits(x) ((x) | -(x))
+
+/* index corresponding to given bit. Use x86 asm if possible */
+
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+#define compute_bit2idx(X, I)\
+{\
+  unsigned int J;\
+  J = __builtin_ctz(X); \
+  I = (bindex_t)J;\
+}
+
+#elif defined (__INTEL_COMPILER)
+#define compute_bit2idx(X, I)\
+{\
+  unsigned int J;\
+  J = _bit_scan_forward (X); \
+  I = (bindex_t)J;\
+}
+
+#elif defined(_MSC_VER) && _MSC_VER>=1300
+#define compute_bit2idx(X, I)\
+{\
+  unsigned int J;\
+  _BitScanForward((DWORD *) &J, X);\
+  I = (bindex_t)J;\
+}
+
+#elif USE_BUILTIN_FFS
+#define compute_bit2idx(X, I) I = ffs(X)-1
+
+#else
+#define compute_bit2idx(X, I)\
+{\
+  unsigned int Y = X - 1;\
+  unsigned int K = Y >> (16-4) & 16;\
+  unsigned int N = K;        Y >>= K;\
+  N += K = Y >> (8-3) &  8;  Y >>= K;\
+  N += K = Y >> (4-2) &  4;  Y >>= K;\
+  N += K = Y >> (2-1) &  2;  Y >>= K;\
+  N += K = Y >> (1-0) &  1;  Y >>= K;\
+  I = (bindex_t)(N + Y);\
+}
+#endif /* GNUC */
+
+
+/* ----------------------- Runtime Check Support ------------------------- */
+
+/*
+  For security, the main invariant is that malloc/free/etc never
+  writes to a static address other than malloc_state, unless static
+  malloc_state itself has been corrupted, which cannot occur via
+  malloc (because of these checks). In essence this means that we
+  believe all pointers, sizes, maps etc held in malloc_state, but
+  check all of those linked or offsetted from other embedded data
+  structures.  These checks are interspersed with main code in a way
+  that tends to minimize their run-time cost.
+
+  When FOOTERS is defined, in addition to range checking, we also
+  verify footer fields of inuse chunks, which can be used guarantee
+  that the mstate controlling malloc/free is intact.  This is a
+  streamlined version of the approach described by William Robertson
+  et al in "Run-time Detection of Heap-based Overflows" LISA'03
+  http://www.usenix.org/events/lisa03/tech/robertson.html The footer
+  of an inuse chunk holds the xor of its mstate and a random seed,
+  that is checked upon calls to free() and realloc().  This is
+  (probabalistically) unguessable from outside the program, but can be
+  computed by any code successfully malloc'ing any chunk, so does not
+  itself provide protection against code that has already broken
+  security through some other means.  Unlike Robertson et al, we
+  always dynamically check addresses of all offset chunks (previous,
+  next, etc). This turns out to be cheaper than relying on hashes.
+*/
+
+#if !INSECURE
+/* Check if address a is at least as high as any from MORECORE or MMAP */
+#define ok_address(M, a) ((char*)(a) >= (M)->least_addr)
+/* Check if address of next chunk n is higher than base chunk p */
+#define ok_next(p, n)    ((char*)(p) < (char*)(n))
+/* Check if p has inuse status */
+#define ok_inuse(p)     is_inuse(p)
+/* Check if p has its pinuse bit on */
+#define ok_pinuse(p)     pinuse(p)
+
+#else /* !INSECURE */
+#define ok_address(M, a) (1)
+#define ok_next(b, n)    (1)
+#define ok_inuse(p)      (1)
+#define ok_pinuse(p)     (1)
+#endif /* !INSECURE */
+
+#if (FOOTERS && !INSECURE)
+/* Check if (alleged) mstate m has expected magic field */
+#define ok_magic(M)      ((M)->magic == mparams.magic)
+#else  /* (FOOTERS && !INSECURE) */
+#define ok_magic(M)      (1)
+#endif /* (FOOTERS && !INSECURE) */
+
+/* In gcc, use __builtin_expect to minimize impact of checks */
+#if !INSECURE
+#if defined(__GNUC__) && __GNUC__ >= 3
+#define RTCHECK(e)  __builtin_expect(e, 1)
+#else /* GNUC */
+#define RTCHECK(e)  (e)
+#endif /* GNUC */
+#else /* !INSECURE */
+#define RTCHECK(e)  (1)
+#endif /* !INSECURE */
+
+/* macros to set up inuse chunks with or without footers */
+
+#if !FOOTERS
+
+#define mark_inuse_foot(M,p,s)
+
+/* Macros for setting head/foot of non-mmapped chunks */
+
+/* Set cinuse bit and pinuse bit of next chunk */
+#define set_inuse(M,p,s)\
+  ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
+  ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT)
+
+/* Set cinuse and pinuse of this chunk and pinuse of next chunk */
+#define set_inuse_and_pinuse(M,p,s)\
+  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+  ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT)
+
+/* Set size, cinuse and pinuse bit of this chunk */
+#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
+  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT))
+
+#else /* FOOTERS */
+
+/* Set foot of inuse chunk to be xor of mstate and seed */
+#define mark_inuse_foot(M,p,s)\
+  (((mchunkptr)((char*)(p) + (s)))->prev_foot = ((size_t)(M) ^ mparams.magic))
+
+#define get_mstate_for(p)\
+  ((mstate)(((mchunkptr)((char*)(p) +\
+    (chunksize(p))))->prev_foot ^ mparams.magic))
+
+#define set_inuse(M,p,s)\
+  ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
+  (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT), \
+  mark_inuse_foot(M,p,s))
+
+#define set_inuse_and_pinuse(M,p,s)\
+  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+  (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT),\
+ mark_inuse_foot(M,p,s))
+
+#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
+  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+  mark_inuse_foot(M, p, s))
+
+#endif /* !FOOTERS */
+
+/* ---------------------------- setting mparams -------------------------- */
+
+/* Initialize mparams */
+static int init_mparams(void) {
+#ifdef NEED_GLOBAL_LOCK_INIT
+  if (malloc_global_mutex_status <= 0)
+    init_malloc_global_mutex();
+#endif
+
+  ACQUIRE_MALLOC_GLOBAL_LOCK();
+  if (mparams.magic == 0) {
+    size_t magic;
+    size_t psize;
+    size_t gsize;
+
+#ifndef WIN32
+    psize = malloc_getpagesize;
+    gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize);
+#else /* WIN32 */
+    {
+      SYSTEM_INFO system_info;
+      GetSystemInfo(&system_info);
+      psize = system_info.dwPageSize;
+      gsize = ((DEFAULT_GRANULARITY != 0)?
+               DEFAULT_GRANULARITY : system_info.dwAllocationGranularity);
+    }
+#endif /* WIN32 */
+
+    /* Sanity-check configuration:
+       size_t must be unsigned and as wide as pointer type.
+       ints must be at least 4 bytes.
+       alignment must be at least 8.
+       Alignment, min chunk size, and page size must all be powers of 2.
+    */
+    if ((sizeof(size_t) != sizeof(char*)) ||
+        (MAX_SIZE_T < MIN_CHUNK_SIZE)  ||
+        (sizeof(int) < 4)  ||
+        (MALLOC_ALIGNMENT < (size_t)8U) ||
+        ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-SIZE_T_ONE)) != 0) ||
+        ((MCHUNK_SIZE      & (MCHUNK_SIZE-SIZE_T_ONE))      != 0) ||
+        ((gsize            & (gsize-SIZE_T_ONE))            != 0) ||
+        ((psize            & (psize-SIZE_T_ONE))            != 0))
+      ABORT;
+
+    mparams.granularity = gsize;
+    mparams.page_size = psize;
+    mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD;
+    mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD;
+#if MORECORE_CONTIGUOUS
+    mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT;
+#else  /* MORECORE_CONTIGUOUS */
+    mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT;
+#endif /* MORECORE_CONTIGUOUS */
+
+#if !ONLY_MSPACES
+    /* Set up lock for main malloc area */
+    gm->mflags = mparams.default_mflags;
+    (void)INITIAL_LOCK(&gm->mutex);
+#endif
+
+    {
+#if USE_DEV_RANDOM
+      int fd;
+      unsigned char buf[sizeof(size_t)];
+      /* Try to use /dev/urandom, else fall back on using time */
+      if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 &&
+          read(fd, buf, sizeof(buf)) == sizeof(buf)) {
+        magic = *((size_t *) buf);
+        close(fd);
+      }
+      else
+#endif /* USE_DEV_RANDOM */
+#ifdef WIN32
+        magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U);
+#elif defined(LACKS_TIME_H)
+      magic = (size_t)&magic ^ (size_t)0x55555555U;
+#else
+        magic = (size_t)(time(0) ^ (size_t)0x55555555U);
+#endif
+      magic |= (size_t)8U;    /* ensure nonzero */
+      magic &= ~(size_t)7U;   /* improve chances of fault for bad values */
+      /* Until memory modes commonly available, use volatile-write */
+      (*(volatile size_t *)(&(mparams.magic))) = magic;
+    }
+  }
+
+  RELEASE_MALLOC_GLOBAL_LOCK();
+  return 1;
+}
+
+/* support for mallopt */
+static int change_mparam(int param_number, int value) {
+  size_t val;
+  ensure_initialization();
+  val = (value == -1)? MAX_SIZE_T : (size_t)value;
+  switch(param_number) {
+  case M_TRIM_THRESHOLD:
+    mparams.trim_threshold = val;
+    return 1;
+  case M_GRANULARITY:
+    if (val >= mparams.page_size && ((val & (val-1)) == 0)) {
+      mparams.granularity = val;
+      return 1;
+    }
+    else
+      return 0;
+  case M_MMAP_THRESHOLD:
+    mparams.mmap_threshold = val;
+    return 1;
+  default:
+    return 0;
+  }
+}
+
+#if DEBUG
+/* ------------------------- Debugging Support --------------------------- */
+
+/* Check properties of any chunk, whether free, inuse, mmapped etc  */
+static void do_check_any_chunk(mstate m, mchunkptr p) {
+  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+  assert(ok_address(m, p));
+}
+
+/* Check properties of top chunk */
+static void do_check_top_chunk(mstate m, mchunkptr p) {
+  msegmentptr sp = segment_holding(m, (char*)p);
+  size_t  sz = p->head & ~INUSE_BITS; /* third-lowest bit can be set! */
+  assert(sp != 0);
+  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+  assert(ok_address(m, p));
+  assert(sz == m->topsize);
+  assert(sz > 0);
+  assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE);
+  assert(pinuse(p));
+  assert(!pinuse(chunk_plus_offset(p, sz)));
+}
+
+/* Check properties of (inuse) mmapped chunks */
+static void do_check_mmapped_chunk(mstate m, mchunkptr p) {
+  size_t  sz = chunksize(p);
+  size_t len = (sz + (p->prev_foot) + MMAP_FOOT_PAD);
+  assert(is_mmapped(p));
+  assert(use_mmap(m));
+  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+  assert(ok_address(m, p));
+  assert(!is_small(sz));
+  assert((len & (mparams.page_size-SIZE_T_ONE)) == 0);
+  assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD);
+  assert(chunk_plus_offset(p, sz+SIZE_T_SIZE)->head == 0);
+}
+
+/* Check properties of inuse chunks */
+static void do_check_inuse_chunk(mstate m, mchunkptr p) {
+  do_check_any_chunk(m, p);
+  assert(is_inuse(p));
+  assert(next_pinuse(p));
+  /* If not pinuse and not mmapped, previous chunk has OK offset */
+  assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p);
+  if (is_mmapped(p))
+    do_check_mmapped_chunk(m, p);
+}
+
+/* Check properties of free chunks */
+static void do_check_free_chunk(mstate m, mchunkptr p) {
+  size_t sz = chunksize(p);
+  mchunkptr next = chunk_plus_offset(p, sz);
+  do_check_any_chunk(m, p);
+  assert(!is_inuse(p));
+  assert(!next_pinuse(p));
+  assert (!is_mmapped(p));
+  if (p != m->dv && p != m->top) {
+    if (sz >= MIN_CHUNK_SIZE) {
+      assert((sz & CHUNK_ALIGN_MASK) == 0);
+      assert(is_aligned(chunk2mem(p)));
+      assert(next->prev_foot == sz);
+      assert(pinuse(p));
+      assert (next == m->top || is_inuse(next));
+      assert(p->fd->bk == p);
+      assert(p->bk->fd == p);
+    }
+    else  /* markers are always of size SIZE_T_SIZE */
+      assert(sz == SIZE_T_SIZE);
+  }
+}
+
+/* Check properties of malloced chunks at the point they are malloced */
+static void do_check_malloced_chunk(mstate m, void* mem, size_t s) {
+  if (mem != 0) {
+    mchunkptr p = mem2chunk(mem);
+    size_t sz = p->head & ~INUSE_BITS;
+    do_check_inuse_chunk(m, p);
+    assert((sz & CHUNK_ALIGN_MASK) == 0);
+    assert(sz >= MIN_CHUNK_SIZE);
+    assert(sz >= s);
+    /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */
+    assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE));
+  }
+}
+
+/* Check a tree and its subtrees.  */
+static void do_check_tree(mstate m, tchunkptr t) {
+  tchunkptr head = 0;
+  tchunkptr u = t;
+  bindex_t tindex = t->index;
+  size_t tsize = chunksize(t);
+  bindex_t idx;
+  compute_tree_index(tsize, idx);
+  assert(tindex == idx);
+  assert(tsize >= MIN_LARGE_SIZE);
+  assert(tsize >= minsize_for_tree_index(idx));
+  assert((idx == NTREEBINS-1) || (tsize < minsize_for_tree_index((idx+1))));
+
+  do { /* traverse through chain of same-sized nodes */
+    do_check_any_chunk(m, ((mchunkptr)u));
+    assert(u->index == tindex);
+    assert(chunksize(u) == tsize);
+    assert(!is_inuse(u));
+    assert(!next_pinuse(u));
+    assert(u->fd->bk == u);
+    assert(u->bk->fd == u);
+    if (u->parent == 0) {
+      assert(u->child[0] == 0);
+      assert(u->child[1] == 0);
+    }
+    else {
+      assert(head == 0); /* only one node on chain has parent */
+      head = u;
+      assert(u->parent != u);
+      assert (u->parent->child[0] == u ||
+              u->parent->child[1] == u ||
+              *((tbinptr*)(u->parent)) == u);
+      if (u->child[0] != 0) {
+        assert(u->child[0]->parent == u);
+        assert(u->child[0] != u);
+        do_check_tree(m, u->child[0]);
+      }
+      if (u->child[1] != 0) {
+        assert(u->child[1]->parent == u);
+        assert(u->child[1] != u);
+        do_check_tree(m, u->child[1]);
+      }
+      if (u->child[0] != 0 && u->child[1] != 0) {
+        assert(chunksize(u->child[0]) < chunksize(u->child[1]));
+      }
+    }
+    u = u->fd;
+  } while (u != t);
+  assert(head != 0);
+}
+
+/*  Check all the chunks in a treebin.  */
+static void do_check_treebin(mstate m, bindex_t i) {
+  tbinptr* tb = treebin_at(m, i);
+  tchunkptr t = *tb;
+  int empty = (m->treemap & (1U << i)) == 0;
+  if (t == 0)
+    assert(empty);
+  if (!empty)
+    do_check_tree(m, t);
+}
+
+/*  Check all the chunks in a smallbin.  */
+static void do_check_smallbin(mstate m, bindex_t i) {
+  sbinptr b = smallbin_at(m, i);
+  mchunkptr p = b->bk;
+  unsigned int empty = (m->smallmap & (1U << i)) == 0;
+  if (p == b)
+    assert(empty);
+  if (!empty) {
+    for (; p != b; p = p->bk) {
+      size_t size = chunksize(p);
+      mchunkptr q;
+      /* each chunk claims to be free */
+      do_check_free_chunk(m, p);
+      /* chunk belongs in bin */
+      assert(small_index(size) == i);
+      assert(p->bk == b || chunksize(p->bk) == chunksize(p));
+      /* chunk is followed by an inuse chunk */
+      q = next_chunk(p);
+      if (q->head != FENCEPOST_HEAD)
+        do_check_inuse_chunk(m, q);
+    }
+  }
+}
+
+/* Find x in a bin. Used in other check functions. */
+static int bin_find(mstate m, mchunkptr x) {
+  size_t size = chunksize(x);
+  if (is_small(size)) {
+    bindex_t sidx = small_index(size);
+    sbinptr b = smallbin_at(m, sidx);
+    if (smallmap_is_marked(m, sidx)) {
+      mchunkptr p = b;
+      do {
+        if (p == x)
+          return 1;
+      } while ((p = p->fd) != b);
+    }
+  }
+  else {
+    bindex_t tidx;
+    compute_tree_index(size, tidx);
+    if (treemap_is_marked(m, tidx)) {
+      tchunkptr t = *treebin_at(m, tidx);
+      size_t sizebits = size << leftshift_for_tree_index(tidx);
+      while (t != 0 && chunksize(t) != size) {
+        t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
+        sizebits <<= 1;
+      }
+      if (t != 0) {
+        tchunkptr u = t;
+        do {
+          if (u == (tchunkptr)x)
+            return 1;
+        } while ((u = u->fd) != t);
+      }
+    }
+  }
+  return 0;
+}
+
+/* Traverse each chunk and check it; return total */
+static size_t traverse_and_check(mstate m) {
+  size_t sum = 0;
+  if (is_initialized(m)) {
+    msegmentptr s = &m->seg;
+    sum += m->topsize + TOP_FOOT_SIZE;
+    while (s != 0) {
+      mchunkptr q = align_as_chunk(s->base);
+      mchunkptr lastq = 0;
+      assert(pinuse(q));
+      while (segment_holds(s, q) &&
+             q != m->top && q->head != FENCEPOST_HEAD) {
+        sum += chunksize(q);
+        if (is_inuse(q)) {
+          assert(!bin_find(m, q));
+          do_check_inuse_chunk(m, q);
+        }
+        else {
+          assert(q == m->dv || bin_find(m, q));
+          assert(lastq == 0 || is_inuse(lastq)); /* Not 2 consecutive free */
+          do_check_free_chunk(m, q);
+        }
+        lastq = q;
+        q = next_chunk(q);
+      }
+      s = s->next;
+    }
+  }
+  return sum;
+}
+
+
+/* Check all properties of malloc_state. */
+static void do_check_malloc_state(mstate m) {
+  bindex_t i;
+  size_t total;
+  /* check bins */
+  for (i = 0; i < NSMALLBINS; ++i)
+    do_check_smallbin(m, i);
+  for (i = 0; i < NTREEBINS; ++i)
+    do_check_treebin(m, i);
+
+  if (m->dvsize != 0) { /* check dv chunk */
+    do_check_any_chunk(m, m->dv);
+    assert(m->dvsize == chunksize(m->dv));
+    assert(m->dvsize >= MIN_CHUNK_SIZE);
+    assert(bin_find(m, m->dv) == 0);
+  }
+
+  if (m->top != 0) {   /* check top chunk */
+    do_check_top_chunk(m, m->top);
+    /*assert(m->topsize == chunksize(m->top)); redundant */
+    assert(m->topsize > 0);
+    assert(bin_find(m, m->top) == 0);
+  }
+
+  total = traverse_and_check(m);
+  assert(total <= m->footprint);
+  assert(m->footprint <= m->max_footprint);
+}
+#endif /* DEBUG */
+
+/* ----------------------------- statistics ------------------------------ */
+
+#if !NO_MALLINFO
+static struct mallinfo internal_mallinfo(mstate m) {
+  struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+  ensure_initialization();
+  if (!PREACTION(m)) {
+    check_malloc_state(m);
+    if (is_initialized(m)) {
+      size_t nfree = SIZE_T_ONE; /* top always free */
+      size_t mfree = m->topsize + TOP_FOOT_SIZE;
+      size_t sum = mfree;
+      msegmentptr s = &m->seg;
+      while (s != 0) {
+        mchunkptr q = align_as_chunk(s->base);
+        while (segment_holds(s, q) &&
+               q != m->top && q->head != FENCEPOST_HEAD) {
+          size_t sz = chunksize(q);
+          sum += sz;
+          if (!is_inuse(q)) {
+            mfree += sz;
+            ++nfree;
+          }
+          q = next_chunk(q);
+        }
+        s = s->next;
+      }
+
+      nm.arena    = sum;
+      nm.ordblks  = nfree;
+      nm.hblkhd   = m->footprint - sum;
+      nm.usmblks  = m->max_footprint;
+      nm.uordblks = m->footprint - mfree;
+      nm.fordblks = mfree;
+      nm.keepcost = m->topsize;
+    }
+
+    POSTACTION(m);
+  }
+  return nm;
+}
+#endif /* !NO_MALLINFO */
+
+#if !NO_MALLOC_STATS
+static void internal_malloc_stats(mstate m) {
+  ensure_initialization();
+  if (!PREACTION(m)) {
+    size_t maxfp = 0;
+    size_t fp = 0;
+    size_t used = 0;
+    check_malloc_state(m);
+    if (is_initialized(m)) {
+      msegmentptr s = &m->seg;
+      maxfp = m->max_footprint;
+      fp = m->footprint;
+      used = fp - (m->topsize + TOP_FOOT_SIZE);
+
+      while (s != 0) {
+        mchunkptr q = align_as_chunk(s->base);
+        while (segment_holds(s, q) &&
+               q != m->top && q->head != FENCEPOST_HEAD) {
+          if (!is_inuse(q))
+            used -= chunksize(q);
+          q = next_chunk(q);
+        }
+        s = s->next;
+      }
+    }
+    POSTACTION(m); /* drop lock */
+    fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp));
+    fprintf(stderr, "system bytes     = %10lu\n", (unsigned long)(fp));
+    fprintf(stderr, "in use bytes     = %10lu\n", (unsigned long)(used));
+  }
+}
+#endif /* NO_MALLOC_STATS */
+
+/* ----------------------- Operations on smallbins ----------------------- */
+
+/*
+  Various forms of linking and unlinking are defined as macros.  Even
+  the ones for trees, which are very long but have very short typical
+  paths.  This is ugly but reduces reliance on inlining support of
+  compilers.
+*/
+
+/* Link a free chunk into a smallbin  */
+#define insert_small_chunk(M, P, S) {\
+  bindex_t I  = small_index(S);\
+  mchunkptr B = smallbin_at(M, I);\
+  mchunkptr F = B;\
+  assert(S >= MIN_CHUNK_SIZE);\
+  if (!smallmap_is_marked(M, I))\
+    mark_smallmap(M, I);\
+  else if (RTCHECK(ok_address(M, B->fd)))\
+    F = B->fd;\
+  else {\
+    CORRUPTION_ERROR_ACTION(M);\
+  }\
+  B->fd = P;\
+  F->bk = P;\
+  P->fd = F;\
+  P->bk = B;\
+}
+
+/* Unlink a chunk from a smallbin  */
+#define unlink_small_chunk(M, P, S) {\
+  mchunkptr F = P->fd;\
+  mchunkptr B = P->bk;\
+  bindex_t I = small_index(S);\
+  assert(P != B);\
+  assert(P != F);\
+  assert(chunksize(P) == small_index2size(I));\
+  if (RTCHECK(F == smallbin_at(M,I) || (ok_address(M, F) && F->bk == P))) { \
+    if (B == F) {\
+      clear_smallmap(M, I);\
+    }\
+    else if (RTCHECK(B == smallbin_at(M,I) ||\
+                     (ok_address(M, B) && B->fd == P))) {\
+      F->bk = B;\
+      B->fd = F;\
+    }\
+    else {\
+      CORRUPTION_ERROR_ACTION(M);\
+    }\
+  }\
+  else {\
+    CORRUPTION_ERROR_ACTION(M);\
+  }\
+}
+
+/* Unlink the first chunk from a smallbin */
+#define unlink_first_small_chunk(M, B, P, I) {\
+  mchunkptr F = P->fd;\
+  assert(P != B);\
+  assert(P != F);\
+  assert(chunksize(P) == small_index2size(I));\
+  if (B == F) {\
+    clear_smallmap(M, I);\
+  }\
+  else if (RTCHECK(ok_address(M, F) && F->bk == P)) {\
+    F->bk = B;\
+    B->fd = F;\
+  }\
+  else {\
+    CORRUPTION_ERROR_ACTION(M);\
+  }\
+}
+
+/* Replace dv node, binning the old one */
+/* Used only when dvsize known to be small */
+#define replace_dv(M, P, S) {\
+  size_t DVS = M->dvsize;\
+  assert(is_small(DVS));\
+  if (DVS != 0) {\
+    mchunkptr DV = M->dv;\
+    insert_small_chunk(M, DV, DVS);\
+  }\
+  M->dvsize = S;\
+  M->dv = P;\
+}
+
+/* ------------------------- Operations on trees ------------------------- */
+
+/* Insert chunk into tree */
+#define insert_large_chunk(M, X, S) {\
+  tbinptr* H;\
+  bindex_t I;\
+  compute_tree_index(S, I);\
+  H = treebin_at(M, I);\
+  X->index = I;\
+  X->child[0] = X->child[1] = 0;\
+  if (!treemap_is_marked(M, I)) {\
+    mark_treemap(M, I);\
+    *H = X;\
+    X->parent = (tchunkptr)H;\
+    X->fd = X->bk = X;\
+  }\
+  else {\
+    tchunkptr T = *H;\
+    size_t K = S << leftshift_for_tree_index(I);\
+    for (;;) {\
+      if (chunksize(T) != S) {\
+        tchunkptr* C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\
+        K <<= 1;\
+        if (*C != 0)\
+          T = *C;\
+        else if (RTCHECK(ok_address(M, C))) {\
+          *C = X;\
+          X->parent = T;\
+          X->fd = X->bk = X;\
+          break;\
+        }\
+        else {\
+          CORRUPTION_ERROR_ACTION(M);\
+          break;\
+        }\
+      }\
+      else {\
+        tchunkptr F = T->fd;\
+        if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {\
+          T->fd = F->bk = X;\
+          X->fd = F;\
+          X->bk = T;\
+          X->parent = 0;\
+          break;\
+        }\
+        else {\
+          CORRUPTION_ERROR_ACTION(M);\
+          break;\
+        }\
+      }\
+    }\
+  }\
+}
+
+/*
+  Unlink steps:
+
+  1. If x is a chained node, unlink it from its same-sized fd/bk links
+     and choose its bk node as its replacement.
+  2. If x was the last node of its size, but not a leaf node, it must
+     be replaced with a leaf node (not merely one with an open left or
+     right), to make sure that lefts and rights of descendents
+     correspond properly to bit masks.  We use the rightmost descendent
+     of x.  We could use any other leaf, but this is easy to locate and
+     tends to counteract removal of leftmosts elsewhere, and so keeps
+     paths shorter than minimally guaranteed.  This doesn't loop much
+     because on average a node in a tree is near the bottom.
+  3. If x is the base of a chain (i.e., has parent links) relink
+     x's parent and children to x's replacement (or null if none).
+*/
+
+#define unlink_large_chunk(M, X) {\
+  tchunkptr XP = X->parent;\
+  tchunkptr R;\
+  if (X->bk != X) {\
+    tchunkptr F = X->fd;\
+    R = X->bk;\
+    if (RTCHECK(ok_address(M, F) && F->bk == X && R->fd == X)) {\
+      F->bk = R;\
+      R->fd = F;\
+    }\
+    else {\
+      CORRUPTION_ERROR_ACTION(M);\
+    }\
+  }\
+  else {\
+    tchunkptr* RP;\
+    if (((R = *(RP = &(X->child[1]))) != 0) ||\
+        ((R = *(RP = &(X->child[0]))) != 0)) {\
+      tchunkptr* CP;\
+      while ((*(CP = &(R->child[1])) != 0) ||\
+             (*(CP = &(R->child[0])) != 0)) {\
+        R = *(RP = CP);\
+      }\
+      if (RTCHECK(ok_address(M, RP)))\
+        *RP = 0;\
+      else {\
+        CORRUPTION_ERROR_ACTION(M);\
+      }\
+    }\
+  }\
+  if (XP != 0) {\
+    tbinptr* H = treebin_at(M, X->index);\
+    if (X == *H) {\
+      if ((*H = R) == 0) \
+        clear_treemap(M, X->index);\
+    }\
+    else if (RTCHECK(ok_address(M, XP))) {\
+      if (XP->child[0] == X) \
+        XP->child[0] = R;\
+      else \
+        XP->child[1] = R;\
+    }\
+    else\
+      CORRUPTION_ERROR_ACTION(M);\
+    if (R != 0) {\
+      if (RTCHECK(ok_address(M, R))) {\
+        tchunkptr C0, C1;\
+        R->parent = XP;\
+        if ((C0 = X->child[0]) != 0) {\
+          if (RTCHECK(ok_address(M, C0))) {\
+            R->child[0] = C0;\
+            C0->parent = R;\
+          }\
+          else\
+            CORRUPTION_ERROR_ACTION(M);\
+        }\
+        if ((C1 = X->child[1]) != 0) {\
+          if (RTCHECK(ok_address(M, C1))) {\
+            R->child[1] = C1;\
+            C1->parent = R;\
+          }\
+          else\
+            CORRUPTION_ERROR_ACTION(M);\
+        }\
+      }\
+      else\
+        CORRUPTION_ERROR_ACTION(M);\
+    }\
+  }\
+}
+
+/* Relays to large vs small bin operations */
+
+#define insert_chunk(M, P, S)\
+  if (is_small(S)) insert_small_chunk(M, P, S)\
+  else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); }
+
+#define unlink_chunk(M, P, S)\
+  if (is_small(S)) unlink_small_chunk(M, P, S)\
+  else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); }
+
+
+/* Relays to internal calls to malloc/free from realloc, memalign etc */
+
+#if ONLY_MSPACES
+#define internal_malloc(m, b) mspace_malloc(m, b)
+#define internal_free(m, mem) mspace_free(m,mem);
+#else /* ONLY_MSPACES */
+#if MSPACES
+#define internal_malloc(m, b)\
+  ((m == gm)? dlmalloc(b) : mspace_malloc(m, b))
+#define internal_free(m, mem)\
+   if (m == gm) dlfree(mem); else mspace_free(m,mem);
+#else /* MSPACES */
+#define internal_malloc(m, b) dlmalloc(b)
+#define internal_free(m, mem) dlfree(mem)
+#endif /* MSPACES */
+#endif /* ONLY_MSPACES */
+
+/* -----------------------  Direct-mmapping chunks ----------------------- */
+
+/*
+  Directly mmapped chunks are set up with an offset to the start of
+  the mmapped region stored in the prev_foot field of the chunk. This
+  allows reconstruction of the required argument to MUNMAP when freed,
+  and also allows adjustment of the returned chunk to meet alignment
+  requirements (especially in memalign).
+*/
+
+/* Malloc using mmap */
+static void* mmap_alloc(mstate m, size_t nb) {
+  size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+  if (m->footprint_limit != 0) {
+    size_t fp = m->footprint + mmsize;
+    if (fp <= m->footprint || fp > m->footprint_limit)
+      return 0;
+  }
+  if (mmsize > nb) {     /* Check for wrap around 0 */
+    char* mm = (char*)(CALL_DIRECT_MMAP(mmsize));
+    if (mm != CMFAIL) {
+      size_t offset = align_offset(chunk2mem(mm));
+      size_t psize = mmsize - offset - MMAP_FOOT_PAD;
+      mchunkptr p = (mchunkptr)(mm + offset);
+      p->prev_foot = offset;
+      p->head = psize;
+      mark_inuse_foot(m, p, psize);
+      chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD;
+      chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0;
+
+      if (m->least_addr == 0 || mm < m->least_addr)
+        m->least_addr = mm;
+      if ((m->footprint += mmsize) > m->max_footprint)
+        m->max_footprint = m->footprint;
+      assert(is_aligned(chunk2mem(p)));
+      check_mmapped_chunk(m, p);
+      return chunk2mem(p);
+    }
+  }
+  return 0;
+}
+
+/* Realloc using mmap */
+static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb, int flags) {
+  size_t oldsize = chunksize(oldp);
+  (void)flags; /* placate people compiling -Wunused */
+  if (is_small(nb)) /* Can't shrink mmap regions below small size */
+    return 0;
+  /* Keep old chunk if big enough but not too big */
+  if (oldsize >= nb + SIZE_T_SIZE &&
+      (oldsize - nb) <= (mparams.granularity << 1))
+    return oldp;
+  else {
+    size_t offset = oldp->prev_foot;
+    size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD;
+    size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+    char* cp = (char*)CALL_MREMAP((char*)oldp - offset,
+                                  oldmmsize, newmmsize, flags);
+    if (cp != CMFAIL) {
+      mchunkptr newp = (mchunkptr)(cp + offset);
+      size_t psize = newmmsize - offset - MMAP_FOOT_PAD;
+      newp->head = psize;
+      mark_inuse_foot(m, newp, psize);
+      chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD;
+      chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0;
+
+      if (cp < m->least_addr)
+        m->least_addr = cp;
+      if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint)
+        m->max_footprint = m->footprint;
+      check_mmapped_chunk(m, newp);
+      return newp;
+    }
+  }
+  return 0;
+}
+
+
+/* -------------------------- mspace management -------------------------- */
+
+/* Initialize top chunk and its size */
+static void init_top(mstate m, mchunkptr p, size_t psize) {
+  /* Ensure alignment */
+  size_t offset = align_offset(chunk2mem(p));
+  p = (mchunkptr)((char*)p + offset);
+  psize -= offset;
+
+  m->top = p;
+  m->topsize = psize;
+  p->head = psize | PINUSE_BIT;
+  /* set size of fake trailing chunk holding overhead space only once */
+  chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE;
+  m->trim_check = mparams.trim_threshold; /* reset on each update */
+}
+
+/* Initialize bins for a new mstate that is otherwise zeroed out */
+static void init_bins(mstate m) {
+  /* Establish circular links for smallbins */
+  bindex_t i;
+  for (i = 0; i < NSMALLBINS; ++i) {
+    sbinptr bin = smallbin_at(m,i);
+    bin->fd = bin->bk = bin;
+  }
+}
+
+#if PROCEED_ON_ERROR
+
+/* default corruption action */
+static void reset_on_error(mstate m) {
+  int i;
+  ++malloc_corruption_error_count;
+  /* Reinitialize fields to forget about all memory */
+  m->smallmap = m->treemap = 0;
+  m->dvsize = m->topsize = 0;
+  m->seg.base = 0;
+  m->seg.size = 0;
+  m->seg.next = 0;
+  m->top = m->dv = 0;
+  for (i = 0; i < NTREEBINS; ++i)
+    *treebin_at(m, i) = 0;
+  init_bins(m);
+}
+#endif /* PROCEED_ON_ERROR */
+
+/* Allocate chunk and prepend remainder with chunk in successor base. */
+static void* prepend_alloc(mstate m, char* newbase, char* oldbase,
+                           size_t nb) {
+  mchunkptr p = align_as_chunk(newbase);
+  mchunkptr oldfirst = align_as_chunk(oldbase);
+  size_t psize = (char*)oldfirst - (char*)p;
+  mchunkptr q = chunk_plus_offset(p, nb);
+  size_t qsize = psize - nb;
+  set_size_and_pinuse_of_inuse_chunk(m, p, nb);
+
+  assert((char*)oldfirst > (char*)q);
+  assert(pinuse(oldfirst));
+  assert(qsize >= MIN_CHUNK_SIZE);
+
+  /* consolidate remainder with first chunk of old base */
+  if (oldfirst == m->top) {
+    size_t tsize = m->topsize += qsize;
+    m->top = q;
+    q->head = tsize | PINUSE_BIT;
+    check_top_chunk(m, q);
+  }
+  else if (oldfirst == m->dv) {
+    size_t dsize = m->dvsize += qsize;
+    m->dv = q;
+    set_size_and_pinuse_of_free_chunk(q, dsize);
+  }
+  else {
+    if (!is_inuse(oldfirst)) {
+      size_t nsize = chunksize(oldfirst);
+      unlink_chunk(m, oldfirst, nsize);
+      oldfirst = chunk_plus_offset(oldfirst, nsize);
+      qsize += nsize;
+    }
+    set_free_with_pinuse(q, qsize, oldfirst);
+    insert_chunk(m, q, qsize);
+    check_free_chunk(m, q);
+  }
+
+  check_malloced_chunk(m, chunk2mem(p), nb);
+  return chunk2mem(p);
+}
+
+/* Add a segment to hold a new noncontiguous region */
+static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) {
+  /* Determine locations and sizes of segment, fenceposts, old top */
+  char* old_top = (char*)m->top;
+  msegmentptr oldsp = segment_holding(m, old_top);
+  char* old_end = oldsp->base + oldsp->size;
+  size_t ssize = pad_request(sizeof(struct malloc_segment));
+  char* rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+  size_t offset = align_offset(chunk2mem(rawsp));
+  char* asp = rawsp + offset;
+  char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp;
+  mchunkptr sp = (mchunkptr)csp;
+  msegmentptr ss = (msegmentptr)(chunk2mem(sp));
+  mchunkptr tnext = chunk_plus_offset(sp, ssize);
+  mchunkptr p = tnext;
+  int nfences = 0;
+
+  /* reset top to new space */
+  init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
+
+  /* Set up segment record */
+  assert(is_aligned(ss));
+  set_size_and_pinuse_of_inuse_chunk(m, sp, ssize);
+  *ss = m->seg; /* Push current record */
+  m->seg.base = tbase;
+  m->seg.size = tsize;
+  m->seg.sflags = mmapped;
+  m->seg.next = ss;
+
+  /* Insert trailing fenceposts */
+  for (;;) {
+    mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE);
+    p->head = FENCEPOST_HEAD;
+    ++nfences;
+    if ((char*)(&(nextp->head)) < old_end)
+      p = nextp;
+    else
+      break;
+  }
+  assert(nfences >= 2);
+
+  /* Insert the rest of old top into a bin as an ordinary free chunk */
+  if (csp != old_top) {
+    mchunkptr q = (mchunkptr)old_top;
+    size_t psize = csp - old_top;
+    mchunkptr tn = chunk_plus_offset(q, psize);
+    set_free_with_pinuse(q, psize, tn);
+    insert_chunk(m, q, psize);
+  }
+
+  check_top_chunk(m, m->top);
+}
+
+/* -------------------------- System allocation -------------------------- */
+
+/* Get memory from system using MORECORE or MMAP */
+static void* sys_alloc(mstate m, size_t nb) {
+  char* tbase = CMFAIL;
+  size_t tsize = 0;
+  flag_t mmap_flag = 0;
+  size_t asize; /* allocation size */
+
+  ensure_initialization();
+
+  /* Directly map large chunks, but only if already initialized */
+  if (use_mmap(m) && nb >= mparams.mmap_threshold && m->topsize != 0) {
+    void* mem = mmap_alloc(m, nb);
+    if (mem != 0)
+      return mem;
+  }
+
+  asize = granularity_align(nb + SYS_ALLOC_PADDING);
+  if (asize <= nb)
+    return 0; /* wraparound */
+  if (m->footprint_limit != 0) {
+    size_t fp = m->footprint + asize;
+    if (fp <= m->footprint || fp > m->footprint_limit)
+      return 0;
+  }
+
+  /*
+    Try getting memory in any of three ways (in most-preferred to
+    least-preferred order):
+    1. A call to MORECORE that can normally contiguously extend memory.
+       (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or
+       or main space is mmapped or a previous contiguous call failed)
+    2. A call to MMAP new space (disabled if not HAVE_MMAP).
+       Note that under the default settings, if MORECORE is unable to
+       fulfill a request, and HAVE_MMAP is true, then mmap is
+       used as a noncontiguous system allocator. This is a useful backup
+       strategy for systems with holes in address spaces -- in this case
+       sbrk cannot contiguously expand the heap, but mmap may be able to
+       find space.
+    3. A call to MORECORE that cannot usually contiguously extend memory.
+       (disabled if not HAVE_MORECORE)
+
+   In all cases, we need to request enough bytes from system to ensure
+   we can malloc nb bytes upon success, so pad with enough space for
+   top_foot, plus alignment-pad to make sure we don't lose bytes if
+   not on boundary, and round this up to a granularity unit.
+  */
+
+  if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) {
+    char* br = CMFAIL;
+    msegmentptr ss = (m->top == 0)? 0 : segment_holding(m, (char*)m->top);
+    ACQUIRE_MALLOC_GLOBAL_LOCK();
+
+    if (ss == 0) {  /* First time through or recovery */
+      char* base = (char*)CALL_MORECORE(0);
+      if (base != CMFAIL) {
+        size_t fp;
+        /* Adjust to end on a page boundary */
+        if (!is_page_aligned(base))
+          asize += (page_align((size_t)base) - (size_t)base);
+        fp = m->footprint + asize; /* recheck limits */
+        if (asize > nb && asize < HALF_MAX_SIZE_T &&
+            (m->footprint_limit == 0 ||
+             (fp > m->footprint && fp <= m->footprint_limit)) &&
+            (br = (char*)(CALL_MORECORE(asize))) == base) {
+          tbase = base;
+          tsize = asize;
+        }
+      }
+    }
+    else {
+      /* Subtract out existing available top space from MORECORE request. */
+      asize = granularity_align(nb - m->topsize + SYS_ALLOC_PADDING);
+      /* Use mem here only if it did continuously extend old space */
+      if (asize < HALF_MAX_SIZE_T &&
+          (br = (char*)(CALL_MORECORE(asize))) == ss->base+ss->size) {
+        tbase = br;
+        tsize = asize;
+      }
+    }
+
+    if (tbase == CMFAIL) {    /* Cope with partial failure */
+      if (br != CMFAIL) {    /* Try to use/extend the space we did get */
+        if (asize < HALF_MAX_SIZE_T &&
+            asize < nb + SYS_ALLOC_PADDING) {
+          size_t esize = granularity_align(nb + SYS_ALLOC_PADDING - asize);
+          if (esize < HALF_MAX_SIZE_T) {
+            char* end = (char*)CALL_MORECORE(esize);
+            if (end != CMFAIL)
+              asize += esize;
+            else {            /* Can't use; try to release */
+              (void) CALL_MORECORE(-asize);
+              br = CMFAIL;
+            }
+          }
+        }
+      }
+      if (br != CMFAIL) {    /* Use the space we did get */
+        tbase = br;
+        tsize = asize;
+      }
+      else
+        disable_contiguous(m); /* Don't try contiguous path in the future */
+    }
+
+    RELEASE_MALLOC_GLOBAL_LOCK();
+  }
+
+  if (HAVE_MMAP && tbase == CMFAIL) {  /* Try MMAP */
+    char* mp = (char*)(CALL_MMAP(asize));
+    if (mp != CMFAIL) {
+      tbase = mp;
+      tsize = asize;
+      mmap_flag = USE_MMAP_BIT;
+    }
+  }
+
+  if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */
+    if (asize < HALF_MAX_SIZE_T) {
+      char* br = CMFAIL;
+      char* end = CMFAIL;
+      ACQUIRE_MALLOC_GLOBAL_LOCK();
+      br = (char*)(CALL_MORECORE(asize));
+      end = (char*)(CALL_MORECORE(0));
+      RELEASE_MALLOC_GLOBAL_LOCK();
+      if (br != CMFAIL && end != CMFAIL && br < end) {
+        size_t ssize = end - br;
+        if (ssize > nb + TOP_FOOT_SIZE) {
+          tbase = br;
+          tsize = ssize;
+        }
+      }
+    }
+  }
+
+  if (tbase != CMFAIL) {
+
+    if ((m->footprint += tsize) > m->max_footprint)
+      m->max_footprint = m->footprint;
+
+    if (!is_initialized(m)) { /* first-time initialization */
+      if (m->least_addr == 0 || tbase < m->least_addr)
+        m->least_addr = tbase;
+      m->seg.base = tbase;
+      m->seg.size = tsize;
+      m->seg.sflags = mmap_flag;
+      m->magic = mparams.magic;
+      m->release_checks = MAX_RELEASE_CHECK_RATE;
+      init_bins(m);
+#if !ONLY_MSPACES
+      if (is_global(m))
+        init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
+      else
+#endif
+      {
+        /* Offset top by embedded malloc_state */
+        mchunkptr mn = next_chunk(mem2chunk(m));
+        init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE);
+      }
+    }
+
+    else {
+      /* Try to merge with an existing segment */
+      msegmentptr sp = &m->seg;
+      /* Only consider most recent segment if traversal suppressed */
+      while (sp != 0 && tbase != sp->base + sp->size)
+        sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next;
+      if (sp != 0 &&
+          !is_extern_segment(sp) &&
+          (sp->sflags & USE_MMAP_BIT) == mmap_flag &&
+          segment_holds(sp, m->top)) { /* append */
+        sp->size += tsize;
+        init_top(m, m->top, m->topsize + tsize);
+      }
+      else {
+        if (tbase < m->least_addr)
+          m->least_addr = tbase;
+        sp = &m->seg;
+        while (sp != 0 && sp->base != tbase + tsize)
+          sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next;
+        if (sp != 0 &&
+            !is_extern_segment(sp) &&
+            (sp->sflags & USE_MMAP_BIT) == mmap_flag) {
+          char* oldbase = sp->base;
+          sp->base = tbase;
+          sp->size += tsize;
+          return prepend_alloc(m, tbase, oldbase, nb);
+        }
+        else
+          add_segment(m, tbase, tsize, mmap_flag);
+      }
+    }
+
+    if (nb < m->topsize) { /* Allocate from new or extended top space */
+      size_t rsize = m->topsize -= nb;
+      mchunkptr p = m->top;
+      mchunkptr r = m->top = chunk_plus_offset(p, nb);
+      r->head = rsize | PINUSE_BIT;
+      set_size_and_pinuse_of_inuse_chunk(m, p, nb);
+      check_top_chunk(m, m->top);
+      check_malloced_chunk(m, chunk2mem(p), nb);
+      return chunk2mem(p);
+    }
+  }
+
+  MALLOC_FAILURE_ACTION;
+  return 0;
+}
+
+/* -----------------------  system deallocation -------------------------- */
+
+/* Unmap and unlink any mmapped segments that don't contain used chunks */
+static size_t release_unused_segments(mstate m) {
+  size_t released = 0;
+  int nsegs = 0;
+  msegmentptr pred = &m->seg;
+  msegmentptr sp = pred->next;
+  while (sp != 0) {
+    char* base = sp->base;
+    size_t size = sp->size;
+    msegmentptr next = sp->next;
+    ++nsegs;
+    if (is_mmapped_segment(sp) && !is_extern_segment(sp)) {
+      mchunkptr p = align_as_chunk(base);
+      size_t psize = chunksize(p);
+      /* Can unmap if first chunk holds entire segment and not pinned */
+      if (!is_inuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) {
+        tchunkptr tp = (tchunkptr)p;
+        assert(segment_holds(sp, (char*)sp));
+        if (p == m->dv) {
+          m->dv = 0;
+          m->dvsize = 0;
+        }
+        else {
+          unlink_large_chunk(m, tp);
+        }
+        if (CALL_MUNMAP(base, size) == 0) {
+          released += size;
+          m->footprint -= size;
+          /* unlink obsoleted record */
+          sp = pred;
+          sp->next = next;
+        }
+        else { /* back out if cannot unmap */
+          insert_large_chunk(m, tp, psize);
+        }
+      }
+    }
+    if (NO_SEGMENT_TRAVERSAL) /* scan only first segment */
+      break;
+    pred = sp;
+    sp = next;
+  }
+  /* Reset check counter */
+  m->release_checks = ((nsegs > MAX_RELEASE_CHECK_RATE)?
+                       nsegs : MAX_RELEASE_CHECK_RATE);
+  return released;
+}
+
+static int sys_trim(mstate m, size_t pad) {
+  size_t released = 0;
+  ensure_initialization();
+  if (pad < MAX_REQUEST && is_initialized(m)) {
+    pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */
+
+    if (m->topsize > pad) {
+      /* Shrink top space in granularity-size units, keeping at least one */
+      size_t unit = mparams.granularity;
+      size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit -
+                      SIZE_T_ONE) * unit;
+      msegmentptr sp = segment_holding(m, (char*)m->top);
+
+      if (!is_extern_segment(sp)) {
+        if (is_mmapped_segment(sp)) {
+          if (HAVE_MMAP &&
+              sp->size >= extra &&
+              !has_segment_link(m, sp)) { /* can't shrink if pinned */
+            size_t newsize = sp->size - extra;
+            /* Prefer mremap, fall back to munmap */
+            if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) ||
+                (CALL_MUNMAP(sp->base + newsize, extra) == 0)) {
+              released = extra;
+            }
+          }
+        }
+        else if (HAVE_MORECORE) {
+          if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */
+            extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit;
+          ACQUIRE_MALLOC_GLOBAL_LOCK();
+          {
+            /* Make sure end of memory is where we last set it. */
+            char* old_br = (char*)(CALL_MORECORE(0));
+            if (old_br == sp->base + sp->size) {
+              char* rel_br = (char*)(CALL_MORECORE(-extra));
+              char* new_br = (char*)(CALL_MORECORE(0));
+              if (rel_br != CMFAIL && new_br < old_br)
+                released = old_br - new_br;
+            }
+          }
+          RELEASE_MALLOC_GLOBAL_LOCK();
+        }
+      }
+
+      if (released != 0) {
+        sp->size -= released;
+        m->footprint -= released;
+        init_top(m, m->top, m->topsize - released);
+        check_top_chunk(m, m->top);
+      }
+    }
+
+    /* Unmap any unused mmapped segments */
+    if (HAVE_MMAP)
+      released += release_unused_segments(m);
+
+    /* On failure, disable autotrim to avoid repeated failed future calls */
+    if (released == 0 && m->topsize > m->trim_check)
+      m->trim_check = MAX_SIZE_T;
+  }
+
+  return (released != 0)? 1 : 0;
+}
+
+/* Consolidate and bin a chunk. Differs from exported versions
+   of free mainly in that the chunk need not be marked as inuse.
+*/
+static void dispose_chunk(mstate m, mchunkptr p, size_t psize) {
+  mchunkptr next = chunk_plus_offset(p, psize);
+  if (!pinuse(p)) {
+    mchunkptr prev;
+    size_t prevsize = p->prev_foot;
+    if (is_mmapped(p)) {
+      psize += prevsize + MMAP_FOOT_PAD;
+      if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
+        m->footprint -= psize;
+      return;
+    }
+    prev = chunk_minus_offset(p, prevsize);
+    psize += prevsize;
+    p = prev;
+    if (RTCHECK(ok_address(m, prev))) { /* consolidate backward */
+      if (p != m->dv) {
+        unlink_chunk(m, p, prevsize);
+      }
+      else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+        m->dvsize = psize;
+        set_free_with_pinuse(p, psize, next);
+        return;
+      }
+    }
+    else {
+      CORRUPTION_ERROR_ACTION(m);
+      return;
+    }
+  }
+  if (RTCHECK(ok_address(m, next))) {
+    if (!cinuse(next)) {  /* consolidate forward */
+      if (next == m->top) {
+        size_t tsize = m->topsize += psize;
+        m->top = p;
+        p->head = tsize | PINUSE_BIT;
+        if (p == m->dv) {
+          m->dv = 0;
+          m->dvsize = 0;
+        }
+        return;
+      }
+      else if (next == m->dv) {
+        size_t dsize = m->dvsize += psize;
+        m->dv = p;
+        set_size_and_pinuse_of_free_chunk(p, dsize);
+        return;
+      }
+      else {
+        size_t nsize = chunksize(next);
+        psize += nsize;
+        unlink_chunk(m, next, nsize);
+        set_size_and_pinuse_of_free_chunk(p, psize);
+        if (p == m->dv) {
+          m->dvsize = psize;
+          return;
+        }
+      }
+    }
+    else {
+      set_free_with_pinuse(p, psize, next);
+    }
+    insert_chunk(m, p, psize);
+  }
+  else {
+    CORRUPTION_ERROR_ACTION(m);
+  }
+}
+
+/* ---------------------------- malloc --------------------------- */
+
+/* allocate a large request from the best fitting chunk in a treebin */
+static void* tmalloc_large(mstate m, size_t nb) {
+  tchunkptr v = 0;
+  size_t rsize = -nb; /* Unsigned negation */
+  tchunkptr t;
+  bindex_t idx;
+  compute_tree_index(nb, idx);
+  if ((t = *treebin_at(m, idx)) != 0) {
+    /* Traverse tree for this bin looking for node with size == nb */
+    size_t sizebits = nb << leftshift_for_tree_index(idx);
+    tchunkptr rst = 0;  /* The deepest untaken right subtree */
+    for (;;) {
+      tchunkptr rt;
+      size_t trem = chunksize(t) - nb;
+      if (trem < rsize) {
+        v = t;
+        if ((rsize = trem) == 0)
+          break;
+      }
+      rt = t->child[1];
+      t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
+      if (rt != 0 && rt != t)
+        rst = rt;
+      if (t == 0) {
+        t = rst; /* set t to least subtree holding sizes > nb */
+        break;
+      }
+      sizebits <<= 1;
+    }
+  }
+  if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */
+    binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap;
+    if (leftbits != 0) {
+      bindex_t i;
+      binmap_t leastbit = least_bit(leftbits);
+      compute_bit2idx(leastbit, i);
+      t = *treebin_at(m, i);
+    }
+  }
+
+  while (t != 0) { /* find smallest of tree or subtree */
+    size_t trem = chunksize(t) - nb;
+    if (trem < rsize) {
+      rsize = trem;
+      v = t;
+    }
+    t = leftmost_child(t);
+  }
+
+  /*  If dv is a better fit, return 0 so malloc will use it */
+  if (v != 0 && rsize < (size_t)(m->dvsize - nb)) {
+    if (RTCHECK(ok_address(m, v))) { /* split */
+      mchunkptr r = chunk_plus_offset(v, nb);
+      assert(chunksize(v) == rsize + nb);
+      if (RTCHECK(ok_next(v, r))) {
+        unlink_large_chunk(m, v);
+        if (rsize < MIN_CHUNK_SIZE)
+          set_inuse_and_pinuse(m, v, (rsize + nb));
+        else {
+          set_size_and_pinuse_of_inuse_chunk(m, v, nb);
+          set_size_and_pinuse_of_free_chunk(r, rsize);
+          insert_chunk(m, r, rsize);
+        }
+        return chunk2mem(v);
+      }
+    }
+    CORRUPTION_ERROR_ACTION(m);
+  }
+  return 0;
+}
+
+/* allocate a small request from the best fitting chunk in a treebin */
+static void* tmalloc_small(mstate m, size_t nb) {
+  tchunkptr t, v;
+  size_t rsize;
+  bindex_t i;
+  binmap_t leastbit = least_bit(m->treemap);
+  compute_bit2idx(leastbit, i);
+  v = t = *treebin_at(m, i);
+  rsize = chunksize(t) - nb;
+
+  while ((t = leftmost_child(t)) != 0) {
+    size_t trem = chunksize(t) - nb;
+    if (trem < rsize) {
+      rsize = trem;
+      v = t;
+    }
+  }
+
+  if (RTCHECK(ok_address(m, v))) {
+    mchunkptr r = chunk_plus_offset(v, nb);
+    assert(chunksize(v) == rsize + nb);
+    if (RTCHECK(ok_next(v, r))) {
+      unlink_large_chunk(m, v);
+      if (rsize < MIN_CHUNK_SIZE)
+        set_inuse_and_pinuse(m, v, (rsize + nb));
+      else {
+        set_size_and_pinuse_of_inuse_chunk(m, v, nb);
+        set_size_and_pinuse_of_free_chunk(r, rsize);
+        replace_dv(m, r, rsize);
+      }
+      return chunk2mem(v);
+    }
+  }
+
+  CORRUPTION_ERROR_ACTION(m);
+  return 0;
+}
+
+#if !ONLY_MSPACES
+
+void* dlmalloc(size_t bytes) {
+  /*
+     Basic algorithm:
+     If a small request (< 256 bytes minus per-chunk overhead):
+       1. If one exists, use a remainderless chunk in associated smallbin.
+          (Remainderless means that there are too few excess bytes to
+          represent as a chunk.)
+       2. If it is big enough, use the dv chunk, which is normally the
+          chunk adjacent to the one used for the most recent small request.
+       3. If one exists, split the smallest available chunk in a bin,
+          saving remainder in dv.
+       4. If it is big enough, use the top chunk.
+       5. If available, get memory from system and use it
+     Otherwise, for a large request:
+       1. Find the smallest available binned chunk that fits, and use it
+          if it is better fitting than dv chunk, splitting if necessary.
+       2. If better fitting than any binned chunk, use the dv chunk.
+       3. If it is big enough, use the top chunk.
+       4. If request size >= mmap threshold, try to directly mmap this chunk.
+       5. If available, get memory from system and use it
+
+     The ugly goto's here ensure that postaction occurs along all paths.
+  */
+
+#if USE_LOCKS
+  ensure_initialization(); /* initialize in sys_alloc if not using locks */
+#endif
+
+  if (!PREACTION(gm)) {
+    void* mem;
+    size_t nb;
+    if (bytes <= MAX_SMALL_REQUEST) {
+      bindex_t idx;
+      binmap_t smallbits;
+      nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
+      idx = small_index(nb);
+      smallbits = gm->smallmap >> idx;
+
+      if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
+        mchunkptr b, p;
+        idx += ~smallbits & 1;       /* Uses next bin if idx empty */
+        b = smallbin_at(gm, idx);
+        p = b->fd;
+        assert(chunksize(p) == small_index2size(idx));
+        unlink_first_small_chunk(gm, b, p, idx);
+        set_inuse_and_pinuse(gm, p, small_index2size(idx));
+        mem = chunk2mem(p);
+        check_malloced_chunk(gm, mem, nb);
+        goto postaction;
+      }
+
+      else if (nb > gm->dvsize) {
+        if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
+          mchunkptr b, p, r;
+          size_t rsize;
+          bindex_t i;
+          binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
+          binmap_t leastbit = least_bit(leftbits);
+          compute_bit2idx(leastbit, i);
+          b = smallbin_at(gm, i);
+          p = b->fd;
+          assert(chunksize(p) == small_index2size(i));
+          unlink_first_small_chunk(gm, b, p, i);
+          rsize = small_index2size(i) - nb;
+          /* Fit here cannot be remainderless if 4byte sizes */
+          if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
+            set_inuse_and_pinuse(gm, p, small_index2size(i));
+          else {
+            set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+            r = chunk_plus_offset(p, nb);
+            set_size_and_pinuse_of_free_chunk(r, rsize);
+            replace_dv(gm, r, rsize);
+          }
+          mem = chunk2mem(p);
+          check_malloced_chunk(gm, mem, nb);
+          goto postaction;
+        }
+
+        else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) {
+          check_malloced_chunk(gm, mem, nb);
+          goto postaction;
+        }
+      }
+    }
+    else if (bytes >= MAX_REQUEST)
+      nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
+    else {
+      nb = pad_request(bytes);
+      if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) {
+        check_malloced_chunk(gm, mem, nb);
+        goto postaction;
+      }
+    }
+
+    if (nb <= gm->dvsize) {
+      size_t rsize = gm->dvsize - nb;
+      mchunkptr p = gm->dv;
+      if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
+        mchunkptr r = gm->dv = chunk_plus_offset(p, nb);
+        gm->dvsize = rsize;
+        set_size_and_pinuse_of_free_chunk(r, rsize);
+        set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+      }
+      else { /* exhaust dv */
+        size_t dvs = gm->dvsize;
+        gm->dvsize = 0;
+        gm->dv = 0;
+        set_inuse_and_pinuse(gm, p, dvs);
+      }
+      mem = chunk2mem(p);
+      check_malloced_chunk(gm, mem, nb);
+      goto postaction;
+    }
+
+    else if (nb < gm->topsize) { /* Split top */
+      size_t rsize = gm->topsize -= nb;
+      mchunkptr p = gm->top;
+      mchunkptr r = gm->top = chunk_plus_offset(p, nb);
+      r->head = rsize | PINUSE_BIT;
+      set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+      mem = chunk2mem(p);
+      check_top_chunk(gm, gm->top);
+      check_malloced_chunk(gm, mem, nb);
+      goto postaction;
+    }
+
+    mem = sys_alloc(gm, nb);
+
+  postaction:
+    POSTACTION(gm);
+    return mem;
+  }
+
+  return 0;
+}
+
+/* ---------------------------- free --------------------------- */
+
+void dlfree(void* mem) {
+  /*
+     Consolidate freed chunks with preceeding or succeeding bordering
+     free chunks, if they exist, and then place in a bin.  Intermixed
+     with special cases for top, dv, mmapped chunks, and usage errors.
+  */
+
+  if (mem != 0) {
+    mchunkptr p  = mem2chunk(mem);
+#if FOOTERS
+    mstate fm = get_mstate_for(p);
+    if (!ok_magic(fm)) {
+      USAGE_ERROR_ACTION(fm, p);
+      return;
+    }
+#else /* FOOTERS */
+#define fm gm
+#endif /* FOOTERS */
+    if (!PREACTION(fm)) {
+      check_inuse_chunk(fm, p);
+      if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) {
+        size_t psize = chunksize(p);
+        mchunkptr next = chunk_plus_offset(p, psize);
+        if (!pinuse(p)) {
+          size_t prevsize = p->prev_foot;
+          if (is_mmapped(p)) {
+            psize += prevsize + MMAP_FOOT_PAD;
+            if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
+              fm->footprint -= psize;
+            goto postaction;
+          }
+          else {
+            mchunkptr prev = chunk_minus_offset(p, prevsize);
+            psize += prevsize;
+            p = prev;
+            if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */
+              if (p != fm->dv) {
+                unlink_chunk(fm, p, prevsize);
+              }
+              else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+                fm->dvsize = psize;
+                set_free_with_pinuse(p, psize, next);
+                goto postaction;
+              }
+            }
+            else
+              goto erroraction;
+          }
+        }
+
+        if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
+          if (!cinuse(next)) {  /* consolidate forward */
+            if (next == fm->top) {
+              size_t tsize = fm->topsize += psize;
+              fm->top = p;
+              p->head = tsize | PINUSE_BIT;
+              if (p == fm->dv) {
+                fm->dv = 0;
+                fm->dvsize = 0;
+              }
+              if (should_trim(fm, tsize))
+                sys_trim(fm, 0);
+              goto postaction;
+            }
+            else if (next == fm->dv) {
+              size_t dsize = fm->dvsize += psize;
+              fm->dv = p;
+              set_size_and_pinuse_of_free_chunk(p, dsize);
+              goto postaction;
+            }
+            else {
+              size_t nsize = chunksize(next);
+              psize += nsize;
+              unlink_chunk(fm, next, nsize);
+              set_size_and_pinuse_of_free_chunk(p, psize);
+              if (p == fm->dv) {
+                fm->dvsize = psize;
+                goto postaction;
+              }
+            }
+          }
+          else
+            set_free_with_pinuse(p, psize, next);
+
+          if (is_small(psize)) {
+            insert_small_chunk(fm, p, psize);
+            check_free_chunk(fm, p);
+          }
+          else {
+            tchunkptr tp = (tchunkptr)p;
+            insert_large_chunk(fm, tp, psize);
+            check_free_chunk(fm, p);
+            if (--fm->release_checks == 0)
+              release_unused_segments(fm);
+          }
+          goto postaction;
+        }
+      }
+    erroraction:
+      USAGE_ERROR_ACTION(fm, p);
+    postaction:
+      POSTACTION(fm);
+    }
+  }
+#if !FOOTERS
+#undef fm
+#endif /* FOOTERS */
+}
+
+void* dlcalloc(size_t n_elements, size_t elem_size) {
+  void* mem;
+  size_t req = 0;
+  if (n_elements != 0) {
+    req = n_elements * elem_size;
+    if (((n_elements | elem_size) & ~(size_t)0xffff) &&
+        (req / n_elements != elem_size))
+      req = MAX_SIZE_T; /* force downstream failure on overflow */
+  }
+  mem = dlmalloc(req);
+  if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
+    memset(mem, 0, req);
+  return mem;
+}
+
+#endif /* !ONLY_MSPACES */
+
+/* ------------ Internal support for realloc, memalign, etc -------------- */
+
+/* Try to realloc; only in-place unless can_move true */
+static mchunkptr try_realloc_chunk(mstate m, mchunkptr p, size_t nb,
+                                   int can_move) {
+  mchunkptr newp = 0;
+  size_t oldsize = chunksize(p);
+  mchunkptr next = chunk_plus_offset(p, oldsize);
+  if (RTCHECK(ok_address(m, p) && ok_inuse(p) &&
+              ok_next(p, next) && ok_pinuse(next))) {
+    if (is_mmapped(p)) {
+      newp = mmap_resize(m, p, nb, can_move);
+    }
+    else if (oldsize >= nb) {             /* already big enough */
+      size_t rsize = oldsize - nb;
+      if (rsize >= MIN_CHUNK_SIZE) {      /* split off remainder */
+        mchunkptr r = chunk_plus_offset(p, nb);
+        set_inuse(m, p, nb);
+        set_inuse(m, r, rsize);
+        dispose_chunk(m, r, rsize);
+      }
+      newp = p;
+    }
+    else if (next == m->top) {  /* extend into top */
+      if (oldsize + m->topsize > nb) {
+        size_t newsize = oldsize + m->topsize;
+        size_t newtopsize = newsize - nb;
+        mchunkptr newtop = chunk_plus_offset(p, nb);
+        set_inuse(m, p, nb);
+        newtop->head = newtopsize |PINUSE_BIT;
+        m->top = newtop;
+        m->topsize = newtopsize;
+        newp = p;
+      }
+    }
+    else if (next == m->dv) { /* extend into dv */
+      size_t dvs = m->dvsize;
+      if (oldsize + dvs >= nb) {
+        size_t dsize = oldsize + dvs - nb;
+        if (dsize >= MIN_CHUNK_SIZE) {
+          mchunkptr r = chunk_plus_offset(p, nb);
+          mchunkptr n = chunk_plus_offset(r, dsize);
+          set_inuse(m, p, nb);
+          set_size_and_pinuse_of_free_chunk(r, dsize);
+          clear_pinuse(n);
+          m->dvsize = dsize;
+          m->dv = r;
+        }
+        else { /* exhaust dv */
+          size_t newsize = oldsize + dvs;
+          set_inuse(m, p, newsize);
+          m->dvsize = 0;
+          m->dv = 0;
+        }
+        newp = p;
+      }
+    }
+    else if (!cinuse(next)) { /* extend into next free chunk */
+      size_t nextsize = chunksize(next);
+      if (oldsize + nextsize >= nb) {
+        size_t rsize = oldsize + nextsize - nb;
+        unlink_chunk(m, next, nextsize);
+        if (rsize < MIN_CHUNK_SIZE) {
+          size_t newsize = oldsize + nextsize;
+          set_inuse(m, p, newsize);
+        }
+        else {
+          mchunkptr r = chunk_plus_offset(p, nb);
+          set_inuse(m, p, nb);
+          set_inuse(m, r, rsize);
+          dispose_chunk(m, r, rsize);
+        }
+        newp = p;
+      }
+    }
+  }
+  else {
+    USAGE_ERROR_ACTION(m, oldmem);
+  }
+  return newp;
+}
+
+static void* internal_memalign(mstate m, size_t alignment, size_t bytes) {
+  void* mem = 0;
+  if (alignment <  MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */
+    alignment = MIN_CHUNK_SIZE;
+  if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */
+    size_t a = MALLOC_ALIGNMENT << 1;
+    while (a < alignment) a <<= 1;
+    alignment = a;
+  }
+  if (bytes >= MAX_REQUEST - alignment) {
+    if (m != 0)  { /* Test isn't needed but avoids compiler warning */
+      MALLOC_FAILURE_ACTION;
+    }
+  }
+  else {
+    size_t nb = request2size(bytes);
+    size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD;
+    mem = internal_malloc(m, req);
+    if (mem != 0) {
+      mchunkptr p = mem2chunk(mem);
+      if (PREACTION(m))
+        return 0;
+      if ((((size_t)(mem)) & (alignment - 1)) != 0) { /* misaligned */
+        /*
+          Find an aligned spot inside chunk.  Since we need to give
+          back leading space in a chunk of at least MIN_CHUNK_SIZE, if
+          the first calculation places us at a spot with less than
+          MIN_CHUNK_SIZE leader, we can move to the next aligned spot.
+          We've allocated enough total room so that this is always
+          possible.
+        */
+        char* br = (char*)mem2chunk((size_t)(((size_t)((char*)mem + alignment -
+                                                       SIZE_T_ONE)) &
+                                             -alignment));
+        char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)?
+          br : br+alignment;
+        mchunkptr newp = (mchunkptr)pos;
+        size_t leadsize = pos - (char*)(p);
+        size_t newsize = chunksize(p) - leadsize;
+
+        if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */
+          newp->prev_foot = p->prev_foot + leadsize;
+          newp->head = newsize;
+        }
+        else { /* Otherwise, give back leader, use the rest */
+          set_inuse(m, newp, newsize);
+          set_inuse(m, p, leadsize);
+          dispose_chunk(m, p, leadsize);
+        }
+        p = newp;
+      }
+
+      /* Give back spare room at the end */
+      if (!is_mmapped(p)) {
+        size_t size = chunksize(p);
+        if (size > nb + MIN_CHUNK_SIZE) {
+          size_t remainder_size = size - nb;
+          mchunkptr remainder = chunk_plus_offset(p, nb);
+          set_inuse(m, p, nb);
+          set_inuse(m, remainder, remainder_size);
+          dispose_chunk(m, remainder, remainder_size);
+        }
+      }
+
+      mem = chunk2mem(p);
+      assert (chunksize(p) >= nb);
+      assert(((size_t)mem & (alignment - 1)) == 0);
+      check_inuse_chunk(m, p);
+      POSTACTION(m);
+    }
+  }
+  return mem;
+}
+
+/*
+  Common support for independent_X routines, handling
+    all of the combinations that can result.
+  The opts arg has:
+    bit 0 set if all elements are same size (using sizes[0])
+    bit 1 set if elements should be zeroed
+*/
+static void** ialloc(mstate m,
+                     size_t n_elements,
+                     size_t* sizes,
+                     int opts,
+                     void* chunks[]) {
+
+  size_t    element_size;   /* chunksize of each element, if all same */
+  size_t    contents_size;  /* total size of elements */
+  size_t    array_size;     /* request size of pointer array */
+  void*     mem;            /* malloced aggregate space */
+  mchunkptr p;              /* corresponding chunk */
+  size_t    remainder_size; /* remaining bytes while splitting */
+  void**    marray;         /* either "chunks" or malloced ptr array */
+  mchunkptr array_chunk;    /* chunk for malloced ptr array */
+  flag_t    was_enabled;    /* to disable mmap */
+  size_t    size;
+  size_t    i;
+
+  ensure_initialization();
+  /* compute array length, if needed */
+  if (chunks != 0) {
+    if (n_elements == 0)
+      return chunks; /* nothing to do */
+    marray = chunks;
+    array_size = 0;
+  }
+  else {
+    /* if empty req, must still return chunk representing empty array */
+    if (n_elements == 0)
+      return (void**)internal_malloc(m, 0);
+    marray = 0;
+    array_size = request2size(n_elements * (sizeof(void*)));
+  }
+
+  /* compute total element size */
+  if (opts & 0x1) { /* all-same-size */
+    element_size = request2size(*sizes);
+    contents_size = n_elements * element_size;
+  }
+  else { /* add up all the sizes */
+    element_size = 0;
+    contents_size = 0;
+    for (i = 0; i != n_elements; ++i)
+      contents_size += request2size(sizes[i]);
+  }
+
+  size = contents_size + array_size;
+
+  /*
+     Allocate the aggregate chunk.  First disable direct-mmapping so
+     malloc won't use it, since we would not be able to later
+     free/realloc space internal to a segregated mmap region.
+  */
+  was_enabled = use_mmap(m);
+  disable_mmap(m);
+  mem = internal_malloc(m, size - CHUNK_OVERHEAD);
+  if (was_enabled)
+    enable_mmap(m);
+  if (mem == 0)
+    return 0;
+
+  if (PREACTION(m)) return 0;
+  p = mem2chunk(mem);
+  remainder_size = chunksize(p);
+
+  assert(!is_mmapped(p));
+
+  if (opts & 0x2) {       /* optionally clear the elements */
+    memset((size_t*)mem, 0, remainder_size - SIZE_T_SIZE - array_size);
+  }
+
+  /* If not provided, allocate the pointer array as final part of chunk */
+  if (marray == 0) {
+    size_t  array_chunk_size;
+    array_chunk = chunk_plus_offset(p, contents_size);
+    array_chunk_size = remainder_size - contents_size;
+    marray = (void**) (chunk2mem(array_chunk));
+    set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size);
+    remainder_size = contents_size;
+  }
+
+  /* split out elements */
+  for (i = 0; ; ++i) {
+    marray[i] = chunk2mem(p);
+    if (i != n_elements-1) {
+      if (element_size != 0)
+        size = element_size;
+      else
+        size = request2size(sizes[i]);
+      remainder_size -= size;
+      set_size_and_pinuse_of_inuse_chunk(m, p, size);
+      p = chunk_plus_offset(p, size);
+    }
+    else { /* the final element absorbs any overallocation slop */
+      set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size);
+      break;
+    }
+  }
+
+#if DEBUG
+  if (marray != chunks) {
+    /* final element must have exactly exhausted chunk */
+    if (element_size != 0) {
+      assert(remainder_size == element_size);
+    }
+    else {
+      assert(remainder_size == request2size(sizes[i]));
+    }
+    check_inuse_chunk(m, mem2chunk(marray));
+  }
+  for (i = 0; i != n_elements; ++i)
+    check_inuse_chunk(m, mem2chunk(marray[i]));
+
+#endif /* DEBUG */
+
+  POSTACTION(m);
+  return marray;
+}
+
+/* Try to free all pointers in the given array.
+   Note: this could be made faster, by delaying consolidation,
+   at the price of disabling some user integrity checks, We
+   still optimize some consolidations by combining adjacent
+   chunks before freeing, which will occur often if allocated
+   with ialloc or the array is sorted.
+*/
+static size_t internal_bulk_free(mstate m, void* array[], size_t nelem) {
+  size_t unfreed = 0;
+  if (!PREACTION(m)) {
+    void** a;
+    void** fence = &(array[nelem]);
+    for (a = array; a != fence; ++a) {
+      void* mem = *a;
+      if (mem != 0) {
+        mchunkptr p = mem2chunk(mem);
+        size_t psize = chunksize(p);
+#if FOOTERS
+        if (get_mstate_for(p) != m) {
+          ++unfreed;
+          continue;
+        }
+#endif
+        check_inuse_chunk(m, p);
+        *a = 0;
+        if (RTCHECK(ok_address(m, p) && ok_inuse(p))) {
+          void ** b = a + 1; /* try to merge with next chunk */
+          mchunkptr next = next_chunk(p);
+          if (b != fence && *b == chunk2mem(next)) {
+            size_t newsize = chunksize(next) + psize;
+            set_inuse(m, p, newsize);
+            *b = chunk2mem(p);
+          }
+          else
+            dispose_chunk(m, p, psize);
+        }
+        else {
+          CORRUPTION_ERROR_ACTION(m);
+          break;
+        }
+      }
+    }
+    if (should_trim(m, m->topsize))
+      sys_trim(m, 0);
+    POSTACTION(m);
+  }
+  return unfreed;
+}
+
+/* Traversal */
+#if MALLOC_INSPECT_ALL
+static void internal_inspect_all(mstate m,
+                                 void(*handler)(void *start,
+                                                void *end,
+                                                size_t used_bytes,
+                                                void* callback_arg),
+                                 void* arg) {
+  if (is_initialized(m)) {
+    mchunkptr top = m->top;
+    msegmentptr s;
+    for (s = &m->seg; s != 0; s = s->next) {
+      mchunkptr q = align_as_chunk(s->base);
+      while (segment_holds(s, q) && q->head != FENCEPOST_HEAD) {
+        mchunkptr next = next_chunk(q);
+        size_t sz = chunksize(q);
+        size_t used;
+        void* start;
+        if (is_inuse(q)) {
+          used = sz - CHUNK_OVERHEAD; /* must not be mmapped */
+          start = chunk2mem(q);
+        }
+        else {
+          used = 0;
+          if (is_small(sz)) {     /* offset by possible bookkeeping */
+            start = (void*)((char*)q + sizeof(malloc_chunk));
+          }
+          else {
+            start = (void*)((char*)q + sizeof(malloc_tree_chunk));
+          }
+        }
+        if (start < (void*)next)  /* skip if all space is bookkeeping */
+          handler(start, next, used, arg);
+        if (q == top)
+          break;
+        q = next;
+      }
+    }
+  }
+}
+#endif /* MALLOC_INSPECT_ALL */
+
+/* ------------------ Exported realloc, memalign, etc -------------------- */
+
+#if !ONLY_MSPACES
+
+void* dlrealloc(void* oldmem, size_t bytes) {
+  void* mem = 0;
+  if (oldmem == 0) {
+    mem = dlmalloc(bytes);
+  }
+  else if (bytes >= MAX_REQUEST) {
+    MALLOC_FAILURE_ACTION;
+  }
+#ifdef REALLOC_ZERO_BYTES_FREES
+  else if (bytes == 0) {
+    dlfree(oldmem);
+  }
+#endif /* REALLOC_ZERO_BYTES_FREES */
+  else {
+    size_t nb = request2size(bytes);
+    mchunkptr oldp = mem2chunk(oldmem);
+#if ! FOOTERS
+    mstate m = gm;
+#else /* FOOTERS */
+    mstate m = get_mstate_for(oldp);
+    if (!ok_magic(m)) {
+      USAGE_ERROR_ACTION(m, oldmem);
+      return 0;
+    }
+#endif /* FOOTERS */
+    if (!PREACTION(m)) {
+      mchunkptr newp = try_realloc_chunk(m, oldp, nb, 1);
+      POSTACTION(m);
+      if (newp != 0) {
+        check_inuse_chunk(m, newp);
+        mem = chunk2mem(newp);
+      }
+      else {
+        mem = internal_malloc(m, bytes);
+        if (mem != 0) {
+          size_t oc = chunksize(oldp) - overhead_for(oldp);
+          memcpy(mem, oldmem, (oc < bytes)? oc : bytes);
+          internal_free(m, oldmem);
+        }
+      }
+    }
+  }
+  return mem;
+}
+
+void* dlrealloc_in_place(void* oldmem, size_t bytes) {
+  void* mem = 0;
+  if (oldmem != 0) {
+    if (bytes >= MAX_REQUEST) {
+      MALLOC_FAILURE_ACTION;
+    }
+    else {
+      size_t nb = request2size(bytes);
+      mchunkptr oldp = mem2chunk(oldmem);
+#if ! FOOTERS
+      mstate m = gm;
+#else /* FOOTERS */
+      mstate m = get_mstate_for(oldp);
+      if (!ok_magic(m)) {
+        USAGE_ERROR_ACTION(m, oldmem);
+        return 0;
+      }
+#endif /* FOOTERS */
+      if (!PREACTION(m)) {
+        mchunkptr newp = try_realloc_chunk(m, oldp, nb, 0);
+        POSTACTION(m);
+        if (newp == oldp) {
+          check_inuse_chunk(m, newp);
+          mem = oldmem;
+        }
+      }
+    }
+  }
+  return mem;
+}
+
+void* dlmemalign(size_t alignment, size_t bytes) {
+  if (alignment <= MALLOC_ALIGNMENT) {
+    return dlmalloc(bytes);
+  }
+  return internal_memalign(gm, alignment, bytes);
+}
+
+int dlposix_memalign(void** pp, size_t alignment, size_t bytes) {
+  void* mem = 0;
+  if (alignment == MALLOC_ALIGNMENT)
+    mem = dlmalloc(bytes);
+  else {
+    size_t d = alignment / sizeof(void*);
+    size_t r = alignment % sizeof(void*);
+    if (r != 0 || d == 0 || (d & (d-SIZE_T_ONE)) != 0)
+      return EINVAL;
+    else if (bytes >= MAX_REQUEST - alignment) {
+      if (alignment <  MIN_CHUNK_SIZE)
+        alignment = MIN_CHUNK_SIZE;
+      mem = internal_memalign(gm, alignment, bytes);
+    }
+  }
+  if (mem == 0)
+    return ENOMEM;
+  else {
+    *pp = mem;
+    return 0;
+  }
+}
+
+void* dlvalloc(size_t bytes) {
+  size_t pagesz;
+  ensure_initialization();
+  pagesz = mparams.page_size;
+  return dlmemalign(pagesz, bytes);
+}
+
+void* dlpvalloc(size_t bytes) {
+  size_t pagesz;
+  ensure_initialization();
+  pagesz = mparams.page_size;
+  return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE));
+}
+
+void** dlindependent_calloc(size_t n_elements, size_t elem_size,
+                            void* chunks[]) {
+  size_t sz = elem_size; /* serves as 1-element array */
+  return ialloc(gm, n_elements, &sz, 3, chunks);
+}
+
+void** dlindependent_comalloc(size_t n_elements, size_t sizes[],
+                              void* chunks[]) {
+  return ialloc(gm, n_elements, sizes, 0, chunks);
+}
+
+size_t dlbulk_free(void* array[], size_t nelem) {
+  return internal_bulk_free(gm, array, nelem);
+}
+
+#if MALLOC_INSPECT_ALL
+void dlmalloc_inspect_all(void(*handler)(void *start,
+                                         void *end,
+                                         size_t used_bytes,
+                                         void* callback_arg),
+                          void* arg) {
+  ensure_initialization();
+  if (!PREACTION(gm)) {
+    internal_inspect_all(gm, handler, arg);
+    POSTACTION(gm);
+  }
+}
+#endif /* MALLOC_INSPECT_ALL */
+
+int dlmalloc_trim(size_t pad) {
+  int result = 0;
+  ensure_initialization();
+  if (!PREACTION(gm)) {
+    result = sys_trim(gm, pad);
+    POSTACTION(gm);
+  }
+  return result;
+}
+
+size_t dlmalloc_footprint(void) {
+  return gm->footprint;
+}
+
+size_t dlmalloc_max_footprint(void) {
+  return gm->max_footprint;
+}
+
+size_t dlmalloc_footprint_limit(void) {
+  size_t maf = gm->footprint_limit;
+  return maf == 0 ? MAX_SIZE_T : maf;
+}
+
+size_t dlmalloc_set_footprint_limit(size_t bytes) {
+  size_t result;  /* invert sense of 0 */
+  if (bytes == 0)
+    result = granularity_align(1); /* Use minimal size */
+  if (bytes == MAX_SIZE_T)
+    result = 0;                    /* disable */
+  else
+    result = granularity_align(bytes);
+  return gm->footprint_limit = result;
+}
+
+#if !NO_MALLINFO
+struct mallinfo dlmallinfo(void) {
+  return internal_mallinfo(gm);
+}
+#endif /* NO_MALLINFO */
+
+#if !NO_MALLOC_STATS
+void dlmalloc_stats() {
+  internal_malloc_stats(gm);
+}
+#endif /* NO_MALLOC_STATS */
+
+int dlmallopt(int param_number, int value) {
+  return change_mparam(param_number, value);
+}
+
+size_t dlmalloc_usable_size(void* mem) {
+  if (mem != 0) {
+    mchunkptr p = mem2chunk(mem);
+    if (is_inuse(p))
+      return chunksize(p) - overhead_for(p);
+  }
+  return 0;
+}
+
+#endif /* !ONLY_MSPACES */
+
+/* ----------------------------- user mspaces ---------------------------- */
+
+#if MSPACES
+
+static mstate init_user_mstate(char* tbase, size_t tsize) {
+  size_t msize = pad_request(sizeof(struct malloc_state));
+  mchunkptr mn;
+  mchunkptr msp = align_as_chunk(tbase);
+  mstate m = (mstate)(chunk2mem(msp));
+  memset(m, 0, msize);
+  (void)INITIAL_LOCK(&m->mutex);
+  msp->head = (msize|INUSE_BITS);
+  m->seg.base = m->least_addr = tbase;
+  m->seg.size = m->footprint = m->max_footprint = tsize;
+  m->magic = mparams.magic;
+  m->release_checks = MAX_RELEASE_CHECK_RATE;
+  m->mflags = mparams.default_mflags;
+  m->extp = 0;
+  m->exts = 0;
+  disable_contiguous(m);
+  init_bins(m);
+  mn = next_chunk(mem2chunk(m));
+  init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE);
+  check_top_chunk(m, m->top);
+  return m;
+}
+
+mspace create_mspace(size_t capacity, int locked) {
+  mstate m = 0;
+  size_t msize;
+  ensure_initialization();
+  msize = pad_request(sizeof(struct malloc_state));
+  if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) {
+    size_t rs = ((capacity == 0)? mparams.granularity :
+                 (capacity + TOP_FOOT_SIZE + msize));
+    size_t tsize = granularity_align(rs);
+    char* tbase = (char*)(CALL_MMAP(tsize));
+    if (tbase != CMFAIL) {
+      m = init_user_mstate(tbase, tsize);
+      m->seg.sflags = USE_MMAP_BIT;
+      set_lock(m, locked);
+    }
+  }
+  return (mspace)m;
+}
+
+mspace create_mspace_with_base(void* base, size_t capacity, int locked) {
+  mstate m = 0;
+  size_t msize;
+  ensure_initialization();
+  msize = pad_request(sizeof(struct malloc_state));
+  if (capacity > msize + TOP_FOOT_SIZE &&
+      capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) {
+    m = init_user_mstate((char*)base, capacity);
+    m->seg.sflags = EXTERN_BIT;
+    set_lock(m, locked);
+  }
+  return (mspace)m;
+}
+
+int mspace_track_large_chunks(mspace msp, int enable) {
+  int ret = 0;
+  mstate ms = (mstate)msp;
+  if (!PREACTION(ms)) {
+    if (!use_mmap(ms))
+      ret = 1;
+    if (!enable)
+      enable_mmap(ms);
+    else
+      disable_mmap(ms);
+    POSTACTION(ms);
+  }
+  return ret;
+}
+
+size_t destroy_mspace(mspace msp) {
+  size_t freed = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    msegmentptr sp = &ms->seg;
+    (void)DESTROY_LOCK(&ms->mutex); /* destroy before unmapped */
+    while (sp != 0) {
+      char* base = sp->base;
+      size_t size = sp->size;
+      flag_t flag = sp->sflags;
+      sp = sp->next;
+      if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) &&
+          CALL_MUNMAP(base, size) == 0)
+        freed += size;
+    }
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return freed;
+}
+
+/*
+  mspace versions of routines are near-clones of the global
+  versions. This is not so nice but better than the alternatives.
+*/
+
+void* mspace_malloc(mspace msp, size_t bytes) {
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  if (!PREACTION(ms)) {
+    void* mem;
+    size_t nb;
+    if (bytes <= MAX_SMALL_REQUEST) {
+      bindex_t idx;
+      binmap_t smallbits;
+      nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
+      idx = small_index(nb);
+      smallbits = ms->smallmap >> idx;
+
+      if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
+        mchunkptr b, p;
+        idx += ~smallbits & 1;       /* Uses next bin if idx empty */
+        b = smallbin_at(ms, idx);
+        p = b->fd;
+        assert(chunksize(p) == small_index2size(idx));
+        unlink_first_small_chunk(ms, b, p, idx);
+        set_inuse_and_pinuse(ms, p, small_index2size(idx));
+        mem = chunk2mem(p);
+        check_malloced_chunk(ms, mem, nb);
+        goto postaction;
+      }
+
+      else if (nb > ms->dvsize) {
+        if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
+          mchunkptr b, p, r;
+          size_t rsize;
+          bindex_t i;
+          binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
+          binmap_t leastbit = least_bit(leftbits);
+          compute_bit2idx(leastbit, i);
+          b = smallbin_at(ms, i);
+          p = b->fd;
+          assert(chunksize(p) == small_index2size(i));
+          unlink_first_small_chunk(ms, b, p, i);
+          rsize = small_index2size(i) - nb;
+          /* Fit here cannot be remainderless if 4byte sizes */
+          if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
+            set_inuse_and_pinuse(ms, p, small_index2size(i));
+          else {
+            set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+            r = chunk_plus_offset(p, nb);
+            set_size_and_pinuse_of_free_chunk(r, rsize);
+            replace_dv(ms, r, rsize);
+          }
+          mem = chunk2mem(p);
+          check_malloced_chunk(ms, mem, nb);
+          goto postaction;
+        }
+
+        else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) {
+          check_malloced_chunk(ms, mem, nb);
+          goto postaction;
+        }
+      }
+    }
+    else if (bytes >= MAX_REQUEST)
+      nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
+    else {
+      nb = pad_request(bytes);
+      if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) {
+        check_malloced_chunk(ms, mem, nb);
+        goto postaction;
+      }
+    }
+
+    if (nb <= ms->dvsize) {
+      size_t rsize = ms->dvsize - nb;
+      mchunkptr p = ms->dv;
+      if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
+        mchunkptr r = ms->dv = chunk_plus_offset(p, nb);
+        ms->dvsize = rsize;
+        set_size_and_pinuse_of_free_chunk(r, rsize);
+        set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+      }
+      else { /* exhaust dv */
+        size_t dvs = ms->dvsize;
+        ms->dvsize = 0;
+        ms->dv = 0;
+        set_inuse_and_pinuse(ms, p, dvs);
+      }
+      mem = chunk2mem(p);
+      check_malloced_chunk(ms, mem, nb);
+      goto postaction;
+    }
+
+    else if (nb < ms->topsize) { /* Split top */
+      size_t rsize = ms->topsize -= nb;
+      mchunkptr p = ms->top;
+      mchunkptr r = ms->top = chunk_plus_offset(p, nb);
+      r->head = rsize | PINUSE_BIT;
+      set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+      mem = chunk2mem(p);
+      check_top_chunk(ms, ms->top);
+      check_malloced_chunk(ms, mem, nb);
+      goto postaction;
+    }
+
+    mem = sys_alloc(ms, nb);
+
+  postaction:
+    POSTACTION(ms);
+    return mem;
+  }
+
+  return 0;
+}
+
+void mspace_free(mspace msp, void* mem) {
+  if (mem != 0) {
+    mchunkptr p  = mem2chunk(mem);
+#if FOOTERS
+    mstate fm = get_mstate_for(p);
+    (void)msp; /* placate people compiling -Wunused */
+#else /* FOOTERS */
+    mstate fm = (mstate)msp;
+#endif /* FOOTERS */
+    if (!ok_magic(fm)) {
+      USAGE_ERROR_ACTION(fm, p);
+      return;
+    }
+    if (!PREACTION(fm)) {
+      check_inuse_chunk(fm, p);
+      if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) {
+        size_t psize = chunksize(p);
+        mchunkptr next = chunk_plus_offset(p, psize);
+        if (!pinuse(p)) {
+          size_t prevsize = p->prev_foot;
+          if (is_mmapped(p)) {
+            psize += prevsize + MMAP_FOOT_PAD;
+            if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
+              fm->footprint -= psize;
+            goto postaction;
+          }
+          else {
+            mchunkptr prev = chunk_minus_offset(p, prevsize);
+            psize += prevsize;
+            p = prev;
+            if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */
+              if (p != fm->dv) {
+                unlink_chunk(fm, p, prevsize);
+              }
+              else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+                fm->dvsize = psize;
+                set_free_with_pinuse(p, psize, next);
+                goto postaction;
+              }
+            }
+            else
+              goto erroraction;
+          }
+        }
+
+        if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
+          if (!cinuse(next)) {  /* consolidate forward */
+            if (next == fm->top) {
+              size_t tsize = fm->topsize += psize;
+              fm->top = p;
+              p->head = tsize | PINUSE_BIT;
+              if (p == fm->dv) {
+                fm->dv = 0;
+                fm->dvsize = 0;
+              }
+              if (should_trim(fm, tsize))
+                sys_trim(fm, 0);
+              goto postaction;
+            }
+            else if (next == fm->dv) {
+              size_t dsize = fm->dvsize += psize;
+              fm->dv = p;
+              set_size_and_pinuse_of_free_chunk(p, dsize);
+              goto postaction;
+            }
+            else {
+              size_t nsize = chunksize(next);
+              psize += nsize;
+              unlink_chunk(fm, next, nsize);
+              set_size_and_pinuse_of_free_chunk(p, psize);
+              if (p == fm->dv) {
+                fm->dvsize = psize;
+                goto postaction;
+              }
+            }
+          }
+          else
+            set_free_with_pinuse(p, psize, next);
+
+          if (is_small(psize)) {
+            insert_small_chunk(fm, p, psize);
+            check_free_chunk(fm, p);
+          }
+          else {
+            tchunkptr tp = (tchunkptr)p;
+            insert_large_chunk(fm, tp, psize);
+            check_free_chunk(fm, p);
+            if (--fm->release_checks == 0)
+              release_unused_segments(fm);
+          }
+          goto postaction;
+        }
+      }
+    erroraction:
+      USAGE_ERROR_ACTION(fm, p);
+    postaction:
+      POSTACTION(fm);
+    }
+  }
+}
+
+void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) {
+  void* mem;
+  size_t req = 0;
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  if (n_elements != 0) {
+    req = n_elements * elem_size;
+    if (((n_elements | elem_size) & ~(size_t)0xffff) &&
+        (req / n_elements != elem_size))
+      req = MAX_SIZE_T; /* force downstream failure on overflow */
+  }
+  mem = internal_malloc(ms, req);
+  if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
+    memset(mem, 0, req);
+  return mem;
+}
+
+void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) {
+  void* mem = 0;
+  if (oldmem == 0) {
+    mem = mspace_malloc(msp, bytes);
+  }
+  else if (bytes >= MAX_REQUEST) {
+    MALLOC_FAILURE_ACTION;
+  }
+#ifdef REALLOC_ZERO_BYTES_FREES
+  else if (bytes == 0) {
+    mspace_free(msp, oldmem);
+  }
+#endif /* REALLOC_ZERO_BYTES_FREES */
+  else {
+    size_t nb = request2size(bytes);
+    mchunkptr oldp = mem2chunk(oldmem);
+#if ! FOOTERS
+    mstate m = (mstate)msp;
+#else /* FOOTERS */
+    mstate m = get_mstate_for(oldp);
+    if (!ok_magic(m)) {
+      USAGE_ERROR_ACTION(m, oldmem);
+      return 0;
+    }
+#endif /* FOOTERS */
+    if (!PREACTION(m)) {
+      mchunkptr newp = try_realloc_chunk(m, oldp, nb, 1);
+      POSTACTION(m);
+      if (newp != 0) {
+        check_inuse_chunk(m, newp);
+        mem = chunk2mem(newp);
+      }
+      else {
+        mem = mspace_malloc(m, bytes);
+        if (mem != 0) {
+          size_t oc = chunksize(oldp) - overhead_for(oldp);
+          memcpy(mem, oldmem, (oc < bytes)? oc : bytes);
+          mspace_free(m, oldmem);
+        }
+      }
+    }
+  }
+  return mem;
+}
+
+void* mspace_realloc_in_place(mspace msp, void* oldmem, size_t bytes) {
+  void* mem = 0;
+  if (oldmem != 0) {
+    if (bytes >= MAX_REQUEST) {
+      MALLOC_FAILURE_ACTION;
+    }
+    else {
+      size_t nb = request2size(bytes);
+      mchunkptr oldp = mem2chunk(oldmem);
+#if ! FOOTERS
+      mstate m = (mstate)msp;
+#else /* FOOTERS */
+      mstate m = get_mstate_for(oldp);
+      (void)msp; /* placate people compiling -Wunused */
+      if (!ok_magic(m)) {
+        USAGE_ERROR_ACTION(m, oldmem);
+        return 0;
+      }
+#endif /* FOOTERS */
+      if (!PREACTION(m)) {
+        mchunkptr newp = try_realloc_chunk(m, oldp, nb, 0);
+        POSTACTION(m);
+        if (newp == oldp) {
+          check_inuse_chunk(m, newp);
+          mem = oldmem;
+        }
+      }
+    }
+  }
+  return mem;
+}
+
+void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) {
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  if (alignment <= MALLOC_ALIGNMENT)
+    return mspace_malloc(msp, bytes);
+  return internal_memalign(ms, alignment, bytes);
+}
+
+void** mspace_independent_calloc(mspace msp, size_t n_elements,
+                                 size_t elem_size, void* chunks[]) {
+  size_t sz = elem_size; /* serves as 1-element array */
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  return ialloc(ms, n_elements, &sz, 3, chunks);
+}
+
+void** mspace_independent_comalloc(mspace msp, size_t n_elements,
+                                   size_t sizes[], void* chunks[]) {
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  return ialloc(ms, n_elements, sizes, 0, chunks);
+}
+
+size_t mspace_bulk_free(mspace msp, void* array[], size_t nelem) {
+  return internal_bulk_free((mstate)msp, array, nelem);
+}
+
+#if MALLOC_INSPECT_ALL
+void mspace_inspect_all(mspace msp,
+                        void(*handler)(void *start,
+                                       void *end,
+                                       size_t used_bytes,
+                                       void* callback_arg),
+                        void* arg) {
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    if (!PREACTION(ms)) {
+      internal_inspect_all(ms, handler, arg);
+      POSTACTION(ms);
+    }
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+}
+#endif /* MALLOC_INSPECT_ALL */
+
+int mspace_trim(mspace msp, size_t pad) {
+  int result = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    if (!PREACTION(ms)) {
+      result = sys_trim(ms, pad);
+      POSTACTION(ms);
+    }
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return result;
+}
+
+#if !NO_MALLOC_STATS
+void mspace_malloc_stats(mspace msp) {
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    internal_malloc_stats(ms);
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+}
+#endif /* NO_MALLOC_STATS */
+
+size_t mspace_footprint(mspace msp) {
+  size_t result = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    result = ms->footprint;
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return result;
+}
+
+size_t mspace_max_footprint(mspace msp) {
+  size_t result = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    result = ms->max_footprint;
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return result;
+}
+
+size_t mspace_footprint_limit(mspace msp) {
+  size_t result = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    size_t maf = ms->footprint_limit;
+    result = (maf == 0) ? MAX_SIZE_T : maf;
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return result;
+}
+
+size_t mspace_set_footprint_limit(mspace msp, size_t bytes) {
+  size_t result = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    if (bytes == 0)
+      result = granularity_align(1); /* Use minimal size */
+    if (bytes == MAX_SIZE_T)
+      result = 0;                    /* disable */
+    else
+      result = granularity_align(bytes);
+    ms->footprint_limit = result;
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return result;
+}
+
+#if !NO_MALLINFO
+struct mallinfo mspace_mallinfo(mspace msp) {
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return internal_mallinfo(ms);
+}
+#endif /* NO_MALLINFO */
+
+size_t mspace_usable_size(void* mem) {
+  if (mem != 0) {
+    mchunkptr p = mem2chunk(mem);
+    if (is_inuse(p))
+      return chunksize(p) - overhead_for(p);
+  }
+  return 0;
+}
+
+int mspace_mallopt(int param_number, int value) {
+  return change_mparam(param_number, value);
+}
+
+#endif /* MSPACES */
+
+
+/* -------------------- Alternative MORECORE functions ------------------- */
+
+/*
+  Guidelines for creating a custom version of MORECORE:
+
+  * For best performance, MORECORE should allocate in multiples of pagesize.
+  * MORECORE may allocate more memory than requested. (Or even less,
+      but this will usually result in a malloc failure.)
+  * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call.
+  * For best performance, consecutive calls to MORECORE with positive
+      arguments should return increasing addresses, indicating that
+      space has been contiguously extended.
+  * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+  * MORECORE need not handle negative arguments -- it may instead
+      just return MFAIL when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+  As an example alternative MORECORE, here is a custom allocator
+  kindly contributed for pre-OSX macOS.  It uses virtually but not
+  necessarily physically contiguous non-paged memory (locked in,
+  present and won't get swapped out).  You can use it by uncommenting
+  this section, adding some #includes, and setting up the appropriate
+  defines above:
+
+      #define MORECORE osMoreCore
+
+  There is also a shutdown routine that should somehow be called for
+  cleanup upon program exit.
+
+  #define MAX_POOL_ENTRIES 100
+  #define MINIMUM_MORECORE_SIZE  (64 * 1024U)
+  static int next_os_pool;
+  void *our_os_pools[MAX_POOL_ENTRIES];
+
+  void *osMoreCore(int size)
+  {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MFAIL;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MFAIL;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+  }
+
+  // cleanup any allocated memory pools
+  // called as last thing before shutting down driver
+
+  void osCleanupMem(void)
+  {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+         *ptr = 0;
+      }
+  }
+
+*/
+
+
+/* -----------------------------------------------------------------------
+History:
+    v2.8.5 Sun May 22 10:26:02 2011  Doug Lea  (dl at gee)
+      * Always perform unlink checks unless INSECURE
+      * Add posix_memalign.
+      * Improve realloc to expand in more cases; expose realloc_in_place.
+        Thanks to Peter Buhr for the suggestion.
+      * Add footprint_limit, inspect_all, bulk_free. Thanks
+        to Barry Hayes and others for the suggestions.
+      * Internal refactorings to avoid calls while holding locks
+      * Use non-reentrant locks by default. Thanks to Roland McGrath
+        for the suggestion.
+      * Small fixes to mspace_destroy, reset_on_error.
+      * Various configuration extensions/changes. Thanks
+         to all who contributed these.
+
+    V2.8.4a Thu Apr 28 14:39:43 2011 (dl at gee.cs.oswego.edu)
+      * Update Creative Commons URL
+
+    V2.8.4 Wed May 27 09:56:23 2009  Doug Lea  (dl at gee)
+      * Use zeros instead of prev foot for is_mmapped
+      * Add mspace_track_large_chunks; thanks to Jean Brouwers
+      * Fix set_inuse in internal_realloc; thanks to Jean Brouwers
+      * Fix insufficient sys_alloc padding when using 16byte alignment
+      * Fix bad error check in mspace_footprint
+      * Adaptations for ptmalloc; thanks to Wolfram Gloger.
+      * Reentrant spin locks; thanks to Earl Chew and others
+      * Win32 improvements; thanks to Niall Douglas and Earl Chew
+      * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options
+      * Extension hook in malloc_state
+      * Various small adjustments to reduce warnings on some compilers
+      * Various configuration extensions/changes for more platforms. Thanks
+         to all who contributed these.
+
+    V2.8.3 Thu Sep 22 11:16:32 2005  Doug Lea  (dl at gee)
+      * Add max_footprint functions
+      * Ensure all appropriate literals are size_t
+      * Fix conditional compilation problem for some #define settings
+      * Avoid concatenating segments with the one provided
+        in create_mspace_with_base
+      * Rename some variables to avoid compiler shadowing warnings
+      * Use explicit lock initialization.
+      * Better handling of sbrk interference.
+      * Simplify and fix segment insertion, trimming and mspace_destroy
+      * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x
+      * Thanks especially to Dennis Flanagan for help on these.
+
+    V2.8.2 Sun Jun 12 16:01:10 2005  Doug Lea  (dl at gee)
+      * Fix memalign brace error.
+
+    V2.8.1 Wed Jun  8 16:11:46 2005  Doug Lea  (dl at gee)
+      * Fix improper #endif nesting in C++
+      * Add explicit casts needed for C++
+
+    V2.8.0 Mon May 30 14:09:02 2005  Doug Lea  (dl at gee)
+      * Use trees for large bins
+      * Support mspaces
+      * Use segments to unify sbrk-based and mmap-based system allocation,
+        removing need for emulation on most platforms without sbrk.
+      * Default safety checks
+      * Optional footer checks. Thanks to William Robertson for the idea.
+      * Internal code refactoring
+      * Incorporate suggestions and platform-specific changes.
+        Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas,
+        Aaron Bachmann,  Emery Berger, and others.
+      * Speed up non-fastbin processing enough to remove fastbins.
+      * Remove useless cfree() to avoid conflicts with other apps.
+      * Remove internal memcpy, memset. Compilers handle builtins better.
+      * Remove some options that no one ever used and rename others.
+
+    V2.7.2 Sat Aug 17 09:07:30 2002  Doug Lea  (dl at gee)
+      * Fix malloc_state bitmap array misdeclaration
+
+    V2.7.1 Thu Jul 25 10:58:03 2002  Doug Lea  (dl at gee)
+      * Allow tuning of FIRST_SORTED_BIN_SIZE
+      * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte.
+      * Better detection and support for non-contiguousness of MORECORE.
+        Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger
+      * Bypass most of malloc if no frees. Thanks To Emery Berger.
+      * Fix freeing of old top non-contiguous chunk im sysmalloc.
+      * Raised default trim and map thresholds to 256K.
+      * Fix mmap-related #defines. Thanks to Lubos Lunak.
+      * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield.
+      * Branch-free bin calculation
+      * Default trim and mmap thresholds now 256K.
+
+    V2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+      * Introduce independent_comalloc and independent_calloc.
+        Thanks to Michael Pachos for motivation and help.
+      * Make optional .h file available
+      * Allow > 2GB requests on 32bit systems.
+      * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>.
+        Thanks also to Andreas Mueller <a.mueller at paradatec.de>,
+        and Anonymous.
+      * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for
+        helping test this.)
+      * memalign: check alignment arg
+      * realloc: don't try to shift chunks backwards, since this
+        leads to  more fragmentation in some programs and doesn't
+        seem to help in any others.
+      * Collect all cases in malloc requiring system memory into sysmalloc
+      * Use mmap as backup to sbrk
+      * Place all internal state in malloc_state
+      * Introduce fastbins (although similar to 2.5.1)
+      * Many minor tunings and cosmetic improvements
+      * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK
+      * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS
+        Thanks to Tony E. Bennett <tbennett@nvidia.com> and others.
+      * Include errno.h to support default failure action.
+
+    V2.6.6 Sun Dec  5 07:42:19 1999  Doug Lea  (dl at gee)
+      * return null for negative arguments
+      * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com>
+         * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h'
+          (e.g. WIN32 platforms)
+         * Cleanup header file inclusion for WIN32 platforms
+         * Cleanup code to avoid Microsoft Visual C++ compiler complaints
+         * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing
+           memory allocation routines
+         * Set 'malloc_getpagesize' for WIN32 platforms (needs more work)
+         * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to
+           usage of 'assert' in non-WIN32 code
+         * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to
+           avoid infinite loop
+      * Always call 'fREe()' rather than 'free()'
+
+    V2.6.5 Wed Jun 17 15:57:31 1998  Doug Lea  (dl at gee)
+      * Fixed ordering problem with boundary-stamping
+
+    V2.6.3 Sun May 19 08:17:58 1996  Doug Lea  (dl at gee)
+      * Added pvalloc, as recommended by H.J. Liu
+      * Added 64bit pointer support mainly from Wolfram Gloger
+      * Added anonymously donated WIN32 sbrk emulation
+      * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen
+      * malloc_extend_top: fix mask error that caused wastage after
+        foreign sbrks
+      * Add linux mremap support code from HJ Liu
+
+    V2.6.2 Tue Dec  5 06:52:55 1995  Doug Lea  (dl at gee)
+      * Integrated most documentation with the code.
+      * Add support for mmap, with help from
+        Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+      * Use last_remainder in more cases.
+      * Pack bins using idea from  colin@nyx10.cs.du.edu
+      * Use ordered bins instead of best-fit threshhold
+      * Eliminate block-local decls to simplify tracing and debugging.
+      * Support another case of realloc via move into top
+      * Fix error occuring when initial sbrk_base not word-aligned.
+      * Rely on page size for units instead of SBRK_UNIT to
+        avoid surprises about sbrk alignment conventions.
+      * Add mallinfo, mallopt. Thanks to Raymond Nijssen
+        (raymond@es.ele.tue.nl) for the suggestion.
+      * Add `pad' argument to malloc_trim and top_pad mallopt parameter.
+      * More precautions for cases where other routines call sbrk,
+        courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+      * Added macros etc., allowing use in linux libc from
+        H.J. Lu (hjl@gnu.ai.mit.edu)
+      * Inverted this history list
+
+    V2.6.1 Sat Dec  2 14:10:57 1995  Doug Lea  (dl at gee)
+      * Re-tuned and fixed to behave more nicely with V2.6.0 changes.
+      * Removed all preallocation code since under current scheme
+        the work required to undo bad preallocations exceeds
+        the work saved in good cases for most test programs.
+      * No longer use return list or unconsolidated bins since
+        no scheme using them consistently outperforms those that don't
+        given above changes.
+      * Use best fit for very large chunks to prevent some worst-cases.
+      * Added some support for debugging
+
+    V2.6.0 Sat Nov  4 07:05:23 1995  Doug Lea  (dl at gee)
+      * Removed footers when chunks are in use. Thanks to
+        Paul Wilson (wilson@cs.texas.edu) for the suggestion.
+
+    V2.5.4 Wed Nov  1 07:54:51 1995  Doug Lea  (dl at gee)
+      * Added malloc_trim, with help from Wolfram Gloger
+        (wmglo@Dent.MED.Uni-Muenchen.DE).
+
+    V2.5.3 Tue Apr 26 10:16:01 1994  Doug Lea  (dl at g)
+
+    V2.5.2 Tue Apr  5 16:20:40 1994  Doug Lea  (dl at g)
+      * realloc: try to expand in both directions
+      * malloc: swap order of clean-bin strategy;
+      * realloc: only conditionally expand backwards
+      * Try not to scavenge used bins
+      * Use bin counts as a guide to preallocation
+      * Occasionally bin return list chunks in first scan
+      * Add a few optimizations from colin@nyx10.cs.du.edu
+
+    V2.5.1 Sat Aug 14 15:40:43 1993  Doug Lea  (dl at g)
+      * faster bin computation & slightly different binning
+      * merged all consolidations to one part of malloc proper
+         (eliminating old malloc_find_space & malloc_clean_bin)
+      * Scan 2 returns chunks (not just 1)
+      * Propagate failure in realloc if malloc returns 0
+      * Add stuff to allow compilation on non-ANSI compilers
+          from kpv@research.att.com
+
+    V2.5 Sat Aug  7 07:41:59 1993  Doug Lea  (dl at g.oswego.edu)
+      * removed potential for odd address access in prev_chunk
+      * removed dependency on getpagesize.h
+      * misc cosmetics and a bit more internal documentation
+      * anticosmetics: mangled names in macros to evade debugger strangeness
+      * tested on sparc, hp-700, dec-mips, rs6000
+          with gcc & native cc (hp, dec only) allowing
+          Detlefs & Zorn comparison study (in SIGPLAN Notices.)
+
+    Trial version Fri Aug 28 13:14:29 1992  Doug Lea  (dl at g.oswego.edu)
+      * Based loosely on libg++-1.2X malloc. (It retains some of the overall
+         structure of old version,  but most details differ.)
+
+*/
+
diff --git a/model.cc b/model.cc
new file mode 100644
index 0000000..230607a
--- /dev/null
+++ b/model.cc
@@ -0,0 +1,473 @@
+#include <stdio.h>
+#include <algorithm>
+#include <new>
+#include <stdarg.h>
+
+#include "model.h"
+#include "action.h"
+#include "nodestack.h"
+#include "schedule.h"
+#include "snapshot-interface.h"
+#include "common.h"
+#include "datarace.h"
+#include "threads-model.h"
+#include "output.h"
+#include "traceanalysis.h"
+#include "execution.h"
+#include "bugmessage.h"
+
+ModelChecker *model;
+
+/** @brief Constructor */
+ModelChecker::ModelChecker(struct model_params params) :
+	/* Initialize default scheduler */
+	params(params),
+	scheduler(new Scheduler()),
+	node_stack(new NodeStack()),
+	execution(new ModelExecution(this, &this->params, scheduler, node_stack)),
+	execution_number(1),
+	diverge(NULL),
+	earliest_diverge(NULL),
+	trace_analyses()
+{
+}
+
+/** @brief Destructor */
+ModelChecker::~ModelChecker()
+{
+	delete node_stack;
+	delete scheduler;
+}
+
+/**
+ * Restores user program to initial state and resets all model-checker data
+ * structures.
+ */
+void ModelChecker::reset_to_initial_state()
+{
+	DEBUG("+++ Resetting to initial state +++\n");
+	node_stack->reset_execution();
+
+	/**
+	 * FIXME: if we utilize partial rollback, we will need to free only
+	 * those pending actions which were NOT pending before the rollback
+	 * point
+	 */
+	for (unsigned int i = 0; i < get_num_threads(); i++)
+		delete get_thread(int_to_id(i))->get_pending();
+
+	snapshot_backtrack_before(0);
+}
+
+/** @return the number of user threads created during this execution */
+unsigned int ModelChecker::get_num_threads() const
+{
+	return execution->get_num_threads();
+}
+
+/**
+ * Must be called from user-thread context (e.g., through the global
+ * thread_current() interface)
+ *
+ * @return The currently executing Thread.
+ */
+Thread * ModelChecker::get_current_thread() const
+{
+	return scheduler->get_current_thread();
+}
+
+/**
+ * @brief Choose the next thread to execute.
+ *
+ * This function chooses the next thread that should execute. It can enforce
+ * execution replay/backtracking or, if the model-checker has no preference
+ * regarding the next thread (i.e., when exploring a new execution ordering),
+ * we defer to the scheduler.
+ *
+ * @return The next chosen thread to run, if any exist. Or else if the current
+ * execution should terminate, return NULL.
+ */
+Thread * ModelChecker::get_next_thread()
+{
+	thread_id_t tid;
+
+	/*
+	 * Have we completed exploring the preselected path? Then let the
+	 * scheduler decide
+	 */
+	if (diverge == NULL)
+		return scheduler->select_next_thread(node_stack->get_head());
+
+
+	/* Else, we are trying to replay an execution */
+	ModelAction *next = node_stack->get_next()->get_action();
+
+	if (next == diverge) {
+		if (earliest_diverge == NULL || *diverge < *earliest_diverge)
+			earliest_diverge = diverge;
+
+		Node *nextnode = next->get_node();
+		Node *prevnode = nextnode->get_parent();
+		scheduler->update_sleep_set(prevnode);
+
+		/* Reached divergence point */
+		if (nextnode->increment_behaviors()) {
+			/* Execute the same thread with a new behavior */
+			tid = next->get_tid();
+			node_stack->pop_restofstack(2);
+		} else {
+			ASSERT(prevnode);
+			/* Make a different thread execute for next step */
+			scheduler->add_sleep(get_thread(next->get_tid()));
+			tid = prevnode->get_next_backtrack();
+			/* Make sure the backtracked thread isn't sleeping. */
+			node_stack->pop_restofstack(1);
+			if (diverge == earliest_diverge) {
+				earliest_diverge = prevnode->get_action();
+			}
+		}
+		/* Start the round robin scheduler from this thread id */
+		scheduler->set_scheduler_thread(tid);
+		/* The correct sleep set is in the parent node. */
+		execute_sleep_set();
+
+		DEBUG("*** Divergence point ***\n");
+
+		diverge = NULL;
+	} else {
+		tid = next->get_tid();
+	}
+	DEBUG("*** ModelChecker chose next thread = %d ***\n", id_to_int(tid));
+	ASSERT(tid != THREAD_ID_T_NONE);
+	return get_thread(id_to_int(tid));
+}
+
+/**
+ * We need to know what the next actions of all threads in the sleep
+ * set will be.  This method computes them and stores the actions at
+ * the corresponding thread object's pending action.
+ */
+void ModelChecker::execute_sleep_set()
+{
+	for (unsigned int i = 0; i < get_num_threads(); i++) {
+		thread_id_t tid = int_to_id(i);
+		Thread *thr = get_thread(tid);
+		if (scheduler->is_sleep_set(thr) && thr->get_pending()) {
+			thr->get_pending()->set_sleep_flag();
+		}
+	}
+}
+
+/**
+ * @brief Assert a bug in the executing program.
+ *
+ * Use this function to assert any sort of bug in the user program. If the
+ * current trace is feasible (actually, a prefix of some feasible execution),
+ * then this execution will be aborted, printing the appropriate message. If
+ * the current trace is not yet feasible, the error message will be stashed and
+ * printed if the execution ever becomes feasible.
+ *
+ * @param msg Descriptive message for the bug (do not include newline char)
+ * @return True if bug is immediately-feasible
+ */
+bool ModelChecker::assert_bug(const char *msg, ...)
+{
+	char str[800];
+
+	va_list ap;
+	va_start(ap, msg);
+	vsnprintf(str, sizeof(str), msg, ap);
+	va_end(ap);
+
+	return execution->assert_bug(str);
+}
+
+/**
+ * @brief Assert a bug in the executing program, asserted by a user thread
+ * @see ModelChecker::assert_bug
+ * @param msg Descriptive message for the bug (do not include newline char)
+ */
+void ModelChecker::assert_user_bug(const char *msg)
+{
+	/* If feasible bug, bail out now */
+	if (assert_bug(msg))
+		switch_to_master(NULL);
+}
+
+/** @brief Print bug report listing for this execution (if any bugs exist) */
+void ModelChecker::print_bugs() const
+{
+	SnapVector<bug_message *> *bugs = execution->get_bugs();
+
+	model_print("Bug report: %zu bug%s detected\n",
+			bugs->size(),
+			bugs->size() > 1 ? "s" : "");
+	for (unsigned int i = 0; i < bugs->size(); i++)
+		(*bugs)[i]->print();
+}
+
+/**
+ * @brief Record end-of-execution stats
+ *
+ * Must be run when exiting an execution. Records various stats.
+ * @see struct execution_stats
+ */
+void ModelChecker::record_stats()
+{
+	stats.num_total++;
+	if (!execution->isfeasibleprefix())
+		stats.num_infeasible++;
+	else if (execution->have_bug_reports())
+		stats.num_buggy_executions++;
+	else if (execution->is_complete_execution())
+		stats.num_complete++;
+	else {
+		stats.num_redundant++;
+
+		/**
+		 * @todo We can violate this ASSERT() when fairness/sleep sets
+		 * conflict to cause an execution to terminate, e.g. with:
+		 * Scheduler: [0: disabled][1: disabled][2: sleep][3: current, enabled]
+		 */
+		//ASSERT(scheduler->all_threads_sleeping());
+	}
+}
+
+/** @brief Print execution stats */
+void ModelChecker::print_stats() const
+{
+	model_print("Number of complete, bug-free executions: %d\n", stats.num_complete);
+	model_print("Number of redundant executions: %d\n", stats.num_redundant);
+	model_print("Number of buggy executions: %d\n", stats.num_buggy_executions);
+	model_print("Number of infeasible executions: %d\n", stats.num_infeasible);
+	model_print("Total executions: %d\n", stats.num_total);
+	if (params.verbose)
+		model_print("Total nodes created: %d\n", node_stack->get_total_nodes());
+}
+
+/**
+ * @brief End-of-exeuction print
+ * @param printbugs Should any existing bugs be printed?
+ */
+void ModelChecker::print_execution(bool printbugs) const
+{
+	model_print("Program output from execution %d:\n",
+			get_execution_number());
+	print_program_output();
+
+	if (params.verbose >= 2) {
+		model_print("\nEarliest divergence point since last feasible execution:\n");
+		if (earliest_diverge)
+			earliest_diverge->print();
+		else
+			model_print("(Not set)\n");
+
+		model_print("\n");
+		print_stats();
+	}
+
+	/* Don't print invalid bugs */
+	if (printbugs && execution->have_bug_reports()) {
+		model_print("\n");
+		print_bugs();
+	}
+
+	model_print("\n");
+	execution->print_summary();
+}
+
+/**
+ * Queries the model-checker for more executions to explore and, if one
+ * exists, resets the model-checker state to execute a new execution.
+ *
+ * @return If there are more executions to explore, return true. Otherwise,
+ * return false.
+ */
+bool ModelChecker::next_execution()
+{
+	DBG();
+	/* Is this execution a feasible execution that's worth bug-checking? */
+	bool complete = execution->isfeasibleprefix() &&
+		(execution->is_complete_execution() ||
+		 execution->have_bug_reports());
+
+	/* End-of-execution bug checks */
+	if (complete) {
+		if (execution->is_deadlocked())
+			assert_bug("Deadlock detected");
+
+		checkDataRaces();
+		run_trace_analyses();
+	}
+
+	record_stats();
+
+	/* Output */
+	if (params.verbose || (complete && execution->have_bug_reports()))
+		print_execution(complete);
+	else
+		clear_program_output();
+
+	if (complete)
+		earliest_diverge = NULL;
+
+	if ((diverge = execution->get_next_backtrack()) == NULL)
+		return false;
+
+	if (DBG_ENABLED()) {
+		model_print("Next execution will diverge at:\n");
+		diverge->print();
+	}
+
+	execution_number++;
+
+	reset_to_initial_state();
+	return true;
+}
+
+/** @brief Run trace analyses on complete trace */
+void ModelChecker::run_trace_analyses() {
+	for (unsigned int i = 0; i < trace_analyses.size(); i++)
+		trace_analyses[i]->analyze(execution->get_action_trace());
+}
+
+/**
+ * @brief Get a Thread reference by its ID
+ * @param tid The Thread's ID
+ * @return A Thread reference
+ */
+Thread * ModelChecker::get_thread(thread_id_t tid) const
+{
+	return execution->get_thread(tid);
+}
+
+/**
+ * @brief Get a reference to the Thread in which a ModelAction was executed
+ * @param act The ModelAction
+ * @return A Thread reference
+ */
+Thread * ModelChecker::get_thread(const ModelAction *act) const
+{
+	return execution->get_thread(act);
+}
+
+/**
+ * Switch from a model-checker context to a user-thread context. This is the
+ * complement of ModelChecker::switch_to_master and must be called from the
+ * model-checker context
+ *
+ * @param thread The user-thread to switch to
+ */
+void ModelChecker::switch_from_master(Thread *thread)
+{
+	scheduler->set_current_thread(thread);
+	Thread::swap(&system_context, thread);
+}
+
+/**
+ * Switch from a user-context to the "master thread" context (a.k.a. system
+ * context). This switch is made with the intention of exploring a particular
+ * model-checking action (described by a ModelAction object). Must be called
+ * from a user-thread context.
+ *
+ * @param act The current action that will be explored. May be NULL only if
+ * trace is exiting via an assertion (see ModelExecution::set_assert and
+ * ModelExecution::has_asserted).
+ * @return Return the value returned by the current action
+ */
+uint64_t ModelChecker::switch_to_master(ModelAction *act)
+{
+	DBG();
+	Thread *old = thread_current();
+	scheduler->set_current_thread(NULL);
+	ASSERT(!old->get_pending());
+	old->set_pending(act);
+	if (Thread::swap(old, &system_context) < 0) {
+		perror("swap threads");
+		exit(EXIT_FAILURE);
+	}
+	return old->get_return_value();
+}
+
+/** Wrapper to run the user's main function, with appropriate arguments */
+void user_main_wrapper(void *)
+{
+	user_main(model->params.argc, model->params.argv);
+}
+
+bool ModelChecker::should_terminate_execution()
+{
+	/* Infeasible -> don't take any more steps */
+	if (execution->is_infeasible())
+		return true;
+	else if (execution->isfeasibleprefix() && execution->have_bug_reports()) {
+		execution->set_assert();
+		return true;
+	}
+
+	if (execution->too_many_steps())
+		return true;
+	return false;
+}
+
+/** @brief Run ModelChecker for the user program */
+void ModelChecker::run()
+{
+	do {
+		thrd_t user_thread;
+		Thread *t = new Thread(execution->get_next_id(), &user_thread, &user_main_wrapper, NULL, NULL);
+		execution->add_thread(t);
+
+		do {
+			/*
+			 * Stash next pending action(s) for thread(s). There
+			 * should only need to stash one thread's action--the
+			 * thread which just took a step--plus the first step
+			 * for any newly-created thread
+			 */
+			for (unsigned int i = 0; i < get_num_threads(); i++) {
+				thread_id_t tid = int_to_id(i);
+				Thread *thr = get_thread(tid);
+				if (!thr->is_model_thread() && !thr->is_complete() && !thr->get_pending()) {
+					switch_from_master(thr);
+					if (thr->is_waiting_on(thr))
+						assert_bug("Deadlock detected (thread %u)", i);
+				}
+			}
+
+			/* Don't schedule threads which should be disabled */
+			for (unsigned int i = 0; i < get_num_threads(); i++) {
+				Thread *th = get_thread(int_to_id(i));
+				ModelAction *act = th->get_pending();
+				if (act && execution->is_enabled(th) && !execution->check_action_enabled(act)) {
+					scheduler->sleep(th);
+				}
+			}
+
+			/* Catch assertions from prior take_step or from
+			 * between-ModelAction bugs (e.g., data races) */
+			if (execution->has_asserted())
+				break;
+
+			if (!t)
+				t = get_next_thread();
+			if (!t || t->is_model_thread())
+				break;
+
+			/* Consume the next action for a Thread */
+			ModelAction *curr = t->get_pending();
+			t->set_pending(NULL);
+			t = execution->take_step(curr);
+		} while (!should_terminate_execution());
+
+	} while (next_execution());
+
+	execution->fixup_release_sequences();
+
+	model_print("******* Model-checking complete: *******\n");
+	print_stats();
+
+	/* Have the trace analyses dump their output. */
+	for (unsigned int i = 0; i < trace_analyses.size(); i++)
+		trace_analyses[i]->finish();
+}
diff --git a/model.h b/model.h
new file mode 100644
index 0000000..74cb4e1
--- /dev/null
+++ b/model.h
@@ -0,0 +1,113 @@
+/** @file model.h
+ *  @brief Core model checker.
+ */
+
+#ifndef __MODEL_H__
+#define __MODEL_H__
+
+#include <cstddef>
+#include <inttypes.h>
+
+#include "mymemory.h"
+#include "hashtable.h"
+#include "config.h"
+#include "modeltypes.h"
+#include "stl-model.h"
+#include "context.h"
+#include "params.h"
+
+/* Forward declaration */
+class Node;
+class NodeStack;
+class CycleGraph;
+class Promise;
+class Scheduler;
+class Thread;
+class ClockVector;
+class TraceAnalysis;
+class ModelExecution;
+class ModelAction;
+
+typedef SnapList<ModelAction *> action_list_t;
+
+/** @brief Model checker execution stats */
+struct execution_stats {
+	int num_total; /**< @brief Total number of executions */
+	int num_infeasible; /**< @brief Number of infeasible executions */
+	int num_buggy_executions; /** @brief Number of buggy executions */
+	int num_complete; /**< @brief Number of feasible, non-buggy, complete executions */
+	int num_redundant; /**< @brief Number of redundant, aborted executions */
+};
+
+/** @brief The central structure for model-checking */
+class ModelChecker {
+public:
+	ModelChecker(struct model_params params);
+	~ModelChecker();
+
+	void run();
+
+	/** @returns the context for the main model-checking system thread */
+	ucontext_t * get_system_context() { return &system_context; }
+
+	ModelExecution * get_execution() const { return execution; }
+
+	int get_execution_number() const { return execution_number; }
+
+	Thread * get_thread(thread_id_t tid) const;
+	Thread * get_thread(const ModelAction *act) const;
+
+	Thread * get_current_thread() const;
+
+	void switch_from_master(Thread *thread);
+	uint64_t switch_to_master(ModelAction *act);
+
+	bool assert_bug(const char *msg, ...);
+	void assert_user_bug(const char *msg);
+
+	const model_params params;
+	void add_trace_analysis(TraceAnalysis *a) {
+		trace_analyses.push_back(a);
+	}
+
+	MEMALLOC
+private:
+	/** The scheduler to use: tracks the running/ready Threads */
+	Scheduler * const scheduler;
+	NodeStack * const node_stack;
+	ModelExecution *execution;
+
+	int execution_number;
+
+	unsigned int get_num_threads() const;
+
+	void execute_sleep_set();
+
+	bool next_execution();
+	bool should_terminate_execution();
+
+	Thread * get_next_thread();
+	void reset_to_initial_state();
+
+
+	ModelAction *diverge;
+	ModelAction *earliest_diverge;
+
+	ucontext_t system_context;
+
+	ModelVector<TraceAnalysis *> trace_analyses;
+
+	/** @brief The cumulative execution stats */
+	struct execution_stats stats;
+	void record_stats();
+	void run_trace_analyses();
+	void print_bugs() const;
+	void print_execution(bool printbugs) const;
+	void print_stats() const;
+
+	friend void user_main_wrapper();
+};
+
+extern ModelChecker *model;
+
+#endif /* __MODEL_H__ */
diff --git a/mutex.cc b/mutex.cc
new file mode 100644
index 0000000..d5ec40f
--- /dev/null
+++ b/mutex.cc
@@ -0,0 +1,34 @@
+#include <mutex>
+
+#include "model.h"
+#include "execution.h"
+#include "threads-model.h"
+#include "clockvector.h"
+#include "action.h"
+
+namespace std {
+
+mutex::mutex()
+{
+	state.locked = NULL;
+	thread_id_t tid = thread_current()->get_id();
+	state.alloc_tid = tid;
+	state.alloc_clock = model->get_execution()->get_cv(tid)->getClock(tid);
+}
+	
+void mutex::lock()
+{
+	model->switch_to_master(new ModelAction(ATOMIC_LOCK, std::memory_order_seq_cst, this));
+}
+	
+bool mutex::try_lock()
+{
+	return model->switch_to_master(new ModelAction(ATOMIC_TRYLOCK, std::memory_order_seq_cst, this));
+}
+
+void mutex::unlock()
+{
+	model->switch_to_master(new ModelAction(ATOMIC_UNLOCK, std::memory_order_seq_cst, this));
+}
+
+}
diff --git a/mymemory.cc b/mymemory.cc
new file mode 100644
index 0000000..9e05c36
--- /dev/null
+++ b/mymemory.cc
@@ -0,0 +1,268 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <dlfcn.h>
+#include <unistd.h>
+#include <string.h>
+#include <new>
+
+#include "mymemory.h"
+#include "snapshot.h"
+#include "common.h"
+#include "threads-model.h"
+#include "model.h"
+
+#define REQUESTS_BEFORE_ALLOC 1024
+
+size_t allocatedReqs[REQUESTS_BEFORE_ALLOC] = { 0 };
+int nextRequest = 0;
+int howManyFreed = 0;
+#if !USE_MPROTECT_SNAPSHOT
+static mspace sStaticSpace = NULL;
+#endif
+
+/** Non-snapshotting calloc for our use. */
+void *model_calloc(size_t count, size_t size)
+{
+#if USE_MPROTECT_SNAPSHOT
+	static void *(*callocp)(size_t count, size_t size) = NULL;
+	char *error;
+	void *ptr;
+
+	/* get address of libc malloc */
+	if (!callocp) {
+		callocp = (void * (*)(size_t, size_t))dlsym(RTLD_NEXT, "calloc");
+		if ((error = dlerror()) != NULL) {
+			fputs(error, stderr);
+			exit(EXIT_FAILURE);
+		}
+	}
+	ptr = callocp(count, size);
+	return ptr;
+#else
+	if (!sStaticSpace)
+		sStaticSpace = create_shared_mspace();
+	return mspace_calloc(sStaticSpace, count, size);
+#endif
+}
+
+/** Non-snapshotting malloc for our use. */
+void *model_malloc(size_t size)
+{
+#if USE_MPROTECT_SNAPSHOT
+	static void *(*mallocp)(size_t size) = NULL;
+	char *error;
+	void *ptr;
+
+	/* get address of libc malloc */
+	if (!mallocp) {
+		mallocp = (void * (*)(size_t))dlsym(RTLD_NEXT, "malloc");
+		if ((error = dlerror()) != NULL) {
+			fputs(error, stderr);
+			exit(EXIT_FAILURE);
+		}
+	}
+	ptr = mallocp(size);
+	return ptr;
+#else
+	if (!sStaticSpace)
+		sStaticSpace = create_shared_mspace();
+	return mspace_malloc(sStaticSpace, size);
+#endif
+}
+
+/** @brief Snapshotting malloc, for use by model-checker (not user progs) */
+void * snapshot_malloc(size_t size)
+{
+	void *tmp = mspace_malloc(model_snapshot_space, size);
+	ASSERT(tmp);
+	return tmp;
+}
+
+/** @brief Snapshotting calloc, for use by model-checker (not user progs) */
+void * snapshot_calloc(size_t count, size_t size)
+{
+	void *tmp = mspace_calloc(model_snapshot_space, count, size);
+	ASSERT(tmp);
+	return tmp;
+}
+
+/** @brief Snapshotting realloc, for use by model-checker (not user progs) */
+void *snapshot_realloc(void *ptr, size_t size)
+{
+	void *tmp = mspace_realloc(model_snapshot_space, ptr, size);
+	ASSERT(tmp);
+	return tmp;
+}
+
+/** @brief Snapshotting free, for use by model-checker (not user progs) */
+void snapshot_free(void *ptr)
+{
+	mspace_free(model_snapshot_space, ptr);
+}
+
+/** Non-snapshotting free for our use. */
+void model_free(void *ptr)
+{
+#if USE_MPROTECT_SNAPSHOT
+	static void (*freep)(void *);
+	char *error;
+
+	/* get address of libc free */
+	if (!freep) {
+		freep = (void (*)(void *))dlsym(RTLD_NEXT, "free");
+		if ((error = dlerror()) != NULL) {
+			fputs(error, stderr);
+			exit(EXIT_FAILURE);
+		}
+	}
+	freep(ptr);
+#else
+	mspace_free(sStaticSpace, ptr);
+#endif
+}
+
+/** Bootstrap allocation. Problem is that the dynamic linker calls require
+ *  calloc to work and calloc requires the dynamic linker to work. */
+
+#define BOOTSTRAPBYTES 4096
+char bootstrapmemory[BOOTSTRAPBYTES];
+size_t offset = 0;
+
+void * HandleEarlyAllocationRequest(size_t sz)
+{
+	/* Align to 8 byte boundary */
+	sz = (sz + 7) & ~7;
+
+	if (sz > (BOOTSTRAPBYTES-offset)) {
+		model_print("OUT OF BOOTSTRAP MEMORY\n");
+		exit(EXIT_FAILURE);
+	}
+
+	void *pointer = (void *)&bootstrapmemory[offset];
+	offset += sz;
+	return pointer;
+}
+
+/** @brief Global mspace reference for the model-checker's snapshotting heap */
+mspace model_snapshot_space = NULL;
+
+#if USE_MPROTECT_SNAPSHOT
+
+/** @brief Global mspace reference for the user's snapshotting heap */
+mspace user_snapshot_space = NULL;
+
+/** Check whether this is bootstrapped memory that we should not free */
+static bool DontFree(void *ptr)
+{
+	return (ptr >= (&bootstrapmemory[0]) && ptr < (&bootstrapmemory[BOOTSTRAPBYTES]));
+}
+
+/**
+ * @brief The allocator function for "user" allocation
+ *
+ * Should only be used for allocations which will not disturb the allocation
+ * patterns of a user thread.
+ */
+static void * user_malloc(size_t size)
+{
+	void *tmp = mspace_malloc(user_snapshot_space, size);
+	ASSERT(tmp);
+	return tmp;
+}
+
+/**
+ * @brief Snapshotting malloc implementation for user programs
+ *
+ * Do NOT call this function from a model-checker context. Doing so may disrupt
+ * the allocation patterns of a user thread.
+ */
+void *malloc(size_t size)
+{
+	if (user_snapshot_space) {
+		/* Only perform user allocations from user context */
+		ASSERT(!model || thread_current());
+		return user_malloc(size);
+	} else
+		return HandleEarlyAllocationRequest(size);
+}
+
+/** @brief Snapshotting free implementation for user programs */
+void free(void * ptr)
+{
+	if (!DontFree(ptr))
+		mspace_free(user_snapshot_space, ptr);
+}
+
+/** @brief Snapshotting realloc implementation for user programs */
+void *realloc(void *ptr, size_t size)
+{
+	void *tmp = mspace_realloc(user_snapshot_space, ptr, size);
+	ASSERT(tmp);
+	return tmp;
+}
+
+/** @brief Snapshotting calloc implementation for user programs */
+void * calloc(size_t num, size_t size)
+{
+	if (user_snapshot_space) {
+		void *tmp = mspace_calloc(user_snapshot_space, num, size);
+		ASSERT(tmp);
+		return tmp;
+	} else {
+		void *tmp = HandleEarlyAllocationRequest(size * num);
+		memset(tmp, 0, size * num);
+		return tmp;
+	}
+}
+
+/** @brief Snapshotting allocation function for use by the Thread class only */
+void * Thread_malloc(size_t size)
+{
+	return user_malloc(size);
+}
+
+/** @brief Snapshotting free function for use by the Thread class only */
+void Thread_free(void *ptr)
+{
+	free(ptr);
+}
+
+/** @brief Snapshotting new operator for user programs */
+void * operator new(size_t size) throw(std::bad_alloc)
+{
+	return malloc(size);
+}
+
+/** @brief Snapshotting delete operator for user programs */
+void operator delete(void *p) throw()
+{
+	free(p);
+}
+
+/** @brief Snapshotting new[] operator for user programs */
+void * operator new[](size_t size) throw(std::bad_alloc)
+{
+	return malloc(size);
+}
+
+/** @brief Snapshotting delete[] operator for user programs */
+void operator delete[](void *p, size_t size)
+{
+	free(p);
+}
+
+#else /* !USE_MPROTECT_SNAPSHOT */
+
+/** @brief Snapshotting allocation function for use by the Thread class only */
+void * Thread_malloc(size_t size)
+{
+	return malloc(size);
+}
+
+/** @brief Snapshotting free function for use by the Thread class only */
+void Thread_free(void *ptr)
+{
+	free(ptr);
+}
+
+#endif /* !USE_MPROTECT_SNAPSHOT */
diff --git a/mymemory.h b/mymemory.h
new file mode 100644
index 0000000..a62ab83
--- /dev/null
+++ b/mymemory.h
@@ -0,0 +1,267 @@
+/** @file mymemory.h
+ *  @brief Memory allocation functions.
+ */
+
+#ifndef _MY_MEMORY_H
+#define _MY_MEMORY_H
+#include <limits>
+#include <stddef.h>
+
+#include "config.h"
+
+/** MEMALLOC declares the allocators for a class to allocate
+ *	memory in the non-snapshotting heap. */
+#define MEMALLOC \
+	void * operator new(size_t size) { \
+		return model_malloc(size); \
+	} \
+	void operator delete(void *p, size_t size) { \
+		model_free(p); \
+	} \
+	void * operator new[](size_t size) { \
+		return model_malloc(size); \
+	} \
+	void operator delete[](void *p, size_t size) { \
+		model_free(p); \
+	} \
+	void * operator new(size_t size, void *p) { /* placement new */ \
+		return p; \
+	}
+
+/** SNAPSHOTALLOC declares the allocators for a class to allocate
+ *	memory in the snapshotting heap. */
+#define SNAPSHOTALLOC \
+	void * operator new(size_t size) { \
+		return snapshot_malloc(size); \
+	} \
+	void operator delete(void *p, size_t size) { \
+		snapshot_free(p); \
+	} \
+	void * operator new[](size_t size) { \
+		return snapshot_malloc(size); \
+	} \
+	void operator delete[](void *p, size_t size) { \
+		snapshot_free(p); \
+	} \
+	void * operator new(size_t size, void *p) { /* placement new */ \
+		return p; \
+	}
+
+void *model_malloc(size_t size);
+void *model_calloc(size_t count, size_t size);
+void model_free(void *ptr);
+
+void * snapshot_malloc(size_t size);
+void * snapshot_calloc(size_t count, size_t size);
+void * snapshot_realloc(void *ptr, size_t size);
+void snapshot_free(void *ptr);
+
+void * Thread_malloc(size_t size);
+void Thread_free(void *ptr);
+
+/** @brief Provides a non-snapshotting allocator for use in STL classes.
+ *
+ * The code was adapted from a code example from the book The C++
+ * Standard Library - A Tutorial and Reference by Nicolai M. Josuttis,
+ * Addison-Wesley, 1999 Â© Copyright Nicolai M. Josuttis 1999
+ * Permission to copy, use, modify, sell and distribute this software
+ * is granted provided this copyright notice appears in all copies.
+ * This software is provided "as is" without express or implied
+ * warranty, and with no claim as to its suitability for any purpose.
+ */
+template <class T>
+class ModelAlloc {
+ public:
+	// type definitions
+	typedef T        value_type;
+	typedef T*       pointer;
+	typedef const T* const_pointer;
+	typedef T&       reference;
+	typedef const T& const_reference;
+	typedef size_t   size_type;
+	typedef size_t   difference_type;
+
+	// rebind allocator to type U
+	template <class U>
+	struct rebind {
+		typedef ModelAlloc<U> other;
+	};
+
+	// return address of values
+	pointer address(reference value) const {
+		return &value;
+	}
+	const_pointer address(const_reference value) const {
+		return &value;
+	}
+
+	/* constructors and destructor
+	 * - nothing to do because the allocator has no state
+	 */
+	ModelAlloc() throw() {
+	}
+	ModelAlloc(const ModelAlloc&) throw() {
+	}
+	template <class U>
+	ModelAlloc(const ModelAlloc<U>&) throw() {
+	}
+	~ModelAlloc() throw() {
+	}
+
+	// return maximum number of elements that can be allocated
+	size_type max_size() const throw() {
+		return std::numeric_limits<size_t>::max() / sizeof(T);
+	}
+
+	// allocate but don't initialize num elements of type T
+	pointer allocate(size_type num, const void * = 0) {
+		pointer p = (pointer)model_malloc(num * sizeof(T));
+		return p;
+	}
+
+	// initialize elements of allocated storage p with value value
+	void construct(pointer p, const T& value) {
+		// initialize memory with placement new
+		new((void*)p)T(value);
+	}
+
+	// destroy elements of initialized storage p
+	void destroy(pointer p) {
+		// destroy objects by calling their destructor
+		p->~T();
+	}
+
+	// deallocate storage p of deleted elements
+	void deallocate(pointer p, size_type num) {
+		model_free((void*)p);
+	}
+};
+
+/** Return that all specializations of this allocator are interchangeable. */
+template <class T1, class T2>
+bool operator ==(const ModelAlloc<T1>&,
+		const ModelAlloc<T2>&) throw() {
+	return true;
+}
+
+/** Return that all specializations of this allocator are interchangeable. */
+template <class T1, class T2>
+bool operator!= (const ModelAlloc<T1>&,
+		const ModelAlloc<T2>&) throw() {
+	return false;
+}
+
+/** @brief Provides a snapshotting allocator for use in STL classes.
+ *
+ * The code was adapted from a code example from the book The C++
+ * Standard Library - A Tutorial and Reference by Nicolai M. Josuttis,
+ * Addison-Wesley, 1999 Â© Copyright Nicolai M. Josuttis 1999
+ * Permission to copy, use, modify, sell and distribute this software
+ * is granted provided this copyright notice appears in all copies.
+ * This software is provided "as is" without express or implied
+ * warranty, and with no claim as to its suitability for any purpose.
+ */
+template <class T>
+class SnapshotAlloc {
+ public:
+	// type definitions
+	typedef T        value_type;
+	typedef T*       pointer;
+	typedef const T* const_pointer;
+	typedef T&       reference;
+	typedef const T& const_reference;
+	typedef size_t   size_type;
+	typedef size_t   difference_type;
+
+	// rebind allocator to type U
+	template <class U>
+	struct rebind {
+		typedef SnapshotAlloc<U> other;
+	};
+
+	// return address of values
+	pointer address(reference value) const {
+		return &value;
+	}
+	const_pointer address(const_reference value) const {
+		return &value;
+	}
+
+	/* constructors and destructor
+	 * - nothing to do because the allocator has no state
+	 */
+	SnapshotAlloc() throw() {
+	}
+	SnapshotAlloc(const SnapshotAlloc&) throw() {
+	}
+	template <class U>
+	SnapshotAlloc(const SnapshotAlloc<U>&) throw() {
+	}
+	~SnapshotAlloc() throw() {
+	}
+
+	// return maximum number of elements that can be allocated
+	size_type max_size() const throw() {
+		return std::numeric_limits<size_t>::max() / sizeof(T);
+	}
+
+	// allocate but don't initialize num elements of type T
+	pointer allocate(size_type num, const void * = 0) {
+		pointer p = (pointer)snapshot_malloc(num * sizeof(T));
+		return p;
+	}
+
+	// initialize elements of allocated storage p with value value
+	void construct(pointer p, const T& value) {
+		// initialize memory with placement new
+		new((void*)p)T(value);
+	}
+
+	// destroy elements of initialized storage p
+	void destroy(pointer p) {
+		// destroy objects by calling their destructor
+		p->~T();
+	}
+
+	// deallocate storage p of deleted elements
+	void deallocate(pointer p, size_type num) {
+		snapshot_free((void*)p);
+	}
+};
+
+/** Return that all specializations of this allocator are interchangeable. */
+template <class T1, class T2>
+bool operator ==(const SnapshotAlloc<T1>&,
+		const SnapshotAlloc<T2>&) throw() {
+	return true;
+}
+
+/** Return that all specializations of this allocator are interchangeable. */
+template <class T1, class T2>
+bool operator!= (const SnapshotAlloc<T1>&,
+		const SnapshotAlloc<T2>&) throw() {
+	return false;
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+	typedef void * mspace;
+	extern void * mspace_malloc(mspace msp, size_t bytes);
+	extern void mspace_free(mspace msp, void* mem);
+	extern void * mspace_realloc(mspace msp, void* mem, size_t newsize);
+	extern void * mspace_calloc(mspace msp, size_t n_elements, size_t elem_size);
+	extern mspace create_mspace_with_base(void* base, size_t capacity, int locked);
+	extern mspace create_mspace(size_t capacity, int locked);
+
+#if USE_MPROTECT_SNAPSHOT
+	extern mspace user_snapshot_space;
+#endif
+
+	extern mspace model_snapshot_space;
+
+#ifdef __cplusplus
+};  /* end of extern "C" */
+#endif
+
+#endif /* _MY_MEMORY_H */
diff --git a/nodestack.cc b/nodestack.cc
new file mode 100644
index 0000000..e5f4687
--- /dev/null
+++ b/nodestack.cc
@@ -0,0 +1,865 @@
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+
+#include <string.h>
+
+#include "nodestack.h"
+#include "action.h"
+#include "common.h"
+#include "threads-model.h"
+#include "modeltypes.h"
+#include "execution.h"
+#include "params.h"
+
+/**
+ * @brief Node constructor
+ *
+ * Constructs a single Node for use in a NodeStack. Each Node is associated
+ * with exactly one ModelAction (exception: the first Node should be created
+ * as an empty stub, to represent the first thread "choice") and up to one
+ * parent.
+ *
+ * @param params The model-checker parameters
+ * @param act The ModelAction to associate with this Node. May be NULL.
+ * @param par The parent Node in the NodeStack. May be NULL if there is no
+ * parent.
+ * @param nthreads The number of threads which exist at this point in the
+ * execution trace.
+ */
+Node::Node(const struct model_params *params, ModelAction *act, Node *par,
+		int nthreads, Node *prevfairness) :
+	read_from_status(READ_FROM_PAST),
+	action(act),
+	params(params),
+	uninit_action(NULL),
+	parent(par),
+	num_threads(nthreads),
+	explored_children(num_threads),
+	backtrack(num_threads),
+	fairness(num_threads),
+	numBacktracks(0),
+	enabled_array(NULL),
+	read_from_past(),
+	read_from_past_idx(0),
+	read_from_promises(),
+	read_from_promise_idx(-1),
+	future_values(),
+	future_index(-1),
+	resolve_promise(),
+	resolve_promise_idx(-1),
+	relseq_break_writes(),
+	relseq_break_index(0),
+	misc_index(0),
+	misc_max(0),
+	yield_data(NULL)
+{
+	ASSERT(act);
+	act->set_node(this);
+	int currtid = id_to_int(act->get_tid());
+	int prevtid = prevfairness ? id_to_int(prevfairness->action->get_tid()) : 0;
+
+	if (get_params()->fairwindow != 0) {
+		for (int i = 0; i < num_threads; i++) {
+			ASSERT(i < ((int)fairness.size()));
+			struct fairness_info *fi = &fairness[i];
+			struct fairness_info *prevfi = (parent && i < parent->get_num_threads()) ? &parent->fairness[i] : NULL;
+			if (prevfi) {
+				*fi = *prevfi;
+			}
+			if (parent && parent->is_enabled(int_to_id(i))) {
+				fi->enabled_count++;
+			}
+			if (i == currtid) {
+				fi->turns++;
+				fi->priority = false;
+			}
+			/* Do window processing */
+			if (prevfairness != NULL) {
+				if (prevfairness->parent->is_enabled(int_to_id(i)))
+					fi->enabled_count--;
+				if (i == prevtid) {
+					fi->turns--;
+				}
+				/* Need full window to start evaluating
+				 * conditions
+				 * If we meet the enabled count and have no
+				 * turns, give us priority */
+				if ((fi->enabled_count >= get_params()->enabledcount) &&
+						(fi->turns == 0))
+					fi->priority = true;
+			}
+		}
+	}
+}
+
+int Node::get_yield_data(int tid1, int tid2) const {
+	if (tid1<num_threads && tid2 < num_threads)
+		return yield_data[YIELD_INDEX(tid1,tid2,num_threads)];
+	else
+		return YIELD_S | YIELD_D;
+}
+
+void Node::update_yield(Scheduler * scheduler) {
+	if (yield_data==NULL)
+		yield_data=(int *) model_calloc(1, sizeof(int)*num_threads*num_threads);
+	//handle base case
+	if (parent == NULL) {
+		for(int i = 0; i < num_threads*num_threads; i++) {
+			yield_data[i] = YIELD_S | YIELD_D;
+		}
+		return;
+	}
+	int curr_tid=id_to_int(action->get_tid());
+
+	for(int u = 0; u < num_threads; u++) {
+		for(int v = 0; v < num_threads; v++) {
+			int yield_state=parent->get_yield_data(u, v);
+			bool next_enabled=scheduler->is_enabled(int_to_id(v));
+			bool curr_enabled=parent->is_enabled(int_to_id(v));
+			if (!next_enabled) {
+				//Compute intersection of ES and E
+				yield_state&=~YIELD_E;
+				//Check to see if we disabled the thread
+				if (u==curr_tid && curr_enabled)
+					yield_state|=YIELD_D;
+			}
+			yield_data[YIELD_INDEX(u, v, num_threads)]=yield_state;
+		}
+		yield_data[YIELD_INDEX(u, curr_tid, num_threads)]=(yield_data[YIELD_INDEX(u, curr_tid, num_threads)]&~YIELD_P)|YIELD_S;
+	}
+	//handle curr.yield(t) part of computation
+	if (action->is_yield()) {
+		for(int v = 0; v < num_threads; v++) {
+			int yield_state=yield_data[YIELD_INDEX(curr_tid, v, num_threads)];
+			if ((yield_state & (YIELD_E | YIELD_D)) && (!(yield_state & YIELD_S)))
+				yield_state |= YIELD_P;
+			yield_state &= YIELD_P;
+			if (scheduler->is_enabled(int_to_id(v))) {
+				yield_state|=YIELD_E;
+			}
+			yield_data[YIELD_INDEX(curr_tid, v, num_threads)]=yield_state;
+		}
+	}
+}
+
+/** @brief Node desctructor */
+Node::~Node()
+{
+	delete action;
+	if (uninit_action)
+		delete uninit_action;
+	if (enabled_array)
+		model_free(enabled_array);
+	if (yield_data)
+		model_free(yield_data);
+}
+
+/** Prints debugging info for the ModelAction associated with this Node */
+void Node::print() const
+{
+	action->print();
+	model_print("          thread status: ");
+	if (enabled_array) {
+		for (int i = 0; i < num_threads; i++) {
+			char str[20];
+			enabled_type_to_string(enabled_array[i], str);
+			model_print("[%d: %s]", i, str);
+		}
+		model_print("\n");
+	} else
+		model_print("(info not available)\n");
+	model_print("          backtrack: %s", backtrack_empty() ? "empty" : "non-empty ");
+	for (int i = 0; i < (int)backtrack.size(); i++)
+		if (backtrack[i] == true)
+			model_print("[%d]", i);
+	model_print("\n");
+
+	model_print("          read from past: %s", read_from_past_empty() ? "empty" : "non-empty ");
+	for (int i = read_from_past_idx + 1; i < (int)read_from_past.size(); i++)
+		model_print("[%d]", read_from_past[i]->get_seq_number());
+	model_print("\n");
+
+	model_print("          read-from promises: %s", read_from_promise_empty() ? "empty" : "non-empty ");
+	for (int i = read_from_promise_idx + 1; i < (int)read_from_promises.size(); i++)
+		model_print("[%d]", read_from_promises[i]->get_seq_number());
+	model_print("\n");
+
+	model_print("          future values: %s", future_value_empty() ? "empty" : "non-empty ");
+	for (int i = future_index + 1; i < (int)future_values.size(); i++)
+		model_print("[%#" PRIx64 "]", future_values[i].value);
+	model_print("\n");
+
+	model_print("          promises: %s\n", promise_empty() ? "empty" : "non-empty");
+	model_print("          misc: %s\n", misc_empty() ? "empty" : "non-empty");
+	model_print("          rel seq break: %s\n", relseq_break_empty() ? "empty" : "non-empty");
+}
+
+/****************************** threads backtracking **************************/
+
+/**
+ * Checks if the Thread associated with this thread ID has been explored from
+ * this Node already.
+ * @param tid is the thread ID to check
+ * @return true if this thread choice has been explored already, false
+ * otherwise
+ */
+bool Node::has_been_explored(thread_id_t tid) const
+{
+	int id = id_to_int(tid);
+	return explored_children[id];
+}
+
+/**
+ * Checks if the backtracking set is empty.
+ * @return true if the backtracking set is empty
+ */
+bool Node::backtrack_empty() const
+{
+	return (numBacktracks == 0);
+}
+
+void Node::explore(thread_id_t tid)
+{
+	int i = id_to_int(tid);
+	ASSERT(i < ((int)backtrack.size()));
+	if (backtrack[i]) {
+		backtrack[i] = false;
+		numBacktracks--;
+	}
+	explored_children[i] = true;
+}
+
+/**
+ * Mark the appropriate backtracking information for exploring a thread choice.
+ * @param act The ModelAction to explore
+ */
+void Node::explore_child(ModelAction *act, enabled_type_t *is_enabled)
+{
+	if (!enabled_array)
+		enabled_array = (enabled_type_t *)model_malloc(sizeof(enabled_type_t) * num_threads);
+	if (is_enabled != NULL)
+		memcpy(enabled_array, is_enabled, sizeof(enabled_type_t) * num_threads);
+	else {
+		for (int i = 0; i < num_threads; i++)
+			enabled_array[i] = THREAD_DISABLED;
+	}
+
+	explore(act->get_tid());
+}
+
+/**
+ * Records a backtracking reference for a thread choice within this Node.
+ * Provides feedback as to whether this thread choice is already set for
+ * backtracking.
+ * @return false if the thread was already set to be backtracked, true
+ * otherwise
+ */
+bool Node::set_backtrack(thread_id_t id)
+{
+	int i = id_to_int(id);
+	ASSERT(i < ((int)backtrack.size()));
+	if (backtrack[i])
+		return false;
+	backtrack[i] = true;
+	numBacktracks++;
+	return true;
+}
+
+thread_id_t Node::get_next_backtrack()
+{
+	/** @todo Find next backtrack */
+	unsigned int i;
+	for (i = 0; i < backtrack.size(); i++)
+		if (backtrack[i] == true)
+			break;
+	/* Backtrack set was empty? */
+	ASSERT(i != backtrack.size());
+
+	backtrack[i] = false;
+	numBacktracks--;
+	return int_to_id(i);
+}
+
+void Node::clear_backtracking()
+{
+	for (unsigned int i = 0; i < backtrack.size(); i++)
+		backtrack[i] = false;
+	for (unsigned int i = 0; i < explored_children.size(); i++)
+		explored_children[i] = false;
+	numBacktracks = 0;
+}
+
+/************************** end threads backtracking **************************/
+
+/*********************************** promise **********************************/
+
+/**
+ * Sets a promise to explore meeting with the given node.
+ * @param i is the promise index.
+ */
+void Node::set_promise(unsigned int i)
+{
+	if (i >= resolve_promise.size())
+		resolve_promise.resize(i + 1, false);
+	resolve_promise[i] = true;
+}
+
+/**
+ * Looks up whether a given promise should be satisfied by this node.
+ * @param i The promise index.
+ * @return true if the promise should be satisfied by the given ModelAction.
+ */
+bool Node::get_promise(unsigned int i) const
+{
+	return (i < resolve_promise.size()) && (int)i == resolve_promise_idx;
+}
+
+/**
+ * Increments to the next promise to resolve.
+ * @return true if we have a valid combination.
+ */
+bool Node::increment_promise()
+{
+	DBG();
+	if (resolve_promise.empty())
+		return false;
+	int prev_idx = resolve_promise_idx;
+	resolve_promise_idx++;
+	for ( ; resolve_promise_idx < (int)resolve_promise.size(); resolve_promise_idx++)
+		if (resolve_promise[resolve_promise_idx])
+			return true;
+	resolve_promise_idx = prev_idx;
+	return false;
+}
+
+/**
+ * Returns whether the promise set is empty.
+ * @return true if we have explored all promise combinations.
+ */
+bool Node::promise_empty() const
+{
+	for (int i = resolve_promise_idx + 1; i < (int)resolve_promise.size(); i++)
+		if (i >= 0 && resolve_promise[i])
+			return false;
+	return true;
+}
+
+/** @brief Clear any promise-resolution information for this Node */
+void Node::clear_promise_resolutions()
+{
+	resolve_promise.clear();
+	resolve_promise_idx = -1;
+}
+
+/******************************* end promise **********************************/
+
+void Node::set_misc_max(int i)
+{
+	misc_max = i;
+}
+
+int Node::get_misc() const
+{
+	return misc_index;
+}
+
+bool Node::increment_misc()
+{
+	return (misc_index < misc_max) && ((++misc_index) < misc_max);
+}
+
+bool Node::misc_empty() const
+{
+	return (misc_index + 1) >= misc_max;
+}
+
+bool Node::is_enabled(Thread *t) const
+{
+	int thread_id = id_to_int(t->get_id());
+	return thread_id < num_threads && (enabled_array[thread_id] != THREAD_DISABLED);
+}
+
+enabled_type_t Node::enabled_status(thread_id_t tid) const
+{
+	int thread_id = id_to_int(tid);
+	if (thread_id < num_threads)
+		return enabled_array[thread_id];
+	else
+		return THREAD_DISABLED;
+}
+
+bool Node::is_enabled(thread_id_t tid) const
+{
+	int thread_id = id_to_int(tid);
+	return thread_id < num_threads && (enabled_array[thread_id] != THREAD_DISABLED);
+}
+
+bool Node::has_priority(thread_id_t tid) const
+{
+	return fairness[id_to_int(tid)].priority;
+}
+
+bool Node::has_priority_over(thread_id_t tid1, thread_id_t tid2) const
+{
+	return get_yield_data(id_to_int(tid1), id_to_int(tid2)) & YIELD_P;
+}
+
+/*********************************** read from ********************************/
+
+/**
+ * Get the current state of the may-read-from set iteration
+ * @return The read-from type we should currently be checking (past or future)
+ */
+read_from_type_t Node::get_read_from_status()
+{
+	if (read_from_status == READ_FROM_PAST && read_from_past.empty())
+		increment_read_from();
+	return read_from_status;
+}
+
+/**
+ * Iterate one step in the may-read-from iteration. This includes a step in
+ * reading from the either the past or the future.
+ * @return True if there is a new read-from to explore; false otherwise
+ */
+bool Node::increment_read_from()
+{
+	clear_promise_resolutions();
+	if (increment_read_from_past()) {
+	       read_from_status = READ_FROM_PAST;
+	       return true;
+	} else if (increment_read_from_promise()) {
+		read_from_status = READ_FROM_PROMISE;
+		return true;
+	} else if (increment_future_value()) {
+		read_from_status = READ_FROM_FUTURE;
+		return true;
+	}
+	read_from_status = READ_FROM_NONE;
+	return false;
+}
+
+/**
+ * @return True if there are any new read-froms to explore
+ */
+bool Node::read_from_empty() const
+{
+	return read_from_past_empty() &&
+		read_from_promise_empty() &&
+		future_value_empty();
+}
+
+/**
+ * Get the total size of the may-read-from set, including both past and future
+ * values
+ * @return The size of may-read-from
+ */
+unsigned int Node::read_from_size() const
+{
+	return read_from_past.size() +
+		read_from_promises.size() +
+		future_values.size();
+}
+
+/******************************* end read from ********************************/
+
+/****************************** read from past ********************************/
+
+/** @brief Prints info about read_from_past set */
+void Node::print_read_from_past()
+{
+	for (unsigned int i = 0; i < read_from_past.size(); i++)
+		read_from_past[i]->print();
+}
+
+/**
+ * Add an action to the read_from_past set.
+ * @param act is the action to add
+ */
+void Node::add_read_from_past(const ModelAction *act)
+{
+	read_from_past.push_back(act);
+}
+
+/**
+ * Gets the next 'read_from_past' action from this Node. Only valid for a node
+ * where this->action is a 'read'.
+ * @return The first element in read_from_past
+ */
+const ModelAction * Node::get_read_from_past() const
+{
+	if (read_from_past_idx < read_from_past.size())
+		return read_from_past[read_from_past_idx];
+	else
+		return NULL;
+}
+
+const ModelAction * Node::get_read_from_past(int i) const
+{
+	return read_from_past[i];
+}
+
+int Node::get_read_from_past_size() const
+{
+	return read_from_past.size();
+}
+
+/**
+ * Checks whether the readsfrom set for this node is empty.
+ * @return true if the readsfrom set is empty.
+ */
+bool Node::read_from_past_empty() const
+{
+	return ((read_from_past_idx + 1) >= read_from_past.size());
+}
+
+/**
+ * Increments the index into the readsfrom set to explore the next item.
+ * @return Returns false if we have explored all items.
+ */
+bool Node::increment_read_from_past()
+{
+	DBG();
+	if (read_from_past_idx < read_from_past.size()) {
+		read_from_past_idx++;
+		return read_from_past_idx < read_from_past.size();
+	}
+	return false;
+}
+
+/************************** end read from past ********************************/
+
+/***************************** read_from_promises *****************************/
+
+/**
+ * Add an action to the read_from_promises set.
+ * @param reader The read which generated the Promise; we use the ModelAction
+ * instead of the Promise because the Promise does not last across executions
+ */
+void Node::add_read_from_promise(const ModelAction *reader)
+{
+	read_from_promises.push_back(reader);
+}
+
+/**
+ * Gets the next 'read-from-promise' from this Node. Only valid for a node
+ * where this->action is a 'read'.
+ * @return The current element in read_from_promises
+ */
+Promise * Node::get_read_from_promise() const
+{
+	ASSERT(read_from_promise_idx >= 0 && read_from_promise_idx < ((int)read_from_promises.size()));
+	return read_from_promises[read_from_promise_idx]->get_reads_from_promise();
+}
+
+/**
+ * Gets a particular 'read-from-promise' form this Node. Only vlaid for a node
+ * where this->action is a 'read'.
+ * @param i The index of the Promise to get
+ * @return The Promise at index i, if the Promise is still available; NULL
+ * otherwise
+ */
+Promise * Node::get_read_from_promise(int i) const
+{
+	return read_from_promises[i]->get_reads_from_promise();
+}
+
+/** @return The size of the read-from-promise set */
+int Node::get_read_from_promise_size() const
+{
+	return read_from_promises.size();
+}
+
+/**
+ * Checks whether the read_from_promises set for this node is empty.
+ * @return true if the read_from_promises set is empty.
+ */
+bool Node::read_from_promise_empty() const
+{
+	return ((read_from_promise_idx + 1) >= ((int)read_from_promises.size()));
+}
+
+/**
+ * Increments the index into the read_from_promises set to explore the next item.
+ * @return Returns false if we have explored all promises.
+ */
+bool Node::increment_read_from_promise()
+{
+	DBG();
+	if (read_from_promise_idx < ((int)read_from_promises.size())) {
+		read_from_promise_idx++;
+		return (read_from_promise_idx < ((int)read_from_promises.size()));
+	}
+	return false;
+}
+
+/************************* end read_from_promises *****************************/
+
+/****************************** future values *********************************/
+
+/**
+ * Adds a value from a weakly ordered future write to backtrack to. This
+ * operation may "fail" if the future value has already been run (within some
+ * sloppiness window of this expiration), or if the futurevalues set has
+ * reached its maximum.
+ * @see model_params.maxfuturevalues
+ *
+ * @param value is the value to backtrack to.
+ * @return True if the future value was successully added; false otherwise
+ */
+bool Node::add_future_value(struct future_value fv)
+{
+	uint64_t value = fv.value;
+	modelclock_t expiration = fv.expiration;
+	thread_id_t tid = fv.tid;
+	int idx = -1; /* Highest index where value is found */
+	for (unsigned int i = 0; i < future_values.size(); i++) {
+		if (future_values[i].value == value && future_values[i].tid == tid) {
+			if (expiration <= future_values[i].expiration)
+				return false;
+			idx = i;
+		}
+	}
+	if (idx > future_index) {
+		/* Future value hasn't been explored; update expiration */
+		future_values[idx].expiration = expiration;
+		return true;
+	} else if (idx >= 0 && expiration <= future_values[idx].expiration + get_params()->expireslop) {
+		/* Future value has been explored and is within the "sloppy" window */
+		return false;
+	}
+
+	/* Limit the size of the future-values set */
+	if (get_params()->maxfuturevalues > 0 &&
+			(int)future_values.size() >= get_params()->maxfuturevalues)
+		return false;
+
+	future_values.push_back(fv);
+	return true;
+}
+
+/**
+ * Gets the next 'future_value' from this Node. Only valid for a node where
+ * this->action is a 'read'.
+ * @return The first element in future_values
+ */
+struct future_value Node::get_future_value() const
+{
+	ASSERT(future_index >= 0 && future_index < ((int)future_values.size()));
+	return future_values[future_index];
+}
+
+/**
+ * Checks whether the future_values set for this node is empty.
+ * @return true if the future_values set is empty.
+ */
+bool Node::future_value_empty() const
+{
+	return ((future_index + 1) >= ((int)future_values.size()));
+}
+
+/**
+ * Increments the index into the future_values set to explore the next item.
+ * @return Returns false if we have explored all values.
+ */
+bool Node::increment_future_value()
+{
+	DBG();
+	if (future_index < ((int)future_values.size())) {
+		future_index++;
+		return (future_index < ((int)future_values.size()));
+	}
+	return false;
+}
+
+/************************** end future values *********************************/
+
+/*********************** breaking release sequences ***************************/
+
+/**
+ * Add a write ModelAction to the set of writes that may break the release
+ * sequence. This is used during replay exploration of pending release
+ * sequences. This Node must correspond to a release sequence fixup action.
+ *
+ * @param write The write that may break the release sequence. NULL means we
+ * allow the release sequence to synchronize.
+ */
+void Node::add_relseq_break(const ModelAction *write)
+{
+	relseq_break_writes.push_back(write);
+}
+
+/**
+ * Get the write that may break the current pending release sequence,
+ * according to the replay / divergence pattern.
+ *
+ * @return A write that may break the release sequence. If NULL, that means
+ * the release sequence should not be broken.
+ */
+const ModelAction * Node::get_relseq_break() const
+{
+	if (relseq_break_index < (int)relseq_break_writes.size())
+		return relseq_break_writes[relseq_break_index];
+	else
+		return NULL;
+}
+
+/**
+ * Increments the index into the relseq_break_writes set to explore the next
+ * item.
+ * @return Returns false if we have explored all values.
+ */
+bool Node::increment_relseq_break()
+{
+	DBG();
+	if (relseq_break_index < ((int)relseq_break_writes.size())) {
+		relseq_break_index++;
+		return (relseq_break_index < ((int)relseq_break_writes.size()));
+	}
+	return false;
+}
+
+/**
+ * @return True if all writes that may break the release sequence have been
+ * explored
+ */
+bool Node::relseq_break_empty() const
+{
+	return ((relseq_break_index + 1) >= ((int)relseq_break_writes.size()));
+}
+
+/******************* end breaking release sequences ***************************/
+
+/**
+ * Increments some behavior's index, if a new behavior is available
+ * @return True if there is a new behavior available; otherwise false
+ */
+bool Node::increment_behaviors()
+{
+	/* satisfy a different misc_index values */
+	if (increment_misc())
+		return true;
+	/* satisfy a different set of promises */
+	if (increment_promise())
+		return true;
+	/* read from a different value */
+	if (increment_read_from())
+		return true;
+	/* resolve a release sequence differently */
+	if (increment_relseq_break())
+		return true;
+	return false;
+}
+
+NodeStack::NodeStack() :
+	node_list(),
+	head_idx(-1),
+	total_nodes(0)
+{
+	total_nodes++;
+}
+
+NodeStack::~NodeStack()
+{
+	for (unsigned int i = 0; i < node_list.size(); i++)
+		delete node_list[i];
+}
+
+/**
+ * @brief Register the model-checker object with this NodeStack
+ * @param exec The execution structure for the ModelChecker
+ */
+void NodeStack::register_engine(const ModelExecution *exec)
+{
+	this->execution = exec;
+}
+
+const struct model_params * NodeStack::get_params() const
+{
+	return execution->get_params();
+}
+
+void NodeStack::print() const
+{
+	model_print("............................................\n");
+	model_print("NodeStack printing node_list:\n");
+	for (unsigned int it = 0; it < node_list.size(); it++) {
+		if ((int)it == this->head_idx)
+			model_print("vvv following action is the current iterator vvv\n");
+		node_list[it]->print();
+	}
+	model_print("............................................\n");
+}
+
+/** Note: The is_enabled set contains what actions were enabled when
+ *  act was chosen. */
+ModelAction * NodeStack::explore_action(ModelAction *act, enabled_type_t *is_enabled)
+{
+	DBG();
+
+	if ((head_idx + 1) < (int)node_list.size()) {
+		head_idx++;
+		return node_list[head_idx]->get_action();
+	}
+
+	/* Record action */
+	Node *head = get_head();
+	Node *prevfairness = NULL;
+	if (head) {
+		head->explore_child(act, is_enabled);
+		if (get_params()->fairwindow != 0 && head_idx > (int)get_params()->fairwindow)
+			prevfairness = node_list[head_idx - get_params()->fairwindow];
+	}
+
+	int next_threads = execution->get_num_threads();
+	if (act->get_type() == THREAD_CREATE)
+		next_threads++;
+	node_list.push_back(new Node(get_params(), act, head, next_threads, prevfairness));
+	total_nodes++;
+	head_idx++;
+	return NULL;
+}
+
+/**
+ * Empties the stack of all trailing nodes after a given position and calls the
+ * destructor for each. This function is provided an offset which determines
+ * how many nodes (relative to the current replay state) to save before popping
+ * the stack.
+ * @param numAhead gives the number of Nodes (including this Node) to skip over
+ * before removing nodes.
+ */
+void NodeStack::pop_restofstack(int numAhead)
+{
+	/* Diverging from previous execution; clear out remainder of list */
+	unsigned int it = head_idx + numAhead;
+	for (unsigned int i = it; i < node_list.size(); i++)
+		delete node_list[i];
+	node_list.resize(it);
+	node_list.back()->clear_backtracking();
+}
+
+Node * NodeStack::get_head() const
+{
+	if (node_list.empty() || head_idx < 0)
+		return NULL;
+	return node_list[head_idx];
+}
+
+Node * NodeStack::get_next() const
+{
+	if (node_list.empty()) {
+		DEBUG("Empty\n");
+		return NULL;
+	}
+	unsigned int it = head_idx + 1;
+	if (it == node_list.size()) {
+		DEBUG("At end\n");
+		return NULL;
+	}
+	return node_list[it];
+}
+
+void NodeStack::reset_execution()
+{
+	head_idx = -1;
+}
diff --git a/nodestack.h b/nodestack.h
new file mode 100644
index 0000000..f26100b
--- /dev/null
+++ b/nodestack.h
@@ -0,0 +1,225 @@
+/** @file nodestack.h
+ *  @brief Stack of operations for use in backtracking.
+*/
+
+#ifndef __NODESTACK_H__
+#define __NODESTACK_H__
+
+#include <cstddef>
+#include <inttypes.h>
+
+#include "mymemory.h"
+#include "schedule.h"
+#include "promise.h"
+#include "stl-model.h"
+
+class ModelAction;
+class Thread;
+
+struct fairness_info {
+	unsigned int enabled_count;
+	unsigned int turns;
+	bool priority;
+};
+
+/**
+ * @brief Types of read-from relations
+ *
+ * Our "may-read-from" set is composed of multiple types of reads, and we have
+ * to iterate through all of them in the backtracking search. This enumeration
+ * helps to identify which type of read-from we are currently observing.
+ */
+typedef enum {
+	READ_FROM_PAST, /**< @brief Read from a prior, existing store */
+	READ_FROM_PROMISE, /**< @brief Read from an existing promised future value */
+	READ_FROM_FUTURE, /**< @brief Read from a newly-asserted future value */
+	READ_FROM_NONE, /**< @brief A NULL state, which should not be reached */
+} read_from_type_t;
+
+#define YIELD_E 1
+#define YIELD_D 2
+#define YIELD_S 4
+#define YIELD_P 8
+#define YIELD_INDEX(tid1, tid2, num_threads) (tid1*num_threads+tid2)
+
+
+/**
+ * @brief A single node in a NodeStack
+ *
+ * Represents a single node in the NodeStack. Each Node is associated with up
+ * to one action and up to one parent node. A node holds information
+ * regarding the last action performed (the "associated action"), the thread
+ * choices that have been explored (explored_children) and should be explored
+ * (backtrack), and the actions that the last action may read from.
+ */
+class Node {
+public:
+	Node(const struct model_params *params, ModelAction *act, Node *par,
+			int nthreads, Node *prevfairness);
+	~Node();
+	/* return true = thread choice has already been explored */
+	bool has_been_explored(thread_id_t tid) const;
+	/* return true = backtrack set is empty */
+	bool backtrack_empty() const;
+
+	void clear_backtracking();
+	void explore_child(ModelAction *act, enabled_type_t *is_enabled);
+	/* return false = thread was already in backtrack */
+	bool set_backtrack(thread_id_t id);
+	thread_id_t get_next_backtrack();
+	bool is_enabled(Thread *t) const;
+	bool is_enabled(thread_id_t tid) const;
+	enabled_type_t enabled_status(thread_id_t tid) const;
+
+	ModelAction * get_action() const { return action; }
+	void set_uninit_action(ModelAction *act) { uninit_action = act; }
+	ModelAction * get_uninit_action() const { return uninit_action; }
+
+	bool has_priority(thread_id_t tid) const;
+	void update_yield(Scheduler *);
+	bool has_priority_over(thread_id_t tid, thread_id_t tid2) const;
+	int get_num_threads() const { return num_threads; }
+	/** @return the parent Node to this Node; that is, the action that
+	 * occurred previously in the stack. */
+	Node * get_parent() const { return parent; }
+
+	read_from_type_t get_read_from_status();
+	bool increment_read_from();
+	bool read_from_empty() const;
+	unsigned int read_from_size() const;
+
+	void print_read_from_past();
+	void add_read_from_past(const ModelAction *act);
+	const ModelAction * get_read_from_past() const;
+	const ModelAction * get_read_from_past(int i) const;
+	int get_read_from_past_size() const;
+
+	void add_read_from_promise(const ModelAction *reader);
+	Promise * get_read_from_promise() const;
+	Promise * get_read_from_promise(int i) const;
+	int get_read_from_promise_size() const;
+
+	bool add_future_value(struct future_value fv);
+	struct future_value get_future_value() const;
+
+	void set_promise(unsigned int i);
+	bool get_promise(unsigned int i) const;
+	bool increment_promise();
+	bool promise_empty() const;
+	void clear_promise_resolutions();
+
+	enabled_type_t *get_enabled_array() {return enabled_array;}
+
+	void set_misc_max(int i);
+	int get_misc() const;
+	bool increment_misc();
+	bool misc_empty() const;
+
+	void add_relseq_break(const ModelAction *write);
+	const ModelAction * get_relseq_break() const;
+	bool increment_relseq_break();
+	bool relseq_break_empty() const;
+
+	bool increment_behaviors();
+
+	void print() const;
+
+	MEMALLOC
+private:
+	void explore(thread_id_t tid);
+	int get_yield_data(int tid1, int tid2) const;
+	bool read_from_past_empty() const;
+	bool increment_read_from_past();
+	bool read_from_promise_empty() const;
+	bool increment_read_from_promise();
+	bool future_value_empty() const;
+	bool increment_future_value();
+	read_from_type_t read_from_status;
+	const struct model_params * get_params() const { return params; }
+
+	ModelAction * const action;
+
+	const struct model_params * const params;
+
+	/** @brief ATOMIC_UNINIT action which was created at this Node */
+	ModelAction *uninit_action;
+
+	Node * const parent;
+	const int num_threads;
+	ModelVector<bool> explored_children;
+	ModelVector<bool> backtrack;
+	ModelVector<struct fairness_info> fairness;
+	int numBacktracks;
+	enabled_type_t *enabled_array;
+
+	/**
+	 * The set of past ModelActions that this the action at this Node may
+	 * read from. Only meaningful if this Node represents a 'read' action.
+	 */
+	ModelVector<const ModelAction *> read_from_past;
+	unsigned int read_from_past_idx;
+
+	ModelVector<const ModelAction *> read_from_promises;
+	int read_from_promise_idx;
+
+	ModelVector<struct future_value> future_values;
+	int future_index;
+
+	ModelVector<bool> resolve_promise;
+	int resolve_promise_idx;
+
+	ModelVector<const ModelAction *> relseq_break_writes;
+	int relseq_break_index;
+
+	int misc_index;
+	int misc_max;
+	int * yield_data;
+};
+
+typedef ModelVector<Node *> node_list_t;
+
+/**
+ * @brief A stack of nodes
+ *
+ * Holds a Node linked-list that can be used for holding backtracking,
+ * may-read-from, and replay information. It is used primarily as a
+ * stack-like structure, in that backtracking points and replay nodes are
+ * only removed from the top (most recent).
+ */
+class NodeStack {
+public:
+	NodeStack();
+	~NodeStack();
+
+	void register_engine(const ModelExecution *exec);
+
+	ModelAction * explore_action(ModelAction *act, enabled_type_t * is_enabled);
+	Node * get_head() const;
+	Node * get_next() const;
+	void reset_execution();
+	void pop_restofstack(int numAhead);
+	int get_total_nodes() { return total_nodes; }
+
+	void print() const;
+
+	MEMALLOC
+private:
+	node_list_t node_list;
+
+	const struct model_params * get_params() const;
+
+	/** @brief The model-checker execution object */
+	const ModelExecution *execution;
+
+	/**
+	 * @brief the index position of the current head Node
+	 *
+	 * This index is relative to node_list. The index should point to the
+	 * current head Node. It is negative when the list is empty.
+	 */
+	int head_idx;
+
+	int total_nodes;
+};
+
+#endif /* __NODESTACK_H__ */
diff --git a/output.h b/output.h
new file mode 100644
index 0000000..e390bb6
--- /dev/null
+++ b/output.h
@@ -0,0 +1,20 @@
+/** @file output.h
+ *  @brief Functions for redirecting program output
+ */
+
+#ifndef __OUTPUT_H__
+#define __OUTPUT_H__
+
+#include "config.h"
+
+#ifdef CONFIG_DEBUG
+static inline void redirect_output() { }
+static inline void clear_program_output() { }
+static inline void print_program_output() { }
+#else
+void redirect_output();
+void clear_program_output();
+void print_program_output();
+#endif /* ! CONFIG_DEBUG */
+
+#endif /* __OUTPUT_H__ */
diff --git a/params.h b/params.h
new file mode 100644
index 0000000..ac5dd96
--- /dev/null
+++ b/params.h
@@ -0,0 +1,37 @@
+#ifndef __PARAMS_H__
+#define __PARAMS_H__
+
+/**
+ * Model checker parameter structure. Holds run-time configuration options for
+ * the model checker.
+ */
+struct model_params {
+	int maxreads;
+	int maxfuturedelay;
+	bool yieldon;
+	bool yieldblock;
+	unsigned int fairwindow;
+	unsigned int enabledcount;
+	unsigned int bound;
+	unsigned int uninitvalue;
+
+	/** @brief Maximum number of future values that can be sent to the same
+	 *  read */
+	int maxfuturevalues;
+
+	/** @brief Only generate a new future value/expiration pair if the
+	 *  expiration time exceeds the existing one by more than the slop
+	 *  value */
+	unsigned int expireslop;
+
+	/** @brief Verbosity (0 = quiet; 1 = noisy; 2 = noisier) */
+	int verbose;
+
+	/** @brief Command-line argument count to pass to user program */
+	int argc;
+
+	/** @brief Command-line arguments to pass to user program */
+	char **argv;
+};
+
+#endif /* __PARAMS_H__ */
diff --git a/plugins.cc b/plugins.cc
new file mode 100644
index 0000000..b1d3cfb
--- /dev/null
+++ b/plugins.cc
@@ -0,0 +1,19 @@
+#include "plugins.h"
+#include "scanalysis.h"
+
+ModelVector<TraceAnalysis *> * registered_analysis;
+ModelVector<TraceAnalysis *> * installed_analysis;
+
+void register_plugins() {
+	registered_analysis=new ModelVector<TraceAnalysis *>();
+	installed_analysis=new ModelVector<TraceAnalysis *>();
+	registered_analysis->push_back(new SCAnalysis());
+}
+
+ModelVector<TraceAnalysis *> * getRegisteredTraceAnalysis() {
+	return registered_analysis;
+}
+
+ModelVector<TraceAnalysis *> * getInstalledTraceAnalysis() {
+	return installed_analysis;
+}
diff --git a/plugins.h b/plugins.h
new file mode 100644
index 0000000..ce0f529
--- /dev/null
+++ b/plugins.h
@@ -0,0 +1,10 @@
+#ifndef PLUGINS_H
+#define PLUGINS_H
+#include "traceanalysis.h"
+#include "stl-model.h"
+
+void register_plugins();
+ModelVector<TraceAnalysis *> * getRegisteredTraceAnalysis();
+ModelVector<TraceAnalysis *> * getInstalledTraceAnalysis();
+
+#endif
diff --git a/promise.cc b/promise.cc
new file mode 100644
index 0000000..3a38384
--- /dev/null
+++ b/promise.cc
@@ -0,0 +1,189 @@
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+
+#include "promise.h"
+#include "execution.h"
+#include "schedule.h"
+#include "action.h"
+#include "threads-model.h"
+
+/**
+ * @brief Promise constructor
+ * @param execution The execution which is creating this Promise
+ * @param read The read which reads from a promised future value
+ * @param fv The future value that is promised
+ */
+Promise::Promise(const ModelExecution *execution, ModelAction *read, struct future_value fv) :
+	execution(execution),
+	num_available_threads(0),
+	fv(fv),
+	readers(1, read),
+	write(NULL)
+{
+	add_thread(fv.tid);
+	eliminate_thread(read->get_tid());
+}
+
+/**
+ * Add a reader that reads from this Promise. Must be added in an order
+ * consistent with execution order.
+ *
+ * @param reader The ModelAction that reads from this promise. Must be a read.
+ * @return True if this new reader has invalidated the promise; false otherwise
+ */
+bool Promise::add_reader(ModelAction *reader)
+{
+	readers.push_back(reader);
+	return eliminate_thread(reader->get_tid());
+}
+
+/**
+ * Access a reader that read from this Promise. Readers must be inserted in
+ * order by execution order, so they can be returned in this order.
+ *
+ * @param i The index of the reader to return
+ * @return The i'th reader of this Promise
+ */
+ModelAction * Promise::get_reader(unsigned int i) const
+{
+	return i < readers.size() ? readers[i] : NULL;
+}
+
+/**
+ * Eliminate a thread which no longer can satisfy this promise. Once all
+ * enabled threads have been eliminated, this promise is unresolvable.
+ *
+ * @param tid The thread ID of the thread to eliminate
+ * @return True, if this elimination has invalidated the promise; false
+ * otherwise
+ */
+bool Promise::eliminate_thread(thread_id_t tid)
+{
+	unsigned int id = id_to_int(tid);
+	if (!thread_is_available(tid))
+		return false;
+
+	available_thread[id] = false;
+	num_available_threads--;
+	return has_failed();
+}
+
+/**
+ * Add a thread which may resolve this promise
+ *
+ * @param tid The thread ID
+ */
+void Promise::add_thread(thread_id_t tid)
+{
+	unsigned int id = id_to_int(tid);
+	if (id >= available_thread.size())
+		available_thread.resize(id + 1, false);
+	if (!available_thread[id]) {
+		available_thread[id] = true;
+		num_available_threads++;
+	}
+}
+
+/**
+ * Check if a thread is available for resolving this promise. That is, the
+ * thread must have been previously marked for resolving this promise, and it
+ * cannot have been eliminated due to synchronization, etc.
+ *
+ * @param tid Thread ID of the thread to check
+ * @return True if the thread is available; false otherwise
+ */
+bool Promise::thread_is_available(thread_id_t tid) const
+{
+	unsigned int id = id_to_int(tid);
+	if (id >= available_thread.size())
+		return false;
+	return available_thread[id];
+}
+
+/**
+ * @brief Get an upper bound on the number of available threads
+ *
+ * Gets an upper bound on the number of threads in the available threads set,
+ * useful for iterating over "thread_is_available()".
+ *
+ * @return The upper bound
+ */
+unsigned int Promise::max_available_thread_idx() const
+{
+	return available_thread.size();
+}
+
+/** @brief Print debug info about the Promise */
+void Promise::print() const
+{
+	model_print("Promised value %#" PRIx64 ", first read from thread %d, available threads to resolve: ",
+			fv.value, id_to_int(get_reader(0)->get_tid()));
+	bool failed = true;
+	for (unsigned int i = 0; i < available_thread.size(); i++)
+		if (available_thread[i]) {
+			model_print("[%d]", i);
+			failed = false;
+		}
+	if (failed)
+		model_print("(none)");
+	model_print("\n");
+}
+
+/**
+ * Check if this promise has failed. A promise can fail when all threads which
+ * could possibly satisfy the promise have been eliminated.
+ *
+ * @return True, if this promise has failed; false otherwise
+ */
+bool Promise::has_failed() const
+{
+	return num_available_threads == 0;
+}
+
+/**
+ * @brief Check if an action's thread and location are compatible for resolving
+ * this promise
+ * @param act The action to check against
+ * @return True if we are compatible; false otherwise
+ */
+bool Promise::is_compatible(const ModelAction *act) const
+{
+	return thread_is_available(act->get_tid()) && get_reader(0)->same_var(act);
+}
+
+/**
+ * @brief Check if an action's thread and location are compatible for resolving
+ * this promise, and that the promise is thread-exclusive
+ * @param act The action to check against
+ * @return True if we are compatible and exclusive; false otherwise
+ */
+bool Promise::is_compatible_exclusive(const ModelAction *act) const
+{
+	return get_num_available_threads() == 1 && is_compatible(act);
+}
+
+/**
+ * @brief Check if a store's value matches this Promise
+ * @param write The store to check
+ * @return True if the store's written value matches this Promise
+ */
+bool Promise::same_value(const ModelAction *write) const
+{
+	return get_value() == write->get_write_value();
+}
+
+/**
+ * @brief Check if a ModelAction's location matches this Promise
+ * @param act The ModelAction to check
+ * @return True if the action's location matches this Promise
+ */
+bool Promise::same_location(const ModelAction *act) const
+{
+	return get_reader(0)->same_var(act);
+}
+
+/** @brief Get this Promise's index within the execution's promise array */
+int Promise::get_index() const
+{
+	return execution->get_promise_number(this);
+}
diff --git a/promise.h b/promise.h
new file mode 100644
index 0000000..84d5aa4
--- /dev/null
+++ b/promise.h
@@ -0,0 +1,74 @@
+/** @file promise.h
+ *
+ *  @brief Promise class --- tracks future obligations for execution
+ *  related to weakly ordered writes.
+ */
+
+#ifndef __PROMISE_H__
+#define __PROMISE_H__
+
+#include <inttypes.h>
+
+#include "modeltypes.h"
+#include "mymemory.h"
+#include "stl-model.h"
+
+class ModelAction;
+class ModelExecution;
+
+struct future_value {
+	uint64_t value;
+	modelclock_t expiration;
+	thread_id_t tid;
+};
+
+class Promise {
+ public:
+	Promise(const ModelExecution *execution, ModelAction *read, struct future_value fv);
+	bool add_reader(ModelAction *reader);
+	ModelAction * get_reader(unsigned int i) const;
+	unsigned int get_num_readers() const { return readers.size(); }
+	bool eliminate_thread(thread_id_t tid);
+	void add_thread(thread_id_t tid);
+	bool thread_is_available(thread_id_t tid) const;
+	unsigned int max_available_thread_idx() const;
+	bool has_failed() const;
+	void set_write(const ModelAction *act) { write = act; }
+	const ModelAction * get_write() const { return write; }
+	int get_num_available_threads() const { return num_available_threads; }
+	bool is_compatible(const ModelAction *act) const;
+	bool is_compatible_exclusive(const ModelAction *act) const;
+	bool same_value(const ModelAction *write) const;
+	bool same_location(const ModelAction *act) const;
+
+	modelclock_t get_expiration() const { return fv.expiration; }
+	uint64_t get_value() const { return fv.value; }
+	struct future_value get_fv() const { return fv; }
+
+	int get_index() const;
+
+	void print() const;
+
+	bool equals(const Promise *x) const { return this == x; }
+	bool equals(const ModelAction *x) const { return false; }
+
+	SNAPSHOTALLOC
+ private:
+	/** @brief The execution which created this Promise */
+	const ModelExecution *execution;
+
+	/** @brief Thread ID(s) for thread(s) that potentially can satisfy this
+	 *  promise */
+	SnapVector<bool> available_thread;
+
+	int num_available_threads;
+
+	const future_value fv;
+
+	/** @brief The action(s) which read the promised future value */
+	SnapVector<ModelAction *> readers;
+
+	const ModelAction *write;
+};
+
+#endif
diff --git a/run.sh b/run.sh
new file mode 100755
index 0000000..0807b2e
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+#
+# Runs a simple test (default: ./test/userprog.o)
+# Syntax:
+#  ./run.sh [test program] [OPTIONS]
+#  ./run.sh [OPTIONS]
+#  ./run.sh [gdb [test program]]
+#
+# If you include a 'gdb' argument, the your program will be launched with gdb.
+# You can also supply a test program argument to run something besides the
+# default program.
+#
+
+# Get the directory in which this script and the binaries are located
+BINDIR="${0%/*}"
+
+BIN=${BINDIR}/test/userprog.o
+PREFIX=
+
+export LD_LIBRARY_PATH=${BINDIR}
+# For Mac OSX
+export DYLD_LIBRARY_PATH=${BINDIR}
+
+[ $# -gt 0 ] && [ "$1" = "gdb" ] && PREFIX=gdb && shift
+[ $# -gt 0 ] && [ -e "$1" ] && BIN="$1" && shift
+
+set -xe
+$PREFIX $BIN $@
diff --git a/scanalysis.cc b/scanalysis.cc
new file mode 100644
index 0000000..1776387
--- /dev/null
+++ b/scanalysis.cc
@@ -0,0 +1,453 @@
+#include "scanalysis.h"
+#include "action.h"
+#include "threads-model.h"
+#include "clockvector.h"
+#include "execution.h"
+#include <sys/time.h>
+
+
+SCAnalysis::SCAnalysis() :
+	cvmap(),
+	cyclic(false),
+	badrfset(),
+	lastwrmap(),
+	threadlists(1),
+	execution(NULL),
+	print_always(false),
+	print_buggy(true),
+	print_nonsc(false),
+	time(false),
+	stats((struct sc_statistics *)model_calloc(1, sizeof(struct sc_statistics)))
+{
+}
+
+SCAnalysis::~SCAnalysis() {
+	delete(stats);
+}
+
+void SCAnalysis::setExecution(ModelExecution * execution) {
+	this->execution=execution;
+}
+
+const char * SCAnalysis::name() {
+	const char * name = "SC";
+	return name;
+}
+
+void SCAnalysis::finish() {
+	if (time)
+		model_print("Elapsed time in usec %llu\n", stats->elapsedtime);
+	model_print("SC count: %u\n", stats->sccount);
+	model_print("Non-SC count: %u\n", stats->nonsccount);
+}
+
+bool SCAnalysis::option(char * opt) {
+	if (strcmp(opt, "verbose")==0) {
+		print_always=true;
+		return false;
+	} else if (strcmp(opt, "buggy")==0) {
+		return false;
+	} else if (strcmp(opt, "quiet")==0) {
+		print_buggy=false;
+		return false;
+	} else if (strcmp(opt, "nonsc")==0) {
+		print_nonsc=true;
+		return false;
+	} else if (strcmp(opt, "time")==0) {
+		time=true;
+		return false;
+	} else if (strcmp(opt, "help") != 0) {
+		model_print("Unrecognized option: %s\n", opt);
+	}
+
+	model_print("SC Analysis options\n");
+	model_print("verbose -- print all feasible executions\n");
+	model_print("buggy -- print only buggy executions (default)\n");
+	model_print("nonsc -- print non-sc execution\n");
+	model_print("quiet -- print nothing\n");
+	model_print("time -- time execution of scanalysis\n");
+	model_print("\n");
+	
+	return true;
+}
+
+void SCAnalysis::print_list(action_list_t *list) {
+	model_print("---------------------------------------------------------------------\n");
+	if (cyclic)
+		model_print("Not SC\n");
+	unsigned int hash = 0;
+
+	for (action_list_t::iterator it = list->begin(); it != list->end(); it++) {
+		const ModelAction *act = *it;
+		if (act->get_seq_number() > 0) {
+			if (badrfset.contains(act))
+				model_print("BRF ");
+			act->print();
+			if (badrfset.contains(act)) {
+				model_print("Desired Rf: %u \n", badrfset.get(act)->get_seq_number());
+			}
+		}
+		hash = hash ^ (hash << 3) ^ ((*it)->hash());
+	}
+	model_print("HASH %u\n", hash);
+	model_print("---------------------------------------------------------------------\n");
+}
+
+void SCAnalysis::analyze(action_list_t *actions) {
+
+	struct timeval start;
+	struct timeval finish;
+	if (time)
+		gettimeofday(&start, NULL);
+	action_list_t *list = generateSC(actions);
+	check_rf(list);
+	if (print_always || (print_buggy && execution->have_bug_reports())|| (print_nonsc && cyclic))
+		print_list(list);
+	if (time) {
+		gettimeofday(&finish, NULL);
+		stats->elapsedtime+=((finish.tv_sec*1000000+finish.tv_usec)-(start.tv_sec*1000000+start.tv_usec));
+	}
+	update_stats();
+}
+
+void SCAnalysis::update_stats() {
+	if (cyclic) {
+		stats->nonsccount++;
+	} else {
+		stats->sccount++;
+	}
+}
+
+void SCAnalysis::check_rf(action_list_t *list) {
+	for (action_list_t::iterator it = list->begin(); it != list->end(); it++) {
+		const ModelAction *act = *it;
+		if (act->is_read()) {
+			if (act->get_reads_from() != lastwrmap.get(act->get_location()))
+				badrfset.put(act, lastwrmap.get(act->get_location()));
+		}
+		if (act->is_write())
+			lastwrmap.put(act->get_location(), act);
+	}
+}
+
+bool SCAnalysis::merge(ClockVector *cv, const ModelAction *act, const ModelAction *act2) {
+	ClockVector *cv2 = cvmap.get(act2);
+	if (cv2 == NULL)
+		return true;
+	if (cv2->getClock(act->get_tid()) >= act->get_seq_number() && act->get_seq_number() != 0) {
+		cyclic = true;
+		//refuse to introduce cycles into clock vectors
+		return false;
+	}
+
+	return cv->merge(cv2);
+}
+
+int SCAnalysis::getNextActions(ModelAction ** array) {
+	int count=0;
+
+	for (int t = 0; t <= maxthreads; t++) {
+		action_list_t *tlt = &threadlists[t];
+		if (tlt->empty())
+			continue;
+		ModelAction *act = tlt->front();
+		ClockVector *cv = cvmap.get(act);
+		
+		/* Find the earliest in SC ordering */
+		for (int i = 0; i <= maxthreads; i++) {
+			if ( i == t )
+				continue;
+			action_list_t *threadlist = &threadlists[i];
+			if (threadlist->empty())
+				continue;
+			ModelAction *first = threadlist->front();
+			if (cv->synchronized_since(first)) {
+				act = NULL;
+				break;
+			}
+		}
+		if (act != NULL) {
+			array[count++]=act;
+		}
+	}
+	if (count != 0)
+		return count;
+	for (int t = 0; t <= maxthreads; t++) {
+		action_list_t *tlt = &threadlists[t];
+		if (tlt->empty())
+			continue;
+		ModelAction *act = tlt->front();
+		ClockVector *cv = act->get_cv();
+		
+		/* Find the earliest in SC ordering */
+		for (int i = 0; i <= maxthreads; i++) {
+			if ( i == t )
+				continue;
+			action_list_t *threadlist = &threadlists[i];
+			if (threadlist->empty())
+				continue;
+			ModelAction *first = threadlist->front();
+			if (cv->synchronized_since(first)) {
+				act = NULL;
+				break;
+			}
+		}
+		if (act != NULL) {
+			array[count++]=act;
+		}
+	}
+
+	ASSERT(count==0 || cyclic);
+
+	return count;
+}
+
+ModelAction * SCAnalysis::pruneArray(ModelAction **array,int count) {
+	/* No choice */
+	if (count == 1)
+		return array[0];
+
+	/* Choose first non-write action */
+	ModelAction *nonwrite=NULL;
+	for(int i=0;i<count;i++) {
+		if (!array[i]->is_write())
+			if (nonwrite==NULL || nonwrite->get_seq_number() > array[i]->get_seq_number())
+				nonwrite = array[i];
+	}
+	if (nonwrite != NULL)
+		return nonwrite;
+	
+	/* Look for non-conflicting action */
+	ModelAction *nonconflict=NULL;
+	for(int a=0;a<count;a++) {
+		ModelAction *act=array[a];
+		for (int i = 0; i <= maxthreads && act != NULL; i++) {
+			thread_id_t tid = int_to_id(i);
+			if (tid == act->get_tid())
+				continue;
+			
+			action_list_t *list = &threadlists[id_to_int(tid)];
+			for (action_list_t::iterator rit = list->begin(); rit != list->end(); rit++) {
+				ModelAction *write = *rit;
+				if (!write->is_write())
+					continue;
+				ClockVector *writecv = cvmap.get(write);
+				if (writecv->synchronized_since(act))
+					break;
+				if (write->get_location() == act->get_location()) {
+					//write is sc after act
+					act = NULL;
+					break;
+				}
+			}
+		}
+		if (act != NULL) {
+			if (nonconflict == NULL || nonconflict->get_seq_number() > act->get_seq_number())
+				nonconflict=act;
+		}
+	}
+	return nonconflict;
+}
+
+action_list_t * SCAnalysis::generateSC(action_list_t *list) {
+ 	int numactions=buildVectors(list);
+	computeCV(list);
+
+	action_list_t *sclist = new action_list_t();
+	ModelAction **array = (ModelAction **)model_calloc(1, (maxthreads + 1) * sizeof(ModelAction *));
+	int * choices = (int *) model_calloc(1, sizeof(int)*numactions);
+	int endchoice = 0;
+	int currchoice = 0;
+	int lastchoice = -1;
+	while (true) {
+		int numActions = getNextActions(array);
+		if (numActions == 0)
+			break;
+		ModelAction * act=pruneArray(array, numActions);
+		if (act == NULL) {
+			if (currchoice < endchoice) {
+				act = array[choices[currchoice]];
+				//check whether there is still another option
+				if ((choices[currchoice]+1)<numActions)
+					lastchoice=currchoice;
+				currchoice++;
+			} else {
+				act = array[0];
+				choices[currchoice]=0;
+				if (numActions>1)
+					lastchoice=currchoice;
+				currchoice++;
+			}
+		}
+		thread_id_t tid = act->get_tid();
+		//remove action
+		threadlists[id_to_int(tid)].pop_front();
+		//add ordering constraints from this choice
+		if (updateConstraints(act)) {
+			//propagate changes if we have them
+			bool prevc=cyclic;
+			computeCV(list);
+			if (!prevc && cyclic) {
+				model_print("ROLLBACK in SC\n");
+				//check whether we have another choice
+				if (lastchoice != -1) {
+					//have to reset everything
+					choices[lastchoice]++;
+					endchoice=lastchoice+1;
+					currchoice=0;
+					lastchoice=-1;
+					reset(list);
+					buildVectors(list);
+					computeCV(list);
+					sclist->clear();
+					continue;
+				}
+			}
+		}
+		//add action to end
+		sclist->push_back(act);
+	}
+	model_free(array);
+	return sclist;
+}
+
+int SCAnalysis::buildVectors(action_list_t *list) {
+	maxthreads = 0;
+	int numactions = 0;
+	for (action_list_t::iterator it = list->begin(); it != list->end(); it++) {
+		ModelAction *act = *it;
+		numactions++;
+		int threadid = id_to_int(act->get_tid());
+		if (threadid > maxthreads) {
+			threadlists.resize(threadid + 1);
+			maxthreads = threadid;
+		}
+		threadlists[threadid].push_back(act);
+	}
+	return numactions;
+}
+
+void SCAnalysis::reset(action_list_t *list) {
+	for (int t = 0; t <= maxthreads; t++) {
+		action_list_t *tlt = &threadlists[t];
+		tlt->clear();
+	}
+	for (action_list_t::iterator it = list->begin(); it != list->end(); it++) {
+		ModelAction *act = *it;
+		delete cvmap.get(act);
+		cvmap.put(act, NULL);
+	}
+
+	cyclic=false;	
+}
+
+bool SCAnalysis::updateConstraints(ModelAction *act) {
+	bool changed = false;
+	for (int i = 0; i <= maxthreads; i++) {
+		thread_id_t tid = int_to_id(i);
+		if (tid == act->get_tid())
+			continue;
+
+		action_list_t *list = &threadlists[id_to_int(tid)];
+		for (action_list_t::iterator rit = list->begin(); rit != list->end(); rit++) {
+			ModelAction *write = *rit;
+			if (!write->is_write())
+				continue;
+			ClockVector *writecv = cvmap.get(write);
+			if (writecv->synchronized_since(act))
+				break;
+			if (write->get_location() == act->get_location()) {
+				//write is sc after act
+				merge(writecv, write, act);
+				changed = true;
+				break;
+			}
+		}
+	}
+	return changed;
+}
+
+bool SCAnalysis::processRead(ModelAction *read, ClockVector *cv) {
+	bool changed = false;
+
+	/* Merge in the clock vector from the write */
+	const ModelAction *write = read->get_reads_from();
+	ClockVector *writecv = cvmap.get(write);
+	changed |= merge(cv, read, write) && (*read < *write);
+
+	for (int i = 0; i <= maxthreads; i++) {
+		thread_id_t tid = int_to_id(i);
+		if (tid == read->get_tid())
+			continue;
+		if (tid == write->get_tid())
+			continue;
+		action_list_t *list = execution->get_actions_on_obj(read->get_location(), tid);
+		if (list == NULL)
+			continue;
+		for (action_list_t::reverse_iterator rit = list->rbegin(); rit != list->rend(); rit++) {
+			ModelAction *write2 = *rit;
+			if (!write2->is_write())
+				continue;
+
+			ClockVector *write2cv = cvmap.get(write2);
+			if (write2cv == NULL)
+				continue;
+
+			/* write -sc-> write2 &&
+				 write -rf-> R =>
+				 R -sc-> write2 */
+			if (write2cv->synchronized_since(write)) {
+				changed |= merge(write2cv, write2, read);
+			}
+
+			//looking for earliest write2 in iteration to satisfy this
+			/* write2 -sc-> R &&
+				 write -rf-> R =>
+				 write2 -sc-> write */
+			if (cv->synchronized_since(write2)) {
+				changed |= writecv == NULL || merge(writecv, write, write2);
+				break;
+			}
+		}
+	}
+	return changed;
+}
+
+void SCAnalysis::computeCV(action_list_t *list) {
+	bool changed = true;
+	bool firsttime = true;
+	ModelAction **last_act = (ModelAction **)model_calloc(1, (maxthreads + 1) * sizeof(ModelAction *));
+	while (changed) {
+		changed = changed&firsttime;
+		firsttime = false;
+
+		for (action_list_t::iterator it = list->begin(); it != list->end(); it++) {
+			ModelAction *act = *it;
+			ModelAction *lastact = last_act[id_to_int(act->get_tid())];
+			if (act->is_thread_start())
+				lastact = execution->get_thread(act)->get_creation();
+			last_act[id_to_int(act->get_tid())] = act;
+			ClockVector *cv = cvmap.get(act);
+			if (cv == NULL) {
+				cv = new ClockVector(NULL, act);
+				cvmap.put(act, cv);
+			}
+			if (lastact != NULL) {
+				merge(cv, act, lastact);
+			}
+			if (act->is_thread_join()) {
+				Thread *joinedthr = act->get_thread_operand();
+				ModelAction *finish = execution->get_last_action(joinedthr->get_id());
+				changed |= merge(cv, act, finish);
+			}
+			if (act->is_read()) {
+				changed |= processRead(act, cv);
+			}
+		}
+		/* Reset the last action array */
+		if (changed) {
+			bzero(last_act, (maxthreads + 1) * sizeof(ModelAction *));
+		}
+	}
+	model_free(last_act);
+}
diff --git a/scanalysis.h b/scanalysis.h
new file mode 100644
index 0000000..988c8f7
--- /dev/null
+++ b/scanalysis.h
@@ -0,0 +1,51 @@
+#ifndef SCANALYSIS_H
+#define SCANALYSIS_H
+#include "traceanalysis.h"
+#include "hashtable.h"
+
+struct sc_statistics {
+	unsigned long long elapsedtime;
+	unsigned int sccount;
+	unsigned int nonsccount;
+};
+
+class SCAnalysis : public TraceAnalysis {
+ public:
+	SCAnalysis();
+	~SCAnalysis();
+	virtual void setExecution(ModelExecution * execution);
+	virtual void analyze(action_list_t *);
+	virtual const char * name();
+	virtual bool option(char *);
+	virtual void finish();
+
+
+	SNAPSHOTALLOC
+ private:
+	void update_stats();
+	void print_list(action_list_t *list);
+	int buildVectors(action_list_t *);
+	bool updateConstraints(ModelAction *act);
+	void computeCV(action_list_t *);
+	action_list_t * generateSC(action_list_t *);
+	bool processRead(ModelAction *read, ClockVector *cv);
+	int getNextActions(ModelAction **array);
+	bool merge(ClockVector *cv, const ModelAction *act, const ModelAction *act2);
+	void check_rf(action_list_t *list);
+	void reset(action_list_t *list);
+	ModelAction* pruneArray(ModelAction**, int);
+
+	int maxthreads;
+	HashTable<const ModelAction *, ClockVector *, uintptr_t, 4 > cvmap;
+	bool cyclic;
+	HashTable<const ModelAction *, const ModelAction *, uintptr_t, 4 > badrfset;
+	HashTable<void *, const ModelAction *, uintptr_t, 4 > lastwrmap;
+	SnapVector<action_list_t> threadlists;
+	ModelExecution *execution;
+	bool print_always;
+	bool print_buggy;
+	bool print_nonsc;
+	bool time;
+	struct sc_statistics *stats;
+};
+#endif
diff --git a/schedule.cc b/schedule.cc
new file mode 100644
index 0000000..2ef4c4d
--- /dev/null
+++ b/schedule.cc
@@ -0,0 +1,298 @@
+#include <string.h>
+#include <stdlib.h>
+
+#include "threads-model.h"
+#include "schedule.h"
+#include "common.h"
+#include "model.h"
+#include "nodestack.h"
+#include "execution.h"
+
+/**
+ * Format an "enabled_type_t" for printing
+ * @param e The type to format
+ * @param str The output character array
+ */
+void enabled_type_to_string(enabled_type_t e, char *str)
+{
+	const char *res;
+	switch (e) {
+	case THREAD_DISABLED:
+		res = "disabled";
+		break;
+	case THREAD_ENABLED:
+		res = "enabled";
+		break;
+	case THREAD_SLEEP_SET:
+		res = "sleep";
+		break;
+	default:
+		ASSERT(0);
+		res = NULL;
+		break;
+	}
+	strcpy(str, res);
+}
+
+/** Constructor */
+Scheduler::Scheduler() :
+	execution(NULL),
+	enabled(NULL),
+	enabled_len(0),
+	curr_thread_index(0),
+	current(NULL)
+{
+}
+
+/**
+ * @brief Register the ModelExecution engine
+ * @param execution The ModelExecution which is controlling execution
+ */
+void Scheduler::register_engine(ModelExecution *execution)
+{
+	this->execution = execution;
+}
+
+void Scheduler::set_enabled(Thread *t, enabled_type_t enabled_status) {
+	int threadid = id_to_int(t->get_id());
+	if (threadid >= enabled_len) {
+		enabled_type_t *new_enabled = (enabled_type_t *)snapshot_malloc(sizeof(enabled_type_t) * (threadid + 1));
+		memset(&new_enabled[enabled_len], 0, (threadid + 1 - enabled_len) * sizeof(enabled_type_t));
+		if (enabled != NULL) {
+			memcpy(new_enabled, enabled, enabled_len * sizeof(enabled_type_t));
+			snapshot_free(enabled);
+		}
+		enabled = new_enabled;
+		enabled_len = threadid + 1;
+	}
+	enabled[threadid] = enabled_status;
+	if (enabled_status == THREAD_DISABLED)
+		execution->check_promises_thread_disabled();
+}
+
+/**
+ * @brief Check if a Thread is currently enabled
+ *
+ * Check if a Thread is currently enabled. "Enabled" includes both
+ * THREAD_ENABLED and THREAD_SLEEP_SET.
+ * @param t The Thread to check
+ * @return True if the Thread is currently enabled
+ */
+bool Scheduler::is_enabled(const Thread *t) const
+{
+	return is_enabled(t->get_id());
+}
+
+/**
+ * @brief Check if a Thread is currently enabled
+ *
+ * Check if a Thread is currently enabled. "Enabled" includes both
+ * THREAD_ENABLED and THREAD_SLEEP_SET.
+ * @param tid The ID of the Thread to check
+ * @return True if the Thread is currently enabled
+ */
+bool Scheduler::is_enabled(thread_id_t tid) const
+{
+	int i = id_to_int(tid);
+	return (i >= enabled_len) ? false : (enabled[i] != THREAD_DISABLED);
+}
+
+/**
+ * @brief Check if a Thread is currently in the sleep set
+ * @param t The Thread to check
+ * @return True if the Thread is currently enabled
+ */
+bool Scheduler::is_sleep_set(const Thread *t) const
+{
+	return get_enabled(t) == THREAD_SLEEP_SET;
+}
+
+/**
+ * @brief Check if execution is stuck with no enabled threads and some sleeping
+ * thread
+ * @return True if no threads are enabled an some thread is in the sleep set;
+ * false otherwise
+ */
+bool Scheduler::all_threads_sleeping() const
+{
+	bool sleeping = false;
+	for (int i = 0; i < enabled_len; i++)
+		if (enabled[i] == THREAD_ENABLED)
+			return false;
+		else if (enabled[i] == THREAD_SLEEP_SET)
+			sleeping = true;
+	return sleeping;
+}
+
+enabled_type_t Scheduler::get_enabled(const Thread *t) const
+{
+	int id = id_to_int(t->get_id());
+	ASSERT(id < enabled_len);
+	return enabled[id];
+}
+
+void Scheduler::update_sleep_set(Node *n) {
+	enabled_type_t *enabled_array = n->get_enabled_array();
+	for (int i = 0; i < enabled_len; i++) {
+		if (enabled_array[i] == THREAD_SLEEP_SET) {
+			enabled[i] = THREAD_SLEEP_SET;
+		}
+	}
+}
+
+/**
+ * Add a Thread to the sleep set.
+ * @param t The Thread to add
+ */
+void Scheduler::add_sleep(Thread *t)
+{
+	DEBUG("thread %d\n", id_to_int(t->get_id()));
+	set_enabled(t, THREAD_SLEEP_SET);
+}
+
+/**
+ * Remove a Thread from the sleep set.
+ * @param t The Thread to remove
+ */
+void Scheduler::remove_sleep(Thread *t)
+{
+	DEBUG("thread %d\n", id_to_int(t->get_id()));
+	set_enabled(t, THREAD_ENABLED);
+}
+
+/**
+ * Add a Thread to the scheduler's ready list.
+ * @param t The Thread to add
+ */
+void Scheduler::add_thread(Thread *t)
+{
+	DEBUG("thread %d\n", id_to_int(t->get_id()));
+	ASSERT(!t->is_model_thread());
+	set_enabled(t, THREAD_ENABLED);
+}
+
+/**
+ * Remove a given Thread from the scheduler.
+ * @param t The Thread to remove
+ */
+void Scheduler::remove_thread(Thread *t)
+{
+	if (current == t)
+		current = NULL;
+	set_enabled(t, THREAD_DISABLED);
+}
+
+/**
+ * Prevent a Thread from being scheduled. The sleeping Thread should be
+ * re-awoken via Scheduler::wake.
+ * @param thread The Thread that should sleep
+ */
+void Scheduler::sleep(Thread *t)
+{
+	set_enabled(t, THREAD_DISABLED);
+	t->set_state(THREAD_BLOCKED);
+}
+
+/**
+ * Wake a Thread up that was previously waiting (see Scheduler::wait)
+ * @param t The Thread to wake up
+ */
+void Scheduler::wake(Thread *t)
+{
+	ASSERT(!t->is_model_thread());
+	set_enabled(t, THREAD_ENABLED);
+	t->set_state(THREAD_READY);
+}
+
+/**
+ * @brief Select a Thread to run via round-robin
+ *
+ * @param n The current Node, holding priority information for the next thread
+ * selection
+ *
+ * @return The next Thread to run
+ */
+Thread * Scheduler::select_next_thread(Node *n)
+{
+	int old_curr_thread = curr_thread_index;
+
+	bool have_enabled_thread_with_priority = false;
+	if (model->params.fairwindow != 0) {
+		for (int i = 0; i < enabled_len; i++) {
+			thread_id_t tid = int_to_id(i);
+			if (n->has_priority(tid)) {
+				DEBUG("Node (tid %d) has priority\n", i);
+				if (enabled[i] != THREAD_DISABLED)
+					have_enabled_thread_with_priority = true;
+			}
+		}
+	}	
+
+	for (int i = 0; i < enabled_len; i++) {
+		curr_thread_index = (old_curr_thread + i + 1) % enabled_len;
+		thread_id_t curr_tid = int_to_id(curr_thread_index);
+		if (model->params.yieldon) {
+			bool bad_thread = false;
+			for (int j = 0; j < enabled_len; j++) {
+				thread_id_t tother = int_to_id(j);
+				if ((enabled[j] != THREAD_DISABLED) && n->has_priority_over(curr_tid, tother)) {
+					bad_thread=true;
+					break;
+				}
+			}
+			if (bad_thread)
+				continue;
+		}
+		
+		if (enabled[curr_thread_index] == THREAD_ENABLED &&
+				(!have_enabled_thread_with_priority || n->has_priority(curr_tid))) {
+			return model->get_thread(curr_tid);
+		}
+	}
+	
+	/* No thread was enabled */
+	return NULL;
+}
+
+void Scheduler::set_scheduler_thread(thread_id_t tid) {
+	curr_thread_index=id_to_int(tid);
+}
+
+/**
+ * @brief Set the current "running" Thread
+ * @param t Thread to run
+ */
+void Scheduler::set_current_thread(Thread *t)
+{
+	ASSERT(!t || !t->is_model_thread());
+
+	current = t;
+	if (DBG_ENABLED())
+		print();
+}
+
+/**
+ * @return The currently-running Thread
+ */
+Thread * Scheduler::get_current_thread() const
+{
+	ASSERT(!current || !current->is_model_thread());
+	return current;
+}
+
+/**
+ * Print debugging information about the current state of the scheduler. Only
+ * prints something if debugging is enabled.
+ */
+void Scheduler::print() const
+{
+	int curr_id = current ? id_to_int(current->get_id()) : -1;
+
+	model_print("Scheduler: ");
+	for (int i = 0; i < enabled_len; i++) {
+		char str[20];
+		enabled_type_to_string(enabled[i], str);
+		model_print("[%i: %s%s]", i, i == curr_id ? "current, " : "", str);
+	}
+	model_print("\n");
+}
diff --git a/schedule.h b/schedule.h
new file mode 100644
index 0000000..9b16a7a
--- /dev/null
+++ b/schedule.h
@@ -0,0 +1,63 @@
+/** @file schedule.h
+ *	@brief Thread scheduler.
+ */
+
+#ifndef __SCHEDULE_H__
+#define __SCHEDULE_H__
+
+#include "mymemory.h"
+#include "modeltypes.h"
+
+/* Forward declaration */
+class Thread;
+class Node;
+class ModelExecution;
+
+typedef enum enabled_type {
+	THREAD_DISABLED,
+	THREAD_ENABLED,
+	THREAD_SLEEP_SET
+} enabled_type_t;
+
+void enabled_type_to_string(enabled_type_t e, char *str);
+
+/** @brief The Scheduler class performs the mechanics of Thread execution
+ * scheduling. */
+class Scheduler {
+public:
+	Scheduler();
+	void register_engine(ModelExecution *execution);
+
+	void add_thread(Thread *t);
+	void remove_thread(Thread *t);
+	void sleep(Thread *t);
+	void wake(Thread *t);
+	Thread * select_next_thread(Node *n);
+	void set_current_thread(Thread *t);
+	Thread * get_current_thread() const;
+	void print() const;
+	enabled_type_t * get_enabled_array() const { return enabled; };
+	void remove_sleep(Thread *t);
+	void add_sleep(Thread *t);
+	enabled_type_t get_enabled(const Thread *t) const;
+	void update_sleep_set(Node *n);
+	bool is_enabled(const Thread *t) const;
+	bool is_enabled(thread_id_t tid) const;
+	bool is_sleep_set(const Thread *t) const;
+	bool all_threads_sleeping() const;
+	void set_scheduler_thread(thread_id_t tid);
+
+	SNAPSHOTALLOC
+private:
+	ModelExecution *execution;
+	/** The list of available Threads that are not currently running */
+	enabled_type_t *enabled;
+	int enabled_len;
+	int curr_thread_index;
+	void set_enabled(Thread *t, enabled_type_t enabled_status);
+
+	/** The currently-running Thread */
+	Thread *current;
+};
+
+#endif /* __SCHEDULE_H__ */
diff --git a/snapshot-interface.cc b/snapshot-interface.cc
new file mode 100644
index 0000000..fdabcf3
--- /dev/null
+++ b/snapshot-interface.cc
@@ -0,0 +1,173 @@
+#include <stdlib.h>
+#include <unistd.h>
+#include <cstring>
+#include <inttypes.h>
+
+#include "snapshot-interface.h"
+#include "snapshot.h"
+#include "common.h"
+#include "mymemory.h"
+#include "stl-model.h"
+
+/* MYBINARYNAME only works because our pathname usually includes 'model' (e.g.,
+ * /.../model-checker/test/userprog.o) */
+#define MYBINARYNAME "model"
+#define MAPFILE "/proc/self/maps"
+
+struct snapshot_entry {
+	snapshot_entry(snapshot_id id, int idx) : snapshotid(id), index(idx) { }
+	snapshot_id snapshotid;
+	int index;
+	MEMALLOC
+};
+
+class SnapshotStack {
+ public:
+	int backTrackBeforeStep(int seq_index);
+	void snapshotStep(int seq_index);
+
+	MEMALLOC
+ private:
+	ModelVector<struct snapshot_entry> stack;
+};
+
+static SnapshotStack *snap_stack;
+
+#ifdef MAC
+/** The SnapshotGlobalSegments function computes the memory regions
+ *	that may contain globals and then configures the snapshotting
+ *	library to snapshot them.
+ */
+static void SnapshotGlobalSegments()
+{
+	int pid = getpid();
+	char buf[9000], execname[100];
+	FILE *map;
+
+	sprintf(execname, "vmmap -interleaved %d", pid);
+	map = popen(execname, "r");
+
+	if (!map) {
+		perror("popen");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Wait for correct part */
+	while (fgets(buf, sizeof(buf), map)) {
+		if (strstr(buf, "==== regions for process"))
+			break;
+	}
+
+	while (fgets(buf, sizeof(buf), map)) {
+		char regionname[200] = "";
+		char type[23];
+		char smstr[23];
+		char r, w, x;
+		char mr, mw, mx;
+		int size;
+		void *begin, *end;
+
+		//Skip out at the end of the section
+		if (buf[0] == '\n')
+			break;
+
+		sscanf(buf, "%22s %p-%p [%5dK] %c%c%c/%c%c%c SM=%3s %200s\n", type, &begin, &end, &size, &r, &w, &x, &mr, &mw, &mx, smstr, regionname);
+
+		if (w == 'w' && strstr(regionname, MYBINARYNAME)) {
+			size_t len = ((uintptr_t)end - (uintptr_t)begin) / PAGESIZE;
+			if (len != 0)
+				snapshot_add_memory_region(begin, len);
+		}
+	}
+	pclose(map);
+}
+#else
+
+static void get_binary_name(char *buf, size_t len)
+{
+	ssize_t size = readlink("/proc/self/exe", buf, len);
+	if (size < 0) {
+		perror("readlink");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Terminate string */
+	if ((size_t)size > len)
+		size = len;
+	buf[size] = '\0';
+}
+
+/** The SnapshotGlobalSegments function computes the memory regions
+ *	that may contain globals and then configures the snapshotting
+ *	library to snapshot them.
+ */
+static void SnapshotGlobalSegments()
+{
+	char buf[9000];
+	char binary_name[800];
+	FILE *map;
+
+	map = fopen(MAPFILE, "r");
+	if (!map) {
+		perror("fopen");
+		exit(EXIT_FAILURE);
+	}
+	get_binary_name(binary_name, sizeof(binary_name));
+	while (fgets(buf, sizeof(buf), map)) {
+		char regionname[200] = "";
+		char r, w, x, p;
+		void *begin, *end;
+
+		sscanf(buf, "%p-%p %c%c%c%c %*x %*x:%*x %*u %200s\n", &begin, &end, &r, &w, &x, &p, regionname);
+		if (w == 'w' && strstr(regionname, binary_name)) {
+			size_t len = ((uintptr_t)end - (uintptr_t)begin) / PAGESIZE;
+			if (len != 0)
+				snapshot_add_memory_region(begin, len);
+			DEBUG("%55s: %18p - %18p\t%c%c%c%c\n", regionname, begin, end, r, w, x, p);
+		}
+	}
+	fclose(map);
+}
+#endif
+
+/** This method returns to the last snapshot before the inputted
+ * sequence number.  This function must be called from the model
+ * checking thread and not from a snapshotted stack.
+ * @param seqindex is the sequence number to rollback before.
+ * @return is the sequence number we actually rolled back to.
+ */
+int SnapshotStack::backTrackBeforeStep(int seqindex)
+{
+	int i;
+	for (i = (int)stack.size() - 1; i >= 0; i++)
+		if (stack[i].index <= seqindex)
+			break;
+		else
+			stack.pop_back();
+
+	ASSERT(i >= 0);
+	snapshot_roll_back(stack[i].snapshotid);
+	return stack[i].index;
+}
+
+/** This method takes a snapshot at the given sequence number. */
+void SnapshotStack::snapshotStep(int seqindex)
+{
+	stack.push_back(snapshot_entry(take_snapshot(), seqindex));
+}
+
+void snapshot_stack_init()
+{
+	snap_stack = new SnapshotStack();
+	SnapshotGlobalSegments();
+}
+
+void snapshot_record(int seq_index)
+{
+	snap_stack->snapshotStep(seq_index);
+}
+
+int snapshot_backtrack_before(int seq_index)
+{
+	return snap_stack->backTrackBeforeStep(seq_index);
+}
diff --git a/snapshot-interface.h b/snapshot-interface.h
new file mode 100644
index 0000000..7f4de21
--- /dev/null
+++ b/snapshot-interface.h
@@ -0,0 +1,20 @@
+/**
+ * @file snapshot-interface.h
+ * @brief C interface layer on top of snapshotting system
+ */
+
+#ifndef __SNAPINTERFACE_H
+#define __SNAPINTERFACE_H
+
+typedef unsigned int snapshot_id;
+
+typedef void (*VoidFuncPtr)();
+void snapshot_system_init(unsigned int numbackingpages,
+		unsigned int numsnapshots, unsigned int nummemoryregions,
+		unsigned int numheappages, VoidFuncPtr entryPoint);
+
+void snapshot_stack_init();
+void snapshot_record(int seq_index);
+int snapshot_backtrack_before(int seq_index);
+
+#endif
diff --git a/snapshot.cc b/snapshot.cc
new file mode 100644
index 0000000..66faacd
--- /dev/null
+++ b/snapshot.cc
@@ -0,0 +1,476 @@
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/wait.h>
+
+#include "hashtable.h"
+#include "snapshot.h"
+#include "mymemory.h"
+#include "common.h"
+#include "context.h"
+
+/** PageAlignedAdressUpdate return a page aligned address for the
+ * address being added as a side effect the numBytes are also changed.
+ */
+static void * PageAlignAddressUpward(void *addr)
+{
+	return (void *)((((uintptr_t)addr) + PAGESIZE - 1) & ~(PAGESIZE - 1));
+}
+
+#if USE_MPROTECT_SNAPSHOT
+
+/* Each SnapShotRecord lists the firstbackingpage that must be written to
+ * revert to that snapshot */
+struct SnapShotRecord {
+	unsigned int firstBackingPage;
+};
+
+/** @brief Backing store page */
+typedef unsigned char snapshot_page_t[PAGESIZE];
+
+/* List the base address of the corresponding page in the backing store so we
+ * know where to copy it to */
+struct BackingPageRecord {
+	void *basePtrOfPage;
+};
+
+/* Struct for each memory region */
+struct MemoryRegion {
+	void *basePtr; // base of memory region
+	int sizeInPages; // size of memory region in pages
+};
+
+/** ReturnPageAlignedAddress returns a page aligned address for the
+ * address being added as a side effect the numBytes are also changed.
+ */
+static void * ReturnPageAlignedAddress(void *addr)
+{
+	return (void *)(((uintptr_t)addr) & ~(PAGESIZE - 1));
+}
+
+/* Primary struct for snapshotting system */
+struct mprot_snapshotter {
+	mprot_snapshotter(unsigned int numbackingpages, unsigned int numsnapshots, unsigned int nummemoryregions);
+	~mprot_snapshotter();
+
+	struct MemoryRegion *regionsToSnapShot; //This pointer references an array of memory regions to snapshot
+	snapshot_page_t *backingStore; //This pointer references an array of snapshotpage's that form the backing store
+	void *backingStoreBasePtr; //This pointer references an array of snapshotpage's that form the backing store
+	struct BackingPageRecord *backingRecords; //This pointer references an array of backingpagerecord's (same number of elements as backingstore
+	struct SnapShotRecord *snapShots; //This pointer references the snapshot array
+
+	unsigned int lastSnapShot; //Stores the next snapshot record we should use
+	unsigned int lastBackingPage; //Stores the next backingpage we should use
+	unsigned int lastRegion; //Stores the next memory region to be used
+
+	unsigned int maxRegions; //Stores the max number of memory regions we support
+	unsigned int maxBackingPages; //Stores the total number of backing pages
+	unsigned int maxSnapShots; //Stores the total number of snapshots we allow
+
+	MEMALLOC
+};
+
+static struct mprot_snapshotter *mprot_snap = NULL;
+
+mprot_snapshotter::mprot_snapshotter(unsigned int backing_pages, unsigned int snapshots, unsigned int regions) :
+	lastSnapShot(0),
+	lastBackingPage(0),
+	lastRegion(0),
+	maxRegions(regions),
+	maxBackingPages(backing_pages),
+	maxSnapShots(snapshots)
+{
+	regionsToSnapShot = (struct MemoryRegion *)model_malloc(sizeof(struct MemoryRegion) * regions);
+	backingStoreBasePtr = (void *)model_malloc(sizeof(snapshot_page_t) * (backing_pages + 1));
+	//Page align the backingstorepages
+	backingStore = (snapshot_page_t *)PageAlignAddressUpward(backingStoreBasePtr);
+	backingRecords = (struct BackingPageRecord *)model_malloc(sizeof(struct BackingPageRecord) * backing_pages);
+	snapShots = (struct SnapShotRecord *)model_malloc(sizeof(struct SnapShotRecord) * snapshots);
+}
+
+mprot_snapshotter::~mprot_snapshotter()
+{
+	model_free(regionsToSnapShot);
+	model_free(backingStoreBasePtr);
+	model_free(backingRecords);
+	model_free(snapShots);
+}
+
+/** mprot_handle_pf is the page fault handler for mprotect based snapshotting
+ * algorithm.
+ */
+static void mprot_handle_pf(int sig, siginfo_t *si, void *unused)
+{
+	if (si->si_code == SEGV_MAPERR) {
+		model_print("Segmentation fault at %p\n", si->si_addr);
+		model_print("For debugging, place breakpoint at: %s:%d\n",
+				__FILE__, __LINE__);
+		// print_trace(); // Trace printing may cause dynamic memory allocation
+		exit(EXIT_FAILURE);
+	}
+	void* addr = ReturnPageAlignedAddress(si->si_addr);
+
+	unsigned int backingpage = mprot_snap->lastBackingPage++; //Could run out of pages...
+	if (backingpage == mprot_snap->maxBackingPages) {
+		model_print("Out of backing pages at %p\n", si->si_addr);
+		exit(EXIT_FAILURE);
+	}
+
+	//copy page
+	memcpy(&(mprot_snap->backingStore[backingpage]), addr, sizeof(snapshot_page_t));
+	//remember where to copy page back to
+	mprot_snap->backingRecords[backingpage].basePtrOfPage = addr;
+	//set protection to read/write
+	if (mprotect(addr, sizeof(snapshot_page_t), PROT_READ | PROT_WRITE)) {
+		perror("mprotect");
+		// Handle error by quitting?
+	}
+}
+
+static void mprot_snapshot_init(unsigned int numbackingpages,
+		unsigned int numsnapshots, unsigned int nummemoryregions,
+		unsigned int numheappages, VoidFuncPtr entryPoint)
+{
+	/* Setup a stack for our signal handler....  */
+	stack_t ss;
+	ss.ss_sp = PageAlignAddressUpward(model_malloc(SIGSTACKSIZE + PAGESIZE - 1));
+	ss.ss_size = SIGSTACKSIZE;
+	ss.ss_flags = 0;
+	sigaltstack(&ss, NULL);
+
+	struct sigaction sa;
+	sa.sa_flags = SA_SIGINFO | SA_NODEFER | SA_RESTART | SA_ONSTACK;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_sigaction = mprot_handle_pf;
+#ifdef MAC
+	if (sigaction(SIGBUS, &sa, NULL) == -1) {
+		perror("sigaction(SIGBUS)");
+		exit(EXIT_FAILURE);
+	}
+#endif
+	if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+		perror("sigaction(SIGSEGV)");
+		exit(EXIT_FAILURE);
+	}
+
+	mprot_snap = new mprot_snapshotter(numbackingpages, numsnapshots, nummemoryregions);
+
+	// EVIL HACK: We need to make sure that calls into the mprot_handle_pf method don't cause dynamic links
+	// The problem is that we end up protecting state in the dynamic linker...
+	// Solution is to call our signal handler before we start protecting stuff...
+
+	siginfo_t si;
+	memset(&si, 0, sizeof(si));
+	si.si_addr = ss.ss_sp;
+	mprot_handle_pf(SIGSEGV, &si, NULL);
+	mprot_snap->lastBackingPage--; //remove the fake page we copied
+
+	void *basemySpace = model_malloc((numheappages + 1) * PAGESIZE);
+	void *pagealignedbase = PageAlignAddressUpward(basemySpace);
+	user_snapshot_space = create_mspace_with_base(pagealignedbase, numheappages * PAGESIZE, 1);
+	snapshot_add_memory_region(pagealignedbase, numheappages);
+
+	void *base_model_snapshot_space = model_malloc((numheappages + 1) * PAGESIZE);
+	pagealignedbase = PageAlignAddressUpward(base_model_snapshot_space);
+	model_snapshot_space = create_mspace_with_base(pagealignedbase, numheappages * PAGESIZE, 1);
+	snapshot_add_memory_region(pagealignedbase, numheappages);
+
+	entryPoint();
+}
+
+static void mprot_add_to_snapshot(void *addr, unsigned int numPages)
+{
+	unsigned int memoryregion = mprot_snap->lastRegion++;
+	if (memoryregion == mprot_snap->maxRegions) {
+		model_print("Exceeded supported number of memory regions!\n");
+		exit(EXIT_FAILURE);
+	}
+
+	DEBUG("snapshot region %p-%p (%u page%s)\n",
+			addr, (char *)addr + numPages * PAGESIZE, numPages,
+			numPages > 1 ? "s" : "");
+	mprot_snap->regionsToSnapShot[memoryregion].basePtr = addr;
+	mprot_snap->regionsToSnapShot[memoryregion].sizeInPages = numPages;
+}
+
+static snapshot_id mprot_take_snapshot()
+{
+	for (unsigned int region = 0; region < mprot_snap->lastRegion; region++) {
+		if (mprotect(mprot_snap->regionsToSnapShot[region].basePtr, mprot_snap->regionsToSnapShot[region].sizeInPages * sizeof(snapshot_page_t), PROT_READ) == -1) {
+			perror("mprotect");
+			model_print("Failed to mprotect inside of takeSnapShot\n");
+			exit(EXIT_FAILURE);
+		}
+	}
+	unsigned int snapshot = mprot_snap->lastSnapShot++;
+	if (snapshot == mprot_snap->maxSnapShots) {
+		model_print("Out of snapshots\n");
+		exit(EXIT_FAILURE);
+	}
+	mprot_snap->snapShots[snapshot].firstBackingPage = mprot_snap->lastBackingPage;
+
+	return snapshot;
+}
+
+static void mprot_roll_back(snapshot_id theID)
+{
+#if USE_MPROTECT_SNAPSHOT == 2
+	if (mprot_snap->lastSnapShot == (theID + 1)) {
+		for (unsigned int page = mprot_snap->snapShots[theID].firstBackingPage; page < mprot_snap->lastBackingPage; page++) {
+			memcpy(mprot_snap->backingRecords[page].basePtrOfPage, &mprot_snap->backingStore[page], sizeof(snapshot_page_t));
+		}
+		return;
+	}
+#endif
+
+	HashTable< void *, bool, uintptr_t, 4, model_malloc, model_calloc, model_free> duplicateMap;
+	for (unsigned int region = 0; region < mprot_snap->lastRegion; region++) {
+		if (mprotect(mprot_snap->regionsToSnapShot[region].basePtr, mprot_snap->regionsToSnapShot[region].sizeInPages * sizeof(snapshot_page_t), PROT_READ | PROT_WRITE) == -1) {
+			perror("mprotect");
+			model_print("Failed to mprotect inside of takeSnapShot\n");
+			exit(EXIT_FAILURE);
+		}
+	}
+	for (unsigned int page = mprot_snap->snapShots[theID].firstBackingPage; page < mprot_snap->lastBackingPage; page++) {
+		if (!duplicateMap.contains(mprot_snap->backingRecords[page].basePtrOfPage)) {
+			duplicateMap.put(mprot_snap->backingRecords[page].basePtrOfPage, true);
+			memcpy(mprot_snap->backingRecords[page].basePtrOfPage, &mprot_snap->backingStore[page], sizeof(snapshot_page_t));
+		}
+	}
+	mprot_snap->lastSnapShot = theID;
+	mprot_snap->lastBackingPage = mprot_snap->snapShots[theID].firstBackingPage;
+	mprot_take_snapshot(); //Make sure current snapshot is still good...All later ones are cleared
+}
+
+#else /* !USE_MPROTECT_SNAPSHOT */
+
+#define SHARED_MEMORY_DEFAULT  (100 * ((size_t)1 << 20)) // 100mb for the shared memory
+#define STACK_SIZE_DEFAULT      (((size_t)1 << 20) * 20)  // 20 mb out of the above 100 mb for my stack
+
+struct fork_snapshotter {
+	/** @brief Pointer to the shared (non-snapshot) memory heap base
+	 * (NOTE: this has size SHARED_MEMORY_DEFAULT - sizeof(*fork_snap)) */
+	void *mSharedMemoryBase;
+
+	/** @brief Pointer to the shared (non-snapshot) stack region */
+	void *mStackBase;
+
+	/** @brief Size of the shared stack */
+	size_t mStackSize;
+
+	/**
+	 * @brief Stores the ID that we are attempting to roll back to
+	 *
+	 * Used in inter-process communication so that each process can
+	 * determine whether or not to take over execution (w/ matching ID) or
+	 * exit (we're rolling back even further). Dubiously marked 'volatile'
+	 * to prevent compiler optimizations from messing with the
+	 * inter-process behavior.
+	 */
+	volatile snapshot_id mIDToRollback;
+
+	/**
+	 * @brief The context for the shared (non-snapshot) stack
+	 *
+	 * This context is passed between the various processes which represent
+	 * various snapshot states. It should be used primarily for the
+	 * "client-side" code, not the main snapshot loop.
+	 */
+	ucontext_t shared_ctxt;
+
+	/** @brief Inter-process tracking of the next snapshot ID */
+	snapshot_id currSnapShotID;
+};
+
+static struct fork_snapshotter *fork_snap = NULL;
+
+/** @statics
+*   These variables are necessary because the stack is shared region and
+*   there exists a race between all processes executing the same function.
+*   To avoid the problem above, we require variables allocated in 'safe' regions.
+*   The bug was actually observed with the forkID, these variables below are
+*   used to indicate the various contexts to which to switch to.
+*
+*   @private_ctxt: the context which is internal to the current process. Used
+*   for running the internal snapshot/rollback loop.
+*   @exit_ctxt: a special context used just for exiting from a process (so we
+*   can use swapcontext() instead of setcontext() + hacks)
+*   @snapshotid: it is a running counter for the various forked processes
+*   snapshotid. it is incremented and set in a persistently shared record
+*/
+static ucontext_t private_ctxt;
+static ucontext_t exit_ctxt;
+static snapshot_id snapshotid = 0;
+
+/**
+ * @brief Create a new context, with a given stack and entry function
+ * @param ctxt The context structure to fill
+ * @param stack The stack to run the new context in
+ * @param stacksize The size of the stack
+ * @param func The entry point function for the context
+ */
+static void create_context(ucontext_t *ctxt, void *stack, size_t stacksize,
+		void (*func)(void))
+{
+	getcontext(ctxt);
+	ctxt->uc_stack.ss_sp = stack;
+	ctxt->uc_stack.ss_size = stacksize;
+	makecontext(ctxt, func, 0);
+}
+
+/** @brief An empty function, used for an "empty" context which just exits a
+ *  process */
+static void fork_exit()
+{
+	/* Intentionally empty */
+}
+
+static void createSharedMemory()
+{
+	//step 1. create shared memory.
+	void *memMapBase = mmap(0, SHARED_MEMORY_DEFAULT + STACK_SIZE_DEFAULT, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	if (memMapBase == MAP_FAILED) {
+		perror("mmap");
+		exit(EXIT_FAILURE);
+	}
+
+	//Setup snapshot record at top of free region
+	fork_snap = (struct fork_snapshotter *)memMapBase;
+	fork_snap->mSharedMemoryBase = (void *)((uintptr_t)memMapBase + sizeof(*fork_snap));
+	fork_snap->mStackBase = (void *)((uintptr_t)memMapBase + SHARED_MEMORY_DEFAULT);
+	fork_snap->mStackSize = STACK_SIZE_DEFAULT;
+	fork_snap->mIDToRollback = -1;
+	fork_snap->currSnapShotID = 0;
+}
+
+/**
+ * Create a new mspace pointer for the non-snapshotting (i.e., inter-process
+ * shared) memory region. Only for fork-based snapshotting.
+ *
+ * @return The shared memory mspace
+ */
+mspace create_shared_mspace()
+{
+	if (!fork_snap)
+		createSharedMemory();
+	return create_mspace_with_base((void *)(fork_snap->mSharedMemoryBase), SHARED_MEMORY_DEFAULT - sizeof(*fork_snap), 1);
+}
+
+static void fork_snapshot_init(unsigned int numbackingpages,
+		unsigned int numsnapshots, unsigned int nummemoryregions,
+		unsigned int numheappages, VoidFuncPtr entryPoint)
+{
+	if (!fork_snap)
+		createSharedMemory();
+
+	void *base_model_snapshot_space = malloc((numheappages + 1) * PAGESIZE);
+	void *pagealignedbase = PageAlignAddressUpward(base_model_snapshot_space);
+	model_snapshot_space = create_mspace_with_base(pagealignedbase, numheappages * PAGESIZE, 1);
+
+	/* setup an "exiting" context */
+	char stack[128];
+	create_context(&exit_ctxt, stack, sizeof(stack), fork_exit);
+
+	/* setup the shared-stack context */
+	create_context(&fork_snap->shared_ctxt, fork_snap->mStackBase,
+			STACK_SIZE_DEFAULT, entryPoint);
+	/* switch to a new entryPoint context, on a new stack */
+	model_swapcontext(&private_ctxt, &fork_snap->shared_ctxt);
+
+	/* switch back here when takesnapshot is called */
+	snapshotid = fork_snap->currSnapShotID;
+
+	while (true) {
+		pid_t forkedID;
+		fork_snap->currSnapShotID = snapshotid + 1;
+		forkedID = fork();
+
+		if (0 == forkedID) {
+			setcontext(&fork_snap->shared_ctxt);
+		} else {
+			DEBUG("parent PID: %d, child PID: %d, snapshot ID: %d\n",
+			        getpid(), forkedID, snapshotid);
+
+			while (waitpid(forkedID, NULL, 0) < 0) {
+				/* waitpid() may be interrupted */
+				if (errno != EINTR) {
+					perror("waitpid");
+					exit(EXIT_FAILURE);
+				}
+			}
+
+			if (fork_snap->mIDToRollback != snapshotid)
+				exit(EXIT_SUCCESS);
+		}
+	}
+}
+
+static snapshot_id fork_take_snapshot()
+{
+	model_swapcontext(&fork_snap->shared_ctxt, &private_ctxt);
+	DEBUG("TAKESNAPSHOT RETURN\n");
+	return snapshotid;
+}
+
+static void fork_roll_back(snapshot_id theID)
+{
+	DEBUG("Rollback\n");
+	fork_snap->mIDToRollback = theID;
+	model_swapcontext(&fork_snap->shared_ctxt, &exit_ctxt);
+	fork_snap->mIDToRollback = -1;
+}
+
+#endif /* !USE_MPROTECT_SNAPSHOT */
+
+/**
+ * @brief Initializes the snapshot system
+ * @param entryPoint the function that should run the program.
+ */
+void snapshot_system_init(unsigned int numbackingpages,
+		unsigned int numsnapshots, unsigned int nummemoryregions,
+		unsigned int numheappages, VoidFuncPtr entryPoint)
+{
+#if USE_MPROTECT_SNAPSHOT
+	mprot_snapshot_init(numbackingpages, numsnapshots, nummemoryregions, numheappages, entryPoint);
+#else
+	fork_snapshot_init(numbackingpages, numsnapshots, nummemoryregions, numheappages, entryPoint);
+#endif
+}
+
+/** Assumes that addr is page aligned. */
+void snapshot_add_memory_region(void *addr, unsigned int numPages)
+{
+#if USE_MPROTECT_SNAPSHOT
+	mprot_add_to_snapshot(addr, numPages);
+#else
+	/* not needed for fork-based snapshotting */
+#endif
+}
+
+/** Takes a snapshot of memory.
+ * @return The snapshot identifier.
+ */
+snapshot_id take_snapshot()
+{
+#if USE_MPROTECT_SNAPSHOT
+	return mprot_take_snapshot();
+#else
+	return fork_take_snapshot();
+#endif
+}
+
+/** Rolls the memory state back to the given snapshot identifier.
+ *  @param theID is the snapshot identifier to rollback to.
+ */
+void snapshot_roll_back(snapshot_id theID)
+{
+#if USE_MPROTECT_SNAPSHOT
+	mprot_roll_back(theID);
+#else
+	fork_roll_back(theID);
+#endif
+}
diff --git a/snapshot.h b/snapshot.h
new file mode 100644
index 0000000..d3fc7b8
--- /dev/null
+++ b/snapshot.h
@@ -0,0 +1,20 @@
+/** @file snapshot.h
+ *	@brief Snapshotting interface header file.
+ */
+
+#ifndef _SNAPSHOT_H
+#define _SNAPSHOT_H
+
+#include "snapshot-interface.h"
+#include "config.h"
+#include "mymemory.h"
+
+void snapshot_add_memory_region(void *ptr, unsigned int numPages);
+snapshot_id take_snapshot();
+void snapshot_roll_back(snapshot_id theSnapShot);
+
+#if !USE_MPROTECT_SNAPSHOT
+mspace create_shared_mspace();
+#endif
+
+#endif
diff --git a/stacktrace.h b/stacktrace.h
new file mode 100644
index 0000000..a3b0350
--- /dev/null
+++ b/stacktrace.h
@@ -0,0 +1,94 @@
+// stacktrace.h (c) 2008, Timo Bingmann from http://idlebox.net/
+// published under the WTFPL v2.0
+
+#ifndef __STACKTRACE_H__
+#define __STACKTRACE_H__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <execinfo.h>
+#include <cxxabi.h>
+
+/**
+ * @brief Print a demangled stack backtrace of the caller function to file
+ * descriptor fd.
+ */
+static inline void print_stacktrace(int fd = STDERR_FILENO, unsigned int max_frames = 63)
+{
+	dprintf(fd, "stack trace:\n");
+
+	// storage array for stack trace address data
+	void* addrlist[max_frames+1];
+
+	// retrieve current stack addresses
+	int addrlen = backtrace(addrlist, sizeof(addrlist) / sizeof(void*));
+
+	if (addrlen == 0) {
+		dprintf(fd, "  <empty, possibly corrupt>\n");
+		return;
+	}
+
+	// resolve addresses into strings containing "filename(function+address)",
+	// this array must be free()-ed
+	char** symbollist = backtrace_symbols(addrlist, addrlen);
+
+	// allocate string which will be filled with the demangled function name
+	size_t funcnamesize = 256;
+	char* funcname = (char*)malloc(funcnamesize);
+
+	// iterate over the returned symbol lines. skip the first, it is the
+	// address of this function.
+	for (int i = 1; i < addrlen; i++) {
+		char *begin_name = 0, *begin_offset = 0, *end_offset = 0;
+
+		// find parentheses and +address offset surrounding the mangled name:
+		// ./module(function+0x15c) [0x8048a6d]
+		for (char *p = symbollist[i]; *p; ++p) {
+			if (*p == '(')
+				begin_name = p;
+			else if (*p == '+')
+				begin_offset = p;
+			else if (*p == ')' && begin_offset) {
+				end_offset = p;
+				break;
+			}
+		}
+
+		if (begin_name && begin_offset && end_offset && begin_name < begin_offset) {
+			*begin_name++ = '\0';
+			*begin_offset++ = '\0';
+			*end_offset = '\0';
+
+			// mangled name is now in [begin_name, begin_offset) and caller
+			// offset in [begin_offset, end_offset). now apply
+			// __cxa_demangle():
+
+			int status;
+			char* ret = abi::__cxa_demangle(begin_name,
+					funcname, &funcnamesize, &status);
+			if (status == 0) {
+				funcname = ret; // use possibly realloc()-ed string
+				dprintf(fd, "  %s : %s+%s\n",
+						symbollist[i], funcname, begin_offset);
+			} else {
+				// demangling failed. Output function name as a C function with
+				// no arguments.
+				dprintf(fd, "  %s : %s()+%s\n",
+						symbollist[i], begin_name, begin_offset);
+			}
+		} else {
+			// couldn't parse the line? print the whole line.
+			dprintf(fd, "  %s\n", symbollist[i]);
+		}
+	}
+
+	free(funcname);
+	free(symbollist);
+}
+
+static inline void print_stacktrace(FILE *out, unsigned int max_frames = 63)
+{
+	print_stacktrace(fileno(out), max_frames);
+}
+
+#endif // __STACKTRACE_H__
diff --git a/stl-model.h b/stl-model.h
new file mode 100644
index 0000000..ae6e8b2
--- /dev/null
+++ b/stl-model.h
@@ -0,0 +1,76 @@
+#ifndef __STL_MODEL_H__
+#define __STL_MODEL_H__
+
+#include <vector>
+#include <list>
+#include "mymemory.h"
+
+template<typename _Tp>
+class ModelList : public std::list<_Tp, ModelAlloc<_Tp> >
+{
+ public:
+	typedef std::list< _Tp, ModelAlloc<_Tp> > list;
+
+	ModelList() :
+		list()
+	{ }
+
+	ModelList(size_t n, const _Tp& val = _Tp()) :
+		list(n, val)
+	{ }
+
+	MEMALLOC
+};
+
+template<typename _Tp>
+class SnapList : public std::list<_Tp, SnapshotAlloc<_Tp> >
+{
+ public:
+	typedef std::list<_Tp, SnapshotAlloc<_Tp> > list;
+
+	SnapList() :
+		list()
+	{ }
+
+	SnapList(size_t n, const _Tp& val = _Tp()) :
+		list(n, val)
+	{ }
+
+	SNAPSHOTALLOC
+};
+
+template<typename _Tp>
+class ModelVector : public std::vector<_Tp, ModelAlloc<_Tp> >
+{
+ public:
+	typedef std::vector< _Tp, ModelAlloc<_Tp> > vector;
+
+	ModelVector() :
+		vector()
+	{ }
+
+	ModelVector(size_t n, const _Tp& val = _Tp()) :
+		vector(n, val)
+	{ }
+
+	MEMALLOC
+};
+
+template<typename _Tp>
+class SnapVector : public std::vector<_Tp, SnapshotAlloc<_Tp> >
+{
+ public:
+	typedef std::vector< _Tp, SnapshotAlloc<_Tp> > vector;
+
+	SnapVector() :
+		vector()
+	{ }
+
+	SnapVector(size_t n, const _Tp& val = _Tp()) :
+		vector(n, val)
+	{ }
+
+	SNAPSHOTALLOC
+};
+
+#endif /* __STL_MODEL_H__ */
diff --git a/test/Makefile b/test/Makefile
new file mode 100644
index 0000000..9d7acb0
--- /dev/null
+++ b/test/Makefile
@@ -0,0 +1,26 @@
+BASE := ..
+
+OBJECTS := $(patsubst %.c, %.o, $(wildcard *.c))
+OBJECTS += $(patsubst %.cc, %.o, $(wildcard *.cc))
+
+include $(BASE)/common.mk
+
+DIR := litmus
+include $(DIR)/Makefile
+
+DEPS := $(join $(addsuffix ., $(dir $(OBJECTS))), $(addsuffix .d, $(notdir $(OBJECTS))))
+
+CPPFLAGS += -I$(BASE) -I$(BASE)/include
+
+all: $(OBJECTS)
+
+-include $(DEPS)
+
+%.o: %.c
+	$(CC) -MMD -MF $(@D)/.$(@F).d -o $@ $< $(CPPFLAGS) -L$(BASE) -l$(LIB_NAME)
+
+%.o: %.cc
+	$(CXX) -MMD -MF $(@D)/.$(@F).d -o $@ $< $(CPPFLAGS) -L$(BASE) -l$(LIB_NAME)
+
+clean::
+	rm -f $(OBJECTS) $(DEPS)
diff --git a/test/addr-satcycle.cc b/test/addr-satcycle.cc
new file mode 100644
index 0000000..0d3ba5a
--- /dev/null
+++ b/test/addr-satcycle.cc
@@ -0,0 +1,67 @@
+/**
+ * @file addr-satcycle.cc
+ * @brief Address-based satisfaction cycle test
+ *
+ * This program has a peculiar behavior which is technically legal under the
+ * current C++ memory model but which is a result of a type of satisfaction
+ * cycle. We use this as justification for part of our modifications to the
+ * memory model when proving our model-checker's correctness.
+ */
+
+#include <atomic>
+#include <threads.h>
+#include <stdio.h>
+
+#include "model-assert.h"
+
+using namespace std;
+
+atomic_int x[2], idx, y;
+
+int r1, r2, r3; /* "local" variables */
+
+static void a(void *obj)
+{
+	r1 = idx.load(memory_order_relaxed);
+	x[r1].store(0, memory_order_relaxed);
+
+	/* Key point: can we guarantee that &x[0] == &x[r1]? */
+	r2 = x[0].load(memory_order_relaxed);
+	y.store(r2);
+}
+
+static void b(void *obj)
+{
+	r3 = y.load(memory_order_relaxed);
+	idx.store(r3, memory_order_relaxed);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2;
+
+	atomic_init(&x[0], 1);
+	atomic_init(&idx, 0);
+	atomic_init(&y, 0);
+
+	printf("Main thread: creating 2 threads\n");
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	printf("Main thread is finished\n");
+
+	printf("r1 = %d\n", r1);
+	printf("r2 = %d\n", r2);
+	printf("r3 = %d\n", r3);
+
+	/*
+	 * This condition should not be hit because it only occurs under a
+	 * satisfaction cycle
+	 */
+	bool cycle = (r1 == 1 && r2 == 1 && r3 == 1);
+	MODEL_ASSERT(!cycle);
+
+	return 0;
+}
diff --git a/test/condvar.cc b/test/condvar.cc
new file mode 100644
index 0000000..ff8feb1
--- /dev/null
+++ b/test/condvar.cc
@@ -0,0 +1,44 @@
+#include <stdio.h>
+
+#include "threads.h"
+#include "librace.h"
+#include "stdatomic.h"
+#include <mutex>
+#include <condition_variable>
+
+std::mutex * m;
+std::condition_variable *v;
+int shareddata;
+
+static void a(void *obj)
+{
+	
+	m->lock();
+	while(load_32(&shareddata)==0)
+		v->wait(*m);
+	m->unlock();
+
+}
+
+static void b(void *obj)
+{
+	m->lock();
+	store_32(&shareddata, (unsigned int) 1);
+	v->notify_all();
+	m->unlock();
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2;
+	store_32(&shareddata, (unsigned int) 0);
+	m=new std::mutex();
+	v=new std::condition_variable();
+
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	return 0;
+}
diff --git a/test/csetest.c b/test/csetest.c
new file mode 100644
index 0000000..2058f9c
--- /dev/null
+++ b/test/csetest.c
@@ -0,0 +1,43 @@
+#include <stdio.h>
+#include <threads.h>
+#include <stdatomic.h>
+
+#include "librace.h"
+
+atomic_int a;
+atomic_int b;
+
+static void r(void *obj)
+{
+	int r1=atomic_load_explicit(&a, memory_order_relaxed);
+	int r2=atomic_load_explicit(&a, memory_order_relaxed);
+	if (r1==r2)
+		atomic_store_explicit(&b, 2, memory_order_relaxed);
+	printf("r1=%d\n",r1);
+	printf("r2=%d\n",r2);
+}
+
+static void s(void *obj)
+{
+	int r3=atomic_load_explicit(&b, memory_order_relaxed);
+	atomic_store_explicit(&a, r3, memory_order_relaxed);
+	printf("r3=%d\n",r3);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2;
+
+	atomic_init(&a, 0);
+	atomic_init(&b, 1);
+
+	printf("Main thread: creating 2 threads\n");
+	thrd_create(&t1, (thrd_start_t)&r, NULL);
+	thrd_create(&t2, (thrd_start_t)&s, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	printf("Main thread is finished\n");
+
+	return 0;
+}
diff --git a/test/deadlock.cc b/test/deadlock.cc
new file mode 100644
index 0000000..4810aa4
--- /dev/null
+++ b/test/deadlock.cc
@@ -0,0 +1,46 @@
+#include <stdio.h>
+#include <threads.h>
+#include <mutex>
+
+#include "librace.h"
+
+std::mutex *x;
+std::mutex *y;
+uint32_t shared = 0;
+
+static void a(void *obj)
+{
+	x->lock();
+	y->lock();
+	printf("shared = %u\n", load_32(&shared));
+	y->unlock();
+	x->unlock();
+}
+
+static void b(void *obj)
+{
+	y->lock();
+	x->lock();
+	store_32(&shared, 16);
+	printf("write shared = 16\n");
+	x->unlock();
+	y->unlock();
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2;
+
+	x = new std::mutex();
+	y = new std::mutex();
+
+	printf("Main thread: creating 2 threads\n");
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	printf("Main thread is finished\n");
+
+	return 0;
+}
diff --git a/test/double-read-fv.c b/test/double-read-fv.c
new file mode 100755
index 0000000..120cdc3
--- /dev/null
+++ b/test/double-read-fv.c
@@ -0,0 +1,50 @@
+/*
+ * Try to read the same value as a future value twice.
+ *
+ * This test should be able to see r1 = r2 = 42. Currently, we never see that
+ * (as of 2/21/13) because the r2 load won't have a potential future value of
+ * 42 at the same time as r1, due to our scheduling (the loads for r1 and r2
+ * must occur before the write of x = 42).
+ *
+ * Note that the atomic_int y is simply used to aid in forcing a particularly
+ * interesting scheduling. It is superfluous.
+ */
+#include <stdio.h>
+#include <threads.h>
+#include <stdatomic.h>
+
+#include "librace.h"
+
+atomic_int x;
+atomic_int y;
+
+static void a(void *obj)
+{
+	int r1 = atomic_load_explicit(&x, memory_order_relaxed);
+	int r2 = atomic_load_explicit(&x, memory_order_relaxed);
+	printf("r1 = %d, r2 = %d\n", r1, r2);
+}
+
+static void b(void *obj)
+{
+	atomic_store_explicit(&y, 43, memory_order_relaxed);
+	atomic_store_explicit(&x, 42, memory_order_relaxed);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2;
+
+	atomic_init(&x, 0);
+	atomic_init(&y, 0);
+
+	printf("Main thread: creating 2 threads\n");
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	printf("Main thread is finished\n");
+
+	return 0;
+}
diff --git a/test/double-relseq.c b/test/double-relseq.c
new file mode 100644
index 0000000..2ad1987
--- /dev/null
+++ b/test/double-relseq.c
@@ -0,0 +1,59 @@
+/*
+ * This test performs some relaxed, release, acquire opeations on a single
+ * atomic variable. It can give some rough idea of release sequence support but
+ * probably should be improved to give better information.
+ *
+ * This test tries to establish two release sequences, where we should always
+ * either establish both or establish neither. (Note that this is only true for
+ * a few executions of interest, where both load-acquire's read from the same
+ * write.)
+ */
+
+#include <stdio.h>
+#include <threads.h>
+#include <stdatomic.h>
+
+#include "librace.h"
+
+atomic_int x;
+int var = 0;
+
+static void a(void *obj)
+{
+	store_32(&var, 1);
+	atomic_store_explicit(&x, 1, memory_order_release);
+	atomic_store_explicit(&x, 42, memory_order_relaxed);
+}
+
+static void b(void *obj)
+{
+	int r = atomic_load_explicit(&x, memory_order_acquire);
+	printf("r = %d\n", r);
+	printf("load %d\n", load_32(&var));
+}
+
+static void c(void *obj)
+{
+	atomic_store_explicit(&x, 2, memory_order_relaxed);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2, t3, t4;
+
+	atomic_init(&x, 0);
+
+	printf("Main thread: creating 4 threads\n");
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+	thrd_create(&t3, (thrd_start_t)&b, NULL);
+	thrd_create(&t4, (thrd_start_t)&c, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	thrd_join(t3);
+	thrd_join(t4);
+	printf("Main thread is finished\n");
+
+	return 0;
+}
diff --git a/test/fences.c b/test/fences.c
new file mode 100644
index 0000000..4d0328f
--- /dev/null
+++ b/test/fences.c
@@ -0,0 +1,42 @@
+#include <stdio.h>
+#include <threads.h>
+#include <stdatomic.h>
+
+#include "librace.h"
+
+atomic_int x;
+atomic_int y;
+
+static void a(void *obj)
+{
+	atomic_store_explicit(&x, 1, memory_order_relaxed);
+	atomic_store_explicit(&x, 2, memory_order_relaxed);
+	atomic_thread_fence(memory_order_seq_cst);
+	printf("Thread A reads: %d\n", atomic_load_explicit(&y, memory_order_relaxed));
+}
+
+static void b(void *obj)
+{
+	atomic_store_explicit(&y, 1, memory_order_relaxed);
+	atomic_store_explicit(&y, 2, memory_order_relaxed);
+	atomic_thread_fence(memory_order_seq_cst);
+	printf("Thread B reads: %d\n", atomic_load_explicit(&x, memory_order_relaxed));
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2;
+
+	atomic_init(&x, 0);
+	atomic_init(&y, 0);
+
+	printf("Main thread: creating 2 threads\n");
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	printf("Main thread is finishing\n");
+
+	return 0;
+}
diff --git a/test/fences2.c b/test/fences2.c
new file mode 100644
index 0000000..2c80d61
--- /dev/null
+++ b/test/fences2.c
@@ -0,0 +1,46 @@
+#include <stdio.h>
+#include <threads.h>
+#include <stdatomic.h>
+
+#include "librace.h"
+#include "model-assert.h"
+
+atomic_int x;
+atomic_int y;
+
+static void a(void *obj)
+{
+	atomic_store_explicit(&x, 1, memory_order_relaxed);
+	atomic_thread_fence(memory_order_release);
+	atomic_store_explicit(&x, 2, memory_order_relaxed);
+}
+
+static void b(void *obj)
+{
+	int r1, r2;
+	r1 = atomic_load_explicit(&x, memory_order_relaxed);
+	atomic_thread_fence(memory_order_acquire);
+	r2 = atomic_load_explicit(&x, memory_order_relaxed);
+
+	printf("FENCES: r1 = %d, r2 = %d\n", r1, r2);
+	if (r1 == 2)
+		MODEL_ASSERT(r2 != 1);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2;
+
+	atomic_init(&x, 0);
+	atomic_init(&y, 0);
+
+	printf("Main thread: creating 2 threads\n");
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	printf("Main thread is finishing\n");
+
+	return 0;
+}
diff --git a/test/insanesync.cc b/test/insanesync.cc
new file mode 100644
index 0000000..c0fe7f6
--- /dev/null
+++ b/test/insanesync.cc
@@ -0,0 +1,69 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <threads.h>
+#include <atomic>
+
+#include "librace.h"
+#include "model-assert.h"
+
+using namespace std;
+
+atomic_int x, y;
+atomic_intptr_t z, z2;
+
+int r1, r2, r3; /* "local" variables */
+
+/**
+		This example illustrates a self-satisfying cycle involving
+		synchronization.  A failed synchronization creates the store that
+		causes the synchronization to fail.
+
+		The C++11 memory model nominally allows r1=0, r2=1, r3=5.
+
+		This example is insane, we don't support that behavior.
+*/
+
+
+static void a(void *obj)
+{
+	z.store((intptr_t)&y, memory_order_relaxed);
+	r1 = y.fetch_add(1, memory_order_release);
+	z.store((intptr_t)&x, memory_order_relaxed);
+	r2 = y.fetch_add(1, memory_order_release);
+}
+
+
+static void b(void *obj)
+{
+	r3 = y.fetch_add(1, memory_order_acquire);
+	intptr_t ptr = z.load(memory_order_relaxed);
+	z2.store(ptr, memory_order_relaxed);
+}
+
+static void c(void *obj)
+{
+	atomic_int *ptr2 = (atomic_int *)z2.load(memory_order_relaxed);
+	(*ptr2).store(5, memory_order_relaxed);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2, t3;
+
+	atomic_init(&x, 0);
+	atomic_init(&y, 0);
+	atomic_init(&z, (intptr_t) &x);
+	atomic_init(&z2, (intptr_t) &x);
+
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+	thrd_create(&t3, (thrd_start_t)&c, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	thrd_join(t3);
+
+	printf("r1=%d, r2=%d, r3=%d\n", r1, r2, r3);
+
+	return 0;
+}
diff --git a/test/linuxrwlocks.c b/test/linuxrwlocks.c
new file mode 100644
index 0000000..7e317aa
--- /dev/null
+++ b/test/linuxrwlocks.c
@@ -0,0 +1,112 @@
+#include <stdio.h>
+#include <threads.h>
+#include <stdatomic.h>
+
+#include "librace.h"
+
+#define RW_LOCK_BIAS            0x00100000
+#define WRITE_LOCK_CMP          RW_LOCK_BIAS
+
+/** Example implementation of linux rw lock along with 2 thread test
+ *  driver... */
+
+typedef union {
+	atomic_int lock;
+} rwlock_t;
+
+static inline int read_can_lock(rwlock_t *lock)
+{
+	return atomic_load_explicit(&lock->lock, memory_order_relaxed) > 0;
+}
+
+static inline int write_can_lock(rwlock_t *lock)
+{
+	return atomic_load_explicit(&lock->lock, memory_order_relaxed) == RW_LOCK_BIAS;
+}
+
+static inline void read_lock(rwlock_t *rw)
+{
+	int priorvalue = atomic_fetch_sub_explicit(&rw->lock, 1, memory_order_acquire);
+	while (priorvalue <= 0) {
+		atomic_fetch_add_explicit(&rw->lock, 1, memory_order_relaxed);
+		do {
+			priorvalue = atomic_load_explicit(&rw->lock, memory_order_relaxed);
+		} while (priorvalue <= 0);
+		priorvalue = atomic_fetch_sub_explicit(&rw->lock, 1, memory_order_acquire);
+	}
+}
+
+static inline void write_lock(rwlock_t *rw)
+{
+	int priorvalue = atomic_fetch_sub_explicit(&rw->lock, RW_LOCK_BIAS, memory_order_acquire);
+	while (priorvalue != RW_LOCK_BIAS) {
+		atomic_fetch_add_explicit(&rw->lock, RW_LOCK_BIAS, memory_order_relaxed);
+		do {
+			priorvalue = atomic_load_explicit(&rw->lock, memory_order_relaxed);
+		} while (priorvalue != RW_LOCK_BIAS);
+		priorvalue = atomic_fetch_sub_explicit(&rw->lock, RW_LOCK_BIAS, memory_order_acquire);
+	}
+}
+
+static inline int read_trylock(rwlock_t *rw)
+{
+	int priorvalue = atomic_fetch_sub_explicit(&rw->lock, 1, memory_order_acquire);
+	if (priorvalue > 0)
+		return 1;
+
+	atomic_fetch_add_explicit(&rw->lock, 1, memory_order_relaxed);
+	return 0;
+}
+
+static inline int write_trylock(rwlock_t *rw)
+{
+	int priorvalue = atomic_fetch_sub_explicit(&rw->lock, RW_LOCK_BIAS, memory_order_acquire);
+	if (priorvalue == RW_LOCK_BIAS)
+		return 1;
+
+	atomic_fetch_add_explicit(&rw->lock, RW_LOCK_BIAS, memory_order_relaxed);
+	return 0;
+}
+
+static inline void read_unlock(rwlock_t *rw)
+{
+	atomic_fetch_add_explicit(&rw->lock, 1, memory_order_release);
+}
+
+static inline void write_unlock(rwlock_t *rw)
+{
+	atomic_fetch_add_explicit(&rw->lock, RW_LOCK_BIAS, memory_order_release);
+}
+
+rwlock_t mylock;
+int shareddata;
+
+static void a(void *obj)
+{
+	int i;
+	for(i = 0; i < 2; i++) {
+		if ((i % 2) == 0) {
+			read_lock(&mylock);
+			load_32(&shareddata);
+			read_unlock(&mylock);
+		} else {
+			write_lock(&mylock);
+			store_32(&shareddata,(unsigned int)i);
+			write_unlock(&mylock);
+		}
+	}
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2;
+	atomic_init(&mylock.lock, RW_LOCK_BIAS);
+
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&a, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+
+	return 0;
+}
diff --git a/test/linuxrwlocksyield.c b/test/linuxrwlocksyield.c
new file mode 100644
index 0000000..be3550e
--- /dev/null
+++ b/test/linuxrwlocksyield.c
@@ -0,0 +1,112 @@
+#include <stdio.h>
+#include <threads.h>
+#include <stdatomic.h>
+
+#include "librace.h"
+
+#define RW_LOCK_BIAS            0x00100000
+#define WRITE_LOCK_CMP          RW_LOCK_BIAS
+
+/** Example implementation of linux rw lock along with 2 thread test
+ *  driver... */
+
+typedef union {
+	atomic_int lock;
+} rwlock_t;
+
+static inline int read_can_lock(rwlock_t *lock)
+{
+	return atomic_load_explicit(&lock->lock, memory_order_relaxed) > 0;
+}
+
+static inline int write_can_lock(rwlock_t *lock)
+{
+	return atomic_load_explicit(&lock->lock, memory_order_relaxed) == RW_LOCK_BIAS;
+}
+
+static inline void read_lock(rwlock_t *rw)
+{
+	int priorvalue = atomic_fetch_sub_explicit(&rw->lock, 1, memory_order_acquire);
+	while (priorvalue <= 0) {
+		atomic_fetch_add_explicit(&rw->lock, 1, memory_order_relaxed);
+		while (atomic_load_explicit(&rw->lock, memory_order_relaxed) <= 0) {
+			thrd_yield();
+		}
+		priorvalue = atomic_fetch_sub_explicit(&rw->lock, 1, memory_order_acquire);
+	}
+}
+
+static inline void write_lock(rwlock_t *rw)
+{
+	int priorvalue = atomic_fetch_sub_explicit(&rw->lock, RW_LOCK_BIAS, memory_order_acquire);
+	while (priorvalue != RW_LOCK_BIAS) {
+		atomic_fetch_add_explicit(&rw->lock, RW_LOCK_BIAS, memory_order_relaxed);
+		while (atomic_load_explicit(&rw->lock, memory_order_relaxed) != RW_LOCK_BIAS) {
+			thrd_yield();
+		}
+		priorvalue = atomic_fetch_sub_explicit(&rw->lock, RW_LOCK_BIAS, memory_order_acquire);
+	}
+}
+
+static inline int read_trylock(rwlock_t *rw)
+{
+	int priorvalue = atomic_fetch_sub_explicit(&rw->lock, 1, memory_order_acquire);
+	if (priorvalue > 0)
+		return 1;
+
+	atomic_fetch_add_explicit(&rw->lock, 1, memory_order_relaxed);
+	return 0;
+}
+
+static inline int write_trylock(rwlock_t *rw)
+{
+	int priorvalue = atomic_fetch_sub_explicit(&rw->lock, RW_LOCK_BIAS, memory_order_acquire);
+	if (priorvalue == RW_LOCK_BIAS)
+		return 1;
+
+	atomic_fetch_add_explicit(&rw->lock, RW_LOCK_BIAS, memory_order_relaxed);
+	return 0;
+}
+
+static inline void read_unlock(rwlock_t *rw)
+{
+	atomic_fetch_add_explicit(&rw->lock, 1, memory_order_release);
+}
+
+static inline void write_unlock(rwlock_t *rw)
+{
+	atomic_fetch_add_explicit(&rw->lock, RW_LOCK_BIAS, memory_order_release);
+}
+
+rwlock_t mylock;
+int shareddata;
+
+static void a(void *obj)
+{
+	int i;
+	for(i = 0; i < 2; i++) {
+		if ((i % 2) == 0) {
+			read_lock(&mylock);
+			load_32(&shareddata);
+			read_unlock(&mylock);
+		} else {
+			write_lock(&mylock);
+			store_32(&shareddata,(unsigned int)i);
+			write_unlock(&mylock);
+		}
+	}
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2;
+	atomic_init(&mylock.lock, RW_LOCK_BIAS);
+
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&a, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+
+	return 0;
+}
diff --git a/test/litmus/Makefile b/test/litmus/Makefile
new file mode 100644
index 0000000..a4a19b7
--- /dev/null
+++ b/test/litmus/Makefile
@@ -0,0 +1,4 @@
+D := $(DIR)
+
+OBJECTS += $(patsubst %.c, %.o, $(wildcard $(D)/*.c))
+OBJECTS += $(patsubst %.cc, %.o, $(wildcard $(D)/*.cc))
diff --git a/test/litmus/iriw.cc b/test/litmus/iriw.cc
new file mode 100644
index 0000000..fa4a034
--- /dev/null
+++ b/test/litmus/iriw.cc
@@ -0,0 +1,57 @@
+#include <stdio.h>
+#include <threads.h>
+#include <atomic>
+
+std::atomic_int x;
+std::atomic_int y;
+
+std::memory_order store_mo = std::memory_order_release;
+std::memory_order load_mo = std::memory_order_acquire;
+
+static void a(void *obj)
+{
+	x.store(1, store_mo);
+}
+
+static void b(void *obj)
+{
+	y.store(1, store_mo);
+}
+
+static void c(void *obj)
+{
+	printf("x1: %d\n", x.load(load_mo));
+	printf("y1: %d\n", y.load(load_mo));
+}
+
+static void d(void *obj)
+{
+	printf("y2: %d\n", y.load(load_mo));
+	printf("x2: %d\n", x.load(load_mo));
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2, t3, t4;
+
+	/* Command-line argument 's' enables seq_cst test */
+	if (argc > 1 && *argv[1] == 's')
+		store_mo = load_mo = std::memory_order_seq_cst;
+
+	atomic_init(&x, 0);
+	atomic_init(&y, 0);
+
+	printf("Main thread: creating 4 threads\n");
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+	thrd_create(&t3, (thrd_start_t)&c, NULL);
+	thrd_create(&t4, (thrd_start_t)&d, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	thrd_join(t3);
+	thrd_join(t4);
+	printf("Main thread is finished\n");
+
+	return 0;
+}
diff --git a/test/litmus/load-buffer.cc b/test/litmus/load-buffer.cc
new file mode 100644
index 0000000..9c9923c
--- /dev/null
+++ b/test/litmus/load-buffer.cc
@@ -0,0 +1,36 @@
+#include <stdio.h>
+#include <threads.h>
+#include <atomic>
+
+std::atomic_int x;
+std::atomic_int y;
+
+static void a(void *obj)
+{
+	printf("x: %d\n", x.load(std::memory_order_relaxed));
+	y.store(1, std::memory_order_relaxed);
+}
+
+static void b(void *obj)
+{
+	printf("y: %d\n", y.load(std::memory_order_relaxed));
+	x.store(1, std::memory_order_relaxed);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2;
+
+	atomic_init(&x, 0);
+	atomic_init(&y, 0);
+
+	printf("Main thread: creating 2 threads\n");
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	printf("Main thread is finished\n");
+
+	return 0;
+}
diff --git a/test/litmus/message-passing.cc b/test/litmus/message-passing.cc
new file mode 100644
index 0000000..6ef41eb
--- /dev/null
+++ b/test/litmus/message-passing.cc
@@ -0,0 +1,44 @@
+#include <stdio.h>
+#include <threads.h>
+#include <atomic>
+
+std::atomic_int x;
+std::atomic_int y;
+
+static void a(void *obj)
+{
+	x.store(1, std::memory_order_relaxed);
+	y.store(1, std::memory_order_relaxed);
+}
+
+static void b(void *obj)
+{
+	printf("y1: %d\n", y.load(std::memory_order_relaxed));
+	printf("x1: %d\n", x.load(std::memory_order_relaxed));
+}
+
+static void c(void *obj)
+{
+	printf("x2: %d\n", x.load(std::memory_order_relaxed));
+	printf("y2: %d\n", y.load(std::memory_order_relaxed));
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2, t3;
+
+	atomic_init(&x, 0);
+	atomic_init(&y, 0);
+
+	printf("Main thread: creating 3 threads\n");
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+	thrd_create(&t3, (thrd_start_t)&c, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	thrd_join(t3);
+	printf("Main thread is finished\n");
+
+	return 0;
+}
diff --git a/test/litmus/seq-lock.cc b/test/litmus/seq-lock.cc
new file mode 100644
index 0000000..03724e6
--- /dev/null
+++ b/test/litmus/seq-lock.cc
@@ -0,0 +1,67 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <threads.h>
+#include <atomic>
+
+#include "model-assert.h"
+
+/*
+ * This 'seqlock' example should never trigger the MODEL_ASSERT() for
+ * release/acquire; it may trigger the MODEL_ASSERT() for release/consume
+ */
+
+std::atomic_int x;
+std::atomic_int y;
+std::atomic_int z;
+
+static int N = 1;
+
+static void a(void *obj)
+{
+	for (int i = 0; i < N; i++) {
+		x.store(2 * i + 1, std::memory_order_release);
+		y.store(i + 1, std::memory_order_release);
+		z.store(i + 1, std::memory_order_release);
+		x.store(2 * i + 2, std::memory_order_release);
+	}
+}
+
+static void b(void *obj)
+{
+	int x1, y1, z1, x2;
+	x1 = x.load(std::memory_order_acquire);
+	y1 = y.load(std::memory_order_acquire);
+	z1 = z.load(std::memory_order_acquire);
+	x2 = x.load(std::memory_order_acquire);
+	printf("x: %d\n", x1);
+	printf("y: %d\n", y1);
+	printf("z: %d\n", z1);
+	printf("x: %d\n", x2);
+
+	/* If x1 and x2 are the same, even value, then y1 must equal z1 */
+	MODEL_ASSERT(x1 != x2 || x1 & 0x1 || y1 == z1);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2;
+
+	if (argc > 1)
+		N = atoi(argv[1]);
+
+	printf("N: %d\n", N);
+
+	atomic_init(&x, 0);
+	atomic_init(&y, 0);
+	atomic_init(&z, 0);
+
+	printf("Main thread: creating 2 threads\n");
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	printf("Main thread is finished\n");
+
+	return 0;
+}
diff --git a/test/litmus/store-buffer.cc b/test/litmus/store-buffer.cc
new file mode 100644
index 0000000..eb43d44
--- /dev/null
+++ b/test/litmus/store-buffer.cc
@@ -0,0 +1,36 @@
+#include <stdio.h>
+#include <threads.h>
+#include <atomic>
+
+std::atomic_int x;
+std::atomic_int y;
+
+static void a(void *obj)
+{
+	x.store(1, std::memory_order_relaxed);
+	printf("y: %d\n", y.load(std::memory_order_relaxed));
+}
+
+static void b(void *obj)
+{
+	y.store(1, std::memory_order_relaxed);
+	printf("x: %d\n", x.load(std::memory_order_relaxed));
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2;
+
+	atomic_init(&x, 0);
+	atomic_init(&y, 0);
+
+	printf("Main thread: creating 2 threads\n");
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	printf("Main thread is finished\n");
+
+	return 0;
+}
diff --git a/test/litmus/wrc.cc b/test/litmus/wrc.cc
new file mode 100644
index 0000000..7d295fe
--- /dev/null
+++ b/test/litmus/wrc.cc
@@ -0,0 +1,57 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <threads.h>
+#include <atomic>
+
+static int N = 2;
+
+/* Can be tested for different behavior with relaxed vs. release/acquire/seq-cst */
+#define load_mo std::memory_order_relaxed
+#define store_mo std::memory_order_relaxed
+
+static std::atomic_int *x;
+
+static void a(void *obj)
+{
+	int idx = *((int *)obj);
+
+	if (idx > 0)
+		x[idx - 1].load(load_mo);
+
+	if (idx < N)
+		x[idx].store(1, store_mo);
+	else
+		x[0].load(load_mo);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t *threads;
+	int *indexes;
+
+	if (argc > 1)
+		N = atoi(argv[1]);
+	if (N < 2) {
+		printf("Error: must have N >= 2\n");
+		return 1;
+	}
+	printf("N: %d\n", N);
+
+	threads = (thrd_t *)malloc((N + 1) * sizeof(thrd_t));
+	x = (std::atomic_int *)malloc(N * sizeof(std::atomic_int));
+	indexes = (int *)malloc((N + 1) * sizeof(int));
+	
+	for (int i = 0; i < N + 1; i++)
+		indexes[i] = i;
+
+	for (int i = 0; i < N; i++)
+		atomic_init(&x[i], 0);
+
+	for (int i = 0; i < N + 1; i++)
+		thrd_create(&threads[i], (thrd_start_t)&a, (void *)&indexes[i]);
+
+	for (int i = 0; i < N + 1; i++)
+		thrd_join(threads[i]);
+
+        return 0;
+}
diff --git a/test/mo-satcycle.cc b/test/mo-satcycle.cc
new file mode 100644
index 0000000..e502161
--- /dev/null
+++ b/test/mo-satcycle.cc
@@ -0,0 +1,68 @@
+/**
+ * @file mo-satcycle.cc
+ * @brief MO satisfaction cycle test
+ *
+ * This program has a peculiar behavior which is technically legal under the
+ * current C++ memory model but which is a result of a type of satisfaction
+ * cycle. We use this as justification for part of our modifications to the
+ * memory model when proving our model-checker's correctness.
+ */
+
+#include <atomic>
+#include <threads.h>
+#include <stdio.h>
+
+#include "model-assert.h"
+
+using namespace std;
+
+atomic_int x, y;
+int r0, r1, r2, r3; /* "local" variables */
+
+static void a(void *obj)
+{
+	y.store(10, memory_order_relaxed);
+	x.store(1, memory_order_release);
+}
+
+static void b(void *obj)
+{
+	r0 = x.load(memory_order_relaxed);
+	r1 = x.load(memory_order_acquire);
+	y.store(11, memory_order_relaxed);
+}
+
+static void c(void *obj)
+{
+	r2 = y.load(memory_order_relaxed);
+	r3 = y.load(memory_order_relaxed);
+	if (r2 == 11 && r3 == 10)
+		x.store(0, memory_order_relaxed);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2, t3;
+
+	atomic_init(&x, 0);
+	atomic_init(&y, 0);
+
+	printf("Main thread: creating 3 threads\n");
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+	thrd_create(&t3, (thrd_start_t)&c, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	thrd_join(t3);
+	printf("Main thread is finished\n");
+
+	/*
+	 * This condition should not be hit because it only occurs under a
+	 * satisfaction cycle
+	 */
+	bool cycle = (r0 == 1 && r1 == 0 && r2 == 11 && r3 == 10);
+	MODEL_ASSERT(!cycle);
+
+	return 0;
+}
diff --git a/test/mutextest.cc b/test/mutextest.cc
new file mode 100644
index 0000000..01226a7
--- /dev/null
+++ b/test/mutextest.cc
@@ -0,0 +1,38 @@
+#include <stdio.h>
+
+#include "threads.h"
+#include "librace.h"
+#include "stdatomic.h"
+#include <mutex>
+std::mutex * m;
+int shareddata;
+
+static void a(void *obj)
+{
+	int i;
+	for(i=0;i<2;i++) {
+		if ((i%2)==0) {
+			m->lock();
+			store_32(&shareddata,(unsigned int)i);
+			m->unlock();
+		} else {
+			while(!m->try_lock())
+				thrd_yield();
+			store_32(&shareddata,(unsigned int)i);
+			m->unlock();
+		}
+	}
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2;
+	m=new std::mutex();
+
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&a, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	return 0;
+}
diff --git a/test/nestedpromise.c b/test/nestedpromise.c
new file mode 100644
index 0000000..70de8d6
--- /dev/null
+++ b/test/nestedpromise.c
@@ -0,0 +1,42 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <threads.h>
+#include <stdatomic.h>
+
+#include "librace.h"
+#include "model-assert.h"
+
+atomic_int x;
+atomic_int y;
+atomic_int z;
+static void a(void *obj)
+{
+	(void)atomic_load_explicit(&z, memory_order_relaxed); // this is only for schedule control
+	int t1=atomic_load_explicit(&x, memory_order_relaxed);
+	atomic_store_explicit(&y, 1, memory_order_relaxed);
+	printf("t1=%d\n",t1);
+}
+
+static void b(void *obj)
+{
+	int t2=atomic_load_explicit(&y, memory_order_relaxed);
+	atomic_store_explicit(&x, t2, memory_order_relaxed);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2;
+
+
+	atomic_init(&x, 0);
+	atomic_init(&y, 0);
+	atomic_init(&z, 0);
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+
+
+	return 0;
+}
diff --git a/test/pending-release.c b/test/pending-release.c
new file mode 100644
index 0000000..a68f24d
--- /dev/null
+++ b/test/pending-release.c
@@ -0,0 +1,65 @@
+/*
+ * This test performs some relaxes, release, acquire opeations on a single
+ * atomic variable. It is designed for creating a difficult set of pending
+ * release sequences to resolve at the end of an execution. However, it
+ * utilizes 6 threads, so it blows up into a lot of executions quickly.
+ */
+
+#include <stdio.h>
+#include <threads.h>
+#include <stdatomic.h>
+
+#include "librace.h"
+
+atomic_int x;
+int var = 0;
+
+static void a(void *obj)
+{
+	store_32(&var, 1);
+	atomic_store_explicit(&x, *((int *)obj), memory_order_release);
+	atomic_store_explicit(&x, *((int *)obj) + 1, memory_order_relaxed);
+}
+
+static void b2(void *obj)
+{
+	int r = atomic_load_explicit(&x, memory_order_acquire);
+	printf("r = %d\n", r);
+	store_32(&var, 3);
+}
+
+static void b1(void *obj)
+{
+	thrd_t t3, t4;
+	int i = 7;
+	int r = atomic_load_explicit(&x, memory_order_acquire);
+	printf("r = %d\n", r);
+	store_32(&var, 2);
+	thrd_create(&t3, (thrd_start_t)&a, &i);
+	thrd_create(&t4, (thrd_start_t)&b2, NULL);
+	thrd_join(t3);
+	thrd_join(t4);
+}
+
+static void c(void *obj)
+{
+	atomic_store_explicit(&x, 22, memory_order_relaxed);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2, t5;
+	int i = 4;
+
+	atomic_init(&x, 0);
+
+	thrd_create(&t1, (thrd_start_t)&a, &i);
+	thrd_create(&t2, (thrd_start_t)&b1, NULL);
+	thrd_create(&t5, (thrd_start_t)&c, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	thrd_join(t5);
+
+	return 0;
+}
diff --git a/test/releaseseq.c b/test/releaseseq.c
new file mode 100644
index 0000000..548f0a8
--- /dev/null
+++ b/test/releaseseq.c
@@ -0,0 +1,52 @@
+/*
+ * This test performs some relaxes, release, acquire opeations on a single
+ * atomic variable. It can give some rough idea of release sequence support but
+ * probably should be improved to give better information.
+ */
+
+#include <stdio.h>
+#include <threads.h>
+#include <stdatomic.h>
+
+#include "librace.h"
+
+atomic_int x;
+int var = 0;
+
+static void a(void *obj)
+{
+	store_32(&var, 1);
+	atomic_store_explicit(&x, 1, memory_order_release);
+	atomic_store_explicit(&x, 42, memory_order_relaxed);
+}
+
+static void b(void *obj)
+{
+	int r = atomic_load_explicit(&x, memory_order_acquire);
+	printf("r = %d\n", r);
+	printf("load %d\n", load_32(&var));
+}
+
+static void c(void *obj)
+{
+	atomic_store_explicit(&x, 2, memory_order_relaxed);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2, t3;
+
+	atomic_init(&x, 0);
+
+	printf("Main thread: creating 3 threads\n");
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+	thrd_create(&t3, (thrd_start_t)&c, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	thrd_join(t3);
+	printf("Main thread is finished\n");
+
+	return 0;
+}
diff --git a/test/rmw2prog.c b/test/rmw2prog.c
new file mode 100644
index 0000000..0d03b02
--- /dev/null
+++ b/test/rmw2prog.c
@@ -0,0 +1,37 @@
+#include <stdio.h>
+#include <threads.h>
+#include <stdatomic.h>
+
+#include "librace.h"
+
+atomic_int x;
+atomic_int y;
+
+static void a(void *obj)
+{
+	int v1=atomic_fetch_add_explicit(&x, 1, memory_order_relaxed);
+	int v2=atomic_fetch_add_explicit(&y, 1, memory_order_relaxed);
+	printf("v1 = %d, v2=%d\n", v1, v2);
+}
+
+static void b(void *obj)
+{
+	int v3=atomic_fetch_add_explicit(&y, 1, memory_order_relaxed);
+	int v4=atomic_fetch_add_explicit(&x, 1, memory_order_relaxed);
+	printf("v3 = %d, v4=%d\n", v3, v4);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2;
+
+	atomic_init(&x, 0);
+	atomic_init(&y, 0);
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+
+	return 0;
+}
diff --git a/test/rmwprog.c b/test/rmwprog.c
new file mode 100644
index 0000000..ebace1e
--- /dev/null
+++ b/test/rmwprog.c
@@ -0,0 +1,36 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <threads.h>
+#include <stdatomic.h>
+
+#include "librace.h"
+#include "model-assert.h"
+
+atomic_int x;
+static int N = 2;
+
+static void a(void *obj)
+{
+	int i;
+	for (i = 0; i < N; i++)
+		atomic_fetch_add_explicit(&x, 1, memory_order_relaxed);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2;
+
+	if (argc > 1)
+		N = atoi(argv[1]);
+
+	atomic_init(&x, 0);
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&a, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+
+	MODEL_ASSERT(atomic_load(&x) == N * 2);
+
+	return 0;
+}
diff --git a/test/sctest.c b/test/sctest.c
new file mode 100644
index 0000000..2ddb953
--- /dev/null
+++ b/test/sctest.c
@@ -0,0 +1,59 @@
+#include <stdio.h>
+#include <threads.h>
+#include <stdatomic.h>
+
+#include "librace.h"
+
+atomic_int x;
+atomic_int y;
+atomic_int z;
+
+static int r1, r2, r3;
+
+static void a(void *obj)
+{
+	atomic_store_explicit(&z, 1, memory_order_relaxed);
+}
+
+static void b(void *obj)
+{
+	atomic_store_explicit(&x, 1, memory_order_relaxed);
+	atomic_store_explicit(&y, 1, memory_order_relaxed);
+	r1=atomic_load_explicit(&z, memory_order_relaxed);
+}
+static void c(void *obj)
+{
+	atomic_store_explicit(&z, 2, memory_order_relaxed);
+	atomic_store_explicit(&x, 2, memory_order_relaxed);
+	r2=atomic_load_explicit(&y, memory_order_relaxed);
+}
+
+static void d(void *obj)
+{
+	atomic_store_explicit(&z, 3, memory_order_relaxed);
+	atomic_store_explicit(&y, 2, memory_order_relaxed);
+	r3=atomic_load_explicit(&x, memory_order_relaxed);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2,t3, t4;
+
+	atomic_init(&x, 0);
+	atomic_init(&y, 0);
+	atomic_init(&z, 0);
+
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+	thrd_create(&t3, (thrd_start_t)&c, NULL);
+	thrd_create(&t4, (thrd_start_t)&d, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	thrd_join(t3);
+	thrd_join(t4);
+
+	/* Check and/or print r1, r2, r3? */
+
+	return 0;
+}
diff --git a/test/thinair.c b/test/thinair.c
new file mode 100644
index 0000000..2f4f580
--- /dev/null
+++ b/test/thinair.c
@@ -0,0 +1,41 @@
+#include <stdio.h>
+#include <threads.h>
+#include <stdatomic.h>
+
+#include "librace.h"
+
+atomic_int x;
+atomic_int y;
+
+static void a(void *obj)
+{
+	int r1=atomic_load_explicit(&x, memory_order_relaxed);
+	atomic_store_explicit(&y, r1, memory_order_relaxed);
+	printf("r1=%d\n",r1);
+}
+
+static void b(void *obj)
+{
+	int r2=atomic_load_explicit(&y, memory_order_relaxed);
+	atomic_store_explicit(&x, r2, memory_order_relaxed);
+	atomic_store_explicit(&x, r2 + 1, memory_order_relaxed);
+	printf("r2=%d\n",r2);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2;
+
+	atomic_init(&x, -1);
+	atomic_init(&y, 0);
+
+	printf("Main thread: creating 2 threads\n");
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	printf("Main thread is finished\n");
+
+	return 0;
+}
diff --git a/test/uninit.cc b/test/uninit.cc
new file mode 100644
index 0000000..b3a1026
--- /dev/null
+++ b/test/uninit.cc
@@ -0,0 +1,54 @@
+/**
+ * @file uninit.cc
+ * @brief Uninitialized loads test
+ *
+ * This is a test of the "uninitialized loads" code. While we don't explicitly
+ * initialize y, this example's synchronization pattern should guarantee we
+ * never see it uninitialized.
+ */
+#include <stdio.h>
+#include <threads.h>
+#include <atomic>
+
+#include "librace.h"
+
+std::atomic_int x;
+std::atomic_int y;
+
+static void a(void *obj)
+{
+	int flag = x.load(std::memory_order_acquire);
+	printf("flag: %d\n", flag);
+	if (flag == 2)
+		printf("Load: %d\n", y.load(std::memory_order_relaxed));
+}
+
+static void b(void *obj)
+{
+	printf("fetch_add: %d\n", x.fetch_add(1, std::memory_order_relaxed));
+}
+
+static void c(void *obj)
+{
+	y.store(3, std::memory_order_relaxed);
+	x.store(1, std::memory_order_release);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2, t3;
+
+	std::atomic_init(&x, 0);
+
+	printf("Main thread: creating 3 threads\n");
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+	thrd_create(&t3, (thrd_start_t)&c, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	thrd_join(t3);
+	printf("Main thread is finished\n");
+
+	return 0;
+}
diff --git a/test/userprog.c b/test/userprog.c
new file mode 100644
index 0000000..02a83b4
--- /dev/null
+++ b/test/userprog.c
@@ -0,0 +1,40 @@
+#include <stdio.h>
+#include <threads.h>
+#include <stdatomic.h>
+
+#include "librace.h"
+
+atomic_int x;
+atomic_int y;
+
+static void a(void *obj)
+{
+	int r1=atomic_load_explicit(&y, memory_order_relaxed);
+	atomic_store_explicit(&x, r1, memory_order_relaxed);
+	printf("r1=%d\n",r1);
+}
+
+static void b(void *obj)
+{
+	int r2=atomic_load_explicit(&x, memory_order_relaxed);
+	atomic_store_explicit(&y, 42, memory_order_relaxed);
+	printf("r2=%d\n",r2);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2;
+
+	atomic_init(&x, 0);
+	atomic_init(&y, 0);
+
+	printf("Main thread: creating 2 threads\n");
+	thrd_create(&t1, (thrd_start_t)&a, NULL);
+	thrd_create(&t2, (thrd_start_t)&b, NULL);
+
+	thrd_join(t1);
+	thrd_join(t2);
+	printf("Main thread is finished\n");
+
+	return 0;
+}
diff --git a/test/wrc.c b/test/wrc.c
new file mode 100644
index 0000000..befd23f
--- /dev/null
+++ b/test/wrc.c
@@ -0,0 +1,89 @@
+#include <stdio.h>
+#include <threads.h>
+#include <stdatomic.h>
+#include "librace.h"
+  atomic_int x1;
+  atomic_int x2;
+  atomic_int x3;
+  atomic_int x4;
+  atomic_int x5;
+  atomic_int x6;
+  atomic_int x7;
+static void a(void *obj)
+{
+	atomic_store_explicit(&x1, 1,memory_order_relaxed);
+}
+
+static void b(void *obj)
+{
+	(void)atomic_load_explicit(&x1, memory_order_relaxed);
+	atomic_store_explicit(&x2, 1,memory_order_relaxed);
+}
+
+static void c(void *obj)
+{
+	(void)atomic_load_explicit(&x2, memory_order_relaxed);
+	atomic_store_explicit(&x3, 1,memory_order_relaxed);
+}
+
+static void d(void *obj)
+{
+	(void)atomic_load_explicit(&x3, memory_order_relaxed);
+	atomic_store_explicit(&x4, 1,memory_order_relaxed);
+}
+
+static void e(void *obj)
+{
+	(void)atomic_load_explicit(&x4, memory_order_relaxed);
+	atomic_store_explicit(&x5, 1,memory_order_relaxed);
+}
+
+static void f(void *obj)
+{
+	(void)atomic_load_explicit(&x5, memory_order_relaxed);
+	atomic_store_explicit(&x6, 1,memory_order_relaxed);
+}
+
+static void g(void *obj)
+{
+	(void)atomic_load_explicit(&x6, memory_order_relaxed);
+	atomic_store_explicit(&x7, 1,memory_order_relaxed);
+}
+static void h(void *obj)
+{
+	(void)atomic_load_explicit(&x7, memory_order_relaxed);
+	(void)atomic_load_explicit(&x1, memory_order_relaxed);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2, t3, t4, t5, t6, t7, t8;
+        atomic_init(&x1, 0);
+        atomic_init(&x2, 0);
+        atomic_init(&x3, 0);
+        atomic_init(&x4, 0);
+        atomic_init(&x5, 0);
+        atomic_init(&x6, 0);
+        atomic_init(&x7, 0);
+
+
+        thrd_create(&t1, (thrd_start_t)&a, NULL);
+        thrd_create(&t2, (thrd_start_t)&b, NULL);
+        thrd_create(&t3, (thrd_start_t)&c, NULL);
+        thrd_create(&t4, (thrd_start_t)&d, NULL);
+        thrd_create(&t5, (thrd_start_t)&e, NULL);
+        thrd_create(&t6, (thrd_start_t)&f, NULL);
+        thrd_create(&t7, (thrd_start_t)&g, NULL);
+        thrd_create(&t8, (thrd_start_t)&h, NULL);
+
+        thrd_join(t1);
+        thrd_join(t2);
+        thrd_join(t3);
+        thrd_join(t4);
+        thrd_join(t5);
+        thrd_join(t6);
+        thrd_join(t7);
+        thrd_join(t8);
+
+        return 0;
+}
diff --git a/test/wrcs.c b/test/wrcs.c
new file mode 100644
index 0000000..9728265
--- /dev/null
+++ b/test/wrcs.c
@@ -0,0 +1,89 @@
+#include <stdio.h>
+#include <threads.h>
+#include <stdatomic.h>
+#include "librace.h"
+  atomic_int x1;
+  atomic_int x2;
+  atomic_int x3;
+  atomic_int x4;
+  atomic_int x5;
+  atomic_int x6;
+  atomic_int x7;
+static void a(void *obj)
+{
+	atomic_store_explicit(&x1, 1,memory_order_seq_cst);
+}
+
+static void b(void *obj)
+{
+	(void)atomic_load_explicit(&x1, memory_order_seq_cst);
+	atomic_store_explicit(&x2, 1,memory_order_seq_cst);
+}
+
+static void c(void *obj)
+{
+	(void)atomic_load_explicit(&x2, memory_order_seq_cst);
+	atomic_store_explicit(&x3, 1,memory_order_seq_cst);
+}
+
+static void d(void *obj)
+{
+	(void)atomic_load_explicit(&x3, memory_order_seq_cst);
+	atomic_store_explicit(&x4, 1,memory_order_seq_cst);
+}
+
+static void e(void *obj)
+{
+	(void)atomic_load_explicit(&x4, memory_order_seq_cst);
+	atomic_store_explicit(&x5, 1,memory_order_seq_cst);
+}
+
+static void f(void *obj)
+{
+	(void)atomic_load_explicit(&x5, memory_order_seq_cst);
+	atomic_store_explicit(&x6, 1,memory_order_seq_cst);
+}
+
+static void g(void *obj)
+{
+	(void)atomic_load_explicit(&x6, memory_order_seq_cst);
+	atomic_store_explicit(&x7, 1,memory_order_seq_cst);
+}
+static void h(void *obj)
+{
+	(void)atomic_load_explicit(&x7, memory_order_seq_cst);
+	(void)atomic_load_explicit(&x1, memory_order_seq_cst);
+}
+
+int user_main(int argc, char **argv)
+{
+	thrd_t t1, t2, t3, t4, t5, t6, t7, t8;
+        atomic_init(&x1, 0);
+        atomic_init(&x2, 0);
+        atomic_init(&x3, 0);
+        atomic_init(&x4, 0);
+        atomic_init(&x5, 0);
+        atomic_init(&x6, 0);
+        atomic_init(&x7, 0);
+
+
+        thrd_create(&t1, (thrd_start_t)&a, NULL);
+        thrd_create(&t2, (thrd_start_t)&b, NULL);
+        thrd_create(&t3, (thrd_start_t)&c, NULL);
+        thrd_create(&t4, (thrd_start_t)&d, NULL);
+        thrd_create(&t5, (thrd_start_t)&e, NULL);
+        thrd_create(&t6, (thrd_start_t)&f, NULL);
+        thrd_create(&t7, (thrd_start_t)&g, NULL);
+        thrd_create(&t8, (thrd_start_t)&h, NULL);
+
+        thrd_join(t1);
+        thrd_join(t2);
+        thrd_join(t3);
+        thrd_join(t4);
+        thrd_join(t5);
+        thrd_join(t6);
+        thrd_join(t7);
+        thrd_join(t8);
+
+        return 0;
+}
diff --git a/threads-model.h b/threads-model.h
new file mode 100644
index 0000000..733d825
--- /dev/null
+++ b/threads-model.h
@@ -0,0 +1,179 @@
+/** @file threads-model.h
+ *  @brief Model Checker Thread class.
+ */
+
+#ifndef __THREADS_MODEL_H__
+#define __THREADS_MODEL_H__
+
+#include <stdint.h>
+
+#include "mymemory.h"
+#include <threads.h>
+#include "modeltypes.h"
+#include "stl-model.h"
+#include "context.h"
+
+struct thread_params {
+	thrd_start_t func;
+	void *arg;
+};
+
+/** @brief Represents the state of a user Thread */
+typedef enum thread_state {
+	/** Thread was just created and hasn't run yet */
+	THREAD_CREATED,
+	/** Thread is running */
+	THREAD_RUNNING,
+	/** Thread is not currently running but is ready to run */
+	THREAD_READY,
+	/**
+	 * Thread is waiting on another action (e.g., thread completion, lock
+	 * release, etc.)
+	 */
+	THREAD_BLOCKED,
+	/** Thread has completed its execution */
+	THREAD_COMPLETED
+} thread_state;
+
+class ModelAction;
+
+/** @brief A Thread is created for each user-space thread */
+class Thread {
+public:
+	Thread(thread_id_t tid);
+	Thread(thread_id_t tid, thrd_t *t, void (*func)(void *), void *a, Thread *parent);
+	~Thread();
+	void complete();
+
+	static int swap(ucontext_t *ctxt, Thread *t);
+	static int swap(Thread *t, ucontext_t *ctxt);
+
+	thread_state get_state() const { return state; }
+	void set_state(thread_state s);
+	thread_id_t get_id() const;
+	thrd_t get_thrd_t() const { return *user_thread; }
+	Thread * get_parent() const { return parent; }
+
+	void set_creation(ModelAction *act) { creation = act; }
+	ModelAction * get_creation() const { return creation; }
+
+	/**
+	 * Set a return value for the last action in this thread (e.g., for an
+	 * atomic read).
+	 * @param value The value to return
+	 */
+	void set_return_value(uint64_t value) { last_action_val = value; }
+
+	/**
+	 * Retrieve a return value for the last action in this thread. Used,
+	 * for instance, for an atomic read to return the 'read' value. Should
+	 * be called from a user context.
+	 * @return The value 'returned' by the action
+	 */
+	uint64_t get_return_value() const { return last_action_val; }
+
+	/** @return True if this thread is finished executing */
+	bool is_complete() const { return state == THREAD_COMPLETED; }
+
+	/** @return True if this thread is blocked */
+	bool is_blocked() const { return state == THREAD_BLOCKED; }
+
+	/** @return The pending (next) ModelAction for this Thread
+	 *  @see Thread::pending */
+	ModelAction * get_pending() const { return pending; }
+
+	/** @brief Set the pending (next) ModelAction for this Thread
+	 *  @param act The pending ModelAction
+	 *  @see Thread::pending */
+	void set_pending(ModelAction *act) { pending = act; }
+
+	Thread * waiting_on() const;
+	bool is_waiting_on(const Thread *t) const;
+
+	bool is_model_thread() const { return model_thread; }
+
+	friend void thread_startup();
+
+	/**
+	 * Intentionally NOT allocated with MODELALLOC or SNAPSHOTALLOC.
+	 * Threads should be allocated on the user's normal (snapshotting) heap
+	 * to allow their allocation/deallocation to follow the same pattern as
+	 * the rest of the backtracked/replayed program.
+	 */
+	void * operator new(size_t size) {
+		return Thread_malloc(size);
+	}
+	void operator delete(void *p, size_t size) {
+		Thread_free(p);
+	}
+	void * operator new[](size_t size) {
+		return Thread_malloc(size);
+	}
+	void operator delete[](void *p, size_t size) {
+		Thread_free(p);
+	}
+private:
+	int create_context();
+
+	/** @brief The parent Thread which created this Thread */
+	Thread * const parent;
+
+	/** @brief The THREAD_CREATE ModelAction which created this Thread */
+	ModelAction *creation;
+
+	/**
+	 * @brief The next ModelAction to be run by this Thread
+	 *
+	 * This action should be kept updated by the ModelChecker, so that we
+	 * always know what the next ModelAction's memory_order, action type,
+	 * and location are.
+	 */
+	ModelAction *pending;
+
+	void (*start_routine)(void *);
+	void *arg;
+	ucontext_t context;
+	void *stack;
+	thrd_t *user_thread;
+	thread_id_t id;
+	thread_state state;
+
+	/**
+	 * The value returned by the last action in this thread
+	 * @see Thread::set_return_value()
+	 * @see Thread::get_return_value()
+	 */
+	uint64_t last_action_val;
+
+	/** @brief Is this Thread a special model-checker thread? */
+	const bool model_thread;
+};
+
+Thread * thread_current();
+
+static inline thread_id_t thrd_to_id(thrd_t t)
+{
+	return t.priv->get_id();
+}
+
+/**
+ * @brief Map a zero-based integer index to a unique thread ID
+ *
+ * This is the inverse of id_to_int
+ */
+static inline thread_id_t int_to_id(int i)
+{
+	return i;
+}
+
+/**
+ * @brief Map a unique thread ID to a zero-based integer index
+ *
+ * This is the inverse of int_to_id
+ */
+static inline int id_to_int(thread_id_t id)
+{
+	return id;
+}
+
+#endif /* __THREADS_MODEL_H__ */
diff --git a/threads.cc b/threads.cc
new file mode 100644
index 0000000..a0bc029
--- /dev/null
+++ b/threads.cc
@@ -0,0 +1,229 @@
+/** @file threads.cc
+ *  @brief Thread functions.
+ */
+
+#include <string.h>
+
+#include <threads.h>
+#include <mutex>
+#include "common.h"
+#include "threads-model.h"
+#include "action.h"
+
+/* global "model" object */
+#include "model.h"
+
+/** Allocate a stack for a new thread. */
+static void * stack_allocate(size_t size)
+{
+	return snapshot_malloc(size);
+}
+
+/** Free a stack for a terminated thread. */
+static void stack_free(void *stack)
+{
+	snapshot_free(stack);
+}
+
+/**
+ * @brief Get the current Thread
+ *
+ * Must be called from a user context
+ *
+ * @return The currently executing thread
+ */
+Thread * thread_current(void)
+{
+	ASSERT(model);
+	return model->get_current_thread();
+}
+
+/**
+ * Provides a startup wrapper for each thread, allowing some initial
+ * model-checking data to be recorded. This method also gets around makecontext
+ * not being 64-bit clean
+ */
+void thread_startup()
+{
+	Thread * curr_thread = thread_current();
+
+	/* Add dummy "start" action, just to create a first clock vector */
+	model->switch_to_master(new ModelAction(THREAD_START, std::memory_order_seq_cst, curr_thread));
+
+	/* Call the actual thread function */
+	curr_thread->start_routine(curr_thread->arg);
+
+	/* Finish thread properly */
+	model->switch_to_master(new ModelAction(THREAD_FINISH, std::memory_order_seq_cst, curr_thread));
+}
+
+/**
+ * Create a thread context for a new thread so we can use
+ * setcontext/getcontext/swapcontext to swap it out.
+ * @return 0 on success; otherwise, non-zero error condition
+ */
+int Thread::create_context()
+{
+	int ret;
+
+	ret = getcontext(&context);
+	if (ret)
+		return ret;
+
+	/* Initialize new managed context */
+	stack = stack_allocate(STACK_SIZE);
+	context.uc_stack.ss_sp = stack;
+	context.uc_stack.ss_size = STACK_SIZE;
+	context.uc_stack.ss_flags = 0;
+	context.uc_link = model->get_system_context();
+	makecontext(&context, thread_startup, 0);
+
+	return 0;
+}
+
+/**
+ * Swaps the current context to another thread of execution. This form switches
+ * from a user Thread to a system context.
+ * @param t Thread representing the currently-running thread. The current
+ * context is saved here.
+ * @param ctxt Context to which we will swap. Must hold a valid system context.
+ * @return Does not return, unless we return to Thread t's context. See
+ * swapcontext(3) (returns 0 for success, -1 for failure).
+ */
+int Thread::swap(Thread *t, ucontext_t *ctxt)
+{
+	t->set_state(THREAD_READY);
+	return model_swapcontext(&t->context, ctxt);
+}
+
+/**
+ * Swaps the current context to another thread of execution. This form switches
+ * from a system context to a user Thread.
+ * @param ctxt System context variable to which to save the current context.
+ * @param t Thread to which we will swap. Must hold a valid user context.
+ * @return Does not return, unless we return to the system context (ctxt). See
+ * swapcontext(3) (returns 0 for success, -1 for failure).
+ */
+int Thread::swap(ucontext_t *ctxt, Thread *t)
+{
+	t->set_state(THREAD_RUNNING);
+	return model_swapcontext(ctxt, &t->context);
+}
+
+
+/** Terminate a thread and free its stack. */
+void Thread::complete()
+{
+	ASSERT(!is_complete());
+	DEBUG("completed thread %d\n", id_to_int(get_id()));
+	state = THREAD_COMPLETED;
+	if (stack)
+		stack_free(stack);
+}
+
+/**
+ * @brief Construct a new model-checker Thread
+ *
+ * A model-checker Thread is used for accounting purposes only. It will never
+ * have its own stack, and it should never be inserted into the Scheduler.
+ *
+ * @param tid The thread ID to assign
+ */
+Thread::Thread(thread_id_t tid) :
+	parent(NULL),
+	creation(NULL),
+	pending(NULL),
+	start_routine(NULL),
+	arg(NULL),
+	stack(NULL),
+	user_thread(NULL),
+	id(tid),
+	state(THREAD_READY), /* Thread is always ready? */
+	last_action_val(0),
+	model_thread(true)
+{
+	memset(&context, 0, sizeof(context));
+}
+
+/**
+ * Construct a new thread.
+ * @param t The thread identifier of the newly created thread.
+ * @param func The function that the thread will call.
+ * @param a The parameter to pass to this function.
+ */
+Thread::Thread(thread_id_t tid, thrd_t *t, void (*func)(void *), void *a, Thread *parent) :
+	parent(parent),
+	creation(NULL),
+	pending(NULL),
+	start_routine(func),
+	arg(a),
+	user_thread(t),
+	id(tid),
+	state(THREAD_CREATED),
+	last_action_val(VALUE_NONE),
+	model_thread(false)
+{
+	int ret;
+
+	/* Initialize state */
+	ret = create_context();
+	if (ret)
+		model_print("Error in create_context\n");
+
+	user_thread->priv = this;
+}
+
+/** Destructor */
+Thread::~Thread()
+{
+	if (!is_complete())
+		complete();
+}
+
+/** @return The thread_id_t corresponding to this Thread object. */
+thread_id_t Thread::get_id() const
+{
+	return id;
+}
+
+/**
+ * Set a thread's THREAD_* state (@see thread_state)
+ * @param s The state to enter
+ */
+void Thread::set_state(thread_state s)
+{
+	ASSERT(s == THREAD_COMPLETED || state != THREAD_COMPLETED);
+	state = s;
+}
+
+/**
+ * Get the Thread that this Thread is immediately waiting on
+ * @return The thread we are waiting on, if any; otherwise NULL
+ */
+Thread * Thread::waiting_on() const
+{
+	if (!pending)
+		return NULL;
+
+	if (pending->get_type() == THREAD_JOIN)
+		return pending->get_thread_operand();
+	else if (pending->is_lock())
+		return (Thread *)pending->get_mutex()->get_state()->locked;
+	return NULL;
+}
+
+/**
+ * Check if this Thread is waiting (blocking) on a given Thread, directly or
+ * indirectly (via a chain of waiting threads)
+ *
+ * @param t The Thread on which we may be waiting
+ * @return True if we are waiting on Thread t; false otherwise
+ */
+bool Thread::is_waiting_on(const Thread *t) const
+{
+	Thread *wait;
+	for (wait = waiting_on(); wait != NULL; wait = wait->waiting_on())
+		if (wait == t)
+			return true;
+	return false;
+}
diff --git a/traceanalysis.h b/traceanalysis.h
new file mode 100644
index 0000000..df3356a
--- /dev/null
+++ b/traceanalysis.h
@@ -0,0 +1,35 @@
+#ifndef TRACE_ANALYSIS_H
+#define TRACE_ANALYSIS_H
+#include "model.h"
+
+class TraceAnalysis {
+ public:
+	/** setExecution is called once after installation with a reference to
+	 *  the ModelExecution object. */
+
+	virtual void setExecution(ModelExecution * execution) = 0;
+	
+	/** analyze is called once for each feasible trace with the complete
+	 *  action_list object. */
+
+	virtual void analyze(action_list_t *) = 0;
+
+	/** name returns the analysis name string */
+
+	virtual const char * name() = 0;
+
+	/** Each analysis option is passed into the option method.  This
+	 *	occurs before installation (i.e., you don't have a
+	 *	ModelExecution object yet).  A TraceAnalysis object should
+	 *	support the option "help"  */
+
+	virtual bool option(char *) = 0;
+
+	/** The finish method is called once at the end.  This should be
+	 *  used to print out results.  */
+
+	virtual void finish() = 0;
+
+	SNAPSHOTALLOC
+};
+#endif
diff --git a/workqueue.h b/workqueue.h
new file mode 100644
index 0000000..9034788
--- /dev/null
+++ b/workqueue.h
@@ -0,0 +1,107 @@
+/**
+ * @file workqueue.h
+ * @brief Provides structures for queueing ModelChecker actions to be taken
+ */
+
+#ifndef __WORKQUEUE_H__
+#define __WORKQUEUE_H__
+
+#include "mymemory.h"
+#include "stl-model.h"
+
+class ModelAction;
+
+typedef enum {
+	WORK_NONE = 0,           /**< No work to be done */
+	WORK_CHECK_CURR_ACTION,  /**< Check the current action; used for the
+	                              first action of the work loop */
+	WORK_CHECK_RELEASE_SEQ,  /**< Check if any pending release sequences
+	                              are resolved */
+	WORK_CHECK_MO_EDGES,     /**< Check if new mo_graph edges can be added */
+} model_work_t;
+
+/**
+ */
+class WorkQueueEntry {
+ public:
+	/** @brief Type of work queue entry */
+	model_work_t type;
+
+	/**
+	 * @brief Object affected
+	 * @see CheckRelSeqWorkEntry
+	 */
+	void *location;
+
+	/**
+	 * @brief The ModelAction to work on
+	 * @see MOEdgeWorkEntry
+	 */
+	ModelAction *action;
+};
+
+/**
+ * @brief Work: perform initial promise, mo_graph checks on the current action
+ *
+ * This WorkQueueEntry performs the normal, first-pass checks for a ModelAction
+ * that is currently being explored. The current ModelAction (@a action) is the
+ * only relevant parameter to this entry.
+ */
+class CheckCurrWorkEntry : public WorkQueueEntry {
+ public:
+	/**
+	 * @brief Constructor for a "check current action" work entry
+	 * @param curr The current action
+	 */
+	CheckCurrWorkEntry(ModelAction *curr) {
+		type = WORK_CHECK_CURR_ACTION;
+		location = NULL;
+		action = curr;
+	}
+};
+
+/**
+ * @brief Work: check an object location for resolved release sequences
+ *
+ * This WorkQueueEntry checks synchronization and the mo_graph for resolution
+ * of any release sequences. The object @a location is the only relevant
+ * parameter to this entry.
+ */
+class CheckRelSeqWorkEntry : public WorkQueueEntry {
+ public:
+	/**
+	 * @brief Constructor for a "check release sequences" work entry
+	 * @param l The location which must be checked for release sequences
+	 */
+	CheckRelSeqWorkEntry(void *l) {
+		type = WORK_CHECK_RELEASE_SEQ;
+		location = l;
+		action = NULL;
+	}
+};
+
+/**
+ * @brief Work: check a ModelAction for new mo_graph edges
+ *
+ * This WorkQueueEntry checks for new mo_graph edges for a particular
+ * ModelAction (e.g., that was just generated or that updated its
+ * synchronization). The ModelAction @a action is the only relevant parameter
+ * to this entry.
+ */
+class MOEdgeWorkEntry : public WorkQueueEntry {
+ public:
+	/**
+	 * @brief Constructor for a mo_edge work entry
+	 * @param updated The ModelAction which was updated, triggering this work
+	 */
+	MOEdgeWorkEntry(ModelAction *updated) {
+		type = WORK_CHECK_MO_EDGES;
+		location = NULL;
+		action = updated;
+	}
+};
+
+/** @brief typedef for the work queue type */
+typedef ModelList<WorkQueueEntry> work_queue_t;
+
+#endif /* __WORKQUEUE_H__ */