Add support for vectors of pointers.

[oota-llvm.git] / docs / LangRef.html
diff --git a/docs/LangRef.html b/docs/LangRef.html

index 74b2391e294ac261aeba7bd5d4c7b5ca7a27e1fb..12c140f82372d737e39ba38d3632162e299cc85d 100644 (file)
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -35,7 +35,7 @@
            <li><a href="#linkage_externweak">'<tt>extern_weak</tt>' Linkage</a></li>
            <li><a href="#linkage_linkonce_odr">'<tt>linkonce_odr</tt>' Linkage</a></li>
            <li><a href="#linkage_weak">'<tt>weak_odr</tt>' Linkage</a></li>
-          <li><a href="#linkage_external">'<tt>externally visible</tt>' Linkage</a></li>
+          <li><a href="#linkage_external">'<tt>external</tt>' Linkage</a></li>
            <li><a href="#linkage_dllimport">'<tt>dllimport</tt>' Linkage</a></li>
            <li><a href="#linkage_dllexport">'<tt>dllexport</tt>' Linkage</a></li>
          </ol>
@@ -53,6 +53,8 @@
        <li><a href="#datalayout">Data Layout</a></li>
        <li><a href="#pointeraliasing">Pointer Aliasing Rules</a></li>
        <li><a href="#volatile">Volatile Memory Accesses</a></li>
+      <li><a href="#memmodel">Memory Model for Concurrent Operations</a></li>
+      <li><a href="#ordering">Atomic Memory Ordering Constraints</a></li>
      </ol>
    </li>
    <li><a href="#typesystem">Type System</a>
@@ -74,16 +76,14 @@
              <ol>
                <li><a href="#t_array">Array Type</a></li>
                <li><a href="#t_struct">Structure Type</a></li>
-              <li><a href="#t_pstruct">Packed Structure Type</a></li>
+              <li><a href="#t_opaque">Opaque Structure Types</a></li>
                <li><a href="#t_vector">Vector Type</a></li>
              </ol>
            </li>
            <li><a href="#t_function">Function Type</a></li>
            <li><a href="#t_pointer">Pointer Type</a></li>
-          <li><a href="#t_opaque">Opaque Type</a></li>
          </ol>
        </li>
-      <li><a href="#t_uprefs">Type Up-references</a></li>
      </ol>
    </li>
    <li><a href="#constants">Constants</a>
@@ -100,7 +100,12 @@
    <li><a href="#othervalues">Other Values</a>
      <ol>
        <li><a href="#inlineasm">Inline Assembler Expressions</a></li>
-      <li><a href="#metadata">Metadata Nodes and Metadata Strings</a></li>
+      <li><a href="#metadata">Metadata Nodes and Metadata Strings</a>
+        <ol>
+          <li><a href="#tbaa">'<tt>tbaa</tt>' Metadata</a></li>
+          <li><a href="#fpaccuracy">'<tt>fpaccuracy</tt>' Metadata</a></li>
+        </ol>
+      </li>
      </ol>
    </li>
    <li><a href="#intrinsic_globals">Intrinsic Global Variables</a>
@@ -124,6 +129,7 @@
            <li><a href="#i_indirectbr">'<tt>indirectbr</tt>' Instruction</a></li>
            <li><a href="#i_invoke">'<tt>invoke</tt>' Instruction</a></li>
            <li><a href="#i_unwind">'<tt>unwind</tt>'  Instruction</a></li>
+          <li><a href="#i_resume">'<tt>resume</tt>'  Instruction</a></li>
            <li><a href="#i_unreachable">'<tt>unreachable</tt>' Instruction</a></li>
          </ol>
        </li>
@@ -168,9 +174,12 @@
        </li>
        <li><a href="#memoryops">Memory Access and Addressing Operations</a>
          <ol>
-          <li><a href="#i_alloca">'<tt>alloca</tt>'   Instruction</a></li>
-         <li><a href="#i_load">'<tt>load</tt>'     Instruction</a></li>
-         <li><a href="#i_store">'<tt>store</tt>'    Instruction</a></li>
+          <li><a href="#i_alloca">'<tt>alloca</tt>' Instruction</a></li>
+         <li><a href="#i_load">'<tt>load</tt>' Instruction</a></li>
+         <li><a href="#i_store">'<tt>store</tt>' Instruction</a></li>
+         <li><a href="#i_fence">'<tt>fence</tt>' Instruction</a></li>
+         <li><a href="#i_cmpxchg">'<tt>cmpxchg</tt>' Instruction</a></li>
+         <li><a href="#i_atomicrmw">'<tt>atomicrmw</tt>' Instruction</a></li>
           <li><a href="#i_getelementptr">'<tt>getelementptr</tt>' Instruction</a></li>
          </ol>
        </li>
@@ -198,6 +207,7 @@
            <li><a href="#i_select">'<tt>select</tt>' Instruction</a></li>
            <li><a href="#i_call">'<tt>call</tt>'  Instruction</a></li>
            <li><a href="#i_va_arg">'<tt>va_arg</tt>'  Instruction</a></li>
+          <li><a href="#i_landingpad">'<tt>landingpad</tt>' Instruction</a></li>
          </ol>
        </li>
      </ol>
@@ -241,6 +251,7 @@
            <li><a href="#int_pow">'<tt>llvm.pow.*</tt>' Intrinsic</a></li>
            <li><a href="#int_exp">'<tt>llvm.exp.*</tt>' Intrinsic</a></li>
            <li><a href="#int_log">'<tt>llvm.log.*</tt>' Intrinsic</a></li>
+          <li><a href="#int_fma">'<tt>llvm.fma.*</tt>' Intrinsic</a></li>
          </ol>
        </li>
        <li><a href="#int_manip">Bit Manipulation Intrinsics</a>
@@ -269,34 +280,18 @@
        </li>
        <li><a href="#int_debugger">Debugger intrinsics</a></li>
        <li><a href="#int_eh">Exception Handling intrinsics</a></li>
-      <li><a href="#int_trampoline">Trampoline Intrinsic</a>
+      <li><a href="#int_trampoline">Trampoline Intrinsics</a>
          <ol>
            <li><a href="#int_it">'<tt>llvm.init.trampoline</tt>' Intrinsic</a></li>
-        </ol>
-      </li>
-      <li><a href="#int_atomics">Atomic intrinsics</a>
-        <ol>
-          <li><a href="#int_memory_barrier"><tt>llvm.memory_barrier</tt></a></li>
-          <li><a href="#int_atomic_cmp_swap"><tt>llvm.atomic.cmp.swap</tt></a></li>
-          <li><a href="#int_atomic_swap"><tt>llvm.atomic.swap</tt></a></li>
-          <li><a href="#int_atomic_load_add"><tt>llvm.atomic.load.add</tt></a></li>
-          <li><a href="#int_atomic_load_sub"><tt>llvm.atomic.load.sub</tt></a></li>
-          <li><a href="#int_atomic_load_and"><tt>llvm.atomic.load.and</tt></a></li>
-          <li><a href="#int_atomic_load_nand"><tt>llvm.atomic.load.nand</tt></a></li>
-          <li><a href="#int_atomic_load_or"><tt>llvm.atomic.load.or</tt></a></li>
-          <li><a href="#int_atomic_load_xor"><tt>llvm.atomic.load.xor</tt></a></li>
-          <li><a href="#int_atomic_load_max"><tt>llvm.atomic.load.max</tt></a></li>
-          <li><a href="#int_atomic_load_min"><tt>llvm.atomic.load.min</tt></a></li>
-          <li><a href="#int_atomic_load_umax"><tt>llvm.atomic.load.umax</tt></a></li>
-          <li><a href="#int_atomic_load_umin"><tt>llvm.atomic.load.umin</tt></a></li>
+          <li><a href="#int_at">'<tt>llvm.adjust.trampoline</tt>' Intrinsic</a></li>
          </ol>
        </li>
        <li><a href="#int_memorymarkers">Memory Use Markers</a>
          <ol>
-          <li><a href="#int_lifetime_start"><tt>llvm.lifetime.start</tt></a></li>
-          <li><a href="#int_lifetime_end"><tt>llvm.lifetime.end</tt></a></li>
-          <li><a href="#int_invariant_start"><tt>llvm.invariant.start</tt></a></li>
-          <li><a href="#int_invariant_end"><tt>llvm.invariant.end</tt></a></li>
+          <li><a href="#int_lifetime_start">'<tt>llvm.lifetime.start</tt>' Intrinsic</a></li>
+          <li><a href="#int_lifetime_end">'<tt>llvm.lifetime.end</tt>' Intrinsic</a></li>
+          <li><a href="#int_invariant_start">'<tt>llvm.invariant.start</tt>' Intrinsic</a></li>
+          <li><a href="#int_invariant_end">'<tt>llvm.invariant.end</tt>' Intrinsic</a></li>
          </ol>
        </li>
        <li><a href="#int_general">General intrinsics</a>
@@ -311,6 +306,8 @@
              '<tt>llvm.stackprotector</tt>' Intrinsic</a></li>
           <li><a href="#int_objectsize">
              '<tt>llvm.objectsize</tt>' Intrinsic</a></li>
+         <li><a href="#int_expect">
+            '<tt>llvm.expect</tt>' Intrinsic</a></li>
          </ol>
        </li>
      </ol>
@@ -640,7 +637,7 @@ define i32 @main() {   <i>; i32()* </i>&nbsp;
        be merged with equivalent globals.  These linkage types are otherwise the
        same as their non-<tt>odr</tt> versions.</dd>
  
-  <dt><tt><b><a name="linkage_external">externally visible</a></b></tt>:</dt>
+  <dt><tt><b><a name="linkage_external">external</a></b></tt></dt>
    <dd>If none of the above identifiers are used, the global is externally
        visible, meaning that it participates in linkage and can be used to
        resolve external symbol references.</dd>
@@ -673,8 +670,8 @@ define i32 @main() {   <i>; i32()* </i>&nbsp;
     declarations), they are accessible outside of the current module.</p>
  
  <p>It is illegal for a function <i>declaration</i> to have any linkage type
-   other than "externally visible", <tt>dllimport</tt>
-   or <tt>extern_weak</tt>.</p>
+   other than <tt>external</tt>, <tt>dllimport</tt>
+  or <tt>extern_weak</tt>.</p>
  
  <p>Aliases can have only <tt>external</tt>, <tt>internal</tt>, <tt>weak</tt>
     or <tt>weak_odr</tt> linkages.</p>
@@ -937,7 +934,7 @@ define i32 @main() {   <i>; i32()* </i>&nbsp;
     alignments must be a power of 2.</p>
  
  <p>If the <tt>unnamed_addr</tt> attribute is given, the address is know to not
-  be significant and two identical functions can be merged</p>.
+   be significant and two identical functions can be merged.</p>
  
  <h5>Syntax:</h5>
  <pre class="doc_code">
@@ -1156,14 +1153,6 @@ define void @f() optsize { ... }
        function into callers whenever possible, ignoring any active inlining size
        threshold for this caller.</dd>
  
-  <dt><tt><b>hotpatch</b></tt></dt>
-  <dd>This attribute indicates that the function should be 'hotpatchable',
-      meaning the function can be patched and/or hooked even while it is
-      loaded into memory. On x86, the function prologue will be preceded
-      by six bytes of padding and will begin with a two-byte instruction.
-      Most of the functions in the Windows system DLLs in Windows XP SP2 or
-      higher were compiled in this fashion.</dd>
-
    <dt><tt><b>nonlazybind</b></tt></dt>
    <dd>This attribute suppresses lazy symbol binding for the function. This
        may make calls to the function faster, at the cost of extra program
@@ -1247,6 +1236,19 @@ define void @f() optsize { ... }
        function that doesn't have an <tt>sspreq</tt> attribute or which has
        an <tt>ssp</tt> attribute, then the resulting function will have
        an <tt>sspreq</tt> attribute.</dd>
+
+  <dt><tt><b><a name="uwtable">uwtable</a></b></tt></dt>
+  <dd>This attribute indicates that the ABI being targeted requires that
+      an unwind table entry be produce for this function even if we can
+      show that no exceptions passes by it. This is normally the case for
+      the ELF x86-64 abi, but it can be disabled for some compilation
+      units.</dd>
+
+  <dt><tt><b><a name="returns_twice">returns_twice</a></b></tt></dt>
+  <dd>This attribute indicates that this function can return
+  twice. The C <code>setjmp</code> is an example of such a function.
+  The compiler disables some optimizations (like tail calls) in the caller of
+  these functions.</dd>
  </dl>
  
  </div>
@@ -1307,6 +1309,13 @@ target datalayout = "<i>layout specification</i>"
        the bits with the least significance have the lowest address
        location.</dd>
  
+  <dt><tt>S<i>size</i></tt></dt>
+  <dd>Specifies the natural alignment of the stack in bits. Alignment promotion
+      of stack variables is limited to the natural stack alignment to avoid
+      dynamic stack realignment. The stack alignment must be a multiple of
+      8-bits. If omitted, the natural stack alignment defaults to "unspecified",
+      which does not prevent any alignment promotions.</dd>
+
    <dt><tt>p:<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
    <dd>This specifies the <i>size</i> of a pointer and its <i>abi</i> and
        <i>preferred</i> alignments. All sizes are in bits. Specifying
@@ -1387,6 +1396,22 @@ target datalayout = "<i>layout specification</i>"
        implemented in terms of 64 &lt;2 x double&gt;, for example.</li>
  </ol>
  
+<p>The function of the data layout string may not be what you expect.  Notably,
+   this is not a specification from the frontend of what alignment the code
+   generator should use.</p>
+
+<p>Instead, if specified, the target data layout is required to match what the 
+   ultimate <em>code generator</em> expects.  This string is used by the 
+   mid-level optimizers to
+   improve code, and this only works if it matches what the ultimate code 
+   generator uses.  If you would like to generate IR that does not embed this
+   target-specific detail into the IR, then you don't have to specify the 
+   string.  This will disable some optimizations that require precise layout
+   information, but this also prevents those optimizations from introducing
+   target specificity into the IR.</p>
+
+
+
  </div>
  
  <!-- ======================================================================= -->
@@ -1471,6 +1496,185 @@ synchronization behavior.</p>
  
  </div>
  
+<!-- ======================================================================= -->
+<h3>
+  <a name="memmodel">Memory Model for Concurrent Operations</a>
+</h3>
+
+<div>
+
+<p>The LLVM IR does not define any way to start parallel threads of execution
+or to register signal handlers. Nonetheless, there are platform-specific
+ways to create them, and we define LLVM IR's behavior in their presence. This
+model is inspired by the C++0x memory model.</p>
+
+<p>For a more informal introduction to this model, see the
+<a href="Atomics.html">LLVM Atomic Instructions and Concurrency Guide</a>.
+
+<p>We define a <i>happens-before</i> partial order as the least partial order
+that</p>
+<ul>
+  <li>Is a superset of single-thread program order, and</li>
+  <li>When a <i>synchronizes-with</i> <tt>b</tt>, includes an edge from
+      <tt>a</tt> to <tt>b</tt>. <i>Synchronizes-with</i> pairs are introduced
+      by platform-specific techniques, like pthread locks, thread
+      creation, thread joining, etc., and by atomic instructions.
+      (See also <a href="#ordering">Atomic Memory Ordering Constraints</a>).
+      </li>
+</ul>
+
+<p>Note that program order does not introduce <i>happens-before</i> edges
+between a thread and signals executing inside that thread.</p>
+
+<p>Every (defined) read operation (load instructions, memcpy, atomic
+loads/read-modify-writes, etc.) <var>R</var> reads a series of bytes written by
+(defined) write operations (store instructions, atomic
+stores/read-modify-writes, memcpy, etc.). For the purposes of this section,
+initialized globals are considered to have a write of the initializer which is
+atomic and happens before any other read or write of the memory in question.
+For each byte of a read <var>R</var>, <var>R<sub>byte</sub></var> may see
+any write to the same byte, except:</p>
+
+<ul>
+  <li>If <var>write<sub>1</sub></var> happens before
+      <var>write<sub>2</sub></var>, and <var>write<sub>2</sub></var> happens
+      before <var>R<sub>byte</sub></var>, then <var>R<sub>byte</sub></var>
+      does not see <var>write<sub>1</sub></var>.
+  <li>If <var>R<sub>byte</sub></var> happens before
+      <var>write<sub>3</sub></var>, then <var>R<sub>byte</sub></var> does not
+      see <var>write<sub>3</sub></var>.
+</ul>
+
+<p>Given that definition, <var>R<sub>byte</sub></var> is defined as follows:
+<ul>
+  <li>If <var>R</var> is volatile, the result is target-dependent. (Volatile
+      is supposed to give guarantees which can support
+      <code>sig_atomic_t</code> in C/C++, and may be used for accesses to
+      addresses which do not behave like normal memory.  It does not generally
+      provide cross-thread synchronization.)
+  <li>Otherwise, if there is no write to the same byte that happens before
+    <var>R<sub>byte</sub></var>, <var>R<sub>byte</sub></var> returns 
+    <tt>undef</tt> for that byte.
+  <li>Otherwise, if <var>R<sub>byte</sub></var> may see exactly one write,
+      <var>R<sub>byte</sub></var> returns the value written by that
+      write.</li>
+  <li>Otherwise, if <var>R</var> is atomic, and all the writes
+      <var>R<sub>byte</sub></var> may see are atomic, it chooses one of the
+      values written.  See the <a href="#ordering">Atomic Memory Ordering
+      Constraints</a> section for additional constraints on how the choice
+      is made.
+  <li>Otherwise <var>R<sub>byte</sub></var> returns <tt>undef</tt>.</li>
+</ul>
+
+<p><var>R</var> returns the value composed of the series of bytes it read.
+This implies that some bytes within the value may be <tt>undef</tt>
+<b>without</b> the entire value being <tt>undef</tt>. Note that this only
+defines the semantics of the operation; it doesn't mean that targets will
+emit more than one instruction to read the series of bytes.</p>
+
+<p>Note that in cases where none of the atomic intrinsics are used, this model
+places only one restriction on IR transformations on top of what is required
+for single-threaded execution: introducing a store to a byte which might not
+otherwise be stored is not allowed in general.  (Specifically, in the case
+where another thread might write to and read from an address, introducing a
+store can change a load that may see exactly one write into a load that may
+see multiple writes.)</p>
+
+<!-- FIXME: This model assumes all targets where concurrency is relevant have
+a byte-size store which doesn't affect adjacent bytes.  As far as I can tell,
+none of the backends currently in the tree fall into this category; however,
+there might be targets which care.  If there are, we want a paragraph
+like the following:
+
+Targets may specify that stores narrower than a certain width are not
+available; on such a target, for the purposes of this model, treat any
+non-atomic write with an alignment or width less than the minimum width
+as if it writes to the relevant surrounding bytes.
+-->
+
+</div>
+
+<!-- ======================================================================= -->
+<h3>
+      <a name="ordering">Atomic Memory Ordering Constraints</a>
+</h3>
+
+<div>
+
+<p>Atomic instructions (<a href="#i_cmpxchg"><code>cmpxchg</code></a>,
+<a href="#i_atomicrmw"><code>atomicrmw</code></a>,
+<a href="#i_fence"><code>fence</code></a>,
+<a href="#i_load"><code>atomic load</code></a>, and
+<a href="#i_store"><code>atomic store</code></a>) take an ordering parameter
+that determines which other atomic instructions on the same address they
+<i>synchronize with</i>.  These semantics are borrowed from Java and C++0x,
+but are somewhat more colloquial. If these descriptions aren't precise enough,
+check those specs (see spec references in the
+<a href="Atomic.html#introduction">atomics guide</a>).
+<a href="#i_fence"><code>fence</code></a> instructions
+treat these orderings somewhat differently since they don't take an address.
+See that instruction's documentation for details.</p>
+
+<p>For a simpler introduction to the ordering constraints, see the
+<a href="Atomics.html">LLVM Atomic Instructions and Concurrency Guide</a>.</p>
+
+<dl>
+<dt><code>unordered</code></dt>
+<dd>The set of values that can be read is governed by the happens-before
+partial order. A value cannot be read unless some operation wrote it.
+This is intended to provide a guarantee strong enough to model Java's
+non-volatile shared variables.  This ordering cannot be specified for
+read-modify-write operations; it is not strong enough to make them atomic
+in any interesting way.</dd>
+<dt><code>monotonic</code></dt>
+<dd>In addition to the guarantees of <code>unordered</code>, there is a single
+total order for modifications by <code>monotonic</code> operations on each
+address. All modification orders must be compatible with the happens-before
+order. There is no guarantee that the modification orders can be combined to
+a global total order for the whole program (and this often will not be
+possible). The read in an atomic read-modify-write operation
+(<a href="#i_cmpxchg"><code>cmpxchg</code></a> and
+<a href="#i_atomicrmw"><code>atomicrmw</code></a>)
+reads the value in the modification order immediately before the value it
+writes. If one atomic read happens before another atomic read of the same
+address, the later read must see the same value or a later value in the
+address's modification order. This disallows reordering of
+<code>monotonic</code> (or stronger) operations on the same address. If an
+address is written <code>monotonic</code>ally by one thread, and other threads
+<code>monotonic</code>ally read that address repeatedly, the other threads must
+eventually see the write. This corresponds to the C++0x/C1x
+<code>memory_order_relaxed</code>.</dd>
+<dt><code>acquire</code></dt>
+<dd>In addition to the guarantees of <code>monotonic</code>,
+a <i>synchronizes-with</i> edge may be formed with a <code>release</code>
+operation. This is intended to model C++'s <code>memory_order_acquire</code>.</dd>
+<dt><code>release</code></dt>
+<dd>In addition to the guarantees of <code>monotonic</code>, if this operation
+writes a value which is subsequently read by an <code>acquire</code> operation,
+it <i>synchronizes-with</i> that operation.  (This isn't a complete
+description; see the C++0x definition of a release sequence.) This corresponds
+to the C++0x/C1x <code>memory_order_release</code>.</dd>
+<dt><code>acq_rel</code> (acquire+release)</dt><dd>Acts as both an
+<code>acquire</code> and <code>release</code> operation on its address.
+This corresponds to the C++0x/C1x <code>memory_order_acq_rel</code>.</dd>
+<dt><code>seq_cst</code> (sequentially consistent)</dt><dd>
+<dd>In addition to the guarantees of <code>acq_rel</code>
+(<code>acquire</code> for an operation which only reads, <code>release</code>
+for an operation which only writes), there is a global total order on all
+sequentially-consistent operations on all addresses, which is consistent with
+the <i>happens-before</i> partial order and with the modification orders of
+all the affected addresses. Each sequentially-consistent read sees the last
+preceding write to the same address in this global order. This corresponds
+to the C++0x/C1x <code>memory_order_seq_cst</code> and Java volatile.</dd>
+</dl>
+
+<p id="singlethread">If an atomic operation is marked <code>singlethread</code>,
+it only <i>synchronizes with</i> or participates in modification and seq_cst
+total orderings with other operations running in the same thread (for example,
+in signal handlers).</p>
+
+</div>
+
  </div>
  
  <!-- *********************************************************************** -->
@@ -1534,7 +1738,6 @@ synchronization behavior.</p>
            <a href="#t_function">function</a>,
            <a href="#t_pointer">pointer</a>,
            <a href="#t_struct">structure</a>,
-          <a href="#t_pstruct">packed structure</a>,
            <a href="#t_vector">vector</a>,
            <a href="#t_opaque">opaque</a>.
        </td>
@@ -1702,7 +1905,6 @@ synchronization behavior.</p>
     possible to have a two dimensional array, using an array as the element type
     of another array.</p>
  
-   
  <!-- _______________________________________________________________________ -->
  <h4>
    <a name="t_aggregate">Aggregate Types</a>
@@ -1841,9 +2043,7 @@ synchronization behavior.</p>
  
  <h5>Overview:</h5>
  <p>The structure type is used to represent a collection of data members together
-   in memory.  The packing of the field types is defined to match the ABI of the
-   underlying processor.  The elements of a structure may be any type that has a
-   size.</p>
+  in memory.  The elements of a structure may be any type that has a size.</p>
  
  <p>Structures in memory are accessed using '<tt><a href="#i_load">load</a></tt>'
     and '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a field
@@ -1851,66 +2051,78 @@ synchronization behavior.</p>
     Structures in registers are accessed using the
     '<tt><a href="#i_extractvalue">extractvalue</a></tt>' and
     '<tt><a href="#i_insertvalue">insertvalue</a></tt>' instructions.</p>
+  
+<p>Structures may optionally be "packed" structures, which indicate that the 
+  alignment of the struct is one byte, and that there is no padding between
+  the elements.  In non-packed structs, padding between field types is inserted
+  as defined by the TargetData string in the module, which is required to match
+  what the underlying code generator expects.</p>
+
+<p>Structures can either be "literal" or "identified".  A literal structure is
+  defined inline with other types (e.g. <tt>{i32, i32}*</tt>) whereas identified
+  types are always defined at the top level with a name.  Literal types are
+  uniqued by their contents and can never be recursive or opaque since there is
+  no way to write one.  Identified types can be recursive, can be opaqued, and are
+  never uniqued.
+</p>
+  
  <h5>Syntax:</h5>
  <pre>
-  { &lt;type list&gt; }
+  %T1 = type { &lt;type list&gt; }     <i>; Identified normal struct type</i>
+  %T2 = type &lt;{ &lt;type list&gt; }&gt;   <i>; Identified packed struct type</i>
  </pre>
-
+  
  <h5>Examples:</h5>
  <table class="layout">
    <tr class="layout">
      <td class="left"><tt>{ i32, i32, i32 }</tt></td>
      <td class="left">A triple of three <tt>i32</tt> values</td>
-  </tr><tr class="layout">
+  </tr>
+  <tr class="layout">
      <td class="left"><tt>{&nbsp;float,&nbsp;i32&nbsp;(i32)&nbsp;*&nbsp;}</tt></td>
      <td class="left">A pair, where the first element is a <tt>float</tt> and the
        second element is a <a href="#t_pointer">pointer</a> to a
        <a href="#t_function">function</a> that takes an <tt>i32</tt>, returning
        an <tt>i32</tt>.</td>
    </tr>
+  <tr class="layout">
+    <td class="left"><tt>&lt;{ i8, i32 }&gt;</tt></td>
+    <td class="left">A packed struct known to be 5 bytes in size.</td>
+  </tr>
  </table>
  
  </div>
-
+  
  <!-- _______________________________________________________________________ -->
  <h4>
-  <a name="t_pstruct">Packed Structure Type</a>
+  <a name="t_opaque">Opaque Structure Types</a>
  </h4>
  
  <div>
  
  <h5>Overview:</h5>
-<p>The packed structure type is used to represent a collection of data members
-   together in memory.  There is no padding between fields.  Further, the
-   alignment of a packed structure is 1 byte.  The elements of a packed
-   structure may be any type that has a size.</p>
-
-<p>Structures are accessed using '<tt><a href="#i_load">load</a></tt> and
-   '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a field with
-   the '<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.</p>
+<p>Opaque structure types are used to represent named structure types that do
+   not have a body specified.  This corresponds (for example) to the C notion of
+   a forward declared structure.</p>
  
  <h5>Syntax:</h5>
  <pre>
-  &lt; { &lt;type list&gt; } &gt;
+  %X = type opaque
+  %52 = type opaque
  </pre>
  
  <h5>Examples:</h5>
  <table class="layout">
    <tr class="layout">
-    <td class="left"><tt>&lt; { i32, i32, i32 } &gt;</tt></td>
-    <td class="left">A triple of three <tt>i32</tt> values</td>
-  </tr><tr class="layout">
-  <td class="left">
-<tt>&lt;&nbsp;{&nbsp;float,&nbsp;i32&nbsp;(i32)*&nbsp;}&nbsp;&gt;</tt></td>
-    <td class="left">A pair, where the first element is a <tt>float</tt> and the
-      second element is a <a href="#t_pointer">pointer</a> to a
-      <a href="#t_function">function</a> that takes an <tt>i32</tt>, returning
-      an <tt>i32</tt>.</td>
+    <td class="left"><tt>opaque</tt></td>
+    <td class="left">An opaque type.</td>
    </tr>
  </table>
  
  </div>
  
+
+
  <!-- _______________________________________________________________________ -->
  <h4>
    <a name="t_pointer">Pointer Type</a>
@@ -1977,8 +2189,8 @@ synchronization behavior.</p>
  </pre>
  
  <p>The number of elements is a constant integer value larger than 0; elementtype
-   may be any integer or floating point type.  Vectors of size zero are not
-   allowed, and pointers are not allowed as the element type.</p>
+   may be any integer or floating point type, or a pointer to these types.
+   Vectors of size zero are not allowed. </p>
  
  <h5>Examples:</h5>
  <table class="layout">
@@ -1994,33 +2206,9 @@ synchronization behavior.</p>
      <td class="left"><tt>&lt;2 x i64&gt;</tt></td>
      <td class="left">Vector of 2 64-bit integer values.</td>
    </tr>
-</table>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_opaque">Opaque Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>Opaque types are used to represent unknown types in the system.  This
-   corresponds (for example) to the C notion of a forward declared structure
-   type.  In LLVM, opaque types can eventually be resolved to any type (not just
-   a structure type).</p>
-
-<h5>Syntax:</h5>
-<pre>
-  opaque
-</pre>
-
-<h5>Examples:</h5>
-<table class="layout">
    <tr class="layout">
-    <td class="left"><tt>opaque</tt></td>
-    <td class="left">An opaque type.</td>
+    <td class="left"><tt>&lt;4 x i64*&gt;</tt></td>
+    <td class="left">Vector of 4 pointers to 64-bit integer values.</td>
    </tr>
  </table>
  
@@ -2028,54 +2216,6 @@ synchronization behavior.</p>
  
  </div>
  
-<!-- ======================================================================= -->
-<h3>
-  <a name="t_uprefs">Type Up-references</a>
-</h3>
-
-<div>
-
-<h5>Overview:</h5>
-<p>An "up reference" allows you to refer to a lexically enclosing type without
-   requiring it to have a name. For instance, a structure declaration may
-   contain a pointer to any of the types it is lexically a member of.  Example
-   of up references (with their equivalent as named type declarations)
-   include:</p>
-
-<pre>
-   { \2 * }                %x = type { %x* }
-   { \2 }*                 %y = type { %y }*
-   \1*                     %z = type %z*
-</pre>
-
-<p>An up reference is needed by the asmprinter for printing out cyclic types
-   when there is no declared name for a type in the cycle.  Because the
-   asmprinter does not want to print out an infinite type string, it needs a
-   syntax to handle recursive types that have no names (all names are optional
-   in llvm IR).</p>
-
-<h5>Syntax:</h5>
-<pre>
-   \&lt;level&gt;
-</pre>
-
-<p>The level is the count of the lexical type that is being referred to.</p>
-
-<h5>Examples:</h5>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>\1*</tt></td>
-    <td class="left">Self-referential pointer.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>{ { \3*, i8 }, i32 }</tt></td>
-    <td class="left">Recursive structure where the upref refers to the out-most
-                     structure.</td>
-  </tr>
-</table>
-
-</div>
-
  </div>
  
  <!-- *********************************************************************** -->
@@ -2433,7 +2573,7 @@ b: unreachable
  </ul>
  
  <p>Whenever a trap value is generated, all values which depend on it evaluate
-   to trap. If they have side effects, the evoke their side effects as if each
+   to trap. If they have side effects, they evoke their side effects as if each
     operand with a trap value were undef. If they have externally-visible side
     effects, the behavior is undefined.</p>
  
@@ -2449,7 +2589,7 @@ entry:
    store i32 %trap, i32* @g           ; Trap value conceptually stored to memory.
    %trap2 = load i32* @g              ; Returns a trap value, not just undef.
  
-  volatile store i32 %trap, i32* @g  ; External observation; undefined behavior.
+  store volatile i32 %trap, i32* @g  ; External observation; undefined behavior.
  
    %narrowaddr = bitcast i32* @g to i16*
    %wideaddr = bitcast i32* @g to i64*
@@ -2460,7 +2600,7 @@ entry:
    br i1 %cmp, label %true, label %end ; Branch to either destination.
  
  true:
-  volatile store i32 0, i32* @g      ; This is control-dependent on %cmp, so
+  store volatile i32 0, i32* @g      ; This is control-dependent on %cmp, so
                                       ; it has undefined behavior.
    br label %end
  
@@ -2470,7 +2610,7 @@ end:
                                       ; control-dependent on %cmp, so this
                                       ; always results in a trap value.
  
-  volatile store i32 0, i32* @g      ; This would depend on the store in %true
+  store volatile i32 0, i32* @g      ; This would depend on the store in %true
                                       ; if %cmp is true, or the store in %entry
                                       ; otherwise, so this is undefined behavior.
  
@@ -2483,7 +2623,7 @@ second_true:
    ret void
  
  second_end:
-  volatile store i32 0, i32* @g      ; This time, the instruction always depends
+  store volatile i32 0, i32* @g      ; This time, the instruction always depends
                                       ; on the store in %end. Also, it is
                                       ; control-equivalent to %end, so this is
                                       ; well-defined (again, ignoring earlier
@@ -2666,7 +2806,7 @@ second_end:
  <div>
  
  <p>LLVM supports inline assembler expressions (as opposed
-   to <a href="#moduleasm"> Module-Level Inline Assembly</a>) through the use of
+   to <a href="#moduleasm">Module-Level Inline Assembly</a>) through the use of
     a special value.  This value represents the inline assembler as a string
     (containing the instructions to emit), a list of operand constraints (stored
     as a string), a flag that indicates whether or not the inline asm
@@ -2708,23 +2848,27 @@ call void asm alignstack "eieio", ""()
  <p>If both keywords appear the '<tt>sideeffect</tt>' keyword must come
     first.</p>
  
+<!--
  <p>TODO: The format of the asm and constraints string still need to be
     documented here.  Constraints on what can be done (e.g. duplication, moving,
     etc need to be documented).  This is probably best done by reference to
     another document that covers inline asm from a holistic perspective.</p>
+  -->
  
+<!-- _______________________________________________________________________ -->
  <h4>
-<a name="inlineasm_md">Inline Asm Metadata</a>
+  <a name="inlineasm_md">Inline Asm Metadata</a>
  </h4>
  
  <div>
  
-<p>The call instructions that wrap inline asm nodes may have a "!srcloc" MDNode
-   attached to it that contains a list of constant integers.  If present, the
-  code generator will use the integer as the location cookie value when report
-   errors through the LLVMContext error reporting mechanisms.  This allows a
-   front-end to correlate backend errors that occur with inline asm back to the
-   source code that produced it.  For example:</p>
+<p>The call instructions that wrap inline asm nodes may have a
+   "<tt>!srcloc</tt>" MDNode attached to it that contains a list of constant
+   integers.  If present, the code generator will use the integer as the
+   location cookie value when report errors through the <tt>LLVMContext</tt>
+   error reporting mechanisms.  This allows a front-end to correlate backend
+   errors that occur with inline asm back to the source code that produced it.
+   For example:</p>
  
  <pre class="doc_code">
  call void asm sideeffect "something bad", ""()<b>, !srcloc !42</b>
@@ -2733,7 +2877,7 @@ call void asm sideeffect "something bad", ""()<b>, !srcloc !42</b>
  </pre>
  
  <p>It is up to the front-end to make sense of the magic numbers it places in the
-   IR.  If the MDNode contains multiple constants, the code generator will use
+   IR. If the MDNode contains multiple constants, the code generator will use
     the one that corresponds to the line of the asm that the error occurs on.</p>
  
  </div>
@@ -2755,20 +2899,33 @@ call void asm sideeffect "something bad", ""()<b>, !srcloc !42</b>
     preceding exclamation point ('<tt>!</tt>').</p>
  
  <p>A metadata string is a string surrounded by double quotes.  It can contain
-   any character by escaping non-printable characters with "\xx" where "xx" is
-   the two digit hex code.  For example: "<tt>!"test\00"</tt>".</p>
+   any character by escaping non-printable characters with "<tt>\xx</tt>" where
+   "<tt>xx</tt>" is the two digit hex code.  For example:
+   "<tt>!"test\00"</tt>".</p>
  
  <p>Metadata nodes are represented with notation similar to structure constants
     (a comma separated list of elements, surrounded by braces and preceded by an
-   exclamation point).  For example: "<tt>!{ metadata !"test\00", i32
-   10}</tt>".  Metadata nodes can have any values as their operand.</p>
+   exclamation point). Metadata nodes can have any values as their operand. For
+   example:</p>
+
+<div class="doc_code">
+<pre>
+!{ metadata !"test\00", i32 10}
+</pre>
+</div>
  
  <p>A <a href="#namedmetadatastructure">named metadata</a> is a collection of 
     metadata nodes, which can be looked up in the module symbol table. For
-   example: "<tt>!foo =  metadata !{!4, !3}</tt>".
+   example:</p>
+
+<div class="doc_code">
+<pre>
+!foo =  metadata !{!4, !3}
+</pre>
+</div>
  
  <p>Metadata can be used as function arguments. Here <tt>llvm.dbg.value</tt> 
-   function is using two metadata arguments.</p>
+   function is using two metadata arguments:</p>
  
  <div class="doc_code">
  <pre>
@@ -2777,7 +2934,8 @@ call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
  </div>
  
  <p>Metadata can be attached with an instruction. Here metadata <tt>!21</tt> is
-   attached with <tt>add</tt> instruction using <tt>!dbg</tt> identifier.</p>
+   attached to the <tt>add</tt> instruction using the <tt>!dbg</tt>
+   identifier:</p>
  
  <div class="doc_code">
  <pre>
@@ -2785,6 +2943,87 @@ call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
  </pre>
  </div>
  
+<p>More information about specific metadata nodes recognized by the optimizers
+   and code generator is found below.</p>
+
+<!-- _______________________________________________________________________ -->
+<h4>
+  <a name="tbaa">'<tt>tbaa</tt>' Metadata</a>
+</h4>
+
+<div>
+
+<p>In LLVM IR, memory does not have types, so LLVM's own type system is not
+   suitable for doing TBAA. Instead, metadata is added to the IR to describe
+   a type system of a higher level language. This can be used to implement
+   typical C/C++ TBAA, but it can also be used to implement custom alias
+   analysis behavior for other languages.</p>
+
+<p>The current metadata format is very simple. TBAA metadata nodes have up to
+   three fields, e.g.:</p>
+
+<div class="doc_code">
+<pre>
+!0 = metadata !{ metadata !"an example type tree" }
+!1 = metadata !{ metadata !"int", metadata !0 }
+!2 = metadata !{ metadata !"float", metadata !0 }
+!3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
+</pre>
+</div>
+
+<p>The first field is an identity field. It can be any value, usually
+   a metadata string, which uniquely identifies the type. The most important
+   name in the tree is the name of the root node. Two trees with
+   different root node names are entirely disjoint, even if they
+   have leaves with common names.</p>
+
+<p>The second field identifies the type's parent node in the tree, or
+   is null or omitted for a root node. A type is considered to alias
+   all of its descendants and all of its ancestors in the tree. Also,
+   a type is considered to alias all types in other trees, so that
+   bitcode produced from multiple front-ends is handled conservatively.</p>
+
+<p>If the third field is present, it's an integer which if equal to 1
+   indicates that the type is "constant" (meaning
+   <tt>pointsToConstantMemory</tt> should return true; see
+   <a href="AliasAnalysis.html#OtherItfs">other useful
+   <tt>AliasAnalysis</tt> methods</a>).</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h4>
+  <a name="fpaccuracy">'<tt>fpaccuracy</tt>' Metadata</a>
+</h4>
+ 
+<div>
+
+<p><tt>fpaccuracy</tt> metadata may be attached to any instruction of floating
+   point type.  It expresses the maximum relative error of the result of
+   that instruction, in ULPs. ULP is defined as follows:</p>
+
+<blockquote>
+
+<p>If <tt>x</tt> is a real number that lies between two finite consecutive
+   floating-point numbers <tt>a</tt> and <tt>b</tt>, without being equal to one
+   of them, then <tt>ulp(x) = |b - a|</tt>, otherwise <tt>ulp(x)</tt> is the
+   distance between the two non-equal finite floating-point numbers nearest
+   <tt>x</tt>. Moreover, <tt>ulp(NaN)</tt> is <tt>NaN</tt>.</p>
+
+</blockquote>
+
+<p>The maximum relative error may be any rational number.  The metadata node
+   shall consist of a pair of unsigned integers respectively representing
+   the numerator and denominator.  For example, 2.5 ULP:</p>
+
+<div class="doc_code">
+<pre>
+!0 = metadata !{ i32 5, i32 2 }
+</pre>
+</div>
+
+</div>
+
  </div>
  
  </div>
@@ -2813,26 +3052,29 @@ href="#linkage_appending">appending linkage</a>.  This array contains a list of
  pointers to global variables and functions which may optionally have a pointer
  cast formed of bitcast or getelementptr.  For example, a legal use of it is:</p>
  
+<div class="doc_code">
  <pre>
-  @X = global i8 4
-  @Y = global i32 123
+@X = global i8 4
+@Y = global i32 123
  
-  @llvm.used = appending global [2 x i8*] [
-     i8* @X,
-     i8* bitcast (i32* @Y to i8*)
-  ], section "llvm.metadata"
+@llvm.used = appending global [2 x i8*] [
+   i8* @X,
+   i8* bitcast (i32* @Y to i8*)
+], section "llvm.metadata"
  </pre>
+</div>
  
  <p>If a global variable appears in the <tt>@llvm.used</tt> list, then the
-compiler, assembler, and linker are required to treat the symbol as if there is
-a reference to the global that it cannot see.  For example, if a variable has
-internal linkage and no references other than that from the <tt>@llvm.used</tt>
-list, it cannot be deleted.  This is commonly used to represent references from
-inline asms and other things the compiler cannot "see", and corresponds to
-"attribute((used))" in GNU C.</p>
+   compiler, assembler, and linker are required to treat the symbol as if there
+   is a reference to the global that it cannot see.  For example, if a variable
+   has internal linkage and no references other than that from
+   the <tt>@llvm.used</tt> list, it cannot be deleted.  This is commonly used to
+   represent references from inline asms and other things the compiler cannot
+   "see", and corresponds to "<tt>attribute((used))</tt>" in GNU C.</p>
  
  <p>On some targets, the code generator must emit a directive to the assembler or
-object file to prevent the assembler and linker from molesting the symbol.</p>
+   object file to prevent the assembler and linker from molesting the
+   symbol.</p>
  
  </div>
  
@@ -2846,13 +3088,13 @@ object file to prevent the assembler and linker from molesting the symbol.</p>
  <div>
  
  <p>The <tt>@llvm.compiler.used</tt> directive is the same as the
-<tt>@llvm.used</tt> directive, except that it only prevents the compiler from
-touching the symbol.  On targets that support it, this allows an intelligent
-linker to optimize references to the symbol without being impeded as it would be
-by <tt>@llvm.used</tt>.</p>
+   <tt>@llvm.used</tt> directive, except that it only prevents the compiler from
+   touching the symbol.  On targets that support it, this allows an intelligent
+   linker to optimize references to the symbol without being impeded as it would
+   be by <tt>@llvm.used</tt>.</p>
  
  <p>This is a rare construct that should only be used in rare circumstances, and
-should not be exposed to source languages.</p>
+   should not be exposed to source languages.</p>
  
  </div>
  
@@ -2862,12 +3104,19 @@ should not be exposed to source languages.</p>
  </h3>
  
  <div>
+
+<div class="doc_code">
  <pre>
  %0 = type { i32, void ()* }
  @llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @ctor }]
  </pre>
-<p>The <tt>@llvm.global_ctors</tt> array contains a list of constructor functions and associated priorities.  The functions referenced by this array will be called in ascending order of priority (i.e. lowest first) when the module is loaded.  The order of functions with the same priority is not defined.
-</p>
+</div>
+
+<p>The <tt>@llvm.global_ctors</tt> array contains a list of constructor
+   functions and associated priorities.  The functions referenced by this array
+   will be called in ascending order of priority (i.e. lowest first) when the
+   module is loaded.  The order of functions with the same priority is not
+   defined.</p>
  
  </div>
  
@@ -2877,13 +3126,18 @@ should not be exposed to source languages.</p>
  </h3>
  
  <div>
+
+<div class="doc_code">
  <pre>
  %0 = type { i32, void ()* }
  @llvm.global_dtors = appending global [1 x %0] [%0 { i32 65535, void ()* @dtor }]
  </pre>
+</div>
  
-<p>The <tt>@llvm.global_dtors</tt> array contains a list of destructor functions and associated priorities.  The functions referenced by this array will be called in descending order of priority (i.e. highest first) when the module is loaded.  The order of functions with the same priority is not defined.
-</p>
+<p>The <tt>@llvm.global_dtors</tt> array contains a list of destructor functions
+   and associated priorities.  The functions referenced by this array will be
+   called in descending order of priority (i.e. highest first) when the module
+   is loaded.  The order of functions with the same priority is not defined.</p>
  
  </div>
  
@@ -2916,14 +3170,15 @@ should not be exposed to source languages.</p>
     control flow, not values (the one exception being the
     '<a href="#i_invoke"><tt>invoke</tt></a>' instruction).</p>
  
-<p>There are seven different terminator instructions: the
-   '<a href="#i_ret"><tt>ret</tt></a>' instruction, the
-   '<a href="#i_br"><tt>br</tt></a>' instruction, the
-   '<a href="#i_switch"><tt>switch</tt></a>' instruction, the
-   '<a href="#i_indirectbr">'<tt>indirectbr</tt></a>' Instruction, the
-   '<a href="#i_invoke"><tt>invoke</tt></a>' instruction, the
-   '<a href="#i_unwind"><tt>unwind</tt></a>' instruction, and the
-   '<a href="#i_unreachable"><tt>unreachable</tt></a>' instruction.</p>
+<p>The terminator instructions are: 
+   '<a href="#i_ret"><tt>ret</tt></a>', 
+   '<a href="#i_br"><tt>br</tt></a>',
+   '<a href="#i_switch"><tt>switch</tt></a>', 
+   '<a href="#i_indirectbr"><tt>indirectbr</tt></a>',
+   '<a href="#i_invoke"><tt>invoke</tt></a>', 
+   '<a href="#i_unwind"><tt>unwind</tt></a>',
+   '<a href="#i_resume"><tt>resume</tt></a>', and 
+   '<a href="#i_unreachable"><tt>unreachable</tt></a>'.</p>
  
  <!-- _______________________________________________________________________ -->
  <h4>
@@ -2984,7 +3239,8 @@ should not be exposed to source languages.</p>
  
  <h5>Syntax:</h5>
  <pre>
-  br i1 &lt;cond&gt;, label &lt;iftrue&gt;, label &lt;iffalse&gt;<br>  br label &lt;dest&gt;          <i>; Unconditional branch</i>
+  br i1 &lt;cond&gt;, label &lt;iftrue&gt;, label &lt;iffalse&gt;
+  br label &lt;dest&gt;          <i>; Unconditional branch</i>
  </pre>
  
  <h5>Overview:</h5>
@@ -3144,6 +3400,17 @@ IfUnequal:
     instruction, control is interrupted and continued at the dynamically nearest
     "exception" label.</p>
  
+<p>The '<tt>exception</tt>' label is a
+   <i><a href="ExceptionHandling.html#overview">landing pad</a></i> for the
+   exception. As such, '<tt>exception</tt>' label is required to have the
+   "<a href="#i_landingpad"><tt>landingpad</tt></a>" instruction, which contains
+   the information about about the behavior of the program after unwinding
+   happens, as its first non-PHI instruction. The restrictions on the
+   "<tt>landingpad</tt>" instruction's tightly couples it to the
+   "<tt>invoke</tt>" instruction, so that the important information contained
+   within the "<tt>landingpad</tt>" instruction can't be lost through normal
+   code motion.</p>
+
  <h5>Arguments:</h5>
  <p>This instruction requires several arguments:</p>
  
@@ -3240,6 +3507,40 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
  <p>Note that the code generator does not yet completely support unwind, and
  that the invoke/unwind semantics are likely to change in future versions.</p>
  
+</div>
+
+ <!-- _______________________________________________________________________ -->
+ 
+<h4>
+  <a name="i_resume">'<tt>resume</tt>' Instruction</a>
+</h4>
+
+<div>
+
+<h5>Syntax:</h5>
+<pre>
+  resume &lt;type&gt; &lt;value&gt;
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>resume</tt>' instruction is a terminator instruction that has no
+   successors.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>resume</tt>' instruction requires one argument, which must have the
+   same type as the result of any '<tt>landingpad</tt>' instruction in the same
+   function.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>resume</tt>' instruction resumes propagation of an existing
+   (in-flight) exception whose unwinding was interrupted with
+   a <a href="#i_landingpad"><tt>landingpad</tt></a> instruction.</p>
+
+<h5>Example:</h5>
+<pre>
+  resume { i8*, i32 } %exn
+</pre>
+
  </div>
  
  <!-- _______________________________________________________________________ -->
@@ -4309,7 +4610,7 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
  
  <h5>Syntax:</h5>
  <pre>
-  &lt;result&gt; = insertvalue &lt;aggregate type&gt; &lt;val&gt;, &lt;ty&gt; &lt;elt&gt;, &lt;idx&gt;{, <idx>}*    <i>; yields &lt;aggregate type&gt;</i>
+  &lt;result&gt; = insertvalue &lt;aggregate type&gt; &lt;val&gt;, &lt;ty&gt; &lt;elt&gt;, &lt;idx&gt;{, &lt;idx&gt;}*    <i>; yields &lt;aggregate type&gt;</i>
  </pre>
  
  <h5>Overview:</h5>
@@ -4414,8 +4715,8 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
  
  <h5>Syntax:</h5>
  <pre>
-  &lt;result&gt; = load &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;]
-  &lt;result&gt; = volatile load &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;]
+  &lt;result&gt; = load [volatile] &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;]
+  &lt;result&gt; = load atomic [volatile] &lt;ty&gt;* &lt;pointer&gt; [singlethread] &lt;ordering&gt;, align &lt;alignment&gt;
    !&lt;index&gt; = !{ i32 1 }
  </pre>
  
@@ -4430,6 +4731,19 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
     number or order of execution of this <tt>load</tt> with other <a
     href="#volatile">volatile operations</a>.</p>
  
+<p>If the <code>load</code> is marked as <code>atomic</code>, it takes an extra
+   <a href="#ordering">ordering</a> and optional <code>singlethread</code>
+   argument.  The <code>release</code> and <code>acq_rel</code> orderings are
+   not valid on <code>load</code> instructions.  Atomic loads produce <a
+   href="#memorymodel">defined</a> results when they may see multiple atomic
+   stores.  The type of the pointee must be an integer type whose bit width
+   is a power of two greater than or equal to eight and less than or equal
+   to a target-specific size limit. <code>align</code> must be explicitly 
+   specified on atomic loads, and the load has undefined behavior if the
+   alignment is not set to a value which is at least the size in bytes of
+   the pointee. <code>!nontemporal</code> does not have any defined semantics
+   for atomic loads.</p>
+
  <p>The optional constant <tt>align</tt> argument specifies the alignment of the
     operation (that is, the alignment of the memory address). A value of 0 or an
     omitted <tt>align</tt> argument means that the operation has the preferential
@@ -4473,8 +4787,8 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
  
  <h5>Syntax:</h5>
  <pre>
-  store &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;]                   <i>; yields {void}</i>
-  volatile store &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;]          <i>; yields {void}</i>
+  store [volatile] &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;]                   <i>; yields {void}</i>
+  store atomic [volatile] &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt; [singlethread] &lt;ordering&gt;, align &lt;alignment&gt;             <i>; yields {void}</i>
  </pre>
  
  <h5>Overview:</h5>
@@ -4490,6 +4804,19 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
     order of execution of this <tt>store</tt> with other <a
     href="#volatile">volatile operations</a>.</p>
  
+<p>If the <code>store</code> is marked as <code>atomic</code>, it takes an extra
+   <a href="#ordering">ordering</a> and optional <code>singlethread</code>
+   argument.  The <code>acquire</code> and <code>acq_rel</code> orderings aren't
+   valid on <code>store</code> instructions.  Atomic loads produce <a
+   href="#memorymodel">defined</a> results when they may see multiple atomic
+   stores. The type of the pointee must be an integer type whose bit width
+   is a power of two greater than or equal to eight and less than or equal
+   to a target-specific size limit. <code>align</code> must be explicitly 
+   specified on atomic stores, and the store has undefined behavior if the
+   alignment is not set to a value which is at least the size in bytes of
+   the pointee. <code>!nontemporal</code> does not have any defined semantics
+   for atomic stores.</p>
+
  <p>The optional constant "align" argument specifies the alignment of the
     operation (that is, the alignment of the memory address). A value of 0 or an
     omitted "align" argument means that the operation has the preferential
@@ -4528,68 +4855,279 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
  
  <!-- _______________________________________________________________________ -->
  <h4>
-   <a name="i_getelementptr">'<tt>getelementptr</tt>' Instruction</a>
+<a name="i_fence">'<tt>fence</tt>' Instruction</a>
  </h4>
  
  <div>
  
  <h5>Syntax:</h5>
  <pre>
-  &lt;result&gt; = getelementptr &lt;pty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
-  &lt;result&gt; = getelementptr inbounds &lt;pty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
+  fence [singlethread] &lt;ordering&gt;                   <i>; yields {void}</i>
  </pre>
  
  <h5>Overview:</h5>
-<p>The '<tt>getelementptr</tt>' instruction is used to get the address of a
-   subelement of an <a href="#t_aggregate">aggregate</a> data structure.
-   It performs address calculation only and does not access memory.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is always a pointer, and forms the basis of the
-   calculation. The remaining arguments are indices that indicate which of the
-   elements of the aggregate object are indexed. The interpretation of each
-   index is dependent on the type being indexed into. The first index always
-   indexes the pointer value given as the first argument, the second index
-   indexes a value of the type pointed to (not necessarily the value directly
-   pointed to, since the first index can be non-zero), etc. The first type
-   indexed into must be a pointer value, subsequent types can be arrays,
-   vectors, and structs. Note that subsequent types being indexed into
-   can never be pointers, since that would require loading the pointer before
-   continuing calculation.</p>
-
-<p>The type of each index argument depends on the type it is indexing into.
-   When indexing into a (optionally packed) structure, only <tt>i32</tt>
-   integer <b>constants</b> are allowed.  When indexing into an array, pointer
-   or vector, integers of any width are allowed, and they are not required to be
-   constant.</p>
+<p>The '<tt>fence</tt>' instruction is used to introduce happens-before edges
+between operations.</p>
  
-<p>For example, let's consider a C code fragment and how it gets compiled to
-   LLVM:</p>
+<h5>Arguments:</h5> <p>'<code>fence</code>' instructions take an <a
+href="#ordering">ordering</a> argument which defines what
+<i>synchronizes-with</i> edges they add.  They can only be given
+<code>acquire</code>, <code>release</code>, <code>acq_rel</code>, and
+<code>seq_cst</code> orderings.</p>
  
-<pre class="doc_code">
-struct RT {
-  char A;
-  int B[10][20];
-  char C;
-};
-struct ST {
-  int X;
-  double Y;
-  struct RT Z;
-};
+<h5>Semantics:</h5>
+<p>A fence <var>A</var> which has (at least) <code>release</code> ordering
+semantics <i>synchronizes with</i> a fence <var>B</var> with (at least)
+<code>acquire</code> ordering semantics if and only if there exist atomic
+operations <var>X</var> and <var>Y</var>, both operating on some atomic object
+<var>M</var>, such that <var>A</var> is sequenced before <var>X</var>,
+<var>X</var> modifies <var>M</var> (either directly or through some side effect
+of a sequence headed by <var>X</var>), <var>Y</var> is sequenced before
+<var>B</var>, and <var>Y</var> observes <var>M</var>. This provides a
+<i>happens-before</i> dependency between <var>A</var> and <var>B</var>. Rather
+than an explicit <code>fence</code>, one (but not both) of the atomic operations
+<var>X</var> or <var>Y</var> might provide a <code>release</code> or
+<code>acquire</code> (resp.) ordering constraint and still
+<i>synchronize-with</i> the explicit <code>fence</code> and establish the
+<i>happens-before</i> edge.</p>
+
+<p>A <code>fence</code> which has <code>seq_cst</code> ordering, in addition to
+having both <code>acquire</code> and <code>release</code> semantics specified
+above, participates in the global program order of other <code>seq_cst</code>
+operations and/or fences.</p>
+
+<p>The optional "<a href="#singlethread"><code>singlethread</code></a>" argument
+specifies that the fence only synchronizes with other fences in the same
+thread.  (This is useful for interacting with signal handlers.)</p>
  
-int *foo(struct ST *s) {
-  return &amp;s[1].Z.B[5][13];
-}
+<h5>Example:</h5>
+<pre>
+  fence acquire                          <i>; yields {void}</i>
+  fence singlethread seq_cst             <i>; yields {void}</i>
  </pre>
  
-<p>The LLVM code generated by the GCC frontend is:</p>
+</div>
  
-<pre class="doc_code">
-%RT = <a href="#namedtypes">type</a> { i8 , [10 x [20 x i32]], i8  }
-%ST = <a href="#namedtypes">type</a> { i32, double, %RT }
+<!-- _______________________________________________________________________ -->
+<h4>
+<a name="i_cmpxchg">'<tt>cmpxchg</tt>' Instruction</a>
+</h4>
  
-define i32* @foo(%ST* %s) {
+<div>
+
+<h5>Syntax:</h5>
+<pre>
+  cmpxchg [volatile] &lt;ty&gt;* &lt;pointer&gt;, &lt;ty&gt; &lt;cmp&gt;, &lt;ty&gt; &lt;new&gt; [singlethread] &lt;ordering&gt;                   <i>; yields {ty}</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>cmpxchg</tt>' instruction is used to atomically modify memory.
+It loads a value in memory and compares it to a given value. If they are
+equal, it stores a new value into the memory.</p>
+
+<h5>Arguments:</h5>
+<p>There are three arguments to the '<code>cmpxchg</code>' instruction: an
+address to operate on, a value to compare to the value currently be at that
+address, and a new value to place at that address if the compared values are
+equal.  The type of '<var>&lt;cmp&gt;</var>' must be an integer type whose
+bit width is a power of two greater than or equal to eight and less than
+or equal to a target-specific size limit. '<var>&lt;cmp&gt;</var>' and
+'<var>&lt;new&gt;</var>' must have the same type, and the type of
+'<var>&lt;pointer&gt;</var>' must be a pointer to that type. If the
+<code>cmpxchg</code> is marked as <code>volatile</code>, then the
+optimizer is not allowed to modify the number or order of execution
+of this <code>cmpxchg</code> with other <a href="#volatile">volatile
+operations</a>.</p>
+
+<!-- FIXME: Extend allowed types. -->
+
+<p>The <a href="#ordering"><var>ordering</var></a> argument specifies how this
+<code>cmpxchg</code> synchronizes with other atomic operations.</p>
+
+<p>The optional "<code>singlethread</code>" argument declares that the
+<code>cmpxchg</code> is only atomic with respect to code (usually signal
+handlers) running in the same thread as the <code>cmpxchg</code>.  Otherwise the
+cmpxchg is atomic with respect to all other code in the system.</p>
+
+<p>The pointer passed into cmpxchg must have alignment greater than or equal to
+the size in memory of the operand.
+
+<h5>Semantics:</h5>
+<p>The contents of memory at the location specified by the
+'<tt>&lt;pointer&gt;</tt>' operand is read and compared to
+'<tt>&lt;cmp&gt;</tt>'; if the read value is the equal,
+'<tt>&lt;new&gt;</tt>' is written.  The original value at the location
+is returned.
+
+<p>A successful <code>cmpxchg</code> is a read-modify-write instruction for the
+purpose of identifying <a href="#release_sequence">release sequences</a>.  A
+failed <code>cmpxchg</code> is equivalent to an atomic load with an ordering
+parameter determined by dropping any <code>release</code> part of the
+<code>cmpxchg</code>'s ordering.</p>
+
+<!--
+FIXME: Is compare_exchange_weak() necessary?  (Consider after we've done
+optimization work on ARM.)
+
+FIXME: Is a weaker ordering constraint on failure helpful in practice?
+-->
+
+<h5>Example:</h5>
+<pre>
+entry:
+  %orig = atomic <a href="#i_load">load</a> i32* %ptr unordered                       <i>; yields {i32}</i>
+  <a href="#i_br">br</a> label %loop
+
+loop:
+  %cmp = <a href="#i_phi">phi</a> i32 [ %orig, %entry ], [%old, %loop]
+  %squared = <a href="#i_mul">mul</a> i32 %cmp, %cmp
+  %old = cmpxchg i32* %ptr, i32 %cmp, i32 %squared                       <i>; yields {i32}</i>
+  %success = <a href="#i_icmp">icmp</a> eq i32 %cmp, %old
+  <a href="#i_br">br</a> i1 %success, label %done, label %loop
+
+done:
+  ...
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h4>
+<a name="i_atomicrmw">'<tt>atomicrmw</tt>' Instruction</a>
+</h4>
+
+<div>
+
+<h5>Syntax:</h5>
+<pre>
+  atomicrmw [volatile] &lt;operation&gt; &lt;ty&gt;* &lt;pointer&gt;, &lt;ty&gt; &lt;value&gt; [singlethread] &lt;ordering&gt;                   <i>; yields {ty}</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>atomicrmw</tt>' instruction is used to atomically modify memory.</p>
+
+<h5>Arguments:</h5>
+<p>There are three arguments to the '<code>atomicrmw</code>' instruction: an
+operation to apply, an address whose value to modify, an argument to the
+operation.  The operation must be one of the following keywords:</p>
+<ul>
+  <li>xchg</li>
+  <li>add</li>
+  <li>sub</li>
+  <li>and</li>
+  <li>nand</li>
+  <li>or</li>
+  <li>xor</li>
+  <li>max</li>
+  <li>min</li>
+  <li>umax</li>
+  <li>umin</li>
+</ul>
+
+<p>The type of '<var>&lt;value&gt;</var>' must be an integer type whose
+bit width is a power of two greater than or equal to eight and less than
+or equal to a target-specific size limit.  The type of the
+'<code>&lt;pointer&gt;</code>' operand must be a pointer to that type.
+If the <code>atomicrmw</code> is marked as <code>volatile</code>, then the
+optimizer is not allowed to modify the number or order of execution of this
+<code>atomicrmw</code> with other <a href="#volatile">volatile
+  operations</a>.</p>
+
+<!-- FIXME: Extend allowed types. -->
+
+<h5>Semantics:</h5>
+<p>The contents of memory at the location specified by the
+'<tt>&lt;pointer&gt;</tt>' operand are atomically read, modified, and written
+back.  The original value at the location is returned.  The modification is
+specified by the <var>operation</var> argument:</p>
+
+<ul>
+  <li>xchg: <code>*ptr = val</code></li>
+  <li>add: <code>*ptr = *ptr + val</code></li>
+  <li>sub: <code>*ptr = *ptr - val</code></li>
+  <li>and: <code>*ptr = *ptr &amp; val</code></li>
+  <li>nand: <code>*ptr = ~(*ptr &amp; val)</code></li>
+  <li>or: <code>*ptr = *ptr | val</code></li>
+  <li>xor: <code>*ptr = *ptr ^ val</code></li>
+  <li>max: <code>*ptr = *ptr &gt; val ? *ptr : val</code> (using a signed comparison)</li>
+  <li>min: <code>*ptr = *ptr &lt; val ? *ptr : val</code> (using a signed comparison)</li>
+  <li>umax: <code>*ptr = *ptr &gt; val ? *ptr : val</code> (using an unsigned comparison)</li>
+  <li>umin: <code>*ptr = *ptr &lt; val ? *ptr : val</code> (using an unsigned comparison)</li>
+</ul>
+
+<h5>Example:</h5>
+<pre>
+  %old = atomicrmw add i32* %ptr, i32 1 acquire                        <i>; yields {i32}</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h4>
+   <a name="i_getelementptr">'<tt>getelementptr</tt>' Instruction</a>
+</h4>
+
+<div>
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = getelementptr &lt;pty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
+  &lt;result&gt; = getelementptr inbounds &lt;pty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
+  &lt;result&gt; = getelementptr &lt;ptr vector&gt; ptrval, &lt;vector index type&gt; idx 
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>getelementptr</tt>' instruction is used to get the address of a
+   subelement of an <a href="#t_aggregate">aggregate</a> data structure.
+   It performs address calculation only and does not access memory.</p>
+
+<h5>Arguments:</h5>
+<p>The first argument is always a pointer or a vector of pointers,
+   and forms the basis of the
+   calculation. The remaining arguments are indices that indicate which of the
+   elements of the aggregate object are indexed. The interpretation of each
+   index is dependent on the type being indexed into. The first index always
+   indexes the pointer value given as the first argument, the second index
+   indexes a value of the type pointed to (not necessarily the value directly
+   pointed to, since the first index can be non-zero), etc. The first type
+   indexed into must be a pointer value, subsequent types can be arrays,
+   vectors, and structs. Note that subsequent types being indexed into
+   can never be pointers, since that would require loading the pointer before
+   continuing calculation.</p>
+
+<p>The type of each index argument depends on the type it is indexing into.
+   When indexing into a (optionally packed) structure, only <tt>i32</tt>
+   integer <b>constants</b> are allowed.  When indexing into an array, pointer
+   or vector, integers of any width are allowed, and they are not required to be
+   constant.  These integers are treated as signed values where relevant.</p>
+
+<p>For example, let's consider a C code fragment and how it gets compiled to
+   LLVM:</p>
+
+<pre class="doc_code">
+struct RT {
+  char A;
+  int B[10][20];
+  char C;
+};
+struct ST {
+  int X;
+  double Y;
+  struct RT Z;
+};
+
+int *foo(struct ST *s) {
+  return &amp;s[1].Z.B[5][13];
+}
+</pre>
+
+<p>The LLVM code generated by the GCC frontend is:</p>
+
+<pre class="doc_code">
+%RT = <a href="#namedtypes">type</a> { i8 , [10 x [20 x i32]], i8  }
+%ST = <a href="#namedtypes">type</a> { i32, double, %RT }
+
+define i32* @foo(%ST* %s) {
  entry:
    %reg = getelementptr %ST* %s, i32 1, i32 2, i32 1, i32 5, i32 13
    ret i32* %reg
@@ -4627,18 +5165,22 @@ entry:
     base pointer is not an <i>in bounds</i> address of an allocated object,
     or if any of the addresses that would be formed by successive addition of
     the offsets implied by the indices to the base address with infinitely
-   precise arithmetic are not an <i>in bounds</i> address of that allocated
-   object. The <i>in bounds</i> addresses for an allocated object are all
-   the addresses that point into the object, plus the address one byte past
-   the end.</p>
+   precise signed arithmetic are not an <i>in bounds</i> address of that
+   allocated object. The <i>in bounds</i> addresses for an allocated object
+   are all the addresses that point into the object, plus the address one
+   byte past the end.
+   In cases where the base is a vector of pointers the <tt>inbounds</tt> keyword
+   applies to each of the computations element-wise. </p>
  
  <p>If the <tt>inbounds</tt> keyword is not present, the offsets are added to
-   the base address with silently-wrapping two's complement arithmetic, and
-   the result value of the <tt>getelementptr</tt> may be outside the object
-   pointed to by the base pointer. The result value may not necessarily be
-   used to access memory though, even if it happens to point into allocated
-   storage. See the <a href="#pointeraliasing">Pointer Aliasing Rules</a>
-   section for more information.</p>
+   the base address with silently-wrapping two's complement arithmetic. If the
+   offsets have a different width from the pointer, they are sign-extended or
+   truncated to the width of the pointer. The result value of the
+   <tt>getelementptr</tt> may be outside the object pointed to by the base
+   pointer. The result value may not necessarily be used to access memory
+   though, even if it happens to point into allocated storage. See the
+   <a href="#pointeraliasing">Pointer Aliasing Rules</a> section for more
+   information.</p>
  
  <p>The getelementptr instruction is often confusing.  For some more insight into
     how it works, see <a href="GetElementPtr.html">the getelementptr FAQ</a>.</p>
@@ -4655,6 +5197,13 @@ entry:
      %iptr = getelementptr [10 x i32]* @arr, i16 0, i16 0
  </pre>
  
+<p>In cases where the pointer argument is a vector of pointers, only a
+   single index may be used, and the number of vector elements has to be
+   the same.  For example: </p>
+<pre class="doc_code">
+ %A = getelementptr <4 x i8*> %ptrs, <4 x i64> %offsets,
+</pre>
+
  </div>
  
  </div>
@@ -5027,13 +5576,16 @@ entry:
  </pre>
  
  <h5>Overview:</h5>
-<p>The '<tt>ptrtoint</tt>' instruction converts the pointer <tt>value</tt> to
-   the integer type <tt>ty2</tt>.</p>
+<p>The '<tt>ptrtoint</tt>' instruction converts the pointer or a vector of
+   pointers <tt>value</tt> to
+   the integer (or vector of integers) type <tt>ty2</tt>.</p>
  
  <h5>Arguments:</h5>
  <p>The '<tt>ptrtoint</tt>' instruction takes a <tt>value</tt> to cast, which
-   must be a <a href="#t_pointer">pointer</a> value, and a type to cast it to
-   <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a> type.</p>
+   must be a a value of type <a href="#t_pointer">pointer</a> or a vector of
+    pointers, and a type to cast it to
+   <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a> or a vector
+   of integers type.</p>
  
  <h5>Semantics:</h5>
  <p>The '<tt>ptrtoint</tt>' instruction converts <tt>value</tt> to integer type
@@ -5046,8 +5598,9 @@ entry:
  
  <h5>Example:</h5>
  <pre>
-  %X = ptrtoint i32* %X to i8           <i>; yields truncation on 32-bit architecture</i>
-  %Y = ptrtoint i32* %x to i64          <i>; yields zero extension on 32-bit architecture</i>
+  %X = ptrtoint i32* %P to i8                         <i>; yields truncation on 32-bit architecture</i>
+  %Y = ptrtoint i32* %P to i64                        <i>; yields zero extension on 32-bit architecture</i>
+  %Z = ptrtoint &lt;4 x i32*&gt; %P to &lt;4 x i64&gt;<i>; yields vector zero extension for a vector of addresses on 32-bit architecture</i>
  </pre>
  
  </div>
@@ -5086,6 +5639,7 @@ entry:
    %X = inttoptr i32 255 to i32*          <i>; yields zero extension on 64-bit architecture</i>
    %Y = inttoptr i32 255 to i32*          <i>; yields no-op on 32-bit architecture</i>
    %Z = inttoptr i64 0 to i32*            <i>; yields truncation on 32-bit architecture</i>
+  %Z = inttoptr &lt;4 x i32&gt; %G to &lt;4 x i8*&gt;<i>; yields truncation of vector G to four pointers</i>
  </pre>
  
  </div>
@@ -5120,8 +5674,9 @@ entry:
  <p>The '<tt>bitcast</tt>' instruction converts <tt>value</tt> to type
     <tt>ty2</tt>. It is always a <i>no-op cast</i> because no bits change with
     this conversion.  The conversion is done as if the <tt>value</tt> had been
-   stored to memory and read back as type <tt>ty2</tt>. Pointer types may only
-   be converted to other pointer types with this instruction. To convert
+   stored to memory and read back as type <tt>ty2</tt>.
+   Pointer (or vector of pointers) types may only be converted to other pointer
+   (or vector of pointers) types with this instruction. To convert
     pointers to other types, use the <a href="#i_inttoptr">inttoptr</a> or
     <a href="#i_ptrtoint">ptrtoint</a> instructions first.</p>
  
@@ -5129,7 +5684,8 @@ entry:
  <pre>
    %X = bitcast i8 255 to i8              <i>; yields i8 :-1</i>
    %Y = bitcast i32* %x to sint*          <i>; yields sint*:%x</i>
-  %Z = bitcast &lt;2 x int&gt; %V to i64;      <i>; yields i64: %V</i>
+  %Z = bitcast &lt;2 x int&gt; %V to i64;        <i>; yields i64: %V</i>
+  %Z = bitcast &lt;2 x i32*&gt; %V to &lt;2 x i64*&gt; <i>; yields &lt;2 x i64*&gt;</i>
  </pre>
  
  </div>
@@ -5160,8 +5716,8 @@ entry:
  
  <h5>Overview:</h5>
  <p>The '<tt>icmp</tt>' instruction returns a boolean value or a vector of
-   boolean values based on comparison of its two integer, integer vector, or
-   pointer operands.</p>
+   boolean values based on comparison of its two integer, integer vector,
+   pointer, or pointer vector operands.</p>
  
  <h5>Arguments:</h5>
  <p>The '<tt>icmp</tt>' instruction takes three operands. The first operand is
@@ -5616,6 +6172,87 @@ freestanding environments and non-C-based languages.</p>
  
  </div>
  
+<!-- _______________________________________________________________________ -->
+<h4>
+  <a name="i_landingpad">'<tt>landingpad</tt>' Instruction</a>
+</h4>
+
+<div>
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;resultval&gt; = landingpad &lt;somety&gt; personality &lt;type&gt; &lt;pers_fn&gt; &lt;clause&gt;+
+  &lt;resultval&gt; = landingpad &lt;somety&gt; personality &lt;type&gt; &lt;pers_fn&gt; cleanup &lt;clause&gt;*
+
+  &lt;clause&gt; := catch &lt;type&gt; &lt;value&gt;
+  &lt;clause&gt; := filter &lt;array constant type&gt; &lt;array constant&gt;
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>landingpad</tt>' instruction is used by
+   <a href="ExceptionHandling.html#overview">LLVM's exception handling
+   system</a> to specify that a basic block is a landing pad &mdash; one where
+   the exception lands, and corresponds to the code found in the
+   <i><tt>catch</tt></i> portion of a <i><tt>try/catch</tt></i> sequence. It
+   defines values supplied by the personality function (<tt>pers_fn</tt>) upon
+   re-entry to the function. The <tt>resultval</tt> has the
+   type <tt>somety</tt>.</p>
+
+<h5>Arguments:</h5>
+<p>This instruction takes a <tt>pers_fn</tt> value. This is the personality
+   function associated with the unwinding mechanism. The optional
+   <tt>cleanup</tt> flag indicates that the landing pad block is a cleanup.</p>
+
+<p>A <tt>clause</tt> begins with the clause type &mdash; <tt>catch</tt>
+   or <tt>filter</tt> &mdash; and contains the global variable representing the
+   "type" that may be caught or filtered respectively. Unlike the
+   <tt>catch</tt> clause, the <tt>filter</tt> clause takes an array constant as
+   its argument. Use "<tt>[0 x i8**] undef</tt>" for a filter which cannot
+   throw. The '<tt>landingpad</tt>' instruction must contain <em>at least</em>
+   one <tt>clause</tt> or the <tt>cleanup</tt> flag.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>landingpad</tt>' instruction defines the values which are set by the
+   personality function (<tt>pers_fn</tt>) upon re-entry to the function, and
+   therefore the "result type" of the <tt>landingpad</tt> instruction. As with
+   calling conventions, how the personality function results are represented in
+   LLVM IR is target specific.</p>
+
+<p>The clauses are applied in order from top to bottom. If two
+   <tt>landingpad</tt> instructions are merged together through inlining, the
+   clauses from the calling function are appended to the list of clauses.</p>
+
+<p>The <tt>landingpad</tt> instruction has several restrictions:</p>
+
+<ul>
+  <li>A landing pad block is a basic block which is the unwind destination of an
+      '<tt>invoke</tt>' instruction.</li>
+  <li>A landing pad block must have a '<tt>landingpad</tt>' instruction as its
+      first non-PHI instruction.</li>
+  <li>There can be only one '<tt>landingpad</tt>' instruction within the landing
+      pad block.</li>
+  <li>A basic block that is not a landing pad block may not include a
+      '<tt>landingpad</tt>' instruction.</li>
+  <li>All '<tt>landingpad</tt>' instructions in a function must have the same
+      personality function.</li>
+</ul>
+
+<h5>Example:</h5>
+<pre>
+  ;; A landing pad which can catch an integer.
+  %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+           catch i8** @_ZTIi
+  ;; A landing pad that is a cleanup.
+  %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+           cleanup
+  ;; A landing pad which can catch an integer and can only throw a double.
+  %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+           catch i8** @_ZTIi
+           filter [1 x i8**] [@_ZTId]
+</pre>
+
+</div>
+
  </div>
  
  </div>
@@ -6504,8 +7141,6 @@ LLVM</a>.</p>
  
  </div>
  
-</div>
-
  <!-- _______________________________________________________________________ -->
  <h4>
    <a name="int_exp">'<tt>llvm.exp.*</tt>' Intrinsic</a>
@@ -6572,6 +7207,42 @@ LLVM</a>.</p>
  
  </div>
  
+<!-- _______________________________________________________________________ -->
+<h4>
+  <a name="int_fma">'<tt>llvm.fma.*</tt>' Intrinsic</a>
+</h4>
+
+<div>
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.fma</tt> on any
+   floating point or vector of floating point type. Not all targets support all
+   types however.</p>
+
+<pre>
+  declare float     @llvm.fma.f32(float  %a, float  %b, float  %c)
+  declare double    @llvm.fma.f64(double %a, double %b, double %c)
+  declare x86_fp80  @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c)
+  declare fp128     @llvm.fma.f128(fp128 %a, fp128 %b, fp128 %c)
+  declare ppc_fp128 @llvm.fma.ppcf128(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.fma.*</tt>' intrinsics perform the fused multiply-add
+   operation.</p>
+
+<h5>Arguments:</h5>
+<p>The argument and return value are floating point numbers of the same
+   type.</p>
+
+<h5>Semantics:</h5>
+<p>This function returns the same values as the libm <tt>fma</tt> functions
+   would.</p>
+
+</div>
+
+</div>
+
  <!-- ======================================================================= -->
  <h3>
    <a name="int_manip">Bit Manipulation Intrinsics</a>
@@ -6626,7 +7297,8 @@ LLVM</a>.</p>
  
  <h5>Syntax:</h5>
  <p>This is an overloaded intrinsic. You can use llvm.ctpop on any integer bit
-   width. Not all targets support all bit widths however.</p>
+   width, or on any vector with integer elements. Not all targets support all
+  bit widths or vector types, however.</p>
  
  <pre>
    declare i8 @llvm.ctpop.i8(i8  &lt;src&gt;)
@@ -6634,6 +7306,7 @@ LLVM</a>.</p>
    declare i32 @llvm.ctpop.i32(i32 &lt;src&gt;)
    declare i64 @llvm.ctpop.i64(i64 &lt;src&gt;)
    declare i256 @llvm.ctpop.i256(i256 &lt;src&gt;)
+  declare &lt;2 x i32&gt; @llvm.ctpop.v2i32(&lt;2 x i32&gt; &lt;src&gt;)
  </pre>
  
  <h5>Overview:</h5>
@@ -6642,10 +7315,12 @@ LLVM</a>.</p>
  
  <h5>Arguments:</h5>
  <p>The only argument is the value to be counted.  The argument may be of any
-   integer type.  The return type must match the argument type.</p>
+   integer type, or a vector with integer elements.
+   The return type must match the argument type.</p>
  
  <h5>Semantics:</h5>
-<p>The '<tt>llvm.ctpop</tt>' intrinsic counts the 1's in a variable.</p>
+<p>The '<tt>llvm.ctpop</tt>' intrinsic counts the 1's in a variable, or within each
+   element of a vector.</p>
  
  </div>
  
@@ -6658,7 +7333,8 @@ LLVM</a>.</p>
  
  <h5>Syntax:</h5>
  <p>This is an overloaded intrinsic. You can use <tt>llvm.ctlz</tt> on any
-   integer bit width. Not all targets support all bit widths however.</p>
+   integer bit width, or any vector whose elements are integers. Not all
+   targets support all bit widths or vector types, however.</p>
  
  <pre>
    declare i8 @llvm.ctlz.i8 (i8  &lt;src&gt;)
@@ -6666,6 +7342,7 @@ LLVM</a>.</p>
    declare i32 @llvm.ctlz.i32(i32 &lt;src&gt;)
    declare i64 @llvm.ctlz.i64(i64 &lt;src&gt;)
    declare i256 @llvm.ctlz.i256(i256 &lt;src&gt;)
+  declare &lt;2 x i32&gt; @llvm.ctlz.v2i32(&lt;2 x i32&gt; &lt;src;gt)
  </pre>
  
  <h5>Overview:</h5>
@@ -6674,11 +7351,13 @@ LLVM</a>.</p>
  
  <h5>Arguments:</h5>
  <p>The only argument is the value to be counted.  The argument may be of any
-   integer type. The return type must match the argument type.</p>
+   integer type, or any vector type with integer element type.
+   The return type must match the argument type.</p>
  
  <h5>Semantics:</h5>
  <p>The '<tt>llvm.ctlz</tt>' intrinsic counts the leading (most significant)
-   zeros in a variable.  If the src == 0 then the result is the size in bits of
+   zeros in a variable, or within each element of the vector if the operation
+   is of vector type.  If the src == 0 then the result is the size in bits of
     the type of src. For example, <tt>llvm.ctlz(i32 2) = 30</tt>.</p>
  
  </div>
@@ -6692,7 +7371,8 @@ LLVM</a>.</p>
  
  <h5>Syntax:</h5>
  <p>This is an overloaded intrinsic. You can use <tt>llvm.cttz</tt> on any
-   integer bit width. Not all targets support all bit widths however.</p>
+   integer bit width, or any vector of integer elements. Not all targets
+   support all bit widths or vector types, however.</p>
  
  <pre>
    declare i8 @llvm.cttz.i8 (i8  &lt;src&gt;)
@@ -6700,6 +7380,7 @@ LLVM</a>.</p>
    declare i32 @llvm.cttz.i32(i32 &lt;src&gt;)
    declare i64 @llvm.cttz.i64(i64 &lt;src&gt;)
    declare i256 @llvm.cttz.i256(i256 &lt;src&gt;)
+  declase &lt;2 x i32&gt; @llvm.cttz.v2i32(&lt;2 x i32&gt; &lt;src&gt;)
  </pre>
  
  <h5>Overview:</h5>
@@ -6708,11 +7389,13 @@ LLVM</a>.</p>
  
  <h5>Arguments:</h5>
  <p>The only argument is the value to be counted.  The argument may be of any
-   integer type.  The return type must match the argument type.</p>
+   integer type, or a vectory with integer element type..  The return type
+   must match the argument type.</p>
  
  <h5>Semantics:</h5>
  <p>The '<tt>llvm.cttz</tt>' intrinsic counts the trailing (least significant)
-   zeros in a variable.  If the src == 0 then the result is the size in bits of
+   zeros in a variable, or within each element of a vector.
+   If the src == 0 then the result is the size in bits of
     the type of src.  For example, <tt>llvm.cttz(2) = 1</tt>.</p>
  
  </div>
@@ -7144,12 +7827,12 @@ LLVM</a>.</p>
  
  <!-- ======================================================================= -->
  <h3>
-  <a name="int_trampoline">Trampoline Intrinsic</a>
+  <a name="int_trampoline">Trampoline Intrinsics</a>
  </h3>
  
  <div>
  
-<p>This intrinsic makes it possible to excise one parameter, marked with
+<p>These intrinsics make it possible to excise one parameter, marked with
     the <a href="#nest"><tt>nest</tt></a> attribute, from a function.
     The result is a callable
     function pointer lacking the nest parameter - the caller does not need to
@@ -7166,7 +7849,8 @@ LLVM</a>.</p>
  <pre class="doc_code">
    %tramp = alloca [10 x i8], align 4 ; size and alignment only correct for X86
    %tramp1 = getelementptr [10 x i8]* %tramp, i32 0, i32 0
-  %p = call i8* @llvm.init.trampoline(i8* %tramp1, i8* bitcast (i32 (i8* nest , i32, i32)* @f to i8*), i8* %nval)
+  call i8* @llvm.init.trampoline(i8* %tramp1, i8* bitcast (i32 (i8*, i32, i32)* @f to i8*), i8* %nval)
+  %p = call i8* @llvm.adjust.trampoline(i8* %tramp1)
    %fp = bitcast i8* %p to i32 (i32, i32)*
  </pre>
  
@@ -7184,12 +7868,12 @@ LLVM</a>.</p>
  
  <h5>Syntax:</h5>
  <pre>
-  declare i8* @llvm.init.trampoline(i8* &lt;tramp&gt;, i8* &lt;func&gt;, i8* &lt;nval&gt;)
+  declare void @llvm.init.trampoline(i8* &lt;tramp&gt;, i8* &lt;func&gt;, i8* &lt;nval&gt;)
  </pre>
  
  <h5>Overview:</h5>
-<p>This fills the memory pointed to by <tt>tramp</tt> with code and returns a
-   function pointer suitable for executing it.</p>
+<p>This fills the memory pointed to by <tt>tramp</tt> with executable code,
+   turning it into a trampoline.</p>
  
  <h5>Arguments:</h5>
  <p>The <tt>llvm.init.trampoline</tt> intrinsic takes three arguments, all
@@ -7203,514 +7887,50 @@ LLVM</a>.</p>
  
  <h5>Semantics:</h5>
  <p>The block of memory pointed to by <tt>tramp</tt> is filled with target
-   dependent code, turning it into a function.  A pointer to this function is
-   returned, but needs to be bitcast to an <a href="#int_trampoline">appropriate
-   function pointer type</a> before being called.  The new function's signature
-   is the same as that of <tt>func</tt> with any arguments marked with
-   the <tt>nest</tt> attribute removed.  At most one such <tt>nest</tt> argument
-   is allowed, and it must be of pointer type.  Calling the new function is
-   equivalent to calling <tt>func</tt> with the same argument list, but
-   with <tt>nval</tt> used for the missing <tt>nest</tt> argument.  If, after
-   calling <tt>llvm.init.trampoline</tt>, the memory pointed to
-   by <tt>tramp</tt> is modified, then the effect of any later call to the
-   returned function pointer is undefined.</p>
-
+   dependent code, turning it into a function.  Then <tt>tramp</tt> needs to be
+   passed to <a href="#int_at">llvm.adjust.trampoline</a> to get a pointer
+   which can be <a href="#int_trampoline">bitcast (to a new function) and
+   called</a>.  The new function's signature is the same as that of
+   <tt>func</tt> with any arguments marked with the <tt>nest</tt> attribute
+   removed.  At most one such <tt>nest</tt> argument is allowed, and it must be of
+   pointer type.  Calling the new function is equivalent to calling <tt>func</tt>
+   with the same argument list, but with <tt>nval</tt> used for the missing
+   <tt>nest</tt> argument.  If, after calling <tt>llvm.init.trampoline</tt>, the
+   memory pointed to by <tt>tramp</tt> is modified, then the effect of any later call
+   to the returned function pointer is undefined.</p>
  </div>
  
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_atomics">Atomic Operations and Synchronization Intrinsics</a>
-</h3>
-
-<div>
-
-<p>These intrinsic functions expand the "universal IR" of LLVM to represent
-   hardware constructs for atomic operations and memory synchronization.  This
-   provides an interface to the hardware, not an interface to the programmer. It
-   is aimed at a low enough level to allow any programming models or APIs
-   (Application Programming Interfaces) which need atomic behaviors to map
-   cleanly onto it. It is also modeled primarily on hardware behavior. Just as
-   hardware provides a "universal IR" for source languages, it also provides a
-   starting point for developing a "universal" atomic operation and
-   synchronization IR.</p>
-
-<p>These do <em>not</em> form an API such as high-level threading libraries,
-   software transaction memory systems, atomic primitives, and intrinsic
-   functions as found in BSD, GNU libc, atomic_ops, APR, and other system and
-   application libraries.  The hardware interface provided by LLVM should allow
-   a clean implementation of all of these APIs and parallel programming models.
-   No one model or paradigm should be selected above others unless the hardware
-   itself ubiquitously does so.</p>
-
  <!-- _______________________________________________________________________ -->
  <h4>
-  <a name="int_memory_barrier">'<tt>llvm.memory.barrier</tt>' Intrinsic</a>
-</h4>
-
-<div>
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.memory.barrier(i1 &lt;ll&gt;, i1 &lt;ls&gt;, i1 &lt;sl&gt;, i1 &lt;ss&gt;, i1 &lt;device&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The <tt>llvm.memory.barrier</tt> intrinsic guarantees ordering between
-   specific pairs of memory access types.</p>
-
-<h5>Arguments:</h5>
-<p>The <tt>llvm.memory.barrier</tt> intrinsic requires five boolean arguments.
-   The first four arguments enables a specific barrier as listed below.  The
-   fifth argument specifies that the barrier applies to io or device or uncached
-   memory.</p>
-
-<ul>
-  <li><tt>ll</tt>: load-load barrier</li>
-  <li><tt>ls</tt>: load-store barrier</li>
-  <li><tt>sl</tt>: store-load barrier</li>
-  <li><tt>ss</tt>: store-store barrier</li>
-  <li><tt>device</tt>: barrier applies to device and uncached memory also.</li>
-</ul>
-
-<h5>Semantics:</h5>
-<p>This intrinsic causes the system to enforce some ordering constraints upon
-   the loads and stores of the program. This barrier does not
-   indicate <em>when</em> any events will occur, it only enforces
-   an <em>order</em> in which they occur. For any of the specified pairs of load
-   and store operations (f.ex.  load-load, or store-load), all of the first
-   operations preceding the barrier will complete before any of the second
-   operations succeeding the barrier begin. Specifically the semantics for each
-   pairing is as follows:</p>
-
-<ul>
-  <li><tt>ll</tt>: All loads before the barrier must complete before any load
-      after the barrier begins.</li>
-  <li><tt>ls</tt>: All loads before the barrier must complete before any
-      store after the barrier begins.</li>
-  <li><tt>ss</tt>: All stores before the barrier must complete before any
-      store after the barrier begins.</li>
-  <li><tt>sl</tt>: All stores before the barrier must complete before any
-      load after the barrier begins.</li>
-</ul>
-
-<p>These semantics are applied with a logical "and" behavior when more than one
-   is enabled in a single memory barrier intrinsic.</p>
-
-<p>Backends may implement stronger barriers than those requested when they do
-   not support as fine grained a barrier as requested.  Some architectures do
-   not need all types of barriers and on such architectures, these become
-   noops.</p>
-
-<h5>Example:</h5>
-<pre>
-%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
-%ptr      = bitcast i8* %mallocP to i32*
-            store i32 4, %ptr
-
-%result1  = load i32* %ptr      <i>; yields {i32}:result1 = 4</i>
-            call void @llvm.memory.barrier(i1 false, i1 true, i1 false, i1 false, i1 true)
-                                <i>; guarantee the above finishes</i>
-            store i32 8, %ptr   <i>; before this begins</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_atomic_cmp_swap">'<tt>llvm.atomic.cmp.swap.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.atomic.cmp.swap</tt> on
-   any integer bit width and for different address spaces. Not all targets
-   support all bit widths however.</p>
-
-<pre>
-  declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* &lt;ptr&gt;, i8 &lt;cmp&gt;, i8 &lt;val&gt;)
-  declare i16 @llvm.atomic.cmp.swap.i16.p0i16(i16* &lt;ptr&gt;, i16 &lt;cmp&gt;, i16 &lt;val&gt;)
-  declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;cmp&gt;, i32 &lt;val&gt;)
-  declare i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* &lt;ptr&gt;, i64 &lt;cmp&gt;, i64 &lt;val&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>This loads a value in memory and compares it to a given value. If they are
-   equal, it stores a new value into the memory.</p>
-
-<h5>Arguments:</h5>
-<p>The <tt>llvm.atomic.cmp.swap</tt> intrinsic takes three arguments. The result
-   as well as both <tt>cmp</tt> and <tt>val</tt> must be integer values with the
-   same bit width. The <tt>ptr</tt> argument must be a pointer to a value of
-   this integer type. While any bit width integer may be used, targets may only
-   lower representations they support in hardware.</p>
-
-<h5>Semantics:</h5>
-<p>This entire intrinsic must be executed atomically. It first loads the value
-   in memory pointed to by <tt>ptr</tt> and compares it with the
-   value <tt>cmp</tt>. If they are equal, <tt>val</tt> is stored into the
-   memory. The loaded value is yielded in all cases. This provides the
-   equivalent of an atomic compare-and-swap operation within the SSA
-   framework.</p>
-
-<h5>Examples:</h5>
-<pre>
-%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
-%ptr      = bitcast i8* %mallocP to i32*
-            store i32 4, %ptr
-
-%val1     = add i32 4, 4
-%result1  = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %ptr, i32 4, %val1)
-                                          <i>; yields {i32}:result1 = 4</i>
-%stored1  = icmp eq i32 %result1, 4       <i>; yields {i1}:stored1 = true</i>
-%memval1  = load i32* %ptr                <i>; yields {i32}:memval1 = 8</i>
-
-%val2     = add i32 1, 1
-%result2  = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %ptr, i32 5, %val2)
-                                          <i>; yields {i32}:result2 = 8</i>
-%stored2  = icmp eq i32 %result2, 5       <i>; yields {i1}:stored2 = false</i>
-
-%memval2  = load i32* %ptr                <i>; yields {i32}:memval2 = 8</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_atomic_swap">'<tt>llvm.atomic.swap.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-<h5>Syntax:</h5>
-
-<p>This is an overloaded intrinsic. You can use <tt>llvm.atomic.swap</tt> on any
-   integer bit width. Not all targets support all bit widths however.</p>
-
-<pre>
-  declare i8 @llvm.atomic.swap.i8.p0i8(i8* &lt;ptr&gt;, i8 &lt;val&gt;)
-  declare i16 @llvm.atomic.swap.i16.p0i16(i16* &lt;ptr&gt;, i16 &lt;val&gt;)
-  declare i32 @llvm.atomic.swap.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;val&gt;)
-  declare i64 @llvm.atomic.swap.i64.p0i64(i64* &lt;ptr&gt;, i64 &lt;val&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>This intrinsic loads the value stored in memory at <tt>ptr</tt> and yields
-   the value from memory. It then stores the value in <tt>val</tt> in the memory
-   at <tt>ptr</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The <tt>llvm.atomic.swap</tt> intrinsic takes two arguments. Both
-  the <tt>val</tt> argument and the result must be integers of the same bit
-  width.  The first argument, <tt>ptr</tt>, must be a pointer to a value of this
-  integer type. The targets may only lower integer representations they
-  support.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic loads the value pointed to by <tt>ptr</tt>, yields it, and
-   stores <tt>val</tt> back into <tt>ptr</tt> atomically. This provides the
-   equivalent of an atomic swap operation within the SSA framework.</p>
-
-<h5>Examples:</h5>
-<pre>
-%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
-%ptr      = bitcast i8* %mallocP to i32*
-            store i32 4, %ptr
-
-%val1     = add i32 4, 4
-%result1  = call i32 @llvm.atomic.swap.i32.p0i32(i32* %ptr, i32 %val1)
-                                        <i>; yields {i32}:result1 = 4</i>
-%stored1  = icmp eq i32 %result1, 4     <i>; yields {i1}:stored1 = true</i>
-%memval1  = load i32* %ptr              <i>; yields {i32}:memval1 = 8</i>
-
-%val2     = add i32 1, 1
-%result2  = call i32 @llvm.atomic.swap.i32.p0i32(i32* %ptr, i32 %val2)
-                                        <i>; yields {i32}:result2 = 8</i>
-
-%stored2  = icmp eq i32 %result2, 8     <i>; yields {i1}:stored2 = true</i>
-%memval2  = load i32* %ptr              <i>; yields {i32}:memval2 = 2</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_atomic_load_add">'<tt>llvm.atomic.load.add.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.atomic.load.add</tt> on
-   any integer bit width. Not all targets support all bit widths however.</p>
-
-<pre>
-  declare i8 @llvm.atomic.load.add.i8.p0i8(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
-  declare i16 @llvm.atomic.load.add.i16.p0i16(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
-  declare i32 @llvm.atomic.load.add.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
-  declare i64 @llvm.atomic.load.add.i64.p0i64(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>This intrinsic adds <tt>delta</tt> to the value stored in memory
-   at <tt>ptr</tt>. It yields the original value at <tt>ptr</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The intrinsic takes two arguments, the first a pointer to an integer value
-   and the second an integer value. The result is also an integer value. These
-   integer types can have any bit width, but they must all have the same bit
-   width. The targets may only lower integer representations they support.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic does a series of operations atomically. It first loads the
-   value stored at <tt>ptr</tt>. It then adds <tt>delta</tt>, stores the result
-   to <tt>ptr</tt>. It yields the original value stored at <tt>ptr</tt>.</p>
-
-<h5>Examples:</h5>
-<pre>
-%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
-%ptr      = bitcast i8* %mallocP to i32*
-            store i32 4, %ptr
-%result1  = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %ptr, i32 4)
-                                <i>; yields {i32}:result1 = 4</i>
-%result2  = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %ptr, i32 2)
-                                <i>; yields {i32}:result2 = 8</i>
-%result3  = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %ptr, i32 5)
-                                <i>; yields {i32}:result3 = 10</i>
-%memval1  = load i32* %ptr      <i>; yields {i32}:memval1 = 15</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_atomic_load_sub">'<tt>llvm.atomic.load.sub.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.atomic.load.sub</tt> on
-   any integer bit width and for different address spaces. Not all targets
-   support all bit widths however.</p>
-
-<pre>
-  declare i8 @llvm.atomic.load.sub.i8.p0i32(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
-  declare i16 @llvm.atomic.load.sub.i16.p0i32(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
-  declare i32 @llvm.atomic.load.sub.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
-  declare i64 @llvm.atomic.load.sub.i64.p0i32(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>This intrinsic subtracts <tt>delta</tt> to the value stored in memory at
-   <tt>ptr</tt>. It yields the original value at <tt>ptr</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The intrinsic takes two arguments, the first a pointer to an integer value
-   and the second an integer value. The result is also an integer value. These
-   integer types can have any bit width, but they must all have the same bit
-   width. The targets may only lower integer representations they support.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic does a series of operations atomically. It first loads the
-   value stored at <tt>ptr</tt>. It then subtracts <tt>delta</tt>, stores the
-   result to <tt>ptr</tt>. It yields the original value stored
-   at <tt>ptr</tt>.</p>
-
-<h5>Examples:</h5>
-<pre>
-%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
-%ptr      = bitcast i8* %mallocP to i32*
-            store i32 8, %ptr
-%result1  = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %ptr, i32 4)
-                                <i>; yields {i32}:result1 = 8</i>
-%result2  = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %ptr, i32 2)
-                                <i>; yields {i32}:result2 = 4</i>
-%result3  = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %ptr, i32 5)
-                                <i>; yields {i32}:result3 = 2</i>
-%memval1  = load i32* %ptr      <i>; yields {i32}:memval1 = -3</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_atomic_load_and">
-    '<tt>llvm.atomic.load.and.*</tt>' Intrinsic
-  </a>
-  <br>
-  <a name="int_atomic_load_nand">
-    '<tt>llvm.atomic.load.nand.*</tt>' Intrinsic
-  </a>
-  <br>
-  <a name="int_atomic_load_or">
-    '<tt>llvm.atomic.load.or.*</tt>' Intrinsic
-  </a>
-  <br>
-  <a name="int_atomic_load_xor">
-    '<tt>llvm.atomic.load.xor.*</tt>' Intrinsic
+  <a name="int_at">
+    '<tt>llvm.adjust.trampoline</tt>' Intrinsic
    </a>
  </h4>
  
  <div>
  
  <h5>Syntax:</h5>
-<p>These are overloaded intrinsics. You can
-  use <tt>llvm.atomic.load_and</tt>, <tt>llvm.atomic.load_nand</tt>,
-  <tt>llvm.atomic.load_or</tt>, and <tt>llvm.atomic.load_xor</tt> on any integer
-  bit width and for different address spaces. Not all targets support all bit
-  widths however.</p>
-
-<pre>
-  declare i8 @llvm.atomic.load.and.i8.p0i8(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
-  declare i16 @llvm.atomic.load.and.i16.p0i16(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
-  declare i32 @llvm.atomic.load.and.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
-  declare i64 @llvm.atomic.load.and.i64.p0i64(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
-</pre>
-
-<pre>
-  declare i8 @llvm.atomic.load.or.i8.p0i8(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
-  declare i16 @llvm.atomic.load.or.i16.p0i16(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
-  declare i32 @llvm.atomic.load.or.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
-  declare i64 @llvm.atomic.load.or.i64.p0i64(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
-</pre>
-
  <pre>
-  declare i8 @llvm.atomic.load.nand.i8.p0i32(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
-  declare i16 @llvm.atomic.load.nand.i16.p0i32(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
-  declare i32 @llvm.atomic.load.nand.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
-  declare i64 @llvm.atomic.load.nand.i64.p0i32(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
-</pre>
-
-<pre>
-  declare i8 @llvm.atomic.load.xor.i8.p0i32(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
-  declare i16 @llvm.atomic.load.xor.i16.p0i32(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
-  declare i32 @llvm.atomic.load.xor.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
-  declare i64 @llvm.atomic.load.xor.i64.p0i32(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
+  declare i8* @llvm.adjust.trampoline(i8* &lt;tramp&gt;)
  </pre>
  
  <h5>Overview:</h5>
-<p>These intrinsics bitwise the operation (and, nand, or, xor) <tt>delta</tt> to
-   the value stored in memory at <tt>ptr</tt>. It yields the original value
-   at <tt>ptr</tt>.</p>
+<p>This performs any required machine-specific adjustment to the address of a
+   trampoline (passed as <tt>tramp</tt>).</p>
  
  <h5>Arguments:</h5>
-<p>These intrinsics take two arguments, the first a pointer to an integer value
-   and the second an integer value. The result is also an integer value. These
-   integer types can have any bit width, but they must all have the same bit
-   width. The targets may only lower integer representations they support.</p>
+<p><tt>tramp</tt> must point to a block of memory which already has trampoline code
+   filled in by a previous call to <a href="#int_it"><tt>llvm.init.trampoline</tt>
+   </a>.</p>
  
  <h5>Semantics:</h5>
-<p>These intrinsics does a series of operations atomically. They first load the
-   value stored at <tt>ptr</tt>. They then do the bitwise
-   operation <tt>delta</tt>, store the result to <tt>ptr</tt>. They yield the
-   original value stored at <tt>ptr</tt>.</p>
-
-<h5>Examples:</h5>
-<pre>
-%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
-%ptr      = bitcast i8* %mallocP to i32*
-            store i32 0x0F0F, %ptr
-%result0  = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %ptr, i32 0xFF)
-                                <i>; yields {i32}:result0 = 0x0F0F</i>
-%result1  = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %ptr, i32 0xFF)
-                                <i>; yields {i32}:result1 = 0xFFFFFFF0</i>
-%result2  = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %ptr, i32 0F)
-                                <i>; yields {i32}:result2 = 0xF0</i>
-%result3  = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %ptr, i32 0F)
-                                <i>; yields {i32}:result3 = FF</i>
-%memval1  = load i32* %ptr      <i>; yields {i32}:memval1 = F0</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_atomic_load_max">
-    '<tt>llvm.atomic.load.max.*</tt>' Intrinsic
-  </a>
-  <br>
-  <a name="int_atomic_load_min">
-    '<tt>llvm.atomic.load.min.*</tt>' Intrinsic
-  </a>
-  <br>
-  <a name="int_atomic_load_umax">
-    '<tt>llvm.atomic.load.umax.*</tt>' Intrinsic
-  </a>
-  <br>
-  <a name="int_atomic_load_umin">
-    '<tt>llvm.atomic.load.umin.*</tt>' Intrinsic
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>These are overloaded intrinsics. You can use <tt>llvm.atomic.load_max</tt>,
-   <tt>llvm.atomic.load_min</tt>, <tt>llvm.atomic.load_umax</tt>, and
-   <tt>llvm.atomic.load_umin</tt> on any integer bit width and for different
-   address spaces. Not all targets support all bit widths however.</p>
-
-<pre>
-  declare i8 @llvm.atomic.load.max.i8.p0i8(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
-  declare i16 @llvm.atomic.load.max.i16.p0i16(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
-  declare i32 @llvm.atomic.load.max.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
-  declare i64 @llvm.atomic.load.max.i64.p0i64(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
-</pre>
-
-<pre>
-  declare i8 @llvm.atomic.load.min.i8.p0i8(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
-  declare i16 @llvm.atomic.load.min.i16.p0i16(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
-  declare i32 @llvm.atomic.load.min.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
-  declare i64 @llvm.atomic.load.min.i64.p0i64(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
-</pre>
-
-<pre>
-  declare i8 @llvm.atomic.load.umax.i8.p0i8(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
-  declare i16 @llvm.atomic.load.umax.i16.p0i16(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
-  declare i32 @llvm.atomic.load.umax.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
-  declare i64 @llvm.atomic.load.umax.i64.p0i64(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
-</pre>
-
-<pre>
-  declare i8 @llvm.atomic.load.umin.i8.p0i8(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
-  declare i16 @llvm.atomic.load.umin.i16.p0i16(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
-  declare i32 @llvm.atomic.load.umin.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
-  declare i64 @llvm.atomic.load.umin.i64.p0i64(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>These intrinsics takes the signed or unsigned minimum or maximum of
-   <tt>delta</tt> and the value stored in memory at <tt>ptr</tt>. It yields the
-   original value at <tt>ptr</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>These intrinsics take two arguments, the first a pointer to an integer value
-   and the second an integer value. The result is also an integer value. These
-   integer types can have any bit width, but they must all have the same bit
-   width. The targets may only lower integer representations they support.</p>
-
-<h5>Semantics:</h5>
-<p>These intrinsics does a series of operations atomically. They first load the
-   value stored at <tt>ptr</tt>. They then do the signed or unsigned min or
-   max <tt>delta</tt> and the value, store the result to <tt>ptr</tt>. They
-   yield the original value stored at <tt>ptr</tt>.</p>
-
-<h5>Examples:</h5>
-<pre>
-%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
-%ptr      = bitcast i8* %mallocP to i32*
-            store i32 7, %ptr
-%result0  = call i32 @llvm.atomic.load.min.i32.p0i32(i32* %ptr, i32 -2)
-                                <i>; yields {i32}:result0 = 7</i>
-%result1  = call i32 @llvm.atomic.load.max.i32.p0i32(i32* %ptr, i32 8)
-                                <i>; yields {i32}:result1 = -2</i>
-%result2  = call i32 @llvm.atomic.load.umin.i32.p0i32(i32* %ptr, i32 10)
-                                <i>; yields {i32}:result2 = 8</i>
-%result3  = call i32 @llvm.atomic.load.umax.i32.p0i32(i32* %ptr, i32 30)
-                                <i>; yields {i32}:result3 = 8</i>
-%memval1  = load i32* %ptr      <i>; yields {i32}:memval1 = 30</i>
-</pre>
+<p>On some architectures the address of the code to be executed needs to be
+   different to the address where the trampoline is actually stored.  This
+   intrinsic returns the executable address corresponding to <tt>tramp</tt>
+   after performing the required machine specific adjustments.
+   The pointer returned can then be <a href="#int_trampoline"> bitcast and
+   executed</a>.
+</p>
  
  </div>
  
@@ -7875,7 +8095,7 @@ LLVM</a>.</p>
  <h5>Semantics:</h5>
  <p>This intrinsic allows annotation of local variables with arbitrary strings.
     This can be useful for special purpose optimizations that want to look for
-   these annotations.  These have no other defined use, they are ignored by code
+   these annotations.  These have no other defined use; they are ignored by code
     generation and optimization.</p>
  
  </div>
@@ -7911,7 +8131,7 @@ LLVM</a>.</p>
  <h5>Semantics:</h5>
  <p>This intrinsic allows annotations to be put on arbitrary expressions with
     arbitrary strings.  This can be useful for special purpose optimizations that
-   want to look for these annotations.  These have no other defined use, they
+   want to look for these annotations.  These have no other defined use; they
     are ignored by code generation and optimization.</p>
  
  </div>
@@ -8009,11 +8229,35 @@ LLVM</a>.</p>
     compile time.</p>
  
  </div>
+<!-- _______________________________________________________________________ -->
+<h4>
+  <a name="int_expect">'<tt>llvm.expect</tt>' Intrinsic</a>
+</h4>
  
+<div>
+
+<h5>Syntax:</h5>
+<pre>
+  declare i32 @llvm.expect.i32(i32 &lt;val&gt;, i32 &lt;expected_val&gt;)
+  declare i64 @llvm.expect.i64(i64 &lt;val&gt;, i64 &lt;expected_val&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The <tt>llvm.expect</tt> intrinsic provides information about expected (the
+   most probable) value of <tt>val</tt>, which can be used by optimizers.</p>
+
+<h5>Arguments:</h5>
+<p>The <tt>llvm.expect</tt> intrinsic takes two arguments. The first
+   argument is a value. The second argument is an expected value, this needs to
+   be a constant value, variables are not allowed.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic is lowered to the <tt>val</tt>.</p>
  </div>
  
  </div>
  
+</div>
  <!-- *********************************************************************** -->
  <hr>
  <address>