Revert r102948.

[oota-llvm.git] / docs / LangRef.html
diff --git a/docs/LangRef.html b/docs/LangRef.html

index 5bf06851e6e4bd44efc620f2bf07167fb81ebffa..9a093577025e1381da9c160d9bc9c87b3e743a88 100644 (file)
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -50,6 +50,7 @@
        <li><a href="#moduleasm">Module-Level Inline Assembly</a></li>
        <li><a href="#datalayout">Data Layout</a></li>
        <li><a href="#pointeraliasing">Pointer Aliasing Rules</a></li>
+      <li><a href="#volatile">Volatile Memory Accesses</a></li>
      </ol>
    </li>
    <li><a href="#typesystem">Type System</a>
@@ -89,6 +90,7 @@
        <li><a href="#complexconstants">Complex Constants</a></li>
        <li><a href="#globalconstants">Global Variable and Function Addresses</a></li>
        <li><a href="#undefvalues">Undefined Values</a></li>
+      <li><a href="#trapvalues">Trap Values</a></li>
        <li><a href="#blockaddress">Addresses of Basic Blocks</a></li>
        <li><a href="#constantexprs">Constant Expressions</a></li>
      </ol>
@@ -255,6 +257,12 @@
            <li><a href="#int_umul_overflow">'<tt>llvm.umul.with.overflow.*</tt> Intrinsics</a></li>
          </ol>
        </li>
+      <li><a href="#int_fp16">Half Precision Floating Point Intrinsics</a>
+        <ol>
+          <li><a href="#int_convert_to_fp16">'<tt>llvm.convert.to.fp16</tt>' Intrinsic</a></li>
+          <li><a href="#int_convert_from_fp16">'<tt>llvm.convert.from.fp16</tt>' Intrinsic</a></li>
+        </ol>
+      </li>
        <li><a href="#int_debugger">Debugger intrinsics</a></li>
        <li><a href="#int_eh">Exception Handling intrinsics</a></li>
        <li><a href="#int_trampoline">Trampoline Intrinsic</a>
@@ -691,9 +699,9 @@ define i32 @main() {                                        <i>; i32()* </i>
        target, without having to conform to an externally specified ABI
        (Application Binary Interface).
        <a href="CodeGenerator.html#tailcallopt">Tail calls can only be optimized
-      when this convention is used.</a>  This calling convention does not
-      support varargs and requires the prototype of all callees to exactly match
-      the prototype of the function definition.</dd>
+      when this or the GHC convention is used.</a>  This calling convention
+      does not support varargs and requires the prototype of all callees to
+      exactly match the prototype of the function definition.</dd>
  
    <dt><b>"<tt>coldcc</tt>" - The cold calling convention</b>:</dt>
    <dd>This calling convention attempts to make code in the caller as efficient
@@ -703,6 +711,26 @@ define i32 @main() {                                        <i>; i32()* </i>
        does not support varargs and requires the prototype of all callees to
        exactly match the prototype of the function definition.</dd>
  
+  <dt><b>"<tt>cc <em>10</em></tt>" - GHC convention</b>:</dt>
+  <dd>This calling convention has been implemented specifically for use by the
+      <a href="http://www.haskell.org/ghc">Glasgow Haskell Compiler (GHC)</a>.
+      It passes everything in registers, going to extremes to achieve this by
+      disabling callee save registers. This calling convention should not be
+      used lightly but only for specific situations such as an alternative to
+      the <em>register pinning</em> performance technique often used when
+      implementing functional programming languages.At the moment only X86
+      supports this convention and it has the following limitations:
+      <ul>
+        <li>On <em>X86-32</em> only supports up to 4 bit type parameters. No
+            floating point types are supported.</li>
+        <li>On <em>X86-64</em> only supports up to 10 bit type parameters and
+            6 floating point parameters.</li>
+      </ul>
+      This calling convention supports
+      <a href="CodeGenerator.html#tailcallopt">tail call optimization</a> but
+      requires both the caller and callee are using it.
+  </dd>
+
    <dt><b>"<tt>cc &lt;<em>n</em>&gt;</tt>" - Numbered convention</b>:</dt>
    <dd>Any calling convention may be specified by number, allowing
        target-specific calling conventions to be used.  Target specific calling
@@ -823,11 +851,15 @@ define i32 @main() {                                        <i>; i32()* </i>
  <p>LLVM allows an explicit section to be specified for globals.  If the target
     supports it, it will emit globals to the section specified.</p>
  
-<p>An explicit alignment may be specified for a global.  If not present, or if
-   the alignment is set to zero, the alignment of the global is set by the
-   target to whatever it feels convenient.  If an explicit alignment is
-   specified, the global is forced to have at least that much alignment.  All
-   alignments must be a power of 2.</p>
+<p>An explicit alignment may be specified for a global, which must be a power
+   of 2.  If not present, or if the alignment is set to zero, the alignment of
+   the global is set by the target to whatever it feels convenient.  If an
+   explicit alignment is specified, the global is forced to have exactly that
+   alignment.  Targets and optimizers are not allowed to over-align the global
+   if the global has an assigned section.  In this case, the extra alignment
+   could be observable: for example, code could assume that the globals are
+   densely packed in their section and try to iterate over them as an array,
+   alignment padding would break this iteration.</p>
  
  <p>For example, the following defines a global in a numbered address space with
     an initializer, section, and alignment:</p>
@@ -848,7 +880,7 @@ define i32 @main() {                                        <i>; i32()* </i>
  
  <div class="doc_text">
  
-<p>LLVM function definitions consist of the "<tt>define</tt>" keyord, an
+<p>LLVM function definitions consist of the "<tt>define</tt>" keyword, an
     optional <a href="#linkage">linkage type</a>, an optional
     <a href="#visibility">visibility style</a>, an optional
     <a href="#callingconv">calling convention</a>, a return type, an optional
@@ -1271,7 +1303,7 @@ target datalayout = "<i>layout specification</i>"
  </dl>
  
  <p>When constructing the data layout for a given target, LLVM starts with a
-   default set of specifications which are then (possibly) overriden by the
+   default set of specifications which are then (possibly) overridden by the
     specifications in the <tt>datalayout</tt> keyword. The default specifications
     are given in this list:</p>
  
@@ -1367,6 +1399,24 @@ to implement type-based alias analysis.</p>
  
  </div>
  
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="volatile">Volatile Memory Accesses</a>
+</div>
+
+<div class="doc_text">
+
+<p>Certain memory accesses, such as <a href="#i_load"><tt>load</tt></a>s, <a
+href="#i_store"><tt>store</tt></a>s, and <a
+href="#int_memcpy"><tt>llvm.memcpy</tt></a>s may be marked <tt>volatile</tt>.
+The optimizers must not change the number of volatile operations or change their
+order of execution relative to other volatile operations.  The optimizers
+<i>may</i> change the order of volatile operations relative to non-volatile
+operations.  This is not Java's "volatile" and has no cross-thread
+synchronization behavior.</p>
+
+</div>
+
  <!-- *********************************************************************** -->
  <div class="doc_section"> <a name="typesystem">Type System</a> </div>
  <!-- *********************************************************************** -->
@@ -1664,7 +1714,7 @@ Classifications</a> </div>
     which indicates that the function takes a variable number of arguments.
     Variable argument functions can access their arguments with
     the <a href="#int_varargs">variable argument handling intrinsic</a>
-   functions.  '<tt>&lt;returntype&gt;</tt>' is a any type except
+   functions.  '<tt>&lt;returntype&gt;</tt>' is any type except
     <a href="#t_label">label</a>.</p>
  
  <h5>Examples:</h5>
@@ -1674,12 +1724,11 @@ Classifications</a> </div>
      <td class="left">function taking an <tt>i32</tt>, returning an <tt>i32</tt>
      </td>
    </tr><tr class="layout">
-    <td class="left"><tt>float&nbsp;(i16&nbsp;signext,&nbsp;i32&nbsp;*)&nbsp;*
+    <td class="left"><tt>float&nbsp;(i16,&nbsp;i32&nbsp;*)&nbsp;*
      </tt></td>
      <td class="left"><a href="#t_pointer">Pointer</a> to a function that takes
-      an <tt>i16</tt> that should be sign extended and a
-      <a href="#t_pointer">pointer</a> to <tt>i32</tt>, returning
-      <tt>float</tt>.
+      an <tt>i16</tt> and a <a href="#t_pointer">pointer</a> to <tt>i32</tt>,
+      returning <tt>float</tt>.
      </td>
    </tr><tr class="layout">
      <td class="left"><tt>i32 (i8*, ...)</tt></td>
@@ -2277,6 +2326,114 @@ has undefined behavior.</p>
  
  </div>
  
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="trapvalues">Trap Values</a></div>
+<div class="doc_text">
+
+<p>Trap values are similar to <a href="#undefvalues">undef values</a>, however
+   instead of representing an unspecified bit pattern, they represent the
+   fact that an instruction or constant expression which cannot evoke side
+   effects has nevertheless detected a condition which results in undefined
+   behavior.</p>
+
+<p>There is currently no way of representing a trap value in the IR; they
+   only exist when produced by operations such as
+   <a href="#i_add"><tt>add</tt></a> with the <tt>nsw</tt> flag.</p>
+
+<p>Trap value behavior is defined in terms of value <i>dependence</i>:</p>
+
+<p>
+<ul>
+<li>Values other than <a href="#i_phi"><tt>phi</tt></a> nodes depend on
+    their operands.</li>
+
+<li><a href="#i_phi"><tt>Phi</tt></a> nodes depend on the operand corresponding
+    to their dynamic predecessor basic block.</li>
+
+<li>Function arguments depend on the corresponding actual argument values in
+    the dynamic callers of their functions.</li>
+
+<li><a href="#i_call"><tt>Call</tt></a> instructions depend on the
+    <a href="#i_ret"><tt>ret</tt></a> instructions that dynamically transfer
+    control back to them.</li>
+
+<li><a href="#i_invoke"><tt>Invoke</tt></a> instructions depend on the
+    <a href="#i_ret"><tt>ret</tt></a>, <a href="#i_unwind"><tt>unwind</tt></a>,
+    or exception-throwing call instructions that dynamically transfer control
+    back to them.</li>
+
+<li>Non-volatile loads and stores depend on the most recent stores to all of the
+    referenced memory addresses, following the order in the IR
+    (including loads and stores implied by intrinsics such as
+    <a href="#int_memcpy"><tt>@llvm.memcpy</tt></a>.)</li>
+
+<!-- TODO: In the case of multiple threads, this only applies if the store
+     "happens-before" the load or store. -->
+
+<!-- TODO: floating-point exception state -->
+
+<li>An instruction with externally visible side effects depends on the most
+    recent preceding instruction with externally visible side effects, following
+    the order in the IR. (This includes volatile loads and stores.)</li>
+
+<li>An instruction <i>control-depends</i> on a
+    <a href="#terminators">terminator instruction</a>
+    if the terminator instruction has multiple successors and the instruction
+    is always executed when control transfers to one of the successors, and
+    may not be executed when control is transfered to another.</li>
+
+<li>Dependence is transitive.</li>
+
+</ul>
+</p>
+
+<p>Whenever a trap value is generated, all values which depend on it evaluate
+   to trap. If they have side effects, the evoke their side effects as if each
+   operand with a trap value were undef. If they have externally-visible side
+   effects, the behavior is undefined.</p>
+
+<p>Here are some examples:</p>
+
+<div class="doc_code">
+<pre>
+entry:
+  %trap = sub nuw i32 0, 1           ; Results in a trap value.
+  %still_trap = and i32 %trap, 0     ; Whereas (and i32 undef, 0) would return 0.
+  %trap_yet_again = getelementptr i32* @h, i32 %still_trap
+  store i32 0, i32* %trap_yet_again  ; undefined behavior
+
+  store i32 %trap, i32* @g           ; Trap value conceptually stored to memory.
+  %trap2 = load i32* @g              ; Returns a trap value, not just undef.
+
+  volatile store i32 %trap, i32* @g  ; External observation; undefined behavior.
+
+  %narrowaddr = bitcast i32* @g to i16*
+  %wideaddr = bitcast i32* @g to i64*
+  %trap3 = load 16* %narrowaddr      ; Returns a trap value.
+  %trap4 = load i64* %widaddr        ; Returns a trap value.
+
+  %cmp = icmp i32 slt %trap, 0       ; Returns a trap value.
+  %br i1 %cmp, %true, %end           ; Branch to either destination.
+
+true:
+  volatile store i32 0, i32* @g      ; This is control-dependent on %cmp, so
+                                     ; it has undefined behavior.
+  br label %end
+
+end:
+  %p = phi i32 [ 0, %entry ], [ 1, %true ]
+                                     ; Both edges into this PHI are
+                                     ; control-dependent on %cmp, so this
+                                     ; always results in a trap value.
+
+  volatile store i32 0, i32* @g      ; %end is control-equivalent to %entry
+                                     ; so this is defined (ignoring earlier
+                                     ; undefined behavior in this example).
+</pre>
+</div>
+
+</div>
+
  <!-- ======================================================================= -->
  <div class="doc_subsection"><a name="blockaddress">Addresses of Basic
      Blocks</a></div>
@@ -2491,6 +2648,31 @@ call void asm alignstack "eieio", ""()
     documented here.  Constraints on what can be done (e.g. duplication, moving,
     etc need to be documented).  This is probably best done by reference to
     another document that covers inline asm from a holistic perspective.</p>
+</div>
+
+<div class="doc_subsubsection">
+<a name="inlineasm_md">Inline Asm Metadata</a>
+</div>
+
+<div class="doc_text">
+
+<p>The call instructions that wrap inline asm nodes may have a "!srcloc" MDNode
+   attached to it that contains a constant integer.  If present, the code
+   generator will use the integer as the location cookie value when report
+   errors through the LLVMContext error reporting mechanisms.  This allows a
+   front-end to correlate backend errors that occur with inline asm back to the
+   source code that produced it.  For example:</p>
+
+<div class="doc_code">
+<pre>
+call void asm sideeffect "something bad", ""()<b>, !srcloc !42</b>
+...
+!42 = !{ i32 1234567 }
+</pre>
+</div>
+
+<p>It is up to the front-end to make sense of the magic numbers it places in the
+   IR.</p>
  
  </div>
  
@@ -2521,6 +2703,23 @@ call void asm alignstack "eieio", ""()
     metadata nodes, which can be looked up in the module symbol table. For
     example: "<tt>!foo =  metadata !{!4, !3}</tt>".
  
+<p>Metadata can be used as function arguments. Here <tt>llvm.dbg.value</tt> 
+   function is using two metadata arguments.
+
+   <div class="doc_code">
+     <pre>
+       call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
+     </pre>
+   </div></p>
+
+<p>Metadata can be attached with an instruction. Here metadata <tt>!21</tt> is
+   attached with <tt>add</tt> instruction using <tt>!dbg</tt> identifier.
+
+  <div class="doc_code">
+    <pre>
+      %indvar.next = add i64 %indvar, 1, !dbg !21
+    </pre>
+  </div></p>
  </div>
  
  
@@ -2595,8 +2794,12 @@ should not be exposed to source languages.</p>
  </div>
  
  <div class="doc_text">
-
-<p>TODO: Describe this.</p>
+<pre>
+%0 = type { i32, void ()* }
+@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @ctor }]
+</pre>
+<p>The <tt>@llvm.global_ctors</tt> array contains a list of constructor functions and associated priorities.  The functions referenced by this array will be called in ascending order of priority (i.e. lowest first) when the module is loaded.  The order of functions with the same priority is not defined.
+</p>
  
  </div>
  
@@ -2606,8 +2809,13 @@ should not be exposed to source languages.</p>
  </div>
  
  <div class="doc_text">
+<pre>
+%0 = type { i32, void ()* }
+@llvm.global_dtors = appending global [1 x %0] [%0 { i32 65535, void ()* @dtor }]
+</pre>
  
-<p>TODO: Describe this.</p>
+<p>The <tt>@llvm.global_dtors</tt> array contains a list of destructor functions and associated priorities.  The functions referenced by this array will be called in descending order of priority (i.e. highest first) when the module is loaded.  The order of functions with the same priority is not defined.
+</p>
  
  </div>
  
@@ -2640,7 +2848,7 @@ Instructions</a> </div>
     control flow, not values (the one exception being the
     '<a href="#i_invoke"><tt>invoke</tt></a>' instruction).</p>
  
-<p>There are six different terminator instructions: the
+<p>There are seven different terminator instructions: the
     '<a href="#i_ret"><tt>ret</tt></a>' instruction, the
     '<a href="#i_br"><tt>br</tt></a>' instruction, the
     '<a href="#i_switch"><tt>switch</tt></a>' instruction, the
@@ -2888,9 +3096,10 @@ IfUnequal:
        function to be invoked. </li>
  
    <li>'<tt>function args</tt>': argument list whose types match the function
-      signature argument types.  If the function signature indicates the
-      function accepts a variable number of arguments, the extra arguments can
-      be specified.</li>
+      signature argument types and parameter attributes. All arguments must be
+      of <a href="#t_firstclass">first class</a> type. If the function
+      signature indicates the function accepts a variable number of arguments,
+      the extra arguments can be specified.</li>
  
    <li>'<tt>normal label</tt>': the label reached when the called function
        executes a '<tt><a href="#i_ret">ret</a></tt>' instruction. </li>
@@ -3036,7 +3245,8 @@ Instruction</a> </div>
  <p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
     and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
     <tt>nsw</tt> keywords are present, the result value of the <tt>add</tt>
-   is undefined if unsigned and/or signed overflow, respectively, occurs.</p>
+   is a <a href="#trapvalues">trap value</a> if unsigned and/or signed overflow,
+   respectively, occurs.</p>
  
  <h5>Example:</h5>
  <pre>
@@ -3116,7 +3326,8 @@ Instruction</a> </div>
  <p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
     and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
     <tt>nsw</tt> keywords are present, the result value of the <tt>sub</tt>
-   is undefined if unsigned and/or signed overflow, respectively, occurs.</p>
+   is a <a href="#trapvalues">trap value</a> if unsigned and/or signed overflow,
+   respectively, occurs.</p>
  
  <h5>Example:</h5>
  <pre>
@@ -3202,7 +3413,8 @@ Instruction</a> </div>
  <p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
     and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
     <tt>nsw</tt> keywords are present, the result value of the <tt>mul</tt>
-   is undefined if unsigned and/or signed overflow, respectively, occurs.</p>
+   is a <a href="#trapvalues">trap value</a> if unsigned and/or signed overflow,
+   respectively, occurs.</p>
  
  <h5>Example:</h5>
  <pre>
@@ -3307,8 +3519,8 @@ Instruction</a> </div>
     a 32-bit division of -2147483648 by -1.</p>
  
  <p>If the <tt>exact</tt> keyword is present, the result value of the
-   <tt>sdiv</tt> is undefined if the result would be rounded or if overflow
-   would occur.</p>
+   <tt>sdiv</tt> is a <a href="#trapvalues">trap value</a> if the result would
+   be rounded or if overflow would occur.</p>
  
  <h5>Example:</h5>
  <pre>
@@ -4077,9 +4289,9 @@ Instruction</a> </div>
  
  <h5>Syntax:</h5>
  <pre>
-  &lt;result&gt; = load &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !<index>]
-  &lt;result&gt; = volatile load &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !<index>]
-  !<index> = !{ i32 1 }
+  &lt;result&gt; = load &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;]
+  &lt;result&gt; = volatile load &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;]
+  !&lt;index&gt; = !{ i32 1 }
  </pre>
  
  <h5>Overview:</h5>
@@ -4090,25 +4302,24 @@ Instruction</a> </div>
     from which to load.  The pointer must point to
     a <a href="#t_firstclass">first class</a> type.  If the <tt>load</tt> is
     marked as <tt>volatile</tt>, then the optimizer is not allowed to modify the
-   number or order of execution of this <tt>load</tt> with other
-   volatile <tt>load</tt> and <tt><a href="#i_store">store</a></tt>
-   instructions.</p>
+   number or order of execution of this <tt>load</tt> with other <a
+   href="#volatile">volatile operations</a>.</p>
  
-<p>The optional constant "align" argument specifies the alignment of the
+<p>The optional constant <tt>align</tt> argument specifies the alignment of the
     operation (that is, the alignment of the memory address). A value of 0 or an
-   omitted "align" argument means that the operation has the preferential
+   omitted <tt>align</tt> argument means that the operation has the preferential
     alignment for the target. It is the responsibility of the code emitter to
     ensure that the alignment information is correct. Overestimating the
-   alignment results in an undefined behavior. Underestimating the alignment may
+   alignment results in undefined behavior. Underestimating the alignment may
     produce less efficient code. An alignment of 1 is always safe.</p>
  
-<p>The optional !nontemporal metadata must reference a single metatadata
-   name <index> corresponding to a metadata node with one i32 entry of
-   value 1.  The existance of the !nontemporal metatadata on the
-   instruction tells the optimizer and code generator that this load is
-   not expected to be reused in the cache.  The code generator may
-   select special instructions to save cache bandwidth, such as the
-   MOVNT intruction on x86.</p>
+<p>The optional <tt>!nontemporal</tt> metadata must reference a single
+   metatadata name &lt;index&gt; corresponding to a metadata node with
+   one <tt>i32</tt> entry of value 1.  The existence of
+   the <tt>!nontemporal</tt> metatadata on the instruction tells the optimizer
+   and code generator that this load is not expected to be reused in the cache.
+   The code generator may select special instructions to save cache bandwidth,
+   such as the <tt>MOVNT</tt> instruction on x86.</p>
  
  <h5>Semantics:</h5>
  <p>The location of memory pointed to is loaded.  If the value being loaded is of
@@ -4148,11 +4359,10 @@ Instruction</a> </div>
     and an address at which to store it.  The type of the
     '<tt>&lt;pointer&gt;</tt>' operand must be a pointer to
     the <a href="#t_firstclass">first class</a> type of the
-   '<tt>&lt;value&gt;</tt>' operand. If the <tt>store</tt> is marked
-   as <tt>volatile</tt>, then the optimizer is not allowed to modify the number
-   or order of execution of this <tt>store</tt> with other
-   volatile <tt>load</tt> and <tt><a href="#i_store">store</a></tt>
-   instructions.</p>
+   '<tt>&lt;value&gt;</tt>' operand. If the <tt>store</tt> is marked as
+   <tt>volatile</tt>, then the optimizer is not allowed to modify the number or
+   order of execution of this <tt>store</tt> with other <a
+   href="#volatile">volatile operations</a>.</p>
  
  <p>The optional constant "align" argument specifies the alignment of the
     operation (that is, the alignment of the memory address). A value of 0 or an
@@ -4164,11 +4374,11 @@ Instruction</a> </div>
  
  <p>The optional !nontemporal metadata must reference a single metatadata
     name <index> corresponding to a metadata node with one i32 entry of
-   value 1.  The existance of the !nontemporal metatadata on the
+   value 1.  The existence of the !nontemporal metatadata on the
     instruction tells the optimizer and code generator that this load is
     not expected to be reused in the cache.  The code generator may
     select special instructions to save cache bandwidth, such as the
-   MOVNT intruction on x86.</p>
+   MOVNT instruction on x86.</p>
  
  
  <h5>Semantics:</h5>
@@ -4291,13 +4501,14 @@ entry:
  </pre>
  
  <p>If the <tt>inbounds</tt> keyword is present, the result value of the
-   <tt>getelementptr</tt> is undefined if the base pointer is not an
-   <i>in bounds</i> address of an allocated object, or if any of the addresses
-   that would be formed by successive addition of the offsets implied by the
-   indices to the base address with infinitely precise arithmetic are not an
-   <i>in bounds</i> address of that allocated object.
-   The <i>in bounds</i> addresses for an allocated object are all the addresses
-   that point into the object, plus the address one byte past the end.</p>
+   <tt>getelementptr</tt> is a <a href="#trapvalues">trap value</a> if the
+   base pointer is not an <i>in bounds</i> address of an allocated object,
+   or if any of the addresses that would be formed by successive addition of
+   the offsets implied by the indices to the base address with infinitely
+   precise arithmetic are not an <i>in bounds</i> address of that allocated
+   object. The <i>in bounds</i> addresses for an allocated object are all
+   the addresses that point into the object, plus the address one byte past
+   the end.</p>
  
  <p>If the <tt>inbounds</tt> keyword is not present, the offsets are added to
     the base address with silently-wrapping two's complement arithmetic, and
@@ -4964,7 +5175,7 @@ entry:
        <tt>op1</tt> is equal to <tt>op2</tt>.</li>
  
    <li><tt>ogt</tt>: yields <tt>true</tt> if both operands are not a QNAN and
-      <tt>op1</tt> is greather than <tt>op2</tt>.</li>
+      <tt>op1</tt> is greater than <tt>op2</tt>.</li>
  
    <li><tt>oge</tt>: yields <tt>true</tt> if both operands are not a QNAN and
        <tt>op1</tt> is greater than or equal to <tt>op2</tt>.</li>
@@ -5132,15 +5343,18 @@ Loop:       ; Infinite loop that counts from 0 on up...
        a <a href="#i_ret"><tt>ret</tt></a> instruction.  If the "tail" marker is
        present, the function call is eligible for tail call optimization,
        but <a href="CodeGenerator.html#tailcallopt">might not in fact be
-      optimized into a jump</a>.  As of this writing, the extra requirements for
-      a call to actually be optimized are:
+      optimized into a jump</a>.  The code generator may optimize calls marked
+      "tail" with either 1) automatic <a href="CodeGenerator.html#sibcallopt">
+      sibling call optimization</a> when the caller and callee have
+      matching signatures, or 2) forced tail call optimization when the
+      following extra requirements are met:
        <ul>
          <li>Caller and callee both have the calling
              convention <tt>fastcc</tt>.</li>
          <li>The call is in tail position (ret immediately follows call and ret
              uses value of call or is void).</li>
          <li>Option <tt>-tailcallopt</tt> is enabled,
-            or <code>llvm::PerformTailCallOpt</code> is <code>true</code>.</li>
+            or <code>llvm::GuaranteedTailCallOpt</code> is <code>true</code>.</li>
          <li><a href="CodeGenerator.html#tailcallopt">Platform specific
              constraints are met.</a></li>
        </ul>
@@ -5171,10 +5385,10 @@ Loop:       ; Infinite loop that counts from 0 on up...
        to function value.</li>
  
    <li>'<tt>function args</tt>': argument list whose types match the function
-      signature argument types. All arguments must be of
-      <a href="#t_firstclass">first class</a> type. If the function signature
-      indicates the function accepts a variable number of arguments, the extra
-      arguments can be specified.</li>
+      signature argument types and parameter attributes. All arguments must be
+      of <a href="#t_firstclass">first class</a> type. If the function
+      signature indicates the function accepts a variable number of arguments,
+      the extra arguments can be specified.</li>
  
    <li>The optional <a href="#fnattrs">function attributes</a> list. Only
        '<tt>noreturn</tt>', '<tt>nounwind</tt>', '<tt>readonly</tt>' and
@@ -5209,7 +5423,7 @@ Loop:       ; Infinite loop that counts from 0 on up...
  standard C99 library as being the C99 library functions, and may perform
  optimizations or generate code for them under that assumption.  This is
  something we'd like to change in the future to provide better support for
-freestanding environments and non-C-based langauges.</p>
+freestanding environments and non-C-based languages.</p>
  
  </div>
  
@@ -5765,7 +5979,7 @@ LLVM</a>.</p>
  
  <h5>Semantics:</h5>
  <p>This intrinsic does not modify the behavior of the program.  Backends that do
-   not support this intrinisic may ignore it.</p>
+   not support this intrinsic may ignore it.</p>
  
  </div>
  
@@ -5819,17 +6033,14 @@ LLVM</a>.</p>
  
  <h5>Syntax:</h5>
  <p>This is an overloaded intrinsic. You can use <tt>llvm.memcpy</tt> on any
-   integer bit width. Not all targets support all bit widths however.</p>
+   integer bit width and for different address spaces. Not all targets support
+   all bit widths however.</p>
  
  <pre>
-  declare void @llvm.memcpy.i8(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
-                               i8 &lt;len&gt;, i32 &lt;align&gt;)
-  declare void @llvm.memcpy.i16(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
-                                i16 &lt;len&gt;, i32 &lt;align&gt;)
-  declare void @llvm.memcpy.i32(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
-                                i32 &lt;len&gt;, i32 &lt;align&gt;)
-  declare void @llvm.memcpy.i64(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
-                                i64 &lt;len&gt;, i32 &lt;align&gt;)
+  declare void @llvm.memcpy.p0i8.p0i8.i32(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
+                                          i32 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
+  declare void @llvm.memcpy.p0i8.p0i8.i64(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
+                                          i64 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
  </pre>
  
  <h5>Overview:</h5>
@@ -5837,19 +6048,28 @@ LLVM</a>.</p>
     source location to the destination location.</p>
  
  <p>Note that, unlike the standard libc function, the <tt>llvm.memcpy.*</tt>
-   intrinsics do not return a value, and takes an extra alignment argument.</p>
+   intrinsics do not return a value, takes extra alignment/isvolatile arguments
+   and the pointers can be in specified address spaces.</p>
  
  <h5>Arguments:</h5>
+
  <p>The first argument is a pointer to the destination, the second is a pointer
     to the source.  The third argument is an integer argument specifying the
-   number of bytes to copy, and the fourth argument is the alignment of the
-   source and destination locations.</p>
+   number of bytes to copy, the fourth argument is the alignment of the
+   source and destination locations, and the fifth is a boolean indicating a
+   volatile access.</p>
  
-<p>If the call to this intrinisic has an alignment value that is not 0 or 1,
+<p>If the call to this intrinsic has an alignment value that is not 0 or 1,
     then the caller guarantees that both the source and destination pointers are
     aligned to that boundary.</p>
  
+<p>If the <tt>isvolatile</tt> parameter is <tt>true</tt>, the
+   <tt>llvm.memcpy</tt> call is a <a href="#volatile">volatile operation</a>.
+   The detailed access behavior is not very cleanly specified and it is unwise
+   to depend on it.</p>
+
  <h5>Semantics:</h5>
+
  <p>The '<tt>llvm.memcpy.*</tt>' intrinsics copy a block of memory from the
     source location to the destination location, which are not allowed to
     overlap.  It copies "len" bytes of memory over.  If the argument is known to
@@ -5867,17 +6087,14 @@ LLVM</a>.</p>
  
  <h5>Syntax:</h5>
  <p>This is an overloaded intrinsic. You can use llvm.memmove on any integer bit
-   width. Not all targets support all bit widths however.</p>
+   width and for different address space. Not all targets support all bit
+   widths however.</p>
  
  <pre>
-  declare void @llvm.memmove.i8(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
-                                i8 &lt;len&gt;, i32 &lt;align&gt;)
-  declare void @llvm.memmove.i16(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
-                                 i16 &lt;len&gt;, i32 &lt;align&gt;)
-  declare void @llvm.memmove.i32(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
-                                 i32 &lt;len&gt;, i32 &lt;align&gt;)
-  declare void @llvm.memmove.i64(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
-                                 i64 &lt;len&gt;, i32 &lt;align&gt;)
+  declare void @llvm.memmove.p0i8.p0i8.i32(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
+                                           i32 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
+  declare void @llvm.memmove.p0i8.p0i8.i64(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
+                                           i64 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
  </pre>
  
  <h5>Overview:</h5>
@@ -5887,19 +6104,28 @@ LLVM</a>.</p>
     overlap.</p>
  
  <p>Note that, unlike the standard libc function, the <tt>llvm.memmove.*</tt>
-   intrinsics do not return a value, and takes an extra alignment argument.</p>
+   intrinsics do not return a value, takes extra alignment/isvolatile arguments
+   and the pointers can be in specified address spaces.</p>
  
  <h5>Arguments:</h5>
+
  <p>The first argument is a pointer to the destination, the second is a pointer
     to the source.  The third argument is an integer argument specifying the
-   number of bytes to copy, and the fourth argument is the alignment of the
-   source and destination locations.</p>
+   number of bytes to copy, the fourth argument is the alignment of the
+   source and destination locations, and the fifth is a boolean indicating a
+   volatile access.</p>
  
-<p>If the call to this intrinisic has an alignment value that is not 0 or 1,
+<p>If the call to this intrinsic has an alignment value that is not 0 or 1,
     then the caller guarantees that the source and destination pointers are
     aligned to that boundary.</p>
  
+<p>If the <tt>isvolatile</tt> parameter is <tt>true</tt>, the
+   <tt>llvm.memmove</tt> call is a <a href="#volatile">volatile operation</a>.
+   The detailed access behavior is not very cleanly specified and it is unwise
+   to depend on it.</p>
+
  <h5>Semantics:</h5>
+
  <p>The '<tt>llvm.memmove.*</tt>' intrinsics copy a block of memory from the
     source location to the destination location, which may overlap.  It copies
     "len" bytes of memory over.  If the argument is known to be aligned to some
@@ -5917,17 +6143,14 @@ LLVM</a>.</p>
  
  <h5>Syntax:</h5>
  <p>This is an overloaded intrinsic. You can use llvm.memset on any integer bit
-   width. Not all targets support all bit widths however.</p>
+   width and for different address spaces. Not all targets support all bit
+   widths however.</p>
  
  <pre>
-  declare void @llvm.memset.i8(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
-                               i8 &lt;len&gt;, i32 &lt;align&gt;)
-  declare void @llvm.memset.i16(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
-                                i16 &lt;len&gt;, i32 &lt;align&gt;)
-  declare void @llvm.memset.i32(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
-                                i32 &lt;len&gt;, i32 &lt;align&gt;)
-  declare void @llvm.memset.i64(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
-                                i64 &lt;len&gt;, i32 &lt;align&gt;)
+  declare void @llvm.memset.p0i8.i32(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
+                                     i32 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
+  declare void @llvm.memset.p0i8.i64(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
+                                     i64 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
  </pre>
  
  <h5>Overview:</h5>
@@ -5935,7 +6158,8 @@ LLVM</a>.</p>
     particular byte value.</p>
  
  <p>Note that, unlike the standard libc function, the <tt>llvm.memset</tt>
-   intrinsic does not return a value, and takes an extra alignment argument.</p>
+   intrinsic does not return a value, takes extra alignment/volatile arguments,
+   and the destination can be in an arbitrary address space.</p>
  
  <h5>Arguments:</h5>
  <p>The first argument is a pointer to the destination to fill, the second is the
@@ -5943,10 +6167,15 @@ LLVM</a>.</p>
     specifying the number of bytes to fill, and the fourth argument is the known
     alignment of destination location.</p>
  
-<p>If the call to this intrinisic has an alignment value that is not 0 or 1,
+<p>If the call to this intrinsic has an alignment value that is not 0 or 1,
     then the caller guarantees that the destination pointer is aligned to that
     boundary.</p>
  
+<p>If the <tt>isvolatile</tt> parameter is <tt>true</tt>, the
+   <tt>llvm.memset</tt> call is a <a href="#volatile">volatile operation</a>.
+   The detailed access behavior is not very cleanly specified and it is unwise
+   to depend on it.</p>
+
  <h5>Semantics:</h5>
  <p>The '<tt>llvm.memset.*</tt>' intrinsics fill "len" bytes of memory starting
     at the destination location.  If the argument is known to be aligned to some
@@ -6566,6 +6795,97 @@ LLVM</a>.</p>
  
  </div>
  
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="int_fp16">Half Precision Floating Point Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<p>Half precision floating point is a storage-only format. This means that it is
+   a dense encoding (in memory) but does not support computation in the
+   format.</p>
+   
+<p>This means that code must first load the half-precision floating point
+   value as an i16, then convert it to float with <a
+   href="#int_convert_from_fp16"><tt>llvm.convert.from.fp16</tt></a>.
+   Computation can then be performed on the float value (including extending to
+   double etc).  To store the value back to memory, it is first converted to
+   float if needed, then converted to i16 with
+   <a href="#int_convert_to_fp16"><tt>llvm.convert.to.fp16</tt></a>, then
+   storing as an i16 value.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_convert_to_fp16">'<tt>llvm.convert.to.fp16</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare i16 @llvm.convert.to.fp16(f32 %a)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.convert.to.fp16</tt>' intrinsic function performs
+   a conversion from single precision floating point format to half precision
+   floating point format.</p>
+
+<h5>Arguments:</h5>
+<p>The intrinsic function contains single argument - the value to be
+   converted.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.convert.to.fp16</tt>' intrinsic function performs
+   a conversion from single precision floating point format to half precision
+   floating point format. The return value is an <tt>i16</tt> which
+   contains the converted number.</p>
+
+<h5>Examples:</h5>
+<pre>
+  %res = call i16 @llvm.convert.to.fp16(f32 %a)
+  store i16 %res, i16* @x, align 2
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_convert_from_fp16">'<tt>llvm.convert.from.fp16</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare f32 @llvm.convert.from.fp16(i16 %a)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.convert.from.fp16</tt>' intrinsic function performs
+   a conversion from half precision floating point format to single precision
+   floating point format.</p>
+
+<h5>Arguments:</h5>
+<p>The intrinsic function contains single argument - the value to be
+   converted.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.convert.from.fp16</tt>' intrinsic function performs a
+   conversion from half single precision floating point format to single
+   precision floating point format. The input half-float value is represented by
+   an <tt>i16</tt> value.</p>
+
+<h5>Examples:</h5>
+<pre>
+  %a = load i16* @x, align 2
+  %res = call f32 @llvm.convert.from.fp16(i16 %a)
+</pre>
+
+</div>
+
  <!-- ======================================================================= -->
  <div class="doc_subsection">
    <a name="int_debugger">Debugger Intrinsics</a>
@@ -6714,7 +7034,7 @@ LLVM</a>.</p>
  <h5>Arguments:</h5>
  <p>The <tt>llvm.memory.barrier</tt> intrinsic requires five boolean arguments.
     The first four arguments enables a specific barrier as listed below.  The
-   fith argument specifies that the barrier applies to io or device or uncached
+   fifth argument specifies that the barrier applies to io or device or uncached
     memory.</p>
  
  <ul>