From 59ad77e46ed43d552c2dee1ffd8e5bd522022919 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard@arm.com>
Date: Tue, 8 Dec 2015 12:16:10 +0000
Subject: [PATCH] [AArch64] Add ARMv8.2-A FP16 vector instructions

ARMv8.2-A adds 16-bit floating point versions of all existing SIMD
floating-point instructions. This is an optional extension, so all of
these instructions require the FeatureFullFP16 subtarget feature.

Note that VFP without SIMD is not a valid combination for any version of
ARMv8-A, but I have ensured that these instructions all depend on both
FeatureNEON and FeatureFullFP16 for consistency.

The ".2h" vector type specifier is now legal (for the scalar pairwise
reduction instructions), so some unrelated tests have been modified as
different error messages are emitted. This is not a problem as the
invalid operands are still caught.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255010 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/AArch64/AArch64InstrFormats.td     | 576 ++++++++++++------
 lib/Target/AArch64/AArch64InstrInfo.td        | 152 +++--
 lib/Target/AArch64/AArch64RegisterInfo.td     |   2 +-
 .../AArch64/AsmParser/AArch64AsmParser.cpp    |   2 +
 test/MC/AArch64/arm64-advsimd.s               | 153 ++++-
 test/MC/AArch64/armv8.1a-rdma.s               |  18 -
 test/MC/AArch64/fullfp16-diagnostics.s        |  42 ++
 test/MC/AArch64/fullfp16-neon-neg.s           | 382 ++++++++++++
 test/MC/AArch64/neon-2velem.s                 |  18 +-
 test/MC/AArch64/neon-aba-abd.s                |   4 +-
 test/MC/AArch64/neon-across.s                 |  18 +-
 test/MC/AArch64/neon-add-pairwise.s           |   6 +-
 test/MC/AArch64/neon-add-sub-instructions.s   |  10 +-
 test/MC/AArch64/neon-compare-instructions.s   |  62 +-
 test/MC/AArch64/neon-diagnostics.s            | 132 ++--
 test/MC/AArch64/neon-facge-facgt.s            |  18 +-
 test/MC/AArch64/neon-frsqrt-frecp.s           |  10 +-
 test/MC/AArch64/neon-max-min-pairwise.s       |  18 +-
 test/MC/AArch64/neon-max-min.s                |  18 +-
 test/MC/AArch64/neon-mla-mls-instructions.s   |  10 +-
 test/MC/AArch64/neon-scalar-abs.s             |   4 +-
 test/MC/AArch64/neon-scalar-by-elem-mla.s     |   6 +-
 test/MC/AArch64/neon-scalar-by-elem-mul.s     |   6 +-
 test/MC/AArch64/neon-scalar-cvt.s             |  34 +-
 test/MC/AArch64/neon-scalar-fp-compare.s      |  32 +-
 test/MC/AArch64/neon-scalar-mul.s             |   4 +-
 test/MC/AArch64/neon-scalar-recip.s           |  12 +-
 test/MC/AArch64/neon-scalar-reduce-pairwise.s |   7 +-
 test/MC/AArch64/neon-simd-misc.s              |  98 ++-
 test/MC/AArch64/neon-simd-shift.s             |  18 +-
 .../AArch64/fullfp16-neon-neg.txt             | 382 ++++++++++++
 31 files changed, 1917 insertions(+), 337 deletions(-)
 create mode 100644 test/MC/AArch64/fullfp16-diagnostics.s
 create mode 100644 test/MC/AArch64/fullfp16-neon-neg.s
 create mode 100644 test/MC/Disassembler/AArch64/fullfp16-neon-neg.txt
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index 5eef82153e3..101b0f7e1d3 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -4315,7 +4315,7 @@ let Predicates = [HasNEON] in {
 //----------------------------------------------------------------------------
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDThreeSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDThreeSameVector<bit Q, bit U, bits<3> size, bits<5> opcode,
                         RegisterOperand regtype, string asm, string kind,
                         list<dag> pattern>
   : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
@@ -4329,8 +4329,7 @@ class BaseSIMDThreeSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
   let Inst{30}    = Q;
   let Inst{29}    = U;
   let Inst{28-24} = 0b01110;
-  let Inst{23-22} = size;
-  let Inst{21}    = 1;
+  let Inst{23-21} = size;
   let Inst{20-16} = Rm;
   let Inst{15-11} = opcode;
   let Inst{10}    = 1;
@@ -4339,7 +4338,7 @@ class BaseSIMDThreeSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
 }
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
                         RegisterOperand regtype, string asm, string kind,
                         list<dag> pattern>
   : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm,
@@ -4353,8 +4352,7 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
   let Inst{30}    = Q;
   let Inst{29}    = U;
   let Inst{28-24} = 0b01110;
-  let Inst{23-22} = size;
-  let Inst{21}    = 1;
+  let Inst{23-21} = size;
   let Inst{20-16} = Rm;
   let Inst{15-11} = opcode;
   let Inst{10}    = 1;
@@ -4365,25 +4363,25 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
 // All operand sizes distinguished in the encoding.
 multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
                                SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
+  def v8i8  : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64,
                                       asm, ".8b",
          [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
-  def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
+  def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128,
                                       asm, ".16b",
          [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
-  def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
+  def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64,
                                       asm, ".4h",
          [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
-  def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
+  def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128,
                                       asm, ".8h",
          [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
-  def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
+  def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64,
                                       asm, ".2s",
          [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
-  def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
+  def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128,
                                       asm, ".4s",
          [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
-  def v2i64 : BaseSIMDThreeSameVector<1, U, 0b11, opc, V128,
+  def v2i64 : BaseSIMDThreeSameVector<1, U, 0b111, opc, V128,
                                       asm, ".2d",
          [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>;
 }
@@ -4391,49 +4389,49 @@ multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
 // As above, but D sized elements unsupported.
 multiclass SIMDThreeSameVectorBHS<bit U, bits<5> opc, string asm,
                                   SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
+  def v8i8  : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64,
                                       asm, ".8b",
         [(set V64:$Rd, (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))]>;
-  def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
+  def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128,
                                       asm, ".16b",
         [(set V128:$Rd, (v16i8 (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm))))]>;
-  def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
+  def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64,
                                       asm, ".4h",
         [(set V64:$Rd, (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))]>;
-  def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
+  def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128,
                                       asm, ".8h",
         [(set V128:$Rd, (v8i16 (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm))))]>;
-  def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
+  def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64,
                                       asm, ".2s",
         [(set V64:$Rd, (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))]>;
-  def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
+  def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128,
                                       asm, ".4s",
         [(set V128:$Rd, (v4i32 (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm))))]>;
 }
 
 multiclass SIMDThreeSameVectorBHSTied<bit U, bits<5> opc, string asm,
                                   SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDThreeSameVectorTied<0, U, 0b00, opc, V64,
+  def v8i8  : BaseSIMDThreeSameVectorTied<0, U, 0b001, opc, V64,
                                       asm, ".8b",
       [(set (v8i8 V64:$dst),
             (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
-  def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b00, opc, V128,
+  def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b001, opc, V128,
                                       asm, ".16b",
       [(set (v16i8 V128:$dst),
             (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
-  def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b01, opc, V64,
+  def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b011, opc, V64,
                                       asm, ".4h",
       [(set (v4i16 V64:$dst),
             (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
-  def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b01, opc, V128,
+  def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b011, opc, V128,
                                       asm, ".8h",
       [(set (v8i16 V128:$dst),
             (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
-  def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b10, opc, V64,
+  def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b101, opc, V64,
                                       asm, ".2s",
       [(set (v2i32 V64:$dst),
             (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
-  def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b10, opc, V128,
+  def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b101, opc, V128,
                                       asm, ".4s",
       [(set (v4i32 V128:$dst),
             (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
@@ -4442,54 +4440,80 @@ multiclass SIMDThreeSameVectorBHSTied<bit U, bits<5> opc, string asm,
 // As above, but only B sized elements supported.
 multiclass SIMDThreeSameVectorB<bit U, bits<5> opc, string asm,
                                 SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
+  def v8i8  : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64,
                                       asm, ".8b",
     [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
-  def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
+  def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128,
                                       asm, ".16b",
     [(set (v16i8 V128:$Rd),
           (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
 }
 
-// As above, but only S and D sized floating point elements supported.
-multiclass SIMDThreeSameVectorFP<bit U, bit S, bits<5> opc,
+// As above, but only floating point elements supported.
+multiclass SIMDThreeSameVectorFP<bit U, bit S, bits<3> opc,
                                  string asm, SDPatternOperator OpNode> {
-  def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64,
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64,
+                                      asm, ".4h",
+        [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>;
+  def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128,
+                                      asm, ".8h",
+        [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>;
+  } // Predicates = [HasNEON, HasFullFP16]
+  def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64,
                                       asm, ".2s",
         [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
-  def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128,
+  def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128,
                                       asm, ".4s",
         [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
-  def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128,
+  def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128,
                                       asm, ".2d",
         [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
 }
 
-multiclass SIMDThreeSameVectorFPCmp<bit U, bit S, bits<5> opc,
+multiclass SIMDThreeSameVectorFPCmp<bit U, bit S, bits<3> opc,
                                     string asm,
                                     SDPatternOperator OpNode> {
-  def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64,
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64,
+                                      asm, ".4h",
+        [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>;
+  def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128,
+                                      asm, ".8h",
+        [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>;
+  } // Predicates = [HasNEON, HasFullFP16]
+  def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64,
                                       asm, ".2s",
         [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
-  def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128,
+  def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128,
                                       asm, ".4s",
         [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
-  def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128,
+  def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128,
                                       asm, ".2d",
         [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
 }
 
-multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<5> opc,
+multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<3> opc,
                                  string asm, SDPatternOperator OpNode> {
-  def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0}, opc, V64,
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4f16 : BaseSIMDThreeSameVectorTied<0, U, {S,0b10}, {0b00,opc}, V64,
+                                      asm, ".4h",
+     [(set (v4f16 V64:$dst),
+           (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>;
+  def v8f16 : BaseSIMDThreeSameVectorTied<1, U, {S,0b10}, {0b00,opc}, V128,
+                                      asm, ".8h",
+     [(set (v8f16 V128:$dst),
+           (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>;
+  } // Predicates = [HasNEON, HasFullFP16]
+  def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0b01}, {0b11,opc}, V64,
                                       asm, ".2s",
      [(set (v2f32 V64:$dst),
            (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
-  def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0}, opc, V128,
+  def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0b01}, {0b11,opc}, V128,
                                       asm, ".4s",
      [(set (v4f32 V128:$dst),
            (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
-  def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,1}, opc, V128,
+  def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,0b11}, {0b11,opc}, V128,
                                       asm, ".2d",
      [(set (v2f64 V128:$dst),
            (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
@@ -4498,16 +4522,16 @@ multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<5> opc,
 // As above, but D and B sized elements unsupported.
 multiclass SIMDThreeSameVectorHS<bit U, bits<5> opc, string asm,
                                 SDPatternOperator OpNode> {
-  def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
+  def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64,
                                       asm, ".4h",
         [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
-  def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
+  def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128,
                                       asm, ".8h",
         [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
-  def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
+  def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64,
                                       asm, ".2s",
         [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
-  def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
+  def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128,
                                       asm, ".4s",
         [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
 }
@@ -4515,10 +4539,10 @@ multiclass SIMDThreeSameVectorHS<bit U, bits<5> opc, string asm,
 // Logical three vector ops share opcode bits, and only use B sized elements.
 multiclass SIMDLogicalThreeVector<bit U, bits<2> size, string asm,
                                   SDPatternOperator OpNode = null_frag> {
-  def v8i8  : BaseSIMDThreeSameVector<0, U, size, 0b00011, V64,
+  def v8i8  : BaseSIMDThreeSameVector<0, U, {size,1}, 0b00011, V64,
                                      asm, ".8b",
                          [(set (v8i8 V64:$Rd), (OpNode V64:$Rn, V64:$Rm))]>;
-  def v16i8  : BaseSIMDThreeSameVector<1, U, size, 0b00011, V128,
+  def v16i8  : BaseSIMDThreeSameVector<1, U, {size,1}, 0b00011, V128,
                                      asm, ".16b",
                          [(set (v16i8 V128:$Rd), (OpNode V128:$Rn, V128:$Rm))]>;
 
@@ -4539,11 +4563,11 @@ multiclass SIMDLogicalThreeVector<bit U, bits<2> size, string asm,
 
 multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size,
                                   string asm, SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDThreeSameVectorTied<0, U, size, 0b00011, V64,
+  def v8i8  : BaseSIMDThreeSameVectorTied<0, U, {size,1}, 0b00011, V64,
                                      asm, ".8b",
              [(set (v8i8 V64:$dst),
                    (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
-  def v16i8  : BaseSIMDThreeSameVectorTied<1, U, size, 0b00011, V128,
+  def v16i8  : BaseSIMDThreeSameVectorTied<1, U, {size,1}, 0b00011, V128,
                                      asm, ".16b",
              [(set (v16i8 V128:$dst),
                    (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn),
@@ -4583,8 +4607,8 @@ multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size,
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
 class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
-                        RegisterOperand regtype, string asm, string dstkind,
-                        string srckind, list<dag> pattern>
+                            bits<2> size2, RegisterOperand regtype, string asm,
+                            string dstkind, string srckind, list<dag> pattern>
   : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
       "{\t$Rd" # dstkind # ", $Rn" # srckind #
       "|" # dstkind # "\t$Rd, $Rn}", "", pattern>,
@@ -4596,7 +4620,9 @@ class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
   let Inst{29}    = U;
   let Inst{28-24} = 0b01110;
   let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
+  let Inst{21} = 0b1;
+  let Inst{20-19} = size2;
+  let Inst{18-17} = 0b00;
   let Inst{16-12} = opcode;
   let Inst{11-10} = 0b10;
   let Inst{9-5}   = Rn;
@@ -4605,8 +4631,9 @@ class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
 class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
-                            RegisterOperand regtype, string asm, string dstkind,
-                            string srckind, list<dag> pattern>
+                                bits<2> size2, RegisterOperand regtype,
+                                string asm, string dstkind, string srckind,
+                                list<dag> pattern>
   : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm,
       "{\t$Rd" # dstkind # ", $Rn" # srckind #
       "|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
@@ -4618,7 +4645,9 @@ class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
   let Inst{29}    = U;
   let Inst{28-24} = 0b01110;
   let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
+  let Inst{21} = 0b1;
+  let Inst{20-19} = size2;
+  let Inst{18-17} = 0b00;
   let Inst{16-12} = opcode;
   let Inst{11-10} = 0b10;
   let Inst{9-5}   = Rn;
@@ -4628,22 +4657,22 @@ class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
 // Supports B, H, and S element sizes.
 multiclass SIMDTwoVectorBHS<bit U, bits<5> opc, string asm,
                             SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+  def v8i8  : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
                                       asm, ".8b", ".8b",
                           [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
-  def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+  def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
                                       asm, ".16b", ".16b",
                           [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
-  def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+  def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
                                       asm, ".4h", ".4h",
                           [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
-  def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+  def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
                                       asm, ".8h", ".8h",
                           [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
-  def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
+  def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64,
                                       asm, ".2s", ".2s",
                           [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
-  def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
+  def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128,
                                       asm, ".4s", ".4s",
                           [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
 }
@@ -4686,49 +4715,49 @@ multiclass SIMDVectorLShiftLongBySizeBHS {
 // Supports all element sizes.
 multiclass SIMDLongTwoVector<bit U, bits<5> opc, string asm,
                              SDPatternOperator OpNode> {
-  def v8i8_v4i16  : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+  def v8i8_v4i16  : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
                                       asm, ".4h", ".8b",
                [(set (v4i16 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
-  def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+  def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
                                       asm, ".8h", ".16b",
                [(set (v8i16 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
-  def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+  def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
                                       asm, ".2s", ".4h",
                [(set (v2i32 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
-  def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+  def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
                                       asm, ".4s", ".8h",
                [(set (v4i32 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
-  def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
+  def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64,
                                       asm, ".1d", ".2s",
                [(set (v1i64 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
-  def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
+  def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128,
                                       asm, ".2d", ".4s",
                [(set (v2i64 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
 }
 
 multiclass SIMDLongTwoVectorTied<bit U, bits<5> opc, string asm,
                                  SDPatternOperator OpNode> {
-  def v8i8_v4i16  : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64,
+  def v8i8_v4i16  : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64,
                                           asm, ".4h", ".8b",
       [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd),
                                       (v8i8 V64:$Rn)))]>;
-  def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128,
+  def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128,
                                           asm, ".8h", ".16b",
       [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd),
                                       (v16i8 V128:$Rn)))]>;
-  def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64,
+  def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64,
                                           asm, ".2s", ".4h",
       [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd),
                                       (v4i16 V64:$Rn)))]>;
-  def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128,
+  def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128,
                                           asm, ".4s", ".8h",
       [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd),
                                       (v8i16 V128:$Rn)))]>;
-  def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64,
+  def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64,
                                           asm, ".1d", ".2s",
       [(set (v1i64 V64:$dst), (OpNode (v1i64 V64:$Rd),
                                       (v2i32 V64:$Rn)))]>;
-  def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128,
+  def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128,
                                           asm, ".2d", ".4s",
       [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd),
                                       (v4i32 V128:$Rn)))]>;
@@ -4737,50 +4766,50 @@ multiclass SIMDLongTwoVectorTied<bit U, bits<5> opc, string asm,
 // Supports all element sizes, except 1xD.
 multiclass SIMDTwoVectorBHSDTied<bit U, bits<5> opc, string asm,
                                   SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64,
+  def v8i8  : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64,
                                     asm, ".8b", ".8b",
     [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn)))]>;
-  def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128,
+  def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128,
                                     asm, ".16b", ".16b",
     [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>;
-  def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64,
+  def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64,
                                     asm, ".4h", ".4h",
     [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn)))]>;
-  def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128,
+  def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128,
                                     asm, ".8h", ".8h",
     [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn)))]>;
-  def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64,
+  def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64,
                                     asm, ".2s", ".2s",
     [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn)))]>;
-  def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128,
+  def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128,
                                     asm, ".4s", ".4s",
     [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>;
-  def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, V128,
+  def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, 0b00, V128,
                                     asm, ".2d", ".2d",
     [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn)))]>;
 }
 
 multiclass SIMDTwoVectorBHSD<bit U, bits<5> opc, string asm,
                              SDPatternOperator OpNode = null_frag> {
-  def v8i8  : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+  def v8i8  : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
                                 asm, ".8b", ".8b",
     [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
-  def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+  def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
                                 asm, ".16b", ".16b",
     [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
-  def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+  def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
                                 asm, ".4h", ".4h",
     [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
-  def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+  def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
                                 asm, ".8h", ".8h",
     [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
-  def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
+  def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64,
                                 asm, ".2s", ".2s",
     [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
-  def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
+  def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128,
                                 asm, ".4s", ".4s",
     [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
-  def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, V128,
+  def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, 0b00, V128,
                                 asm, ".2d", ".2d",
     [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
 }
@@ -4789,10 +4818,10 @@ multiclass SIMDTwoVectorBHSD<bit U, bits<5> opc, string asm,
 // Supports only B element sizes.
 multiclass SIMDTwoVectorB<bit U, bits<2> size, bits<5> opc, string asm,
                           SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDTwoSameVector<0, U, size, opc, V64,
+  def v8i8  : BaseSIMDTwoSameVector<0, U, size, opc, 0b00, V64,
                                 asm, ".8b", ".8b",
                     [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
-  def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, V128,
+  def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, 0b00, V128,
                                 asm, ".16b", ".16b",
                     [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
 
@@ -4801,16 +4830,16 @@ multiclass SIMDTwoVectorB<bit U, bits<2> size, bits<5> opc, string asm,
 // Supports only B and H element sizes.
 multiclass SIMDTwoVectorBH<bit U, bits<5> opc, string asm,
                                 SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+  def v8i8  : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
                                 asm, ".8b", ".8b",
                     [(set (v8i8 V64:$Rd), (OpNode V64:$Rn))]>;
-  def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+  def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
                                 asm, ".16b", ".16b",
                     [(set (v16i8 V128:$Rd), (OpNode V128:$Rn))]>;
-  def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+  def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
                                 asm, ".4h", ".4h",
                     [(set (v4i16 V64:$Rd), (OpNode V64:$Rn))]>;
-  def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+  def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
                                 asm, ".8h", ".8h",
                     [(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>;
 }
@@ -4819,13 +4848,21 @@ multiclass SIMDTwoVectorBH<bit U, bits<5> opc, string asm,
 // as an extra opcode bit.
 multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
                            SDPatternOperator OpNode> {
-  def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64,
+                                asm, ".4h", ".4h",
+                          [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>;
+  def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128,
+                                asm, ".8h", ".8h",
+                          [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>;
+  } // Predicates = [HasNEON, HasFullFP16]
+  def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
                                 asm, ".2s", ".2s",
                           [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
-  def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+  def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
                                 asm, ".4s", ".4s",
                           [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
-  def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
+  def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128,
                                 asm, ".2d", ".2d",
                           [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
 }
@@ -4833,10 +4870,10 @@ multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
 // Supports only S element size.
 multiclass SIMDTwoVectorS<bit U, bit S, bits<5> opc, string asm,
                            SDPatternOperator OpNode> {
-  def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+  def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
                                 asm, ".2s", ".2s",
                           [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
-  def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+  def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
                                 asm, ".4s", ".4s",
                           [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
 }
@@ -4844,26 +4881,42 @@ multiclass SIMDTwoVectorS<bit U, bit S, bits<5> opc, string asm,
 
 multiclass SIMDTwoVectorFPToInt<bit U, bit S, bits<5> opc, string asm,
                            SDPatternOperator OpNode> {
-  def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64,
+                                asm, ".4h", ".4h",
+                          [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>;
+  def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128,
+                                asm, ".8h", ".8h",
+                          [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>;
+  } // Predicates = [HasNEON, HasFullFP16]
+  def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
                                 asm, ".2s", ".2s",
                           [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
-  def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+  def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
                                 asm, ".4s", ".4s",
                           [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
-  def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
+  def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128,
                                 asm, ".2d", ".2d",
                           [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
 }
 
 multiclass SIMDTwoVectorIntToFP<bit U, bit S, bits<5> opc, string asm,
                            SDPatternOperator OpNode> {
-  def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64,
+                                asm, ".4h", ".4h",
+                          [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
+  def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128,
+                                asm, ".8h", ".8h",
+                          [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
+  } // Predicates = [HasNEON, HasFullFP16]
+  def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
                                 asm, ".2s", ".2s",
                           [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
-  def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+  def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
                                 asm, ".4s", ".4s",
                           [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
-  def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
+  def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128,
                                 asm, ".2d", ".2d",
                           [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
 }
@@ -4942,10 +4995,10 @@ multiclass SIMDMixedTwoVector<bit U, bits<5> opc, string asm,
                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
 }
 
-class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
-                           RegisterOperand regtype,
-                           string asm, string kind, string zero,
-                           ValueType dty, ValueType sty, SDNode OpNode>
+class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<2> size2,
+                           bits<5> opcode, RegisterOperand regtype, string asm,
+                           string kind, string zero, ValueType dty,
+                           ValueType sty, SDNode OpNode>
   : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
       "{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero #
       "|" # kind # "\t$Rd, $Rn, #" # zero # "}", "",
@@ -4958,7 +5011,9 @@ class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
   let Inst{29}    = U;
   let Inst{28-24} = 0b01110;
   let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
+  let Inst{21} = 0b1;
+  let Inst{20-19} = size2;
+  let Inst{18-17} = 0b00;
   let Inst{16-12} = opcode;
   let Inst{11-10} = 0b10;
   let Inst{9-5}   = Rn;
@@ -4968,49 +5023,69 @@ class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
 // Comparisons support all element sizes, except 1xD.
 multiclass SIMDCmpTwoVector<bit U, bits<5> opc, string asm,
                             SDNode OpNode> {
-  def v8i8rz  : BaseSIMDCmpTwoVector<0, U, 0b00, opc, V64,
+  def v8i8rz  : BaseSIMDCmpTwoVector<0, U, 0b00, 0b00, opc, V64,
                                      asm, ".8b", "0",
                                      v8i8, v8i8, OpNode>;
-  def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, opc, V128,
+  def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, 0b00, opc, V128,
                                      asm, ".16b", "0",
                                      v16i8, v16i8, OpNode>;
-  def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, opc, V64,
+  def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, 0b00, opc, V64,
                                      asm, ".4h", "0",
                                      v4i16, v4i16, OpNode>;
-  def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, opc, V128,
+  def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, 0b00, opc, V128,
                                      asm, ".8h", "0",
                                      v8i16, v8i16, OpNode>;
-  def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, opc, V64,
+  def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, 0b00, opc, V64,
                                      asm, ".2s", "0",
                                      v2i32, v2i32, OpNode>;
-  def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, opc, V128,
+  def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, 0b00, opc, V128,
                                      asm, ".4s", "0",
                                      v4i32, v4i32, OpNode>;
-  def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, opc, V128,
+  def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, 0b00, opc, V128,
                                      asm, ".2d", "0",
                                      v2i64, v2i64, OpNode>;
 }
 
-// FP Comparisons support only S and D element sizes.
+// FP Comparisons support only S and D element sizes (and H for v8.2a).
 multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc,
                               string asm, SDNode OpNode> {
 
-  def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, opc, V64,
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4i16rz : BaseSIMDCmpTwoVector<0, U, {S,1}, 0b11, opc, V64,
+                                     asm, ".4h", "0.0",
+                                     v4i16, v4f16, OpNode>;
+  def v8i16rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b11, opc, V128,
+                                     asm, ".8h", "0.0",
+                                     v8i16, v8f16, OpNode>;
+  } // Predicates = [HasNEON, HasFullFP16]
+  def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, 0b00, opc, V64,
                                      asm, ".2s", "0.0",
                                      v2i32, v2f32, OpNode>;
-  def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, opc, V128,
+  def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, 0b00, opc, V128,
                                      asm, ".4s", "0.0",
                                      v4i32, v4f32, OpNode>;
-  def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, opc, V128,
+  def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b00, opc, V128,
                                      asm, ".2d", "0.0",
                                      v2i64, v2f64, OpNode>;
 
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def : InstAlias<asm # "\t$Vd.4h, $Vn.4h, #0",
+                  (!cast<Instruction>(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>;
+  def : InstAlias<asm # "\t$Vd.8h, $Vn.8h, #0",
+                  (!cast<Instruction>(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>;
+  }
   def : InstAlias<asm # "\t$Vd.2s, $Vn.2s, #0",
                   (!cast<Instruction>(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>;
   def : InstAlias<asm # "\t$Vd.4s, $Vn.4s, #0",
                   (!cast<Instruction>(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>;
   def : InstAlias<asm # "\t$Vd.2d, $Vn.2d, #0",
                   (!cast<Instruction>(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>;
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def : InstAlias<asm # ".4h\t$Vd, $Vn, #0",
+                  (!cast<Instruction>(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>;
+  def : InstAlias<asm # ".8h\t$Vd, $Vn, #0",
+                  (!cast<Instruction>(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>;
+  }
   def : InstAlias<asm # ".2s\t$Vd, $Vn, #0",
                   (!cast<Instruction>(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>;
   def : InstAlias<asm # ".4s\t$Vd, $Vn, #0",
@@ -5561,7 +5636,7 @@ multiclass SIMDZipVector<bits<3>opc, string asm,
 //----------------------------------------------------------------------------
 
 let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDThreeScalar<bit U, bits<3> size, bits<5> opcode,
                         RegisterClass regtype, string asm,
                         list<dag> pattern>
   : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
@@ -5573,8 +5648,7 @@ class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode,
   let Inst{31-30} = 0b01;
   let Inst{29}    = U;
   let Inst{28-24} = 0b11110;
-  let Inst{23-22} = size;
-  let Inst{21}    = 1;
+  let Inst{23-21} = size;
   let Inst{20-16} = Rm;
   let Inst{15-11} = opcode;
   let Inst{10}    = 1;
@@ -5605,17 +5679,17 @@ class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode,
 
 multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm,
                             SDPatternOperator OpNode> {
-  def v1i64  : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
+  def v1i64  : BaseSIMDThreeScalar<U, 0b111, opc, FPR64, asm,
     [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
 }
 
 multiclass SIMDThreeScalarBHSD<bit U, bits<5> opc, string asm,
                                SDPatternOperator OpNode> {
-  def v1i64  : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
+  def v1i64  : BaseSIMDThreeScalar<U, 0b111, opc, FPR64, asm,
     [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
-  def v1i32  : BaseSIMDThreeScalar<U, 0b10, opc, FPR32, asm, []>;
-  def v1i16  : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
-  def v1i8   : BaseSIMDThreeScalar<U, 0b00, opc, FPR8 , asm, []>;
+  def v1i32  : BaseSIMDThreeScalar<U, 0b101, opc, FPR32, asm, []>;
+  def v1i16  : BaseSIMDThreeScalar<U, 0b011, opc, FPR16, asm, []>;
+  def v1i8   : BaseSIMDThreeScalar<U, 0b001, opc, FPR8 , asm, []>;
 
   def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
             (!cast<Instruction>(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>;
@@ -5625,9 +5699,9 @@ multiclass SIMDThreeScalarBHSD<bit U, bits<5> opc, string asm,
 
 multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm,
                              SDPatternOperator OpNode> {
-  def v1i32  : BaseSIMDThreeScalar<U, 0b10, opc, FPR32, asm,
+  def v1i32  : BaseSIMDThreeScalar<U, 0b101, opc, FPR32, asm,
                              [(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
-  def v1i16  : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
+  def v1i16  : BaseSIMDThreeScalar<U, 0b011, opc, FPR16, asm, []>;
 }
 
 multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
@@ -5640,26 +5714,34 @@ multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
                                      asm, []>;
 }
 
-multiclass SIMDThreeScalarSD<bit U, bit S, bits<5> opc, string asm,
+multiclass SIMDFPThreeScalar<bit U, bit S, bits<3> opc, string asm,
                              SDPatternOperator OpNode = null_frag> {
   let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
-    def #NAME#64 : BaseSIMDThreeScalar<U, {S,1}, opc, FPR64, asm,
+    def #NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
       [(set (f64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
-    def #NAME#32 : BaseSIMDThreeScalar<U, {S,0}, opc, FPR32, asm,
+    def #NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
       [(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
+    let Predicates = [HasNEON, HasFullFP16] in {
+    def #NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm,
+      [(set FPR16:$Rd, (OpNode FPR16:$Rn, FPR16:$Rm))]>;
+    } // Predicates = [HasNEON, HasFullFP16]
   }
 
   def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
             (!cast<Instruction>(NAME # "64") FPR64:$Rn, FPR64:$Rm)>;
 }
 
-multiclass SIMDThreeScalarFPCmp<bit U, bit S, bits<5> opc, string asm,
+multiclass SIMDThreeScalarFPCmp<bit U, bit S, bits<3> opc, string asm,
                                 SDPatternOperator OpNode = null_frag> {
   let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
-    def #NAME#64 : BaseSIMDThreeScalar<U, {S,1}, opc, FPR64, asm,
+    def #NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
       [(set (i64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
-    def #NAME#32 : BaseSIMDThreeScalar<U, {S,0}, opc, FPR32, asm,
+    def #NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
       [(set (i32 FPR32:$Rd), (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]>;
+    let Predicates = [HasNEON, HasFullFP16] in {
+    def #NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm,
+      []>;
+    } // Predicates = [HasNEON, HasFullFP16]
   }
 
   def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
@@ -5718,7 +5800,7 @@ multiclass SIMDThreeScalarMixedTiedHS<bit U, bits<5> opc, string asm,
 //----------------------------------------------------------------------------
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDTwoScalar<bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
                         RegisterClass regtype, RegisterClass regtype2,
                         string asm, list<dag> pat>
   : I<(outs regtype:$Rd), (ins regtype2:$Rn), asm,
@@ -5730,7 +5812,9 @@ class BaseSIMDTwoScalar<bit U, bits<2> size, bits<5> opcode,
   let Inst{29}    = U;
   let Inst{28-24} = 0b11110;
   let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
+  let Inst{21} = 0b1;
+  let Inst{20-19} = size2;
+  let Inst{18-17} = 0b00;
   let Inst{16-12} = opcode;
   let Inst{11-10} = 0b10;
   let Inst{9-5}   = Rn;
@@ -5759,7 +5843,7 @@ class BaseSIMDTwoScalarTied<bit U, bits<2> size, bits<5> opcode,
 
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
                         RegisterClass regtype, string asm, string zero>
   : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
       "\t$Rd, $Rn, #" # zero, "", []>,
@@ -5770,7 +5854,9 @@ class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<5> opcode,
   let Inst{29}    = U;
   let Inst{28-24} = 0b11110;
   let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
+  let Inst{21} = 0b1;
+  let Inst{20-19} = size2;
+  let Inst{18-17} = 0b00;
   let Inst{16-12} = opcode;
   let Inst{11-10} = 0b10;
   let Inst{9-5}   = Rn;
@@ -5792,7 +5878,7 @@ class SIMDInexactCvtTwoScalar<bits<5> opcode, string asm>
 
 multiclass SIMDCmpTwoScalarD<bit U, bits<5> opc, string asm,
                              SDPatternOperator OpNode> {
-  def v1i64rz  : BaseSIMDCmpTwoScalar<U, 0b11, opc, FPR64, asm, "0">;
+  def v1i64rz  : BaseSIMDCmpTwoScalar<U, 0b11, 0b00, opc, FPR64, asm, "0">;
 
   def : Pat<(v1i64 (OpNode FPR64:$Rn)),
             (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>;
@@ -5800,13 +5886,20 @@ multiclass SIMDCmpTwoScalarD<bit U, bits<5> opc, string asm,
 
 multiclass SIMDFPCmpTwoScalar<bit U, bit S, bits<5> opc, string asm,
                               SDPatternOperator OpNode> {
-  def v1i64rz  : BaseSIMDCmpTwoScalar<U, {S,1}, opc, FPR64, asm, "0.0">;
-  def v1i32rz  : BaseSIMDCmpTwoScalar<U, {S,0}, opc, FPR32, asm, "0.0">;
+  def v1i64rz  : BaseSIMDCmpTwoScalar<U, {S,1}, 0b00, opc, FPR64, asm, "0.0">;
+  def v1i32rz  : BaseSIMDCmpTwoScalar<U, {S,0}, 0b00, opc, FPR32, asm, "0.0">;
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v1i16rz  : BaseSIMDCmpTwoScalar<U, {S,1}, 0b11, opc, FPR16, asm, "0.0">;
+  }
 
   def : InstAlias<asm # "\t$Rd, $Rn, #0",
                   (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rd, FPR64:$Rn), 0>;
   def : InstAlias<asm # "\t$Rd, $Rn, #0",
                   (!cast<Instruction>(NAME # v1i32rz) FPR32:$Rd, FPR32:$Rn), 0>;
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def : InstAlias<asm # "\t$Rd, $Rn, #0",
+                  (!cast<Instruction>(NAME # v1i16rz) FPR16:$Rd, FPR16:$Rn), 0>;
+  }
 
   def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn))),
             (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>;
@@ -5814,7 +5907,7 @@ multiclass SIMDFPCmpTwoScalar<bit U, bit S, bits<5> opc, string asm,
 
 multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
                           SDPatternOperator OpNode = null_frag> {
-  def v1i64       : BaseSIMDTwoScalar<U, 0b11, opc, FPR64, FPR64, asm,
+  def v1i64       : BaseSIMDTwoScalar<U, 0b11, 0b00, opc, FPR64, FPR64, asm,
     [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn)))]>;
 
   def : Pat<(i64 (OpNode (i64 FPR64:$Rn))),
@@ -5822,27 +5915,34 @@ multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
 }
 
 multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
-  def v1i64       : BaseSIMDTwoScalar<U, {S,1}, opc, FPR64, FPR64, asm,[]>;
-  def v1i32       : BaseSIMDTwoScalar<U, {S,0}, opc, FPR32, FPR32, asm,[]>;
+  def v1i64       : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,[]>;
+  def v1i32       : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,[]>;
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v1f16       : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,[]>;
+  }
 }
 
-multiclass SIMDTwoScalarCVTSD<bit U, bit S, bits<5> opc, string asm,
+multiclass SIMDFPTwoScalarCVT<bit U, bit S, bits<5> opc, string asm,
                               SDPatternOperator OpNode> {
-  def v1i64 : BaseSIMDTwoScalar<U, {S,1}, opc, FPR64, FPR64, asm,
+  def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,
                                 [(set FPR64:$Rd, (OpNode (f64 FPR64:$Rn)))]>;
-  def v1i32 : BaseSIMDTwoScalar<U, {S,0}, opc, FPR32, FPR32, asm,
+  def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,
                                 [(set FPR32:$Rd, (OpNode (f32 FPR32:$Rn)))]>;
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v1i16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,
+                                [(set FPR16:$Rd, (OpNode (f16 FPR16:$Rn)))]>;
+  }
 }
 
 multiclass SIMDTwoScalarBHSD<bit U, bits<5> opc, string asm,
                              SDPatternOperator OpNode = null_frag> {
   let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
-    def v1i64  : BaseSIMDTwoScalar<U, 0b11, opc, FPR64, FPR64, asm,
+    def v1i64  : BaseSIMDTwoScalar<U, 0b11, 0b00, opc, FPR64, FPR64, asm,
            [(set (i64 FPR64:$Rd), (OpNode (i64 FPR64:$Rn)))]>;
-    def v1i32  : BaseSIMDTwoScalar<U, 0b10, opc, FPR32, FPR32, asm,
+    def v1i32  : BaseSIMDTwoScalar<U, 0b10, 0b00, opc, FPR32, FPR32, asm,
            [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>;
-    def v1i16  : BaseSIMDTwoScalar<U, 0b01, opc, FPR16, FPR16, asm, []>;
-    def v1i8   : BaseSIMDTwoScalar<U, 0b00, opc, FPR8 , FPR8 , asm, []>;
+    def v1i16  : BaseSIMDTwoScalar<U, 0b01, 0b00, opc, FPR16, FPR16, asm, []>;
+    def v1i8   : BaseSIMDTwoScalar<U, 0b00, 0b00, opc, FPR8 , FPR8 , asm, []>;
   }
 
   def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn))),
@@ -5869,10 +5969,10 @@ multiclass SIMDTwoScalarBHSDTied<bit U, bits<5> opc, string asm,
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
 multiclass SIMDTwoScalarMixedBHS<bit U, bits<5> opc, string asm,
                                  SDPatternOperator OpNode = null_frag> {
-  def v1i32  : BaseSIMDTwoScalar<U, 0b10, opc, FPR32, FPR64, asm,
+  def v1i32  : BaseSIMDTwoScalar<U, 0b10, 0b00, opc, FPR32, FPR64, asm,
         [(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn)))]>;
-  def v1i16  : BaseSIMDTwoScalar<U, 0b01, opc, FPR16, FPR32, asm, []>;
-  def v1i8   : BaseSIMDTwoScalar<U, 0b00, opc, FPR8 , FPR16, asm, []>;
+  def v1i16  : BaseSIMDTwoScalar<U, 0b01, 0b00, opc, FPR16, FPR32, asm, []>;
+  def v1i8   : BaseSIMDTwoScalar<U, 0b00, 0b00, opc, FPR8 , FPR16, asm, []>;
 }
 
 //----------------------------------------------------------------------------
@@ -5904,10 +6004,14 @@ multiclass SIMDPairwiseScalarD<bit U, bits<5> opc, string asm> {
                                       asm, ".2d">;
 }
 
-multiclass SIMDFPPairwiseScalar<bit U, bit S, bits<5> opc, string asm> {
-  def v2i32p : BaseSIMDPairwiseScalar<U, {S,0}, opc, FPR32Op, V64,
+multiclass SIMDFPPairwiseScalar<bit S, bits<5> opc, string asm> {
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v2i16p : BaseSIMDPairwiseScalar<0, {S,0}, opc, FPR16Op, V64,
+                                      asm, ".2h">;
+  }
+  def v2i32p : BaseSIMDPairwiseScalar<1, {S,0}, opc, FPR32Op, V64,
                                       asm, ".2s">;
-  def v2i64p : BaseSIMDPairwiseScalar<U, {S,1}, opc, FPR64Op, V128,
+  def v2i64p : BaseSIMDPairwiseScalar<1, {S,1}, opc, FPR64Op, V128,
                                       asm, ".2d">;
 }
 
@@ -5963,8 +6067,16 @@ multiclass SIMDAcrossLanesHSD<bit U, bits<5> opcode, string asm> {
                                    asm, ".4s", []>;
 }
 
-multiclass SIMDAcrossLanesS<bits<5> opcode, bit sz1, string asm,
+multiclass SIMDFPAcrossLanes<bits<5> opcode, bit sz1, string asm,
                             Intrinsic intOp> {
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4i16v : BaseSIMDAcrossLanes<0, 0, {sz1, 0}, opcode, FPR16, V64,
+                                   asm, ".4h",
+        [(set FPR16:$Rd, (intOp (v4f16 V64:$Rn)))]>;
+  def v8i16v : BaseSIMDAcrossLanes<1, 0, {sz1, 0}, opcode, FPR16, V128,
+                                   asm, ".8h",
+        [(set FPR16:$Rd, (intOp (v8f16 V128:$Rn)))]>;
+  } // Predicates = [HasNEON, HasFullFP16]
   def v4i32v : BaseSIMDAcrossLanes<1, 1, {sz1, 0}, opcode, FPR32, V128,
                                    asm, ".4s",
         [(set FPR32:$Rd, (intOp (v4f32 V128:$Rn)))]>;
@@ -6451,7 +6563,7 @@ multiclass SIMDScalarCPY<string asm> {
 // AdvSIMD modified immediate instructions
 //----------------------------------------------------------------------------
 
-class BaseSIMDModifiedImm<bit Q, bit op, dag oops, dag iops,
+class BaseSIMDModifiedImm<bit Q, bit op, bit op2, dag oops, dag iops,
                           string asm, string op_string,
                           string cstr, list<dag> pattern>
   : I<oops, iops, asm, op_string, cstr, pattern>,
@@ -6463,16 +6575,17 @@ class BaseSIMDModifiedImm<bit Q, bit op, dag oops, dag iops,
   let Inst{29}    = op;
   let Inst{28-19} = 0b0111100000;
   let Inst{18-16} = imm8{7-5};
-  let Inst{11-10} = 0b01;
+  let Inst{11} = op2;
+  let Inst{10} = 1;
   let Inst{9-5}   = imm8{4-0};
   let Inst{4-0}   = Rd;
 }
 
-class BaseSIMDModifiedImmVector<bit Q, bit op, RegisterOperand vectype,
+class BaseSIMDModifiedImmVector<bit Q, bit op, bit op2, RegisterOperand vectype,
                                 Operand immtype, dag opt_shift_iop,
                                 string opt_shift, string asm, string kind,
                                 list<dag> pattern>
-  : BaseSIMDModifiedImm<Q, op, (outs vectype:$Rd),
+  : BaseSIMDModifiedImm<Q, op, op2, (outs vectype:$Rd),
                         !con((ins immtype:$imm8), opt_shift_iop), asm,
                         "{\t$Rd" # kind # ", $imm8" # opt_shift #
                         "|" # kind # "\t$Rd, $imm8" # opt_shift # "}",
@@ -6484,7 +6597,7 @@ class BaseSIMDModifiedImmVectorTied<bit Q, bit op, RegisterOperand vectype,
                                 Operand immtype, dag opt_shift_iop,
                                 string opt_shift, string asm, string kind,
                                 list<dag> pattern>
-  : BaseSIMDModifiedImm<Q, op, (outs vectype:$dst),
+  : BaseSIMDModifiedImm<Q, op, 0, (outs vectype:$dst),
                         !con((ins vectype:$Rd, immtype:$imm8), opt_shift_iop),
                         asm, "{\t$Rd" # kind # ", $imm8" # opt_shift #
                              "|" # kind # "\t$Rd, $imm8" # opt_shift # "}",
@@ -6495,7 +6608,7 @@ class BaseSIMDModifiedImmVectorTied<bit Q, bit op, RegisterOperand vectype,
 class BaseSIMDModifiedImmVectorShift<bit Q, bit op, bits<2> b15_b12,
                                      RegisterOperand vectype, string asm,
                                      string kind, list<dag> pattern>
-  : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
+  : BaseSIMDModifiedImmVector<Q, op, 0, vectype, imm0_255,
                               (ins logical_vec_shift:$shift),
                               "$shift", asm, kind, pattern> {
   bits<2> shift;
@@ -6520,7 +6633,7 @@ class BaseSIMDModifiedImmVectorShiftTied<bit Q, bit op, bits<2> b15_b12,
 class BaseSIMDModifiedImmVectorShiftHalf<bit Q, bit op, bits<2> b15_b12,
                                          RegisterOperand vectype, string asm,
                                          string kind, list<dag> pattern>
-  : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
+  : BaseSIMDModifiedImmVector<Q, op, 0, vectype, imm0_255,
                               (ins logical_vec_hw_shift:$shift),
                               "$shift", asm, kind, pattern> {
   bits<2> shift;
@@ -6585,7 +6698,7 @@ multiclass SIMDModifiedImmVectorShiftTied<bit op, bits<2> hw_cmode,
 class SIMDModifiedImmMoveMSL<bit Q, bit op, bits<4> cmode,
                              RegisterOperand vectype, string asm,
                              string kind, list<dag> pattern>
-  : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
+  : BaseSIMDModifiedImmVector<Q, op, 0, vectype, imm0_255,
                               (ins move_vec_shift:$shift),
                               "$shift", asm, kind, pattern> {
   bits<1> shift;
@@ -6593,18 +6706,18 @@ class SIMDModifiedImmMoveMSL<bit Q, bit op, bits<4> cmode,
   let Inst{12}    = shift;
 }
 
-class SIMDModifiedImmVectorNoShift<bit Q, bit op, bits<4> cmode,
+class SIMDModifiedImmVectorNoShift<bit Q, bit op, bit op2, bits<4> cmode,
                                    RegisterOperand vectype,
                                    Operand imm_type, string asm,
                                    string kind, list<dag> pattern>
-  : BaseSIMDModifiedImmVector<Q, op, vectype, imm_type, (ins), "",
+  : BaseSIMDModifiedImmVector<Q, op, op2, vectype, imm_type, (ins), "",
                               asm, kind, pattern> {
   let Inst{15-12} = cmode;
 }
 
 class SIMDModifiedImmScalarNoShift<bit Q, bit op, bits<4> cmode, string asm,
                                    list<dag> pattern>
-  : BaseSIMDModifiedImm<Q, op, (outs FPR64:$Rd), (ins simdimmtype10:$imm8), asm,
+  : BaseSIMDModifiedImm<Q, op, 0, (outs FPR64:$Rd), (ins simdimmtype10:$imm8), asm,
                         "\t$Rd, $imm8", "", pattern> {
   let Inst{15-12} = cmode;
   let DecoderMethod = "DecodeModImmInstruction";
@@ -6676,6 +6789,34 @@ class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
 
 multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
                          SDPatternOperator OpNode> {
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b00, opc,
+                                      V64, V64,
+                                      V128_lo, VectorIndexH,
+                                      asm, ".4h", ".4h", ".4h", ".h",
+    [(set (v4f16 V64:$Rd),
+        (OpNode (v4f16 V64:$Rn),
+         (v4f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+
+  def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b00, opc,
+                                      V128, V128,
+                                      V128_lo, VectorIndexH,
+                                      asm, ".8h", ".8h", ".8h", ".h",
+    [(set (v8f16 V128:$Rd),
+        (OpNode (v8f16 V128:$Rn),
+         (v8f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+  } // Predicates = [HasNEON, HasFullFP16]
+
   def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
                                       V64, V64,
                                       V128, VectorIndexS,
@@ -6712,6 +6853,21 @@ multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
     let Inst{21} = 0;
   }
 
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b00, opc,
+                                      FPR16Op, FPR16Op, V128, VectorIndexH,
+                                      asm, ".h", "", "", ".h",
+    [(set (f16 FPR16Op:$Rd),
+          (OpNode (f16 FPR16Op:$Rn),
+                  (f16 (vector_extract (v8f16 V128:$Rm),
+                                       VectorIndexH:$idx))))]> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+  } // Predicates = [HasNEON, HasFullFP16]
+
   def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc,
                                       FPR32Op, FPR32Op, V128, VectorIndexS,
                                       asm, ".s", "", "", ".s",
@@ -6790,6 +6946,27 @@ multiclass SIMDFPIndexedTiedPatterns<string INST, SDPatternOperator OpNode> {
 }
 
 multiclass SIMDFPIndexedTied<bit U, bits<4> opc, string asm> {
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b00, opc, V64, V64,
+                                          V128_lo, VectorIndexH,
+                                          asm, ".4h", ".4h", ".4h", ".h", []> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+
+  def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b00, opc,
+                                          V128, V128,
+                                          V128_lo, VectorIndexH,
+                                          asm, ".8h", ".8h", ".8h", ".h", []> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+  } // Predicates = [HasNEON, HasFullFP16]
+
   def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, V64, V64,
                                           V128, VectorIndexS,
                                           asm, ".2s", ".2s", ".2s", ".s", []> {
@@ -6816,6 +6993,16 @@ multiclass SIMDFPIndexedTied<bit U, bits<4> opc, string asm> {
     let Inst{21} = 0;
   }
 
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v1i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b00, opc,
+                                      FPR16Op, FPR16Op, V128, VectorIndexH,
+                                      asm, ".h", "", "", ".h", []> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+  } // Predicates = [HasNEON, HasFullFP16]
 
   def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
                                       FPR32Op, FPR32Op, V128, VectorIndexS,
@@ -7353,7 +7540,13 @@ class BaseSIMDScalarShiftTied<bit U, bits<5> opc, bits<7> fixed_imm,
 }
 
 
-multiclass SIMDScalarRShiftSD<bit U, bits<5> opc, string asm> {
+multiclass SIMDFPScalarRShift<bit U, bits<5> opc, string asm> {
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
+                              FPR16, FPR16, vecshiftR16, asm, []> {
+    let Inst{19-16} = imm{3-0};
+  }
+  } // Predicates = [HasNEON, HasFullFP16]
   def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
                               FPR32, FPR32, vecshiftR32, asm, []> {
     let Inst{20-16} = imm{4-0};
@@ -7533,6 +7726,23 @@ class BaseSIMDVectorShiftTied<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
 
 multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
                               Intrinsic OpNode> {
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
+                                  V64, V64, vecshiftR16,
+                                  asm, ".4h", ".4h",
+      [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 imm:$imm)))]> {
+    bits<4> imm;
+    let Inst{19-16} = imm;
+  }
+
+  def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
+                                  V128, V128, vecshiftR16,
+                                  asm, ".8h", ".8h",
+      [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 imm:$imm)))]> {
+    bits<4> imm;
+    let Inst{19-16} = imm;
+  }
+  } // Predicates = [HasNEON, HasFullFP16]
   def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
                                   V64, V64, vecshiftR32,
                                   asm, ".2s", ".2s",
@@ -7558,8 +7768,26 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
   }
 }
 
-multiclass SIMDVectorRShiftSDToFP<bit U, bits<5> opc, string asm,
+multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
                                   Intrinsic OpNode> {
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
+                                  V64, V64, vecshiftR16,
+                                  asm, ".4h", ".4h",
+      [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 imm:$imm)))]> {
+    bits<4> imm;
+    let Inst{19-16} = imm;
+  }
+
+  def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
+                                  V128, V128, vecshiftR16,
+                                  asm, ".8h", ".8h",
+      [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 imm:$imm)))]> {
+    bits<4> imm;
+    let Inst{19-16} = imm;
+  }
+  } // Predicates = [HasNEON, HasFullFP16]
+
   def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
                                   V64, V64, vecshiftR32,
                                   asm, ".2s", ".2s",
@@ -8840,9 +9068,8 @@ let Predicates = [HasNEON, HasV8_1a] in {
 class BaseSIMDThreeSameVectorTiedR0<bit Q, bit U, bits<2> size, bits<5> opcode,
                                     RegisterOperand regtype, string asm, 
                                     string kind, list<dag> pattern>
-  : BaseSIMDThreeSameVectorTied<Q, U, size, opcode, regtype, asm, kind, 
+  : BaseSIMDThreeSameVectorTied<Q, U, {size,0}, opcode, regtype, asm, kind, 
                                 pattern> {
-  let Inst{21}=0;
 }
 multiclass SIMDThreeSameVectorSQRDMLxHTiedHS<bit U, bits<5> opc, string asm,
                                              SDPatternOperator Accum> {
@@ -9277,6 +9504,7 @@ def : TokenAlias<".8H", ".8h">;
 def : TokenAlias<".4S", ".4s">;
 def : TokenAlias<".2D", ".2d">;
 def : TokenAlias<".1Q", ".1q">;
+def : TokenAlias<".2H", ".2h">;
 def : TokenAlias<".B", ".b">;
 def : TokenAlias<".H", ".h">;
 def : TokenAlias<".S", ".s">;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 881f55ebeef..cfb0c1b578d 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2857,29 +2857,29 @@ defm CMGT    : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
 defm CMHI    : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
 defm CMHS    : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
 defm CMTST   : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
-defm FABD    : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_aarch64_neon_fabd>;
-defm FACGE   : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_aarch64_neon_facge>;
-defm FACGT   : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_aarch64_neon_facgt>;
-defm FADDP   : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_aarch64_neon_addp>;
-defm FADD    : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", fadd>;
-defm FCMEQ   : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>;
-defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>;
-defm FCMGT   : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>;
-defm FDIV    : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>;
-defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
-defm FMAXNM  : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", fmaxnum>;
-defm FMAXP   : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_aarch64_neon_fmaxp>;
-defm FMAX    : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", fmaxnan>;
-defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_aarch64_neon_fminnmp>;
-defm FMINNM  : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", fminnum>;
-defm FMINP   : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_aarch64_neon_fminp>;
-defm FMIN    : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", fminnan>;
+defm FABD    : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>;
+defm FACGE   : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>;
+defm FACGT   : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>;
+defm FADDP   : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_addp>;
+defm FADD    : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>;
+defm FCMEQ   : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
+defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
+defm FCMGT   : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
+defm FDIV    : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>;
+defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
+defm FMAXNM  : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>;
+defm FMAXP   : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
+defm FMAX    : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaxnan>;
+defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
+defm FMINNM  : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>;
+defm FMINP   : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
+defm FMIN    : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminnan>;
 
 // NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
 // instruction expects the addend first, while the fma intrinsic puts it last.
-defm FMLA     : SIMDThreeSameVectorFPTied<0, 0, 0b11001, "fmla",
+defm FMLA     : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
             TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
-defm FMLS     : SIMDThreeSameVectorFPTied<0, 1, 0b11001, "fmls",
+defm FMLS     : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
             TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
 
 // The following def pats catch the case where the LHS of an FMA is negated.
@@ -2893,11 +2893,11 @@ def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
 def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
           (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>;
 
-defm FMULX    : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_aarch64_neon_fmulx>;
-defm FMUL     : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmul>;
-defm FRECPS   : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_aarch64_neon_frecps>;
-defm FRSQRTS  : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_aarch64_neon_frsqrts>;
-defm FSUB     : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsub>;
+defm FMULX    : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
+defm FMUL     : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>;
+defm FRECPS   : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
+defm FRSQRTS  : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>;
+defm FSUB     : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>;
 defm MLA      : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla",
                       TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >;
 defm MLS      : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls",
@@ -3081,6 +3081,14 @@ def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
                 "|cmlt.2d\t$dst, $src1, $src2}",
                 (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
 
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" #
+                "|fcmle.4h\t$dst, $src1, $src2}",
+                (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" #
+                "|fcmle.8h\t$dst, $src1, $src2}",
+                (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
 def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
                 "|fcmle.2s\t$dst, $src1, $src2}",
                 (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
@@ -3091,6 +3099,14 @@ def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
                 "|fcmle.2d\t$dst, $src1, $src2}",
                 (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
 
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" #
+                "|fcmlt.4h\t$dst, $src1, $src2}",
+                (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" #
+                "|fcmlt.8h\t$dst, $src1, $src2}",
+                (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
 def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
                 "|fcmlt.2s\t$dst, $src1, $src2}",
                 (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
@@ -3101,6 +3117,14 @@ def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
                 "|fcmlt.2d\t$dst, $src1, $src2}",
                 (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
 
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" #
+                "|facle.4h\t$dst, $src1, $src2}",
+                (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" #
+                "|facle.8h\t$dst, $src1, $src2}",
+                (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
 def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
                 "|facle.2s\t$dst, $src1, $src2}",
                 (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
@@ -3111,6 +3135,14 @@ def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
                 "|facle.2d\t$dst, $src1, $src2}",
                 (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
 
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" #
+                "|faclt.4h\t$dst, $src1, $src2}",
+                (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" #
+                "|faclt.8h\t$dst, $src1, $src2}",
+                (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
 def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
                 "|faclt.2s\t$dst, $src1, $src2}",
                 (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
@@ -3132,19 +3164,19 @@ defm CMGT     : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>;
 defm CMHI     : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
 defm CMHS     : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
 defm CMTST    : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
-defm FABD     : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_aarch64_sisd_fabd>;
+defm FABD     : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
 def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
           (FABD64 FPR64:$Rn, FPR64:$Rm)>;
-defm FACGE    : SIMDThreeScalarFPCmp<1, 0, 0b11101, "facge",
+defm FACGE    : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
                                      int_aarch64_neon_facge>;
-defm FACGT    : SIMDThreeScalarFPCmp<1, 1, 0b11101, "facgt",
+defm FACGT    : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
                                      int_aarch64_neon_facgt>;
-defm FCMEQ    : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>;
-defm FCMGE    : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>;
-defm FCMGT    : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>;
-defm FMULX    : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_aarch64_neon_fmulx>;
-defm FRECPS   : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_aarch64_neon_frecps>;
-defm FRSQRTS  : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_aarch64_neon_frsqrts>;
+defm FCMEQ    : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
+defm FCMGE    : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
+defm FCMGT    : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
+defm FMULX    : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx>;
+defm FRECPS   : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps>;
+defm FRSQRTS  : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts>;
 defm SQADD    : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
 defm SQDMULH  : SIMDThreeScalarHS<  0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
 defm SQRDMULH : SIMDThreeScalarHS<  1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
@@ -3248,14 +3280,14 @@ defm FRECPX : SIMDFPTwoScalar<   0, 1, 0b11111, "frecpx">;
 defm FRSQRTE : SIMDFPTwoScalar<  1, 1, 0b11101, "frsqrte">;
 defm NEG    : SIMDTwoScalarD<    1, 0b01011, "neg",
                                  UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
-defm SCVTF  : SIMDTwoScalarCVTSD<   0, 0, 0b11101, "scvtf", AArch64sitof>;
+defm SCVTF  : SIMDFPTwoScalarCVT<   0, 0, 0b11101, "scvtf", AArch64sitof>;
 defm SQABS  : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
 defm SQNEG  : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
 defm SQXTN  : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
 defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
 defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
                                      int_aarch64_neon_suqadd>;
-defm UCVTF  : SIMDTwoScalarCVTSD<   1, 0, 0b11101, "ucvtf", AArch64uitof>;
+defm UCVTF  : SIMDFPTwoScalarCVT<   1, 0, 0b11101, "ucvtf", AArch64uitof>;
 defm UQXTN  : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
 defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
                                     int_aarch64_neon_usqadd>;
@@ -3620,11 +3652,11 @@ defm CPY : SIMDScalarCPY<"cpy">;
 //----------------------------------------------------------------------------
 
 defm ADDP    : SIMDPairwiseScalarD<0, 0b11011, "addp">;
-defm FADDP   : SIMDFPPairwiseScalar<1, 0, 0b01101, "faddp">;
-defm FMAXNMP : SIMDFPPairwiseScalar<1, 0, 0b01100, "fmaxnmp">;
-defm FMAXP   : SIMDFPPairwiseScalar<1, 0, 0b01111, "fmaxp">;
-defm FMINNMP : SIMDFPPairwiseScalar<1, 1, 0b01100, "fminnmp">;
-defm FMINP   : SIMDFPPairwiseScalar<1, 1, 0b01111, "fminp">;
+defm FADDP   : SIMDFPPairwiseScalar<0, 0b01101, "faddp">;
+defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">;
+defm FMAXP   : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
+defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
+defm FMINP   : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
 def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
           (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
 def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
@@ -3976,10 +4008,10 @@ defm UMAXV   : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
 defm UMINV   : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
 defm SADDLV  : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
 defm UADDLV  : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
-defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>;
-defm FMAXV   : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
-defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
-defm FMINV   : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
+defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>;
+defm FMAXV   : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
+defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
+defm FMINV   : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
 
 // Patterns for across-vector intrinsics, that have a node equivalent, that
 // returns a vector (with only the low lane defined) instead of a scalar.
@@ -4226,15 +4258,23 @@ def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
 def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
 
 // AdvSIMD FMOV
-def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1111, V128, fpimm8,
+def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
                                               "fmov", ".2d",
                        [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
-def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1111, V64,  fpimm8,
+def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64,  fpimm8,
                                               "fmov", ".2s",
                        [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
-def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1111, V128, fpimm8,
+def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8,
                                               "fmov", ".4s",
                        [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64,  fpimm8,
+                                              "fmov", ".4h",
+                       [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
+                                              "fmov", ".8h",
+                       [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+} // Predicates = [HasNEON, HasFullFP16]
 
 // AdvSIMD MOVI
 
@@ -4262,7 +4302,7 @@ def : Pat<(v8i8  immAllOnesV), (MOVID (i32 255))>;
 // The movi_edit node has the immediate value already encoded, so we use
 // a plain imm0_255 in the pattern
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOVIv2d_ns   : SIMDModifiedImmVectorNoShift<1, 1, 0b1110, V128,
+def MOVIv2d_ns   : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
                                                 simdimmtype10,
                                                 "movi", ".2d",
                    [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
@@ -4323,10 +4363,10 @@ def MOVIv4s_msl  : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
                             (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
 
 // Per byte: 8b & 16b
-def MOVIv8b_ns   : SIMDModifiedImmVectorNoShift<0, 0, 0b1110, V64,  imm0_255,
+def MOVIv8b_ns   : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64,  imm0_255,
                                                  "movi", ".8b",
                        [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
-def MOVIv16b_ns  : SIMDModifiedImmVectorNoShift<1, 0, 0b1110, V128, imm0_255,
+def MOVIv16b_ns  : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
                                                  "movi", ".16b",
                        [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
 
@@ -4526,10 +4566,10 @@ def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
 //----------------------------------------------------------------------------
 // AdvSIMD scalar shift instructions
 //----------------------------------------------------------------------------
-defm FCVTZS : SIMDScalarRShiftSD<0, 0b11111, "fcvtzs">;
-defm FCVTZU : SIMDScalarRShiftSD<1, 0b11111, "fcvtzu">;
-defm SCVTF  : SIMDScalarRShiftSD<0, 0b11100, "scvtf">;
-defm UCVTF  : SIMDScalarRShiftSD<1, 0b11100, "ucvtf">;
+defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;
+defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;
+defm SCVTF  : SIMDFPScalarRShift<0, 0b11100, "scvtf">;
+defm UCVTF  : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;
 // Codegen patterns for the above. We don't put these directly on the
 // instructions because TableGen's type inference can't handle the truth.
 // Having the same base pattern for fp <--> int totally freaks it out.
@@ -4602,7 +4642,7 @@ defm USRA     : SIMDScalarRShiftDTied<   1, 0b00010, "usra",
 //----------------------------------------------------------------------------
 defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
 defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
-defm SCVTF: SIMDVectorRShiftSDToFP<0, 0b11100, "scvtf",
+defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
                                    int_aarch64_neon_vcvtfxs2fp>;
 defm RSHRN   : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
                                          int_aarch64_neon_rshrn>;
@@ -4637,7 +4677,7 @@ defm SSHLL   : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
 defm SSHR    : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
 defm SSRA    : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
                 TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
-defm UCVTF   : SIMDVectorRShiftSDToFP<1, 0b11100, "ucvtf",
+defm UCVTF   : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
                         int_aarch64_neon_vcvtfxu2fp>;
 defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
                                          int_aarch64_neon_uqrshrn>;
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
index b2efca02337..a8c8b176efa 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -407,7 +407,7 @@ def FPR128 : RegisterClass<"AArch64",
 // The lower 16 vector registers.  Some instructions can only take registers
 // in this range.
 def FPR128_lo : RegisterClass<"AArch64",
-                              [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+                              [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16],
                               128, (trunc FPR128, 16)>;
 
 // Pairs, triples, and quads of 64-bit vector registers.
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index f0ad855ed5e..394c8e78581 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -1921,6 +1921,8 @@ static bool isValidVectorKind(StringRef Name) {
       .Case(".h", true)
       .Case(".s", true)
       .Case(".d", true)
+      // Needed for fp16 scalar pairwise reductions
+      .Case(".2h", true)
       .Default(false);
 }
 
diff --git a/test/MC/AArch64/arm64-advsimd.s b/test/MC/AArch64/arm64-advsimd.s
index c627de708d3..294f0908291 100644
--- a/test/MC/AArch64/arm64-advsimd.s
+++ b/test/MC/AArch64/arm64-advsimd.s
@@ -1,4 +1,4 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto -output-asm-variant=1 -show-encoding < %s | FileCheck %s
+; RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto,fullfp16 -output-asm-variant=1 -show-encoding < %s | FileCheck %s
 
 foo:
 
@@ -440,6 +440,106 @@ foo:
 ; CHECK: urshl.8b	v0, v0, v0      ; encoding: [0x00,0x54,0x20,0x2e]
 ; CHECK: ushl.8b	v0, v0, v0              ; encoding: [0x00,0x44,0x20,0x2e]
 
+  fabd.4h v0, v0, v0
+  facge.4h  v0, v0, v0
+  facgt.4h  v0, v0, v0
+  faddp.4h v0, v0, v0
+  fadd.4h v0, v0, v0
+  fcmeq.4h  v0, v0, v0
+  fcmge.4h  v0, v0, v0
+  fcmgt.4h  v0, v0, v0
+  fdiv.4h v0, v0, v0
+  fmaxnmp.4h v0, v0, v0
+  fmaxnm.4h v0, v0, v0
+  fmaxp.4h v0, v0, v0
+  fmax.4h v0, v0, v0
+  fminnmp.4h v0, v0, v0
+  fminnm.4h v0, v0, v0
+  fminp.4h v0, v0, v0
+  fmin.4h v0, v0, v0
+  fmla.4h v0, v0, v0
+  fmls.4h v0, v0, v0
+  fmulx.4h v0, v0, v0
+  fmul.4h v0, v0, v0
+  frecps.4h v0, v0, v0
+  frsqrts.4h v0, v0, v0
+  fsub.4h v0, v0, v0
+
+; CHECK: fabd.4h    v0, v0, v0     ; encoding: [0x00,0x14,0xc0,0x2e]
+; CHECK: facge.4h   v0, v0, v0     ; encoding: [0x00,0x2c,0x40,0x2e]
+; CHECK: facgt.4h   v0, v0, v0     ; encoding: [0x00,0x2c,0xc0,0x2e]
+; CHECK: faddp.4h   v0, v0, v0     ; encoding: [0x00,0x14,0x40,0x2e]
+; CHECK: fadd.4h    v0, v0, v0     ; encoding: [0x00,0x14,0x40,0x0e]
+; CHECK: fcmeq.4h   v0, v0, v0     ; encoding: [0x00,0x24,0x40,0x0e]
+; CHECK: fcmge.4h   v0, v0, v0     ; encoding: [0x00,0x24,0x40,0x2e]
+; CHECK: fcmgt.4h   v0, v0, v0     ; encoding: [0x00,0x24,0xc0,0x2e]
+; CHECK: fdiv.4h    v0, v0, v0     ; encoding: [0x00,0x3c,0x40,0x2e]
+; CHECK: fmaxnmp.4h v0, v0, v0     ; encoding: [0x00,0x04,0x40,0x2e]
+; CHECK: fmaxnm.4h  v0, v0, v0     ; encoding: [0x00,0x04,0x40,0x0e]
+; CHECK: fmaxp.4h   v0, v0, v0     ; encoding: [0x00,0x34,0x40,0x2e]
+; CHECK: fmax.4h    v0, v0, v0     ; encoding: [0x00,0x34,0x40,0x0e]
+; CHECK: fminnmp.4h v0, v0, v0     ; encoding: [0x00,0x04,0xc0,0x2e]
+; CHECK: fminnm.4h  v0, v0, v0     ; encoding: [0x00,0x04,0xc0,0x0e]
+; CHECK: fminp.4h   v0, v0, v0     ; encoding: [0x00,0x34,0xc0,0x2e]
+; CHECK: fmin.4h    v0, v0, v0     ; encoding: [0x00,0x34,0xc0,0x0e]
+; CHECK: fmla.4h    v0, v0, v0     ; encoding: [0x00,0x0c,0x40,0x0e]
+; CHECK: fmls.4h    v0, v0, v0     ; encoding: [0x00,0x0c,0xc0,0x0e]
+; CHECK: fmulx.4h   v0, v0, v0     ; encoding: [0x00,0x1c,0x40,0x0e]
+; CHECK: fmul.4h    v0, v0, v0     ; encoding: [0x00,0x1c,0x40,0x2e]
+; CHECK: frecps.4h  v0, v0, v0     ; encoding: [0x00,0x3c,0x40,0x0e]
+; CHECK: frsqrts.4h v0, v0, v0     ; encoding: [0x00,0x3c,0xc0,0x0e]
+; CHECK: fsub.4h    v0, v0, v0     ; encoding: [0x00,0x14,0xc0,0x0e]
+
+  fabd.8h v0, v0, v0
+  facge.8h  v0, v0, v0
+  facgt.8h  v0, v0, v0
+  faddp.8h v0, v0, v0
+  fadd.8h v0, v0, v0
+  fcmeq.8h  v0, v0, v0
+  fcmge.8h  v0, v0, v0
+  fcmgt.8h  v0, v0, v0
+  fdiv.8h v0, v0, v0
+  fmaxnmp.8h v0, v0, v0
+  fmaxnm.8h v0, v0, v0
+  fmaxp.8h v0, v0, v0
+  fmax.8h v0, v0, v0
+  fminnmp.8h v0, v0, v0
+  fminnm.8h v0, v0, v0
+  fminp.8h v0, v0, v0
+  fmin.8h v0, v0, v0
+  fmla.8h v0, v0, v0
+  fmls.8h v0, v0, v0
+  fmulx.8h v0, v0, v0
+  fmul.8h v0, v0, v0
+  frecps.8h v0, v0, v0
+  frsqrts.8h v0, v0, v0
+  fsub.8h v0, v0, v0
+
+; CHECK: fabd.8h v0, v0, v0              ; encoding: [0x00,0x14,0xc0,0x6e]
+; CHECK: facge.8h        v0, v0, v0      ; encoding: [0x00,0x2c,0x40,0x6e]
+; CHECK: facgt.8h        v0, v0, v0      ; encoding: [0x00,0x2c,0xc0,0x6e]
+; CHECK: faddp.8h        v0, v0, v0      ; encoding: [0x00,0x14,0x40,0x6e]
+; CHECK: fadd.8h v0, v0, v0              ; encoding: [0x00,0x14,0x40,0x4e]
+; CHECK: fcmeq.8h        v0, v0, v0      ; encoding: [0x00,0x24,0x40,0x4e]
+; CHECK: fcmge.8h        v0, v0, v0      ; encoding: [0x00,0x24,0x40,0x6e]
+; CHECK: fcmgt.8h        v0, v0, v0      ; encoding: [0x00,0x24,0xc0,0x6e]
+; CHECK: fdiv.8h v0, v0, v0              ; encoding: [0x00,0x3c,0x40,0x6e]
+; CHECK: fmaxnmp.8h      v0, v0, v0      ; encoding: [0x00,0x04,0x40,0x6e]
+; CHECK: fmaxnm.8h       v0, v0, v0      ; encoding: [0x00,0x04,0x40,0x4e]
+; CHECK: fmaxp.8h        v0, v0, v0      ; encoding: [0x00,0x34,0x40,0x6e]
+; CHECK: fmax.8h v0, v0, v0              ; encoding: [0x00,0x34,0x40,0x4e]
+; CHECK: fminnmp.8h      v0, v0, v0      ; encoding: [0x00,0x04,0xc0,0x6e]
+; CHECK: fminnm.8h       v0, v0, v0      ; encoding: [0x00,0x04,0xc0,0x4e]
+; CHECK: fminp.8h        v0, v0, v0      ; encoding: [0x00,0x34,0xc0,0x6e]
+; CHECK: fmin.8h v0, v0, v0              ; encoding: [0x00,0x34,0xc0,0x4e]
+; CHECK: fmla.8h v0, v0, v0              ; encoding: [0x00,0x0c,0x40,0x4e]
+; CHECK: fmls.8h v0, v0, v0              ; encoding: [0x00,0x0c,0xc0,0x4e]
+; CHECK: fmulx.8h        v0, v0, v0      ; encoding: [0x00,0x1c,0x40,0x4e]
+; CHECK: fmul.8h v0, v0, v0              ; encoding: [0x00,0x1c,0x40,0x6e]
+; CHECK: frecps.8h       v0, v0, v0      ; encoding: [0x00,0x3c,0x40,0x4e]
+; CHECK: frsqrts.8h      v0, v0, v0      ; encoding: [0x00,0x3c,0xc0,0x4e]
+; CHECK: fsub.8h v0, v0, v0              ; encoding: [0x00,0x14,0xc0,0x4e]
+
   bif.8b v0, v0, v0
   bit.8b v0, v0, v0
   bsl.8b v0, v0, v0
@@ -568,6 +668,57 @@ foo:
 ; CHECK: shll2.4s	v1, v2, #16     ; encoding: [0x41,0x38,0x61,0x6e]
 ; CHECK: shll2.2d	v1, v2, #32     ; encoding: [0x41,0x38,0xa1,0x6e]
 
+  fabs.4h     v0, v0
+  fneg.4h     v0, v0
+  frecpe.4h   v0, v0
+  frinta.4h   v0, v0
+  frintx.4h   v0, v0
+  frinti.4h   v0, v0
+  frintm.4h   v0, v0
+  frintn.4h   v0, v0
+  frintp.4h   v0, v0
+  frintz.4h   v0, v0
+  frsqrte.4h  v0, v0
+  fsqrt.4h    v0, v0
+
+; CHECK: fabs.4h v0, v0                  ; encoding: [0x00,0xf8,0xf8,0x0e]
+; CHECK: fneg.4h v0, v0                  ; encoding: [0x00,0xf8,0xf8,0x2e]
+; CHECK: frecpe.4h       v0, v0          ; encoding: [0x00,0xd8,0xf9,0x0e]
+; CHECK: frinta.4h       v0, v0          ; encoding: [0x00,0x88,0x79,0x2e]
+; CHECK: frintx.4h       v0, v0          ; encoding: [0x00,0x98,0x79,0x2e]
+; CHECK: frinti.4h       v0, v0          ; encoding: [0x00,0x98,0xf9,0x2e]
+; CHECK: frintm.4h       v0, v0          ; encoding: [0x00,0x98,0x79,0x0e]
+; CHECK: frintn.4h       v0, v0          ; encoding: [0x00,0x88,0x79,0x0e]
+; CHECK: frintp.4h       v0, v0          ; encoding: [0x00,0x88,0xf9,0x0e]
+; CHECK: frintz.4h       v0, v0          ; encoding: [0x00,0x98,0xf9,0x0e]
+; CHECK: frsqrte.4h      v0, v0          ; encoding: [0x00,0xd8,0xf9,0x2e]
+; CHECK: fsqrt.4h        v0, v0          ; encoding: [0x00,0xf8,0xf9,0x2e]
+
+  fabs.8h     v0, v0
+  fneg.8h     v0, v0
+  frecpe.8h   v0, v0
+  frinta.8h   v0, v0
+  frintx.8h   v0, v0
+  frinti.8h   v0, v0
+  frintm.8h   v0, v0
+  frintn.8h   v0, v0
+  frintp.8h   v0, v0
+  frintz.8h   v0, v0
+  frsqrte.8h  v0, v0
+  fsqrt.8h    v0, v0
+
+; CHECK: fabs.8h v0, v0                  ; encoding: [0x00,0xf8,0xf8,0x4e]
+; CHECK: fneg.8h v0, v0                  ; encoding: [0x00,0xf8,0xf8,0x6e]
+; CHECK: frecpe.8h       v0, v0          ; encoding: [0x00,0xd8,0xf9,0x4e]
+; CHECK: frinta.8h       v0, v0          ; encoding: [0x00,0x88,0x79,0x6e]
+; CHECK: frintx.8h       v0, v0          ; encoding: [0x00,0x98,0x79,0x6e]
+; CHECK: frinti.8h       v0, v0          ; encoding: [0x00,0x98,0xf9,0x6e]
+; CHECK: frintm.8h       v0, v0          ; encoding: [0x00,0x98,0x79,0x4e]
+; CHECK: frintn.8h       v0, v0          ; encoding: [0x00,0x88,0x79,0x4e]
+; CHECK: frintp.8h       v0, v0          ; encoding: [0x00,0x88,0xf9,0x4e]
+; CHECK: frintz.8h       v0, v0          ; encoding: [0x00,0x98,0xf9,0x4e]
+; CHECK: frsqrte.8h      v0, v0          ; encoding: [0x00,0xd8,0xf9,0x6e]
+; CHECK: fsqrt.8h        v0, v0          ; encoding: [0x00,0xf8,0xf9,0x6e]
 
   cmeq.8b   v0, v0, #0
   cmeq.16b  v0, v0, #0
diff --git a/test/MC/AArch64/armv8.1a-rdma.s b/test/MC/AArch64/armv8.1a-rdma.s
index 1de2a0fb15d..36158428d6c 100644
--- a/test/MC/AArch64/armv8.1a-rdma.s
+++ b/test/MC/AArch64/armv8.1a-rdma.s
@@ -26,27 +26,9 @@
   sqrdmlsh v0.8s, v1.8s, v2.8s
   sqrdmlah v0.2s, v1.4h, v2.8h
   sqrdmlsh v0.4s, v1.8h, v2.2s
-// CHECK-ERROR: error: invalid vector kind qualifier
-// CHECK-ERROR:   sqrdmlah v0.2h, v1.2h, v2.2h
-// CHECK-ERROR:            ^
-// CHECK-ERROR: error: invalid vector kind qualifier
-// CHECK-ERROR:   sqrdmlah v0.2h, v1.2h, v2.2h
-// CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid vector kind qualifier
-// CHECK-ERROR:   sqrdmlah v0.2h, v1.2h, v2.2h
-// CHECK-ERROR:                          ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:   sqrdmlah v0.2h, v1.2h, v2.2h
 // CHECK-ERROR:            ^
-// CHECK-ERROR: error: invalid vector kind qualifier
-// CHECK-ERROR:   sqrdmlsh v0.2h, v1.2h, v2.2h
-// CHECK-ERROR:            ^
-// CHECK-ERROR: error: invalid vector kind qualifier
-// CHECK-ERROR:   sqrdmlsh v0.2h, v1.2h, v2.2h
-// CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid vector kind qualifier
-// CHECK-ERROR:   sqrdmlsh v0.2h, v1.2h, v2.2h
-// CHECK-ERROR:                          ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:   sqrdmlsh v0.2h, v1.2h, v2.2h
 // CHECK-ERROR:            ^
diff --git a/test/MC/AArch64/fullfp16-diagnostics.s b/test/MC/AArch64/fullfp16-diagnostics.s
new file mode 100644
index 00000000000..190b6e25a4b
--- /dev/null
+++ b/test/MC/AArch64/fullfp16-diagnostics.s
@@ -0,0 +1,42 @@
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon < %s 2> %t
+// RUN: FileCheck < %t %s
+
+  fmla v0.4h, v1.4h, v16.h[3]
+  fmla v2.8h, v3.8h, v17.h[6]
+
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmla v0.4h, v1.4h, v16.h[3]
+// CHECK-NEXT:                    ^
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmla v2.8h, v3.8h, v17.h[6]
+// CHECK-NEXT:                    ^
+
+  fmls v0.4h, v1.4h, v16.h[3]
+  fmls v2.8h, v3.8h, v17.h[6]
+
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmls v0.4h, v1.4h, v16.h[3]
+// CHECK-NEXT:                    ^
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmls v2.8h, v3.8h, v17.h[6]
+// CHECK-NEXT:                    ^
+
+  fmul v0.4h, v1.4h, v16.h[3]
+  fmul v2.8h, v3.8h, v17.h[6]
+
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmul v0.4h, v1.4h, v16.h[3]
+// CHECK-NEXT:                    ^
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmul v2.8h, v3.8h, v17.h[6]
+// CHECK-NEXT:                    ^
+
+  fmulx v0.4h, v1.4h, v16.h[3]
+  fmulx v2.8h, v3.8h, v17.h[6]
+
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmulx v0.4h, v1.4h, v16.h[3]
+// CHECK-NEXT:                     ^
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmulx v2.8h, v3.8h, v17.h[6]
+// CHECK-NEXT:                     ^
diff --git a/test/MC/AArch64/fullfp16-neon-neg.s b/test/MC/AArch64/fullfp16-neon-neg.s
new file mode 100644
index 00000000000..0913ecb7e9a
--- /dev/null
+++ b/test/MC/AArch64/fullfp16-neon-neg.s
@@ -0,0 +1,382 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=+neon,-fullfp16 -show-encoding < %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple=aarch64 -mattr=-neon,+fullfp16 -show-encoding < %s 2>&1 | FileCheck %s
+
+
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fabs.4h     v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fneg.4h     v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frecpe.4h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frinta.4h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintx.4h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frinti.4h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintm.4h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintn.4h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintp.4h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintz.4h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frsqrte.4h  v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fsqrt.4h    v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fabs.8h     v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fneg.8h     v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frecpe.8h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frinta.8h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintx.8h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frinti.8h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintm.8h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintn.8h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintp.8h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintz.8h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frsqrte.8h  v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fsqrt.8h    v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmla v0.4h, v1.4h, v2.h[2]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmla v3.8h, v8.8h, v2.h[1]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmls v0.4h, v1.4h, v2.h[2]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmls v3.8h, v8.8h, v2.h[1]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmul v0.4h, v1.4h, v2.h[2]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmul v0.8h, v1.8h, v2.h[2]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmulx v0.4h, v1.4h, v2.h[2]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmulx v0.8h, v1.8h, v2.h[2]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fabd v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmaxnmv h0, v1.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fminnmv h0, v1.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmaxv h0, v1.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fminv h0, v1.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  faddp v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  faddp v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fadd v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fadd v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fsub v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fsub v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmeq v0.4h, v31.4h, v16.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmeq v4.8h, v7.8h, v15.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmge v3.4h, v8.4h, v12.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmge v31.8h, v29.8h, v28.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmle v3.4h,  v12.4h, v8.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmle v31.8h, v28.8h, v29.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmgt v0.4h, v31.4h, v16.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmgt v4.8h, v7.8h, v15.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmlt v0.4h, v16.4h, v31.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmlt v4.8h, v15.8h, v7.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmeq v0.4h, v31.4h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmeq v4.8h, v7.8h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmeq v0.4h, v31.4h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmeq v4.8h, v7.8h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmge v3.4h, v8.4h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmge v31.8h, v29.8h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmge v3.4h, v8.4h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmge v31.8h, v29.8h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmgt v0.4h, v31.4h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmgt v4.8h, v7.8h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmgt v0.4h, v31.4h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmgt v4.8h, v7.8h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmle v3.4h, v20.4h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmle v1.8h, v8.8h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmle v3.4h, v20.4h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmle v1.8h, v8.8h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmlt v16.4h, v2.4h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmlt v15.8h, v4.8h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmlt v16.4h, v2.4h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmlt v15.8h, v4.8h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  facge v0.4h, v31.4h, v16.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  facge v4.8h, v7.8h, v15.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  facle v0.4h, v16.4h, v31.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  facle v4.8h, v15.8h, v7.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  facgt v3.4h, v8.4h, v12.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  facgt v31.8h, v29.8h, v28.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  faclt v3.4h,  v12.4h, v8.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  faclt v31.8h, v28.8h, v29.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frsqrts v0.4h, v31.4h, v16.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frsqrts v4.8h, v7.8h, v15.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frecps v3.4h, v8.4h, v12.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frecps v31.8h, v29.8h, v28.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmaxp v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmaxp v31.8h, v15.8h, v16.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fminp v10.4h, v15.4h, v22.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fminp v3.8h, v5.8h, v6.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmaxnmp v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmaxnmp v31.8h, v15.8h, v16.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fminnmp v10.4h, v15.4h, v22.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fminnmp v3.8h, v5.8h, v6.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmax v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmax v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmin v10.4h, v15.4h, v22.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmin v10.8h, v15.8h, v22.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmaxnm v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmaxnm v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fminnm v10.4h, v15.4h, v22.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fminnm v10.8h, v15.8h, v22.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmla v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmla v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmls v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmls v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fabd h29, h24, h20
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmla    h0, h1, v1.h[5]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmls    h2, h3, v4.h[5]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmul    h0, h1, v1.h[5]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmulx   h6, h2, v8.h[5]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtzs h21, h12, #1
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtzu h21, h12, #1
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtas h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtau h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtms h22, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtmu h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtns h22, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtnu h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtps h22, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtpu h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtzs h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtzu h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmeq h10, h11, h12
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmeq h10, h11, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmeq h10, h11, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmge h10, h11, h12
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmge h10, h11, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmge h10, h11, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmgt h10, h11, h12
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmgt h10, h11, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmgt h10, h11, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmle h10, h11, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmle h10, h11, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmlt h10, h11, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmlt h10, h11, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  facge h10, h11, h12
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  facgt h10, h11, h12
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmulx h20, h22, h15
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frecps h21, h16, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frsqrts h21, h5, h12
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frecpe h19, h14
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frecpx h18, h10
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frsqrte h22, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  faddp h18, v3.2h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fabs v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fabs v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fneg v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fneg v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintn v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintn v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frinta v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frinta v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintp v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintp v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintm v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintm v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintx v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintx v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintz v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintz v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frinti v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frinti v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtns v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtns v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtnu v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtnu v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtps v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtps v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtpu v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtpu v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtms v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtms v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtmu v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtmu v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtzs v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtzs v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtzu v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtzu v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtas v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtas v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtau v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtau v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frecpe v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frecpe v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frsqrte v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frsqrte v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fsqrt v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fsqrt v6.8h, v8.8h
+
+// CHECK-NOT: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
diff --git a/test/MC/AArch64/neon-2velem.s b/test/MC/AArch64/neon-2velem.s
index 04841d0164f..ed55ad0b136 100644
--- a/test/MC/AArch64/neon-2velem.s
+++ b/test/MC/AArch64/neon-2velem.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64 -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -46,6 +46,8 @@
 // CHECK: mls	v0.8h, v1.8h, v2.h[7]   // encoding: [0x20,0x48,0x72,0x6f]
 // CHECK: mls	v0.8h, v1.8h, v14.h[6]  // encoding: [0x20,0x48,0x6e,0x6f]
 
+        fmla v0.4h, v1.4h, v2.h[2]
+        fmla v3.8h, v8.8h, v2.h[1]
         fmla v0.2s, v1.2s, v2.s[2]
         fmla v0.2s, v1.2s, v22.s[2]
         fmla v3.4s, v8.4s, v2.s[1]
@@ -53,6 +55,8 @@
         fmla v0.2d, v1.2d, v2.d[1]
         fmla v0.2d, v1.2d, v22.d[1]
 
+// CHECK: fmla    v0.4h, v1.4h, v2.h[2]   // encoding: [0x20,0x10,0x22,0x0f]
+// CHECK: fmla    v3.8h, v8.8h, v2.h[1]   // encoding: [0x03,0x11,0x12,0x4f]
 // CHECK: fmla	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x18,0x82,0x0f]
 // CHECK: fmla	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x18,0x96,0x0f]
 // CHECK: fmla	v3.4s, v8.4s, v2.s[1]   // encoding: [0x03,0x11,0xa2,0x4f]
@@ -60,6 +64,8 @@
 // CHECK: fmla	v0.2d, v1.2d, v2.d[1]   // encoding: [0x20,0x18,0xc2,0x4f]
 // CHECK: fmla	v0.2d, v1.2d, v22.d[1]  // encoding: [0x20,0x18,0xd6,0x4f]
 
+        fmls v0.4h, v1.4h, v2.h[2]
+        fmls v3.8h, v8.8h, v2.h[1]
         fmls v0.2s, v1.2s, v2.s[2]
         fmls v0.2s, v1.2s, v22.s[2]
         fmls v3.4s, v8.4s, v2.s[1]
@@ -67,6 +73,8 @@
         fmls v0.2d, v1.2d, v2.d[1]
         fmls v0.2d, v1.2d, v22.d[1]
 
+// CHECK: fmls    v0.4h, v1.4h, v2.h[2]   // encoding: [0x20,0x50,0x22,0x0f]
+// CHECK: fmls    v3.8h, v8.8h, v2.h[1]   // encoding: [0x03,0x51,0x12,0x4f]
 // CHECK: fmls	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x58,0x82,0x0f]
 // CHECK: fmls	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x58,0x96,0x0f]
 // CHECK: fmls	v3.4s, v8.4s, v2.s[1]   // encoding: [0x03,0x51,0xa2,0x4f]
@@ -172,6 +180,8 @@
 // CHECK: mul	v0.4s, v1.4s, v2.s[2]   // encoding: [0x20,0x88,0x82,0x4f]
 // CHECK: mul	v0.4s, v1.4s, v22.s[2]  // encoding: [0x20,0x88,0x96,0x4f]
 
+        fmul v0.4h, v1.4h, v2.h[2]
+        fmul v0.8h, v1.8h, v2.h[2]
         fmul v0.2s, v1.2s, v2.s[2]
         fmul v0.2s, v1.2s, v22.s[2]
         fmul v0.4s, v1.4s, v2.s[2]
@@ -179,6 +189,8 @@
         fmul v0.2d, v1.2d, v2.d[1]
         fmul v0.2d, v1.2d, v22.d[1]
 
+// CHECK: fmul    v0.4h, v1.4h, v2.h[2]   // encoding: [0x20,0x90,0x22,0x0f]
+// CHECK: fmul    v0.8h, v1.8h, v2.h[2]   // encoding: [0x20,0x90,0x22,0x4f]
 // CHECK: fmul	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x98,0x82,0x0f]
 // CHECK: fmul	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x98,0x96,0x0f]
 // CHECK: fmul	v0.4s, v1.4s, v2.s[2]   // encoding: [0x20,0x98,0x82,0x4f]
@@ -186,6 +198,8 @@
 // CHECK: fmul	v0.2d, v1.2d, v2.d[1]   // encoding: [0x20,0x98,0xc2,0x4f]
 // CHECK: fmul	v0.2d, v1.2d, v22.d[1]  // encoding: [0x20,0x98,0xd6,0x4f]
 
+        fmulx v0.4h, v1.4h, v2.h[2]
+        fmulx v0.8h, v1.8h, v2.h[2]
         fmulx v0.2s, v1.2s, v2.s[2]
         fmulx v0.2s, v1.2s, v22.s[2]
         fmulx v0.4s, v1.4s, v2.s[2]
@@ -193,6 +207,8 @@
         fmulx v0.2d, v1.2d, v2.d[1]
         fmulx v0.2d, v1.2d, v22.d[1]
 
+// CHECK: fmulx   v0.4h, v1.4h, v2.h[2]   // encoding: [0x20,0x90,0x22,0x2f]
+// CHECK: fmulx   v0.8h, v1.8h, v2.h[2]   // encoding: [0x20,0x90,0x22,0x6f]
 // CHECK: fmulx	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x98,0x82,0x2f]
 // CHECK: fmulx	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x98,0x96,0x2f]
 // CHECK: fmulx	v0.4s, v1.4s, v2.s[2]   // encoding: [0x20,0x98,0x82,0x6f]
diff --git a/test/MC/AArch64/neon-aba-abd.s b/test/MC/AArch64/neon-aba-abd.s
index 178eb26f64c..b3a90bb1489 100644
--- a/test/MC/AArch64/neon-aba-abd.s
+++ b/test/MC/AArch64/neon-aba-abd.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -68,10 +68,12 @@
 //----------------------------------------------------------------------
 // Vector Absolute Difference (Floating Point)
 //----------------------------------------------------------------------
+         fabd v0.4h, v1.4h, v2.4h
          fabd v0.2s, v1.2s, v2.2s
          fabd v31.4s, v15.4s, v16.4s
          fabd v7.2d, v8.2d, v25.2d
 
+// CHECK: fabd    v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x14,0xc2,0x2e]
 // CHECK: fabd v0.2s, v1.2s, v2.2s    // encoding: [0x20,0xd4,0xa2,0x2e]
 // CHECK: fabd v31.4s, v15.4s, v16.4s // encoding: [0xff,0xd5,0xb0,0x6e]
 // CHECK: fabd v7.2d, v8.2d, v25.2d   // encoding: [0x07,0xd5,0xf9,0x6e]
diff --git a/test/MC/AArch64/neon-across.s b/test/MC/AArch64/neon-across.s
index 60b766d8c88..74edc519a47 100644
--- a/test/MC/AArch64/neon-across.s
+++ b/test/MC/AArch64/neon-across.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64 -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -90,11 +90,27 @@
 // CHECK: addv	h0, v1.8h               // encoding: [0x20,0xb8,0x71,0x4e]
 // CHECK: addv	s0, v1.4s               // encoding: [0x20,0xb8,0xb1,0x4e]
 
+        fmaxnmv h0, v1.4h
+        fminnmv h0, v1.4h
+        fmaxv h0, v1.4h
+        fminv h0, v1.4h
+        fmaxnmv h0, v1.8h
+        fminnmv h0, v1.8h
+        fmaxv h0, v1.8h
+        fminv h0, v1.8h
         fmaxnmv s0, v1.4s
         fminnmv s0, v1.4s
         fmaxv s0, v1.4s
         fminv s0, v1.4s
 
+// CHECK: fmaxnmv h0, v1.4h               // encoding: [0x20,0xc8,0x30,0x0e]
+// CHECK: fminnmv h0, v1.4h               // encoding: [0x20,0xc8,0xb0,0x0e]
+// CHECK: fmaxv   h0, v1.4h               // encoding: [0x20,0xf8,0x30,0x0e]
+// CHECK: fminv   h0, v1.4h               // encoding: [0x20,0xf8,0xb0,0x0e]
+// CHECK: fmaxnmv h0, v1.8h               // encoding: [0x20,0xc8,0x30,0x4e]
+// CHECK: fminnmv h0, v1.8h               // encoding: [0x20,0xc8,0xb0,0x4e]
+// CHECK: fmaxv   h0, v1.8h               // encoding: [0x20,0xf8,0x30,0x4e]
+// CHECK: fminv   h0, v1.8h               // encoding: [0x20,0xf8,0xb0,0x4e]
 // CHECK: fmaxnmv	s0, v1.4s               // encoding: [0x20,0xc8,0x30,0x6e]
 // CHECK: fminnmv	s0, v1.4s               // encoding: [0x20,0xc8,0xb0,0x6e]
 // CHECK: fmaxv	s0, v1.4s               // encoding: [0x20,0xf8,0x30,0x6e]
diff --git a/test/MC/AArch64/neon-add-pairwise.s b/test/MC/AArch64/neon-add-pairwise.s
index df9938b07e5..3d77c6e2790 100644
--- a/test/MC/AArch64/neon-add-pairwise.s
+++ b/test/MC/AArch64/neon-add-pairwise.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -25,10 +25,14 @@
 //------------------------------------------------------------------------------
 // Vector Add Pairwise (Floating Point
 //------------------------------------------------------------------------------
+         faddp v0.4h, v1.4h, v2.4h
+         faddp v0.8h, v1.8h, v2.8h
          faddp v0.2s, v1.2s, v2.2s
          faddp v0.4s, v1.4s, v2.4s
          faddp v0.2d, v1.2d, v2.2d
 
+// CHECK: faddp   v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x14,0x42,0x2e]
+// CHECK: faddp   v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x14,0x42,0x6e]
 // CHECK: faddp v0.2s, v1.2s, v2.2s       // encoding: [0x20,0xd4,0x22,0x2e]
 // CHECK: faddp v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xd4,0x22,0x6e]
 // CHECK: faddp v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xd4,0x62,0x6e]
diff --git a/test/MC/AArch64/neon-add-sub-instructions.s b/test/MC/AArch64/neon-add-sub-instructions.s
index 68f169b3dd9..0d841653702 100644
--- a/test/MC/AArch64/neon-add-sub-instructions.s
+++ b/test/MC/AArch64/neon-add-sub-instructions.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -44,10 +44,14 @@
 //------------------------------------------------------------------------------
 // Vector Floating-Point Add
 //------------------------------------------------------------------------------
+         fadd v0.4h, v1.4h, v2.4h
+         fadd v0.8h, v1.8h, v2.8h
          fadd v0.2s, v1.2s, v2.2s
          fadd v0.4s, v1.4s, v2.4s
          fadd v0.2d, v1.2d, v2.2d
 
+// CHECK: fadd v0.4h, v1.4h, v2.4h       // encoding: [0x20,0x14,0x42,0x0e]
+// CHECK: fadd v0.8h, v1.8h, v2.8h       // encoding: [0x20,0x14,0x42,0x4e]
 // CHECK: fadd v0.2s, v1.2s, v2.2s       // encoding: [0x20,0xd4,0x22,0x0e]
 // CHECK: fadd v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xd4,0x22,0x4e]
 // CHECK: fadd v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xd4,0x62,0x4e]
@@ -56,10 +60,14 @@
 //------------------------------------------------------------------------------
 // Vector Floating-Point Sub
 //------------------------------------------------------------------------------
+         fsub v0.4h, v1.4h, v2.4h
+         fsub v0.8h, v1.8h, v2.8h
          fsub v0.2s, v1.2s, v2.2s
          fsub v0.4s, v1.4s, v2.4s
          fsub v0.2d, v1.2d, v2.2d
 
+// CHECK: fsub v0.4h, v1.4h, v2.4h       // encoding: [0x20,0x14,0xc2,0x0e]
+// CHECK; fsub v0.8h, v1.8h, v2.8h       // encoding: [0x20,0x14,0xc2,0x4e]
 // CHECK: fsub v0.2s, v1.2s, v2.2s       // encoding: [0x20,0xd4,0xa2,0x0e]
 // CHECK: fsub v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xd4,0xa2,0x4e]
 // CHECK: fsub v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xd4,0xe2,0x4e]
diff --git a/test/MC/AArch64/neon-compare-instructions.s b/test/MC/AArch64/neon-compare-instructions.s
index 19cfaf1f4d3..ffa88e50e0c 100644
--- a/test/MC/AArch64/neon-compare-instructions.s
+++ b/test/MC/AArch64/neon-compare-instructions.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -194,10 +194,14 @@
 // Vector Compare Mask Equal (Floating Point)
 //----------------------------------------------------------------------
 
+         fcmeq v0.4h, v31.4h, v16.4h
+         fcmeq v4.8h, v7.8h, v15.8h
          fcmeq v0.2s, v31.2s, v16.2s
          fcmeq v4.4s, v7.4s, v15.4s
          fcmeq v29.2d, v2.2d, v5.2d
 
+// CHECK: fcmeq   v0.4h, v31.4h, v16.4h   // encoding: [0xe0,0x27,0x50,0x0e]
+// CHECK: fcmeq   v4.8h, v7.8h, v15.8h    // encoding: [0xe4,0x24,0x4f,0x4e]
 // CHECK: fcmeq v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xe7,0x30,0x0e]
 // CHECK: fcmeq v4.4s, v7.4s, v15.4s  // encoding: [0xe4,0xe4,0x2f,0x4e]
 // CHECK: fcmeq v29.2d, v2.2d, v5.2d  // encoding: [0x5d,0xe4,0x65,0x4e]
@@ -208,6 +212,10 @@
 // FCMLE is alias for FCMGE with operands reversed.
 //----------------------------------------------------------------------
 
+         fcmge v3.4h, v8.4h, v12.4h
+         fcmge v31.8h, v29.8h, v28.8h
+         fcmle v3.4h,  v12.4h, v8.4h
+         fcmle v31.8h, v28.8h, v29.8h
          fcmge v31.4s, v29.4s, v28.4s
          fcmge v3.2s, v8.2s, v12.2s
          fcmge v17.2d, v15.2d, v13.2d
@@ -215,6 +223,10 @@
          fcmle v3.2s,  v12.2s, v8.2s
          fcmle v17.2d, v13.2d, v15.2d
 
+// CHECK: fcmge   v3.4h, v8.4h, v12.4h    // encoding: [0x03,0x25,0x4c,0x2e]
+// CHECK: fcmge   v31.8h, v29.8h, v28.8h  // encoding: [0xbf,0x27,0x5c,0x6e]
+// CHECK: fcmge   v3.4h, v8.4h, v12.4h    // encoding: [0x03,0x25,0x4c,0x2e]
+// CHECK: fcmge   v31.8h, v29.8h, v28.8h  // encoding: [0xbf,0x27,0x5c,0x6e]
 // CHECK: fcmge v31.4s, v29.4s, v28.4s  // encoding: [0xbf,0xe7,0x3c,0x6e]
 // CHECK: fcmge v3.2s, v8.2s, v12.2s    // encoding: [0x03,0xe5,0x2c,0x2e]
 // CHECK: fcmge v17.2d, v15.2d, v13.2d  // encoding: [0xf1,0xe5,0x6d,0x6e]
@@ -228,6 +240,10 @@
 // FCMLT is alias for FCMGT with operands reversed.
 //----------------------------------------------------------------------
 
+         fcmgt v0.4h, v31.4h, v16.4h
+         fcmgt v4.8h, v7.8h, v15.8h
+         fcmlt v0.4h, v16.4h, v31.4h
+         fcmlt v4.8h, v15.8h, v7.8h
          fcmgt v0.2s, v31.2s, v16.2s
          fcmgt v4.4s, v7.4s, v15.4s
          fcmgt v29.2d, v2.2d, v5.2d
@@ -235,6 +251,10 @@
          fcmlt v4.4s, v15.4s, v7.4s
          fcmlt v29.2d, v5.2d, v2.2d
 
+// CHECK: fcmgt   v0.4h, v31.4h, v16.4h   // encoding: [0xe0,0x27,0xd0,0x2e]
+// CHECK: fcmgt   v4.8h, v7.8h, v15.8h    // encoding: [0xe4,0x24,0xcf,0x6e]
+// CHECK: fcmgt   v0.4h, v31.4h, v16.4h   // encoding: [0xe0,0x27,0xd0,0x2e]
+// CHECK: fcmgt   v4.8h, v7.8h, v15.8h    // encoding: [0xe4,0x24,0xcf,0x6e]
 // CHECK: fcmgt v0.2s, v31.2s, v16.2s  // encoding: [0xe0,0xe7,0xb0,0x2e]
 // CHECK: fcmgt v4.4s, v7.4s, v15.4s   // encoding: [0xe4,0xe4,0xaf,0x6e]
 // CHECK: fcmgt v29.2d, v2.2d, v5.2d   // encoding: [0x5d,0xe4,0xe5,0x6e]
@@ -343,16 +363,24 @@
 //----------------------------------------------------------------------
 // Vector Compare Mask Equal to Zero (Floating Point)
 //----------------------------------------------------------------------
+         fcmeq v0.4h, v31.4h, #0.0
+         fcmeq v4.8h, v7.8h, #0.0
          fcmeq v0.2s, v31.2s, #0.0
          fcmeq v4.4s, v7.4s, #0.0
          fcmeq v29.2d, v2.2d, #0.0
+         fcmeq v0.4h, v31.4h, #0
+         fcmeq v4.8h, v7.8h, #0
          fcmeq v0.2s, v31.2s, #0
          fcmeq v4.4s, v7.4s, #0
          fcmeq v29.2d, v2.2d, #0
 
+// CHECK: fcmeq   v0.4h, v31.4h, #0.0     // encoding: [0xe0,0xdb,0xf8,0x0e]
+// CHECK: fcmeq   v4.8h, v7.8h, #0.0      // encoding: [0xe4,0xd8,0xf8,0x4e]
 // CHECK: fcmeq v0.2s, v31.2s, #0.0  // encoding: [0xe0,0xdb,0xa0,0x0e]
 // CHECK: fcmeq v4.4s, v7.4s, #0.0   // encoding: [0xe4,0xd8,0xa0,0x4e]
 // CHECK: fcmeq v29.2d, v2.2d, #0.0  // encoding: [0x5d,0xd8,0xe0,0x4e]
+// CHECK: fcmeq   v0.4h, v31.4h, #0.0     // encoding: [0xe0,0xdb,0xf8,0x0e]
+// CHECK: fcmeq   v4.8h, v7.8h, #0.0      // encoding: [0xe4,0xd8,0xf8,0x4e]
 // CHECK: fcmeq v0.2s, v31.2s, #0.0  // encoding: [0xe0,0xdb,0xa0,0x0e]
 // CHECK: fcmeq v4.4s, v7.4s, #0.0   // encoding: [0xe4,0xd8,0xa0,0x4e]
 // CHECK: fcmeq v29.2d, v2.2d, #0.0  // encoding: [0x5d,0xd8,0xe0,0x4e]
@@ -360,16 +388,24 @@
 //----------------------------------------------------------------------
 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
 //----------------------------------------------------------------------
+         fcmge v3.4h, v8.4h, #0.0
+         fcmge v31.8h, v29.8h, #0.0
          fcmge v31.4s, v29.4s, #0.0
          fcmge v3.2s, v8.2s, #0.0
          fcmge v17.2d, v15.2d, #0.0
+         fcmge v3.4h, v8.4h, #0
+         fcmge v31.8h, v29.8h, #0
          fcmge v31.4s, v29.4s, #0
          fcmge v3.2s, v8.2s, #0
          fcmge v17.2d, v15.2d, #0
 
+// CHECK: fcmge   v3.4h, v8.4h, #0.0      // encoding: [0x03,0xc9,0xf8,0x2e]
+// CHECK: fcmge   v31.8h, v29.8h, #0.0    // encoding: [0xbf,0xcb,0xf8,0x6e]
 // CHECK: fcmge v31.4s, v29.4s, #0.0  // encoding: [0xbf,0xcb,0xa0,0x6e]
 // CHECK: fcmge v3.2s, v8.2s, #0.0    // encoding: [0x03,0xc9,0xa0,0x2e]
 // CHECK: fcmge v17.2d, v15.2d, #0.0   // encoding: [0xf1,0xc9,0xe0,0x6e]
+// CHECK: fcmge   v3.4h, v8.4h, #0.0      // encoding: [0x03,0xc9,0xf8,0x2e]
+// CHECK: fcmge   v31.8h, v29.8h, #0.0    // encoding: [0xbf,0xcb,0xf8,0x6e]
 // CHECK: fcmge v31.4s, v29.4s, #0.0  // encoding: [0xbf,0xcb,0xa0,0x6e]
 // CHECK: fcmge v3.2s, v8.2s, #0.0    // encoding: [0x03,0xc9,0xa0,0x2e]
 // CHECK: fcmge v17.2d, v15.2d, #0.0   // encoding: [0xf1,0xc9,0xe0,0x6e]
@@ -377,16 +413,24 @@
 //----------------------------------------------------------------------
 // Vector Compare Mask Greater Than Zero (Floating Point)
 //----------------------------------------------------------------------
+         fcmgt v0.4h, v31.4h, #0.0
+         fcmgt v4.8h, v7.8h, #0.0
          fcmgt v0.2s, v31.2s, #0.0
          fcmgt v4.4s, v7.4s, #0.0
          fcmgt v29.2d, v2.2d, #0.0
+         fcmgt v0.4h, v31.4h, #0
+         fcmgt v4.8h, v7.8h, #0
          fcmgt v0.2s, v31.2s, #0
          fcmgt v4.4s, v7.4s, #0
          fcmgt v29.2d, v2.2d, #0
 
+// CHECK: fcmgt   v0.4h, v31.4h, #0.0     // encoding: [0xe0,0xcb,0xf8,0x0e]
+// CHECK: fcmgt   v4.8h, v7.8h, #0.0      // encoding: [0xe4,0xc8,0xf8,0x4e]
 // CHECK: fcmgt v0.2s, v31.2s, #0.0   // encoding: [0xe0,0xcb,0xa0,0x0e]
 // CHECK: fcmgt v4.4s, v7.4s, #0.0    // encoding: [0xe4,0xc8,0xa0,0x4e]
 // CHECK: fcmgt v29.2d, v2.2d, #0.0   // encoding: [0x5d,0xc8,0xe0,0x4e]
+// CHECK: fcmgt   v0.4h, v31.4h, #0.0     // encoding: [0xe0,0xcb,0xf8,0x0e]
+// CHECK: fcmgt   v4.8h, v7.8h, #0.0      // encoding: [0xe4,0xc8,0xf8,0x4e]
 // CHECK: fcmgt v0.2s, v31.2s, #0.0   // encoding: [0xe0,0xcb,0xa0,0x0e]
 // CHECK: fcmgt v4.4s, v7.4s, #0.0    // encoding: [0xe4,0xc8,0xa0,0x4e]
 // CHECK: fcmgt v29.2d, v2.2d, #0.0   // encoding: [0x5d,0xc8,0xe0,0x4e]
@@ -394,16 +438,24 @@
 //----------------------------------------------------------------------
 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
 //----------------------------------------------------------------------
+         fcmle v3.4h, v20.4h, #0.0
+         fcmle v1.8h, v8.8h, #0.0
          fcmle v1.4s, v8.4s, #0.0
          fcmle v3.2s, v20.2s, #0.0
          fcmle v7.2d, v13.2d, #0.0
+         fcmle v3.4h, v20.4h, #0
+         fcmle v1.8h, v8.8h, #0
          fcmle v1.4s, v8.4s, #0
          fcmle v3.2s, v20.2s, #0
          fcmle v7.2d, v13.2d, #0
 
+// CHECK: fcmle   v3.4h, v20.4h, #0.0     // encoding: [0x83,0xda,0xf8,0x2e]
+// CHECK: fcmle   v1.8h, v8.8h, #0.0      // encoding: [0x01,0xd9,0xf8,0x6e]
 // CHECK: fcmle v1.4s, v8.4s, #0.0   // encoding: [0x01,0xd9,0xa0,0x6e]
 // CHECK: fcmle v3.2s, v20.2s, #0.0  // encoding: [0x83,0xda,0xa0,0x2e]
 // CHECK: fcmle v7.2d, v13.2d, #0.0  // encoding: [0xa7,0xd9,0xe0,0x6e]
+// CHECK: fcmle   v3.4h, v20.4h, #0.0     // encoding: [0x83,0xda,0xf8,0x2e]
+// CHECK: fcmle   v1.8h, v8.8h, #0.0      // encoding: [0x01,0xd9,0xf8,0x6e]
 // CHECK: fcmle v1.4s, v8.4s, #0.0   // encoding: [0x01,0xd9,0xa0,0x6e]
 // CHECK: fcmle v3.2s, v20.2s, #0.0  // encoding: [0x83,0xda,0xa0,0x2e]
 // CHECK: fcmle v7.2d, v13.2d, #0.0  // encoding: [0xa7,0xd9,0xe0,0x6e]
@@ -411,16 +463,24 @@
 //----------------------------------------------------------------------
 // Vector Compare Mask Less Than Zero (Floating Point)
 //----------------------------------------------------------------------
+         fcmlt v16.4h, v2.4h, #0.0
+         fcmlt v15.8h, v4.8h, #0.0
          fcmlt v16.2s, v2.2s, #0.0
          fcmlt v15.4s, v4.4s, #0.0
          fcmlt v5.2d, v29.2d, #0.0
+         fcmlt v16.4h, v2.4h, #0
+         fcmlt v15.8h, v4.8h, #0
          fcmlt v16.2s, v2.2s, #0
          fcmlt v15.4s, v4.4s, #0
          fcmlt v5.2d, v29.2d, #0
 
+// CHECK: fcmlt   v16.4h, v2.4h, #0.0     // encoding: [0x50,0xe8,0xf8,0x0e]
+// CHECK: fcmlt   v15.8h, v4.8h, #0.0     // encoding: [0x8f,0xe8,0xf8,0x4e]
 // CHECK: fcmlt v16.2s, v2.2s, #0.0   // encoding: [0x50,0xe8,0xa0,0x0e]
 // CHECK: fcmlt v15.4s, v4.4s, #0.0   // encoding: [0x8f,0xe8,0xa0,0x4e]
 // CHECK: fcmlt v5.2d, v29.2d, #0.0   // encoding: [0xa5,0xeb,0xe0,0x4e]
+// CHECK: fcmlt   v16.4h, v2.4h, #0.0     // encoding: [0x50,0xe8,0xf8,0x0e]
+// CHECK: fcmlt   v15.8h, v4.8h, #0.0     // encoding: [0x8f,0xe8,0xf8,0x4e]
 // CHECK: fcmlt v16.2s, v2.2s, #0.0   // encoding: [0x50,0xe8,0xa0,0x0e]
 // CHECK: fcmlt v15.4s, v4.4s, #0.0   // encoding: [0x8f,0xe8,0xa0,0x4e]
 // CHECK: fcmlt v5.2d, v29.2d, #0.0   // encoding: [0xa5,0xeb,0xe0,0x4e]
diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s
index 973acb8249a..6ded6e40bfb 100644
--- a/test/MC/AArch64/neon-diagnostics.s
+++ b/test/MC/AArch64/neon-diagnostics.s
@@ -341,7 +341,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fabd v0.2s, v1.4s, v2.2d
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fabd v0.4h, v1.4h, v2.4h
 // CHECK-ERROR:                 ^
 //----------------------------------------------------------------------
@@ -385,7 +385,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        frecps v0.4s, v1.2d, v2.4s
 // CHECK-ERROR:                         ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        frecps v0.8h, v1.8h, v2.8h
 // CHECK-ERROR:                  ^
 
@@ -400,7 +400,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        frsqrts v0.2d, v1.2d, v2.2s
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        frsqrts v0.4h, v1.4h, v2.4h
 // CHECK-ERROR:                   ^
 
@@ -417,7 +417,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        facge v0.2d, v1.2s, v2.2d
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        facge v0.4h, v1.4h, v2.4h
 // CHECK-ERROR:                 ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -435,7 +435,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        facgt v0.2d, v1.2d, v2.4s
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        facgt v0.8h, v1.8h, v2.8h
 // CHECK-ERROR:                 ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -1092,7 +1092,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fmin v0.4s, v1.4s, v2.2d
 // CHECK-ERROR:                              ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fmin v0.8h, v1.8h, v2.8h
 // CHECK-ERROR:                ^
 
@@ -1177,7 +1177,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fminp v0.4s, v1.4s, v2.2d
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fminp v0.8h, v1.8h, v2.8h
 // CHECK-ERROR:                 ^
 
@@ -1283,7 +1283,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fmulx v21.2s, v5.2s, v13.2d
 // CHECK-ERROR:                                  ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fmulx v1.4h, v25.4h, v3.4h
 // CHECK-ERROR:                  ^
 
@@ -3023,10 +3023,10 @@
       fmla v0.2d, v1.2d, v2.d[2]
       fmla v0.2d, v1.2d, v22.d[2]
 
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fmla v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:                ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fmla v0.8h, v1.8h, v2.h[2]
 // CHECK-ERROR:                ^
 // CHECK-ERROR: vector lane must be an integer in range
@@ -3057,10 +3057,10 @@
       fmls v0.2d, v1.2d, v2.d[2]
       fmls v0.2d, v1.2d, v22.d[2]
 
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fmls v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:                ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fmls v0.8h, v1.8h, v2.h[2]
 // CHECK-ERROR:                ^
 // CHECK-ERROR: vector lane must be an integer in range
@@ -3428,7 +3428,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        mul v0.2d, v1.2d, v2.d[1]
 // CHECK-ERROR:               ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fmul v0.4h, v1.4h, v2.h[4]
 // CHECK-ERROR:                ^
 // CHECK-ERROR: vector lane must be an integer in range
@@ -3458,7 +3458,7 @@
       fmulx v0.2d, v1.2d, v2.d[2]
       fmulx v0.2d, v1.2d, v22.d[2]
 
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fmulx v0.4h, v1.4h, v2.h[4]
 // CHECK-ERROR:                 ^
 // CHECK-ERROR: vector lane must be an integer in range
@@ -3837,16 +3837,16 @@
         fmaxv h0, v1.8h
         fminv h0, v1.8h
 
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fmaxnmv h0, v1.8h
 // CHECK-ERROR:                ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fminnmv h0, v1.8h
 // CHECK-ERROR:                ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fmaxv h0, v1.8h
 // CHECK-ERROR:              ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fminv h0, v1.8h
 // CHECK-ERROR:              ^
 
@@ -5594,13 +5594,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fabs v0.16b, v31.16b
 // CHECK-ERROR:                 ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fabs v2.8h, v4.8h
 // CHECK-ERROR:                 ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fabs v1.8b, v9.8b
 // CHECK-ERROR:                 ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fabs v13.4h, v21.4h
 // CHECK-ERROR:                  ^
 
@@ -5616,13 +5616,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fneg v0.16b, v31.16b
 // CHECK-ERROR:                 ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fneg v2.8h, v4.8h
 // CHECK-ERROR:                 ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fneg v1.8b, v9.8b
 // CHECK-ERROR:                 ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fneg v13.4h, v21.4h
 // CHECK-ERROR:                  ^
 
@@ -5978,205 +5978,205 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintn v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintn v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintn v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintn v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frinta v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frinta v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frinta v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frinta v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintp v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintp v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintp v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintp v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintm v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintm v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintm v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintm v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintx v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintx v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintx v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintx v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintz v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintz v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintz v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintz v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frinti v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frinti v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frinti v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frinti v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtns v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtns v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtns v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtns v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtnu v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtnu v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtnu v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtnu v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtps v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtps v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtps v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtps v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtpu v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtpu v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtpu v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtpu v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtms v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtms v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtms v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtms v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtmu v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtmu v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtmu v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtmu v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtzs v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtzs v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtzs v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtzs v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtzu v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtzu v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtzu v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtzu v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtas v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtas v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtas v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtas v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtau v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtau v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtau v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtau v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -6212,61 +6212,61 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         scvtf v0.16b, v31.16b
 // CHECK-ERROR:                  ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         scvtf v2.8h, v4.8h
 // CHECK-ERROR:                  ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         scvtf v1.8b, v9.8b
 // CHECK-ERROR:                  ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         scvtf v13.4h, v21.4h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         ucvtf v0.16b, v31.16b
 // CHECK-ERROR:                  ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         ucvtf v2.8h, v4.8h
 // CHECK-ERROR:                  ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         ucvtf v1.8b, v9.8b
 // CHECK-ERROR:                  ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         ucvtf v13.4h, v21.4h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frecpe v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frecpe v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frecpe v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frecpe v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frsqrte v0.16b, v31.16b
 // CHECK-ERROR:                    ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frsqrte v2.8h, v4.8h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frsqrte v1.8b, v9.8b
 // CHECK-ERROR:                    ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frsqrte v13.4h, v21.4h
 // CHECK-ERROR:                     ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fsqrt v0.16b, v31.16b
 // CHECK-ERROR:                  ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fsqrt v2.8h, v4.8h
 // CHECK-ERROR:                  ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fsqrt v1.8b, v9.8b
 // CHECK-ERROR:                  ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fsqrt v13.4h, v21.4h
 // CHECK-ERROR:                   ^
 
diff --git a/test/MC/AArch64/neon-facge-facgt.s b/test/MC/AArch64/neon-facge-facgt.s
index 212eda2f209..9c10caa0f7c 100644
--- a/test/MC/AArch64/neon-facge-facgt.s
+++ b/test/MC/AArch64/neon-facge-facgt.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -6,16 +6,24 @@
 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
 // FACLE is alias for FACGE with operands reversed
 //----------------------------------------------------------------------
+         facge v0.4h, v31.4h, v16.4h
+         facge v4.8h, v7.8h, v15.8h
          facge v0.2s, v31.2s, v16.2s
          facge v4.4s, v7.4s, v15.4s
          facge v29.2d, v2.2d, v5.2d
+         facle v0.4h, v16.4h, v31.4h
+         facle v4.8h, v15.8h, v7.8h
          facle v0.2s, v16.2s, v31.2s
          facle v4.4s, v15.4s, v7.4s
          facle v29.2d, v5.2d, v2.2d
 
+// CHECK: facge   v0.4h, v31.4h, v16.4h   // encoding: [0xe0,0x2f,0x50,0x2e]
+// CHECK: facge   v4.8h, v7.8h, v15.8h    // encoding: [0xe4,0x2c,0x4f,0x6e]
 // CHECK: facge v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xef,0x30,0x2e]
 // CHECK: facge v4.4s, v7.4s, v15.4s  // encoding: [0xe4,0xec,0x2f,0x6e]
 // CHECK: facge v29.2d, v2.2d, v5.2d  // encoding: [0x5d,0xec,0x65,0x6e]
+// CHECK: facge   v0.4h, v31.4h, v16.4h   // encoding: [0xe0,0x2f,0x50,0x2e]
+// CHECK: facge   v4.8h, v7.8h, v15.8h    // encoding: [0xe4,0x2c,0x4f,0x6e]
 // CHECK: facge v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xef,0x30,0x2e]
 // CHECK: facge v4.4s, v7.4s, v15.4s  // encoding: [0xe4,0xec,0x2f,0x6e]
 // CHECK: facge v29.2d, v2.2d, v5.2d  // encoding: [0x5d,0xec,0x65,0x6e]
@@ -24,16 +32,24 @@
 // Vector Absolute Compare Mask Less Than (Floating Point)
 // FACLT is alias for FACGT with operands reversed
 //----------------------------------------------------------------------
+         facgt v3.4h, v8.4h, v12.4h
+         facgt v31.8h, v29.8h, v28.8h
          facgt v31.4s, v29.4s, v28.4s
          facgt v3.2s, v8.2s, v12.2s
          facgt v17.2d, v15.2d, v13.2d
+         faclt v3.4h,  v12.4h, v8.4h
+         faclt v31.8h, v28.8h, v29.8h
          faclt v31.4s, v28.4s, v29.4s
          faclt v3.2s,  v12.2s, v8.2s
          faclt v17.2d, v13.2d, v15.2d
 
+// CHECK: facgt   v3.4h, v8.4h, v12.4h    // encoding: [0x03,0x2d,0xcc,0x2e]
+// CHECK: facgt   v31.8h, v29.8h, v28.8h  // encoding: [0xbf,0x2f,0xdc,0x6e]
 // CHECK: facgt v31.4s, v29.4s, v28.4s  // encoding: [0xbf,0xef,0xbc,0x6e]
 // CHECK: facgt v3.2s, v8.2s, v12.2s    // encoding: [0x03,0xed,0xac,0x2e]
 // CHECK: facgt v17.2d, v15.2d, v13.2d  // encoding: [0xf1,0xed,0xed,0x6e]
+// CHECK: facgt   v3.4h, v8.4h, v12.4h    // encoding: [0x03,0x2d,0xcc,0x2e]
+// CHECK: facgt   v31.8h, v29.8h, v28.8h  // encoding: [0xbf,0x2f,0xdc,0x6e]
 // CHECK: facgt v31.4s, v29.4s, v28.4s  // encoding: [0xbf,0xef,0xbc,0x6e]
 // CHECK: facgt v3.2s, v8.2s, v12.2s    // encoding: [0x03,0xed,0xac,0x2e]
 // CHECK: facgt v17.2d, v15.2d, v13.2d  // encoding: [0xf1,0xed,0xed,0x6e]
diff --git a/test/MC/AArch64/neon-frsqrt-frecp.s b/test/MC/AArch64/neon-frsqrt-frecp.s
index 79fe5da5e76..67a1340ecc3 100644
--- a/test/MC/AArch64/neon-frsqrt-frecp.s
+++ b/test/MC/AArch64/neon-frsqrt-frecp.s
@@ -1,14 +1,18 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
 //----------------------------------------------------------------------
 // Vector Reciprocal Square Root Step (Floating Point)
 //----------------------------------------------------------------------
+         frsqrts v0.4h, v31.4h, v16.4h
+         frsqrts v4.8h, v7.8h, v15.8h
          frsqrts v0.2s, v31.2s, v16.2s
          frsqrts v4.4s, v7.4s, v15.4s
          frsqrts v29.2d, v2.2d, v5.2d
 
+// CHECK: frsqrts v0.4h, v31.4h, v16.4h   // encoding: [0xe0,0x3f,0xd0,0x0e]
+// CHECK: frsqrts v4.8h, v7.8h, v15.8h    // encoding: [0xe4,0x3c,0xcf,0x4e]
 // CHECK: frsqrts v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xff,0xb0,0x0e]
 // CHECK: frsqrts v4.4s, v7.4s, v15.4s  // encoding: [0xe4,0xfc,0xaf,0x4e]
 // CHECK: frsqrts v29.2d, v2.2d, v5.2d  // encoding: [0x5d,0xfc,0xe5,0x4e]
@@ -16,10 +20,14 @@
 //----------------------------------------------------------------------
 // Vector Reciprocal Step (Floating Point)
 //----------------------------------------------------------------------
+         frecps v3.4h, v8.4h, v12.4h
+         frecps v31.8h, v29.8h, v28.8h
          frecps v31.4s, v29.4s, v28.4s
          frecps v3.2s, v8.2s, v12.2s
          frecps v17.2d, v15.2d, v13.2d
 
+// CHECK: frecps  v3.4h, v8.4h, v12.4h    // encoding: [0x03,0x3d,0x4c,0x0e]
+// CHECK: frecps  v31.8h, v29.8h, v28.8h  // encoding: [0xbf,0x3f,0x5c,0x4e]
 // CHECK: frecps v31.4s, v29.4s, v28.4s  // encoding: [0xbf,0xff,0x3c,0x4e]
 // CHECK: frecps v3.2s, v8.2s, v12.2s    // encoding: [0x03,0xfd,0x2c,0x0e]
 // CHECK: frecps v17.2d, v15.2d, v13.2d  // encoding: [0xf1,0xfd,0x6d,0x4e]
diff --git a/test/MC/AArch64/neon-max-min-pairwise.s b/test/MC/AArch64/neon-max-min-pairwise.s
index 8d2dadb1997..27cf4c8d830 100644
--- a/test/MC/AArch64/neon-max-min-pairwise.s
+++ b/test/MC/AArch64/neon-max-min-pairwise.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -67,10 +67,14 @@
 //----------------------------------------------------------------------
 // Vector Maximum Pairwise (Floating Point)
 //----------------------------------------------------------------------
+         fmaxp v0.4h, v1.4h, v2.4h
+         fmaxp v31.8h, v15.8h, v16.8h
          fmaxp v0.2s, v1.2s, v2.2s
          fmaxp v31.4s, v15.4s, v16.4s
          fmaxp v7.2d, v8.2d, v25.2d
 
+// CHECK: fmaxp   v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x34,0x42,0x2e]
+// CHECK: fmaxp   v31.8h, v15.8h, v16.8h  // encoding: [0xff,0x35,0x50,0x6e]
 // CHECK: fmaxp v0.2s, v1.2s, v2.2s    // encoding: [0x20,0xf4,0x22,0x2e]
 // CHECK: fmaxp v31.4s, v15.4s, v16.4s // encoding: [0xff,0xf5,0x30,0x6e]
 // CHECK: fmaxp v7.2d, v8.2d, v25.2d   // encoding: [0x07,0xf5,0x79,0x6e]
@@ -78,10 +82,14 @@
 //----------------------------------------------------------------------
 // Vector Minimum Pairwise (Floating Point)
 //----------------------------------------------------------------------
+         fminp v10.4h, v15.4h, v22.4h
+         fminp v3.8h, v5.8h, v6.8h
          fminp v10.2s, v15.2s, v22.2s
          fminp v3.4s, v5.4s, v6.4s
          fminp v17.2d, v13.2d, v2.2d
 
+// CHECK: fminp   v10.4h, v15.4h, v22.4h  // encoding: [0xea,0x35,0xd6,0x2e]
+// CHECK: fminp   v3.8h, v5.8h, v6.8h     // encoding: [0xa3,0x34,0xc6,0x6e]
 // CHECK: fminp v10.2s, v15.2s, v22.2s  // encoding: [0xea,0xf5,0xb6,0x2e]
 // CHECK: fminp v3.4s, v5.4s, v6.4s     // encoding: [0xa3,0xf4,0xa6,0x6e]
 // CHECK: fminp v17.2d, v13.2d, v2.2d   // encoding: [0xb1,0xf5,0xe2,0x6e]
@@ -89,10 +97,14 @@
 //----------------------------------------------------------------------
 // Vector maxNum Pairwise (Floating Point)
 //----------------------------------------------------------------------
+         fmaxnmp v0.4h, v1.4h, v2.4h
+         fmaxnmp v31.8h, v15.8h, v16.8h
          fmaxnmp v0.2s, v1.2s, v2.2s
          fmaxnmp v31.4s, v15.4s, v16.4s
          fmaxnmp v7.2d, v8.2d, v25.2d
 
+// CHECK: fmaxnmp v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x04,0x42,0x2e]
+// CHECK: fmaxnmp v31.8h, v15.8h, v16.8h  // encoding: [0xff,0x05,0x50,0x6e]
 // CHECK: fmaxnmp v0.2s, v1.2s, v2.2s    // encoding: [0x20,0xc4,0x22,0x2e]
 // CHECK: fmaxnmp v31.4s, v15.4s, v16.4s // encoding: [0xff,0xc5,0x30,0x6e]
 // CHECK: fmaxnmp v7.2d, v8.2d, v25.2d   // encoding: [0x07,0xc5,0x79,0x6e]
@@ -100,10 +112,14 @@
 //----------------------------------------------------------------------
 // Vector minNum Pairwise (Floating Point)
 //----------------------------------------------------------------------
+         fminnmp v10.4h, v15.4h, v22.4h
+         fminnmp v3.8h, v5.8h, v6.8h
          fminnmp v10.2s, v15.2s, v22.2s
          fminnmp v3.4s, v5.4s, v6.4s
          fminnmp v17.2d, v13.2d, v2.2d
 
+// CHECK: fminnmp v10.4h, v15.4h, v22.4h  // encoding: [0xea,0x05,0xd6,0x2e]
+// CHECK: fminnmp v3.8h, v5.8h, v6.8h     // encoding: [0xa3,0x04,0xc6,0x6e]
 // CHECK: fminnmp v10.2s, v15.2s, v22.2s  // encoding: [0xea,0xc5,0xb6,0x2e]
 // CHECK: fminnmp v3.4s, v5.4s, v6.4s     // encoding: [0xa3,0xc4,0xa6,0x6e]
 // CHECK: fminnmp v17.2d, v13.2d, v2.2d   // encoding: [0xb1,0xc5,0xe2,0x6e]
diff --git a/test/MC/AArch64/neon-max-min.s b/test/MC/AArch64/neon-max-min.s
index 6d1efde5077..c4bd74d9888 100644
--- a/test/MC/AArch64/neon-max-min.s
+++ b/test/MC/AArch64/neon-max-min.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -67,10 +67,14 @@
 //----------------------------------------------------------------------
 // Vector Maximum (Floating Point)
 //----------------------------------------------------------------------
+         fmax v0.4h, v1.4h, v2.4h
+         fmax v0.8h, v1.8h, v2.8h
          fmax v0.2s, v1.2s, v2.2s
          fmax v31.4s, v15.4s, v16.4s
          fmax v7.2d, v8.2d, v25.2d
 
+// CHECK: fmax    v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x34,0x42,0x0e]
+// CHECK: fmax    v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x34,0x42,0x4e]
 // CHECK: fmax v0.2s, v1.2s, v2.2s    // encoding: [0x20,0xf4,0x22,0x0e]
 // CHECK: fmax v31.4s, v15.4s, v16.4s // encoding: [0xff,0xf5,0x30,0x4e]
 // CHECK: fmax v7.2d, v8.2d, v25.2d   // encoding: [0x07,0xf5,0x79,0x4e]
@@ -78,10 +82,14 @@
 //----------------------------------------------------------------------
 // Vector Minimum (Floating Point)
 //----------------------------------------------------------------------
+         fmin v10.4h, v15.4h, v22.4h
+         fmin v10.8h, v15.8h, v22.8h
          fmin v10.2s, v15.2s, v22.2s
          fmin v3.4s, v5.4s, v6.4s
          fmin v17.2d, v13.2d, v2.2d
 
+// CHECK: fmin    v10.4h, v15.4h, v22.4h  // encoding: [0xea,0x35,0xd6,0x0e]
+// CHECK: fmin    v10.8h, v15.8h, v22.8h  // encoding: [0xea,0x35,0xd6,0x4e]
 // CHECK: fmin v10.2s, v15.2s, v22.2s  // encoding: [0xea,0xf5,0xb6,0x0e]
 // CHECK: fmin v3.4s, v5.4s, v6.4s     // encoding: [0xa3,0xf4,0xa6,0x4e]
 // CHECK: fmin v17.2d, v13.2d, v2.2d   // encoding: [0xb1,0xf5,0xe2,0x4e]
@@ -89,10 +97,14 @@
 //----------------------------------------------------------------------
 // Vector maxNum (Floating Point)
 //----------------------------------------------------------------------
+         fmaxnm v0.4h, v1.4h, v2.4h
+         fmaxnm v0.8h, v1.8h, v2.8h
          fmaxnm v0.2s, v1.2s, v2.2s
          fmaxnm v31.4s, v15.4s, v16.4s
          fmaxnm v7.2d, v8.2d, v25.2d
 
+// CHECK: fmaxnm  v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x04,0x42,0x0e]
+// CHECK: fmaxnm  v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x04,0x42,0x4e]
 // CHECK: fmaxnm v0.2s, v1.2s, v2.2s    // encoding: [0x20,0xc4,0x22,0x0e]
 // CHECK: fmaxnm v31.4s, v15.4s, v16.4s // encoding: [0xff,0xc5,0x30,0x4e]
 // CHECK: fmaxnm v7.2d, v8.2d, v25.2d   // encoding: [0x07,0xc5,0x79,0x4e]
@@ -100,10 +112,14 @@
 //----------------------------------------------------------------------
 // Vector minNum (Floating Point)
 //----------------------------------------------------------------------
+         fminnm v10.4h, v15.4h, v22.4h
+         fminnm v10.8h, v15.8h, v22.8h
          fminnm v10.2s, v15.2s, v22.2s
          fminnm v3.4s, v5.4s, v6.4s
          fminnm v17.2d, v13.2d, v2.2d
 
+// CHECK: fminnm  v10.4h, v15.4h, v22.4h  // encoding: [0xea,0x05,0xd6,0x0e]
+// CHECK: fminnm  v10.8h, v15.8h, v22.8h  // encoding: [0xea,0x05,0xd6,0x4e]
 // CHECK: fminnm v10.2s, v15.2s, v22.2s  // encoding: [0xea,0xc5,0xb6,0x0e]
 // CHECK: fminnm v3.4s, v5.4s, v6.4s     // encoding: [0xa3,0xc4,0xa6,0x4e]
 // CHECK: fminnm v17.2d, v13.2d, v2.2d   // encoding: [0xb1,0xc5,0xe2,0x4e]
diff --git a/test/MC/AArch64/neon-mla-mls-instructions.s b/test/MC/AArch64/neon-mla-mls-instructions.s
index 3072e6f1200..a510fc8c7b9 100644
--- a/test/MC/AArch64/neon-mla-mls-instructions.s
+++ b/test/MC/AArch64/neon-mla-mls-instructions.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -40,10 +40,14 @@
 //----------------------------------------------------------------------
 // Vector Floating-Point Multiply-accumulate
 //----------------------------------------------------------------------
+         fmla v0.4h, v1.4h, v2.4h
+         fmla v0.8h, v1.8h, v2.8h
          fmla v0.2s, v1.2s, v2.2s
          fmla v0.4s, v1.4s, v2.4s
          fmla v0.2d, v1.2d, v2.2d
 
+// CHECK: fmla    v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x0c,0x42,0x0e]
+// CHECK: fmla    v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x0c,0x42,0x4e]
 // CHECK: fmla v0.2s, v1.2s, v2.2s       // encoding: [0x20,0xcc,0x22,0x0e]
 // CHECK: fmla v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xcc,0x22,0x4e]
 // CHECK: fmla v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xcc,0x62,0x4e]
@@ -51,10 +55,14 @@
 //----------------------------------------------------------------------
 // Vector Floating-Point Multiply-subtract
 //----------------------------------------------------------------------
+         fmls v0.4h, v1.4h, v2.4h
+         fmls v0.8h, v1.8h, v2.8h
          fmls v0.2s, v1.2s, v2.2s
          fmls v0.4s, v1.4s, v2.4s
          fmls v0.2d, v1.2d, v2.2d
 
+// CHECK: fmls    v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x0c,0xc2,0x0e]
+// CHECK: fmls    v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x0c,0xc2,0x4e]
 // CHECK: fmls v0.2s, v1.2s, v2.2s       // encoding: [0x20,0xcc,0xa2,0x0e]
 // CHECK: fmls v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xcc,0xa2,0x4e]
 // CHECK: fmls v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xcc,0xe2,0x4e]
diff --git a/test/MC/AArch64/neon-scalar-abs.s b/test/MC/AArch64/neon-scalar-abs.s
index d08756c0c10..71130617848 100644
--- a/test/MC/AArch64/neon-scalar-abs.s
+++ b/test/MC/AArch64/neon-scalar-abs.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -14,9 +14,11 @@
 // Scalar Floating-point Absolute Difference
 //----------------------------------------------------------------------
 
+    fabd h29, h24, h20
     fabd s29, s24, s20
     fabd d29, d24, d20
 
+// CHECK: fabd    h29, h24, h20           // encoding: [0x1d,0x17,0xd4,0x7e]
 // CHECK: fabd s29, s24, s20  // encoding: [0x1d,0xd7,0xb4,0x7e]
 // CHECK: fabd d29, d24, d20  // encoding: [0x1d,0xd7,0xf4,0x7e]
 
diff --git a/test/MC/AArch64/neon-scalar-by-elem-mla.s b/test/MC/AArch64/neon-scalar-by-elem-mla.s
index fec9d12d8b8..394fda673e2 100644
--- a/test/MC/AArch64/neon-scalar-by-elem-mla.s
+++ b/test/MC/AArch64/neon-scalar-by-elem-mla.s
@@ -1,8 +1,9 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 //------------------------------------------------------------------------------
 // Floating Point fused multiply-add (scalar, by element)
 //------------------------------------------------------------------------------
+    fmla    h0, h1, v1.h[5]
     fmla    s0, s1, v1.s[0]
     fmla    s30, s11, v1.s[1]
     fmla    s4, s5, v7.s[2]
@@ -10,6 +11,7 @@
     fmla    d0, d1, v1.d[0]
     fmla    d30, d11, v1.d[1]
 
+// CHECK: fmla    h0, h1, v1.h[5]         // encoding: [0x20,0x18,0x11,0x5f]
 // CHECK: fmla    s0, s1, v1.s[0]         // encoding: [0x20,0x10,0x81,0x5f]
 // CHECK: fmla    s30, s11, v1.s[1]       // encoding: [0x7e,0x11,0xa1,0x5f]
 // CHECK: fmla    s4, s5, v7.s[2]         // encoding: [0xa4,0x18,0x87,0x5f]
@@ -21,6 +23,7 @@
 // Floating Point fused multiply-subtract (scalar, by element)
 //------------------------------------------------------------------------------
 
+    fmls    h2, h3, v4.h[5]
     fmls    s2, s3, v4.s[0]
     fmls    s29, s10, v28.s[1]      
     fmls    s5, s12, v23.s[2]       
@@ -28,6 +31,7 @@
     fmls    d0, d1, v1.d[0]         
     fmls    d30, d11, v1.d[1]       
 
+// CHECK: fmls    h2, h3, v4.h[5]         // encoding: [0x62,0x58,0x14,0x5f]
 // CHECK: fmls    s2, s3, v4.s[0]     // encoding: [0x62,0x50,0x84,0x5f]
 // CHECK: fmls    s29, s10, v28.s[1]  // encoding: [0x5d,0x51,0xbc,0x5f]
 // CHECK: fmls    s5, s12, v23.s[2]   // encoding: [0x85,0x59,0x97,0x5f]
diff --git a/test/MC/AArch64/neon-scalar-by-elem-mul.s b/test/MC/AArch64/neon-scalar-by-elem-mul.s
index 8b8a3f57a9c..0d832742a38 100644
--- a/test/MC/AArch64/neon-scalar-by-elem-mul.s
+++ b/test/MC/AArch64/neon-scalar-by-elem-mul.s
@@ -1,8 +1,9 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 //------------------------------------------------------------------------------
 // Floating Point  multiply (scalar, by element)
 //------------------------------------------------------------------------------
+    fmul    h0, h1, v1.h[5]
     fmul    s0, s1, v1.s[0]
     fmul    s30, s11, v1.s[1]
     fmul    s4, s5, v7.s[2]
@@ -10,6 +11,7 @@
     fmul    d0, d1, v1.d[0]
     fmul    d30, d11, v1.d[1]
 
+// CHECK: fmul    h0, h1, v1.h[5]         // encoding: [0x20,0x98,0x11,0x5f]
 // CHECK: fmul    s0, s1, v1.s[0]      // encoding: [0x20,0x90,0x81,0x5f]
 // CHECK: fmul    s30, s11, v1.s[1]    // encoding: [0x7e,0x91,0xa1,0x5f]
 // CHECK: fmul    s4, s5, v7.s[2]      // encoding: [0xa4,0x98,0x87,0x5f]
@@ -21,6 +23,7 @@
 //------------------------------------------------------------------------------
 // Floating Point  multiply extended (scalar, by element)
 //------------------------------------------------------------------------------
+    fmulx   h6, h2, v8.h[5]
     fmulx   s6, s2, v8.s[0]
     fmulx   s7, s3, v13.s[1]
     fmulx   s9, s7, v9.s[2]
@@ -28,6 +31,7 @@
     fmulx   d15, d9, v7.d[0]
     fmulx   d13, d12, v11.d[1]
 
+// CHECK: fmulx   h6, h2, v8.h[5]         // encoding: [0x46,0x98,0x18,0x7f]
 // CHECK: fmulx   s6, s2, v8.s[0]         // encoding: [0x46,0x90,0x88,0x7f]
 // CHECK: fmulx   s7, s3, v13.s[1]        // encoding: [0x67,0x90,0xad,0x7f]
 // CHECK: fmulx   s9, s7, v9.s[2]         // encoding: [0xe9,0x98,0x89,0x7f]
diff --git a/test/MC/AArch64/neon-scalar-cvt.s b/test/MC/AArch64/neon-scalar-cvt.s
index 97416daf080..3cbf6bae675 100644
--- a/test/MC/AArch64/neon-scalar-cvt.s
+++ b/test/MC/AArch64/neon-scalar-cvt.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -6,9 +6,11 @@
 // Scalar Signed Integer Convert To Floating-point
 //----------------------------------------------------------------------
 
+    scvtf h23, h14
     scvtf s22, s13
     scvtf d21, d12
 
+// CHECK: scvtf   h23, h14                // encoding: [0xd7,0xd9,0x79,0x5e]
 // CHECK: scvtf s22, s13    // encoding: [0xb6,0xd9,0x21,0x5e]
 // CHECK: scvtf d21, d12    // encoding: [0x95,0xd9,0x61,0x5e]
 
@@ -16,9 +18,11 @@
 // Scalar Unsigned Integer Convert To Floating-point
 //----------------------------------------------------------------------
 
+    ucvtf h20, h12
     ucvtf s22, s13
     ucvtf d21, d14
 
+// CHECK: ucvtf   h20, h12                // encoding: [0x94,0xd9,0x79,0x7e]
 // CHECK: ucvtf s22, s13    // encoding: [0xb6,0xd9,0x21,0x7e]
 // CHECK: ucvtf d21, d14    // encoding: [0xd5,0xd9,0x61,0x7e]
 
@@ -26,9 +30,11 @@
 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
 //----------------------------------------------------------------------
 
+    scvtf h22, h13, #16
     scvtf s22, s13, #32
     scvtf d21, d12, #64
 
+// CHECK: scvtf   h22, h13, #16           // encoding: [0xb6,0xe5,0x10,0x5f]
 // CHECK: scvtf s22, s13, #32  // encoding: [0xb6,0xe5,0x20,0x5f]
 // CHECK: scvtf d21, d12, #64  // encoding: [0x95,0xe5,0x40,0x5f]    
 
@@ -36,9 +42,11 @@
 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
 //----------------------------------------------------------------------
 
+    ucvtf h22, h13, #16
     ucvtf s22, s13, #32
     ucvtf d21, d14, #64
 
+// CHECK: ucvtf   h22, h13, #16           // encoding: [0xb6,0xe5,0x10,0x7f]
 // CHECK: ucvtf s22, s13, #32  // encoding: [0xb6,0xe5,0x20,0x7f]
 // CHECK: ucvtf d21, d14, #64  // encoding: [0xd5,0xe5,0x40,0x7f]
 
@@ -46,9 +54,11 @@
 // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
 //----------------------------------------------------------------------
 
+    fcvtzs h21, h12, #1
     fcvtzs s21, s12, #1
     fcvtzs d21, d12, #1
 
+// CHECK: fcvtzs  h21, h12, #1            // encoding: [0x95,0xfd,0x1f,0x5f]
 // CHECK: fcvtzs s21, s12, #1  // encoding: [0x95,0xfd,0x3f,0x5f]
 // CHECK: fcvtzs d21, d12, #1  // encoding: [0x95,0xfd,0x7f,0x5f]
         
@@ -56,9 +66,11 @@
 // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
 //----------------------------------------------------------------------
 
+    fcvtzu h21, h12, #1
     fcvtzu s21, s12, #1
     fcvtzu d21, d12, #1
 
+// CHECK: fcvtzu  h21, h12, #1            // encoding: [0x95,0xfd,0x1f,0x7f]
 // CHECK: fcvtzu s21, s12, #1  // encoding: [0x95,0xfd,0x3f,0x7f]
 // CHECK: fcvtzu d21, d12, #1  // encoding: [0x95,0xfd,0x7f,0x7f]
 
@@ -76,9 +88,11 @@
 // With Ties To Away
 //----------------------------------------------------------------------
 
+    fcvtas h12, h13
     fcvtas s12, s13
     fcvtas d21, d14
 
+// CHECK: fcvtas  h12, h13                // encoding: [0xac,0xc9,0x79,0x5e]
 // CHECK: fcvtas s12, s13    // encoding: [0xac,0xc9,0x21,0x5e]
 // CHECK: fcvtas d21, d14    // encoding: [0xd5,0xc9,0x61,0x5e]
 
@@ -87,9 +101,11 @@
 // Nearest With Ties To Away
 //----------------------------------------------------------------------
 
+    fcvtau h12, h13
     fcvtau s12, s13
     fcvtau d21, d14
 
+// CHECK: fcvtau  h12, h13                // encoding: [0xac,0xc9,0x79,0x7e]
 // CHECK: fcvtau s12, s13    // encoding: [0xac,0xc9,0x21,0x7e]
 // CHECK: fcvtau d21, d14    // encoding: [0xd5,0xc9,0x61,0x7e]
 
@@ -98,9 +114,11 @@
 // Minus Infinity
 //----------------------------------------------------------------------
 
+    fcvtms h22, h13
     fcvtms s22, s13
     fcvtms d21, d14
 
+// CHECK: fcvtms  h22, h13                // encoding: [0xb6,0xb9,0x79,0x5e]
 // CHECK: fcvtms s22, s13    // encoding: [0xb6,0xb9,0x21,0x5e]
 // CHECK: fcvtms d21, d14    // encoding: [0xd5,0xb9,0x61,0x5e]
 
@@ -109,9 +127,11 @@
 // Minus Infinity
 //----------------------------------------------------------------------
 
+    fcvtmu h12, h13
     fcvtmu s12, s13
     fcvtmu d21, d14
 
+// CHECK: fcvtmu  h12, h13                // encoding: [0xac,0xb9,0x79,0x7e]
 // CHECK: fcvtmu s12, s13    // encoding: [0xac,0xb9,0x21,0x7e]
 // CHECK: fcvtmu d21, d14    // encoding: [0xd5,0xb9,0x61,0x7e]
 
@@ -120,9 +140,11 @@
 // With Ties To Even
 //----------------------------------------------------------------------
 
+    fcvtns h22, h13
     fcvtns s22, s13
     fcvtns d21, d14
 
+// CHECK: fcvtns  h22, h13                // encoding: [0xb6,0xa9,0x79,0x5e]
 // CHECK: fcvtns s22, s13    // encoding: [0xb6,0xa9,0x21,0x5e]
 // CHECK: fcvtns d21, d14    // encoding: [0xd5,0xa9,0x61,0x5e]
 
@@ -131,9 +153,11 @@
 // Nearest With Ties To Even
 //----------------------------------------------------------------------
 
+    fcvtnu h12, h13
     fcvtnu s12, s13
     fcvtnu d21, d14
 
+// CHECK: fcvtnu  h12, h13                // encoding: [0xac,0xa9,0x79,0x7e]
 // CHECK: fcvtnu s12, s13    // encoding: [0xac,0xa9,0x21,0x7e]
 // CHECK: fcvtnu d21, d14    // encoding: [0xd5,0xa9,0x61,0x7e]
         
@@ -142,9 +166,11 @@
 // Positive Infinity
 //----------------------------------------------------------------------
 
+    fcvtps h22, h13
     fcvtps s22, s13
     fcvtps d21, d14
 
+// CHECK: fcvtps  h22, h13                // encoding: [0xb6,0xa9,0xf9,0x5e]
 // CHECK: fcvtps s22, s13    // encoding: [0xb6,0xa9,0xa1,0x5e]
 // CHECK: fcvtps d21, d14    // encoding: [0xd5,0xa9,0xe1,0x5e]
         
@@ -153,9 +179,11 @@
 // Positive Infinity
 //----------------------------------------------------------------------
 
+    fcvtpu h12, h13
     fcvtpu s12, s13
     fcvtpu d21, d14
 
+// CHECK: fcvtpu  h12, h13                // encoding: [0xac,0xa9,0xf9,0x7e]
 // CHECK: fcvtpu s12, s13    // encoding: [0xac,0xa9,0xa1,0x7e]
 // CHECK: fcvtpu d21, d14    // encoding: [0xd5,0xa9,0xe1,0x7e]
 
@@ -163,9 +191,11 @@
 // Scalar Floating-point Convert To Signed Integer, Rounding Toward Zero
 //----------------------------------------------------------------------
 
+    fcvtzs h12, h13
     fcvtzs s12, s13
     fcvtzs d21, d14
 
+// CHECK: fcvtzs  h12, h13                // encoding: [0xac,0xb9,0xf9,0x5e]
 // CHECK: fcvtzs s12, s13    // encoding: [0xac,0xb9,0xa1,0x5e]
 // CHECK: fcvtzs d21, d14    // encoding: [0xd5,0xb9,0xe1,0x5e]
         
@@ -174,8 +204,10 @@
 // Zero
 //----------------------------------------------------------------------
 
+    fcvtzu h12, h13
     fcvtzu s12, s13
     fcvtzu d21, d14
 
+// CHECK: fcvtzu  h12, h13                // encoding: [0xac,0xb9,0xf9,0x7e]
 // CHECK: fcvtzu s12, s13    // encoding: [0xac,0xb9,0xa1,0x7e]
 // CHECK: fcvtzu d21, d14    // encoding: [0xd5,0xb9,0xe1,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-fp-compare.s b/test/MC/AArch64/neon-scalar-fp-compare.s
index b798b341067..0b91d945a71 100644
--- a/test/MC/AArch64/neon-scalar-fp-compare.s
+++ b/test/MC/AArch64/neon-scalar-fp-compare.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -6,9 +6,11 @@
 // Scalar Floating-point Compare Mask Equal
 //----------------------------------------------------------------------
 
+         fcmeq h10, h11, h12
          fcmeq s10, s11, s12
          fcmeq d20, d21, d22
 
+// CHECK: fcmeq   h10, h11, h12           // encoding: [0x6a,0x25,0x4c,0x5e]
 // CHECK: fcmeq s10, s11, s12   // encoding: [0x6a,0xe5,0x2c,0x5e]
 // CHECK: fcmeq d20, d21, d22   // encoding: [0xb4,0xe6,0x76,0x5e]
 
@@ -16,13 +18,17 @@
 // Scalar Floating-point Compare Mask Equal To Zero
 //----------------------------------------------------------------------
 
+         fcmeq h10, h11, #0.0
          fcmeq s10, s11, #0.0
          fcmeq d20, d21, #0.0
+         fcmeq h10, h11, #0
          fcmeq s10, s11, #0
          fcmeq d20, d21, #0x0
 
+// CHECK: fcmeq   h10, h11, #0.0          // encoding: [0x6a,0xd9,0xf8,0x5e]
 // CHECK: fcmeq s10, s11, #0.0   // encoding: [0x6a,0xd9,0xa0,0x5e]
 // CHECK: fcmeq d20, d21, #0.0   // encoding: [0xb4,0xda,0xe0,0x5e]
+// CHECK: fcmeq   h10, h11, #0.0          // encoding: [0x6a,0xd9,0xf8,0x5e]
 // CHECK: fcmeq s10, s11, #0.0   // encoding: [0x6a,0xd9,0xa0,0x5e]
 // CHECK: fcmeq d20, d21, #0.0   // encoding: [0xb4,0xda,0xe0,0x5e]
 
@@ -30,9 +36,11 @@
 // Scalar Floating-point Compare Mask Greater Than Or Equal
 //----------------------------------------------------------------------
 
+         fcmge h10, h11, h12
          fcmge s10, s11, s12
          fcmge d20, d21, d22
 
+// CHECK: fcmge   h10, h11, h12           // encoding: [0x6a,0x25,0x4c,0x7e]
 // CHECK: fcmge s10, s11, s12   // encoding: [0x6a,0xe5,0x2c,0x7e]
 // CHECK: fcmge d20, d21, d22   // encoding: [0xb4,0xe6,0x76,0x7e]
 
@@ -40,13 +48,17 @@
 // Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
 //----------------------------------------------------------------------
 
+         fcmge h10, h11, #0.0
          fcmge s10, s11, #0.0
          fcmge d20, d21, #0.0
+         fcmge h10, h11, #0
          fcmge s10, s11, #0
          fcmge d20, d21, #0x0
 
+// CHECK: fcmge   h10, h11, #0.0          // encoding: [0x6a,0xc9,0xf8,0x7e]
 // CHECK: fcmge s10, s11, #0.0   // encoding: [0x6a,0xc9,0xa0,0x7e]
 // CHECK: fcmge d20, d21, #0.0   // encoding: [0xb4,0xca,0xe0,0x7e]
+// CHECK: fcmge   h10, h11, #0.0          // encoding: [0x6a,0xc9,0xf8,0x7e]
 // CHECK: fcmge s10, s11, #0.0   // encoding: [0x6a,0xc9,0xa0,0x7e]
 // CHECK: fcmge d20, d21, #0.0   // encoding: [0xb4,0xca,0xe0,0x7e]
 
@@ -54,9 +66,11 @@
 // Scalar Floating-point Compare Mask Greather Than
 //----------------------------------------------------------------------
 
+         fcmgt h10, h11, h12
          fcmgt s10, s11, s12
          fcmgt d20, d21, d22
 
+// CHECK: fcmgt   h10, h11, h12           // encoding: [0x6a,0x25,0xcc,0x7e]
 // CHECK: fcmgt s10, s11, s12   // encoding: [0x6a,0xe5,0xac,0x7e]
 // CHECK: fcmgt d20, d21, d22   // encoding: [0xb4,0xe6,0xf6,0x7e]
 
@@ -64,13 +78,17 @@
 // Scalar Floating-point Compare Mask Greather Than Zero
 //----------------------------------------------------------------------
 
+         fcmgt h10, h11, #0.0
          fcmgt s10, s11, #0.0
          fcmgt d20, d21, #0.0
+         fcmgt h10, h11, #0
          fcmgt s10, s11, #0
          fcmgt d20, d21, #0x0
 
+// CHECK: fcmgt   h10, h11, #0.0          // encoding: [0x6a,0xc9,0xf8,0x5e]
 // CHECK: fcmgt s10, s11, #0.0   // encoding: [0x6a,0xc9,0xa0,0x5e]
 // CHECK: fcmgt d20, d21, #0.0   // encoding: [0xb4,0xca,0xe0,0x5e]
+// CHECK: fcmgt   h10, h11, #0.0          // encoding: [0x6a,0xc9,0xf8,0x5e]
 // CHECK: fcmgt s10, s11, #0.0   // encoding: [0x6a,0xc9,0xa0,0x5e]
 // CHECK: fcmgt d20, d21, #0.0   // encoding: [0xb4,0xca,0xe0,0x5e]
 
@@ -78,13 +96,17 @@
 // Scalar Floating-point Compare Mask Less Than Or Equal To Zero
 //----------------------------------------------------------------------
 
+         fcmle h10, h11, #0.0
          fcmle s10, s11, #0.0
          fcmle d20, d21, #0.0
+         fcmle h10, h11, #0
          fcmle s10, s11, #0
          fcmle d20, d21, #0x0
 
+// CHECK: fcmle   h10, h11, #0.0          // encoding: [0x6a,0xd9,0xf8,0x7e]
 // CHECK: fcmle s10, s11, #0.0   // encoding: [0x6a,0xd9,0xa0,0x7e]
 // CHECK: fcmle d20, d21, #0.0   // encoding: [0xb4,0xda,0xe0,0x7e]
+// CHECK: fcmle   h10, h11, #0.0          // encoding: [0x6a,0xd9,0xf8,0x7e]
 // CHECK: fcmle s10, s11, #0.0   // encoding: [0x6a,0xd9,0xa0,0x7e]
 // CHECK: fcmle d20, d21, #0.0   // encoding: [0xb4,0xda,0xe0,0x7e]
 
@@ -92,13 +114,17 @@
 // Scalar Floating-point Compare Mask Less Than
 //----------------------------------------------------------------------
 
+         fcmlt h10, h11, #0.0
          fcmlt s10, s11, #0.0
          fcmlt d20, d21, #0.0
+         fcmlt h10, h11, #0
          fcmlt s10, s11, #0
          fcmlt d20, d21, #0x0
 
+// CHECK: fcmlt   h10, h11, #0.0          // encoding: [0x6a,0xe9,0xf8,0x5e]
 // CHECK: fcmlt s10, s11, #0.0   // encoding: [0x6a,0xe9,0xa0,0x5e]
 // CHECK: fcmlt d20, d21, #0.0   // encoding: [0xb4,0xea,0xe0,0x5e]
+// CHECK: fcmlt   h10, h11, #0.0          // encoding: [0x6a,0xe9,0xf8,0x5e]
 // CHECK: fcmlt s10, s11, #0.0   // encoding: [0x6a,0xe9,0xa0,0x5e]
 // CHECK: fcmlt d20, d21, #0.0   // encoding: [0xb4,0xea,0xe0,0x5e]
 
@@ -106,9 +132,11 @@
 // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
 //----------------------------------------------------------------------
 
+         facge h10, h11, h12
          facge s10, s11, s12
          facge d20, d21, d22
 
+// CHECK: facge   h10, h11, h12           // encoding: [0x6a,0x2d,0x4c,0x7e]
 // CHECK: facge s10, s11, s12    // encoding: [0x6a,0xed,0x2c,0x7e]
 // CHECK: facge d20, d21, d22    // encoding: [0xb4,0xee,0x76,0x7e]
 
@@ -116,8 +144,10 @@
 // Scalar Floating-point Absolute Compare Mask Greater Than
 //----------------------------------------------------------------------
 
+         facgt h10, h11, h12
          facgt s10, s11, s12
          facgt d20, d21, d22
 
+// CHECK: facgt   h10, h11, h12           // encoding: [0x6a,0x2d,0xcc,0x7e]
 // CHECK: facgt s10, s11, s12   // encoding: [0x6a,0xed,0xac,0x7e]
 // CHECK: facgt d20, d21, d22   // encoding: [0xb4,0xee,0xf6,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-mul.s b/test/MC/AArch64/neon-scalar-mul.s
index e33bdad91a9..323fad206c4 100644
--- a/test/MC/AArch64/neon-scalar-mul.s
+++ b/test/MC/AArch64/neon-scalar-mul.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -26,9 +26,11 @@
 // Floating-point Multiply Extended
 //----------------------------------------------------------------------
 
+    fmulx h20, h22, h15
     fmulx s20, s22, s15
     fmulx d23, d11, d1
 
+// CHECK: fmulx   h20, h22, h15           // encoding: [0xd4,0x1e,0x4f,0x5e]
 // CHECK: fmulx s20, s22, s15   // encoding: [0xd4,0xde,0x2f,0x5e]
 // CHECK: fmulx d23, d11, d1    // encoding: [0x77,0xdd,0x61,0x5e]
 
diff --git a/test/MC/AArch64/neon-scalar-recip.s b/test/MC/AArch64/neon-scalar-recip.s
index 7a886f3b4a7..923c3549d6f 100644
--- a/test/MC/AArch64/neon-scalar-recip.s
+++ b/test/MC/AArch64/neon-scalar-recip.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -6,9 +6,11 @@
 // Floating-point Reciprocal Step
 //----------------------------------------------------------------------
 
+    frecps h21, h16, h13
     frecps s21, s16, s13
     frecps d22, d30, d21
 
+// CHECK: frecps  h21, h16, h13           // encoding: [0x15,0x3e,0x4d,0x5e]
 // CHECK: frecps s21, s16, s13   // encoding: [0x15,0xfe,0x2d,0x5e]
 // CHECK: frecps d22, d30, d21   // encoding: [0xd6,0xff,0x75,0x5e]
 
@@ -16,9 +18,11 @@
 // Floating-point Reciprocal Square Root Step
 //----------------------------------------------------------------------
 
+    frsqrts h21, h5, h12
     frsqrts s21, s5, s12
     frsqrts d8, d22, d18
 
+// CHECK: frsqrts h21, h5, h12            // encoding: [0xb5,0x3c,0xcc,0x5e]
 // CHECK: frsqrts s21, s5, s12   // encoding: [0xb5,0xfc,0xac,0x5e]
 // CHECK: frsqrts d8, d22, d18   // encoding: [0xc8,0xfe,0xf2,0x5e]
 
@@ -26,9 +30,11 @@
 // Scalar Floating-point Reciprocal Estimate
 //----------------------------------------------------------------------
 
+    frecpe h19, h14
     frecpe s19, s14
     frecpe d13, d13
 
+// CHECK: frecpe  h19, h14                // encoding: [0xd3,0xd9,0xf9,0x5e]
 // CHECK: frecpe s19, s14    // encoding: [0xd3,0xd9,0xa1,0x5e]
 // CHECK: frecpe d13, d13    // encoding: [0xad,0xd9,0xe1,0x5e]
 
@@ -36,9 +42,11 @@
 // Scalar Floating-point Reciprocal Exponent
 //----------------------------------------------------------------------
 
+    frecpx h18, h10
     frecpx s18, s10
     frecpx d16, d19
 
+// CHECK: frecpx  h18, h10                // encoding: [0x52,0xf9,0xf9,0x5e]
 // CHECK: frecpx s18, s10    // encoding: [0x52,0xf9,0xa1,0x5e]
 // CHECK: frecpx d16, d19    // encoding: [0x70,0xfa,0xe1,0x5e]
 
@@ -46,8 +54,10 @@
 // Scalar Floating-point Reciprocal Square Root Estimate
 //----------------------------------------------------------------------
 
+    frsqrte h22, h13
     frsqrte s22, s13
     frsqrte d21, d12
 
+// CHECK: frsqrte h22, h13                // encoding: [0xb6,0xd9,0xf9,0x7e]
 // CHECK: frsqrte s22, s13    // encoding: [0xb6,0xd9,0xa1,0x7e]
 // CHECK: frsqrte d21, d12    // encoding: [0x95,0xd9,0xe1,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-reduce-pairwise.s b/test/MC/AArch64/neon-scalar-reduce-pairwise.s
index 403a940ec2f..dae61d0f0f3 100644
--- a/test/MC/AArch64/neon-scalar-reduce-pairwise.s
+++ b/test/MC/AArch64/neon-scalar-reduce-pairwise.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 //----------------------------------------------------------------------
 // Scalar Reduce Add Pairwise (Integer)
@@ -10,7 +10,12 @@
 //----------------------------------------------------------------------
 // Scalar Reduce Add Pairwise (Floating Point)
 //----------------------------------------------------------------------
+      faddp h18, v3.2h
+      faddp h18, v3.2H
+      faddp s19, v2.2s
       faddp d20, v1.2d
 
+// CHECK: faddp h18, v3.2h     // encoding: [0x72,0xd8,0x30,0x5e]
+// CHECK: faddp s19, v2.2s     // encoding: [0x53,0xd8,0x30,0x7e]
 // CHECK: faddp d20, v1.2d     // encoding: [0x34,0xd8,0x70,0x7e]
 
diff --git a/test/MC/AArch64/neon-simd-misc.s b/test/MC/AArch64/neon-simd-misc.s
index 6d1aafdd772..32dd48629cd 100644
--- a/test/MC/AArch64/neon-simd-misc.s
+++ b/test/MC/AArch64/neon-simd-misc.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64 -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -298,10 +298,14 @@
 // Floating-point absolute
 //------------------------------------------------------------------------------
 
+         fabs v4.4h, v0.4h
+         fabs v6.8h, v8.8h
          fabs v6.4s, v8.4s
          fabs v6.2d, v8.2d
          fabs v4.2s, v0.2s
 
+// CHECK: fabs    v4.4h, v0.4h            // encoding: [0x04,0xf8,0xf8,0x0e]
+// CHECK: fabs    v6.8h, v8.8h            // encoding: [0x06,0xf9,0xf8,0x4e]
 // CHECK:	fabs	v6.4s, v8.4s            // encoding: [0x06,0xf9,0xa0,0x4e]
 // CHECK:	fabs	v6.2d, v8.2d            // encoding: [0x06,0xf9,0xe0,0x4e]
 // CHECK:	fabs	v4.2s, v0.2s            // encoding: [0x04,0xf8,0xa0,0x0e]
@@ -310,10 +314,14 @@
 // Floating-point negate
 //------------------------------------------------------------------------------
 
+         fneg v4.4h, v0.4h
+         fneg v6.8h, v8.8h
          fneg v6.4s, v8.4s
          fneg v6.2d, v8.2d
          fneg v4.2s, v0.2s
 
+// CHECK: fneg    v4.4h, v0.4h            // encoding: [0x04,0xf8,0xf8,0x2e]
+// CHECK: fneg    v6.8h, v8.8h            // encoding: [0x06,0xf9,0xf8,0x6e]
 // CHECK:	fneg	v6.4s, v8.4s            // encoding: [0x06,0xf9,0xa0,0x6e]
 // CHECK:	fneg	v6.2d, v8.2d            // encoding: [0x06,0xf9,0xe0,0x6e]
 // CHECK:	fneg	v4.2s, v0.2s            // encoding: [0x04,0xf8,0xa0,0x2e]
@@ -450,58 +458,86 @@
 // Floating-point round to integral
 //------------------------------------------------------------------------------
 
+         frintn v4.4h, v0.4h
+         frintn v6.8h, v8.8h
          frintn v6.4s, v8.4s
          frintn v6.2d, v8.2d
          frintn v4.2s, v0.2s
 
+// CHECK: frintn  v4.4h, v0.4h            // encoding: [0x04,0x88,0x79,0x0e]
+// CHECK: frintn  v6.8h, v8.8h            // encoding: [0x06,0x89,0x79,0x4e]
 // CHECK:	frintn	v6.4s, v8.4s            // encoding: [0x06,0x89,0x21,0x4e]
 // CHECK:	frintn	v6.2d, v8.2d            // encoding: [0x06,0x89,0x61,0x4e]
 // CHECK:	frintn	v4.2s, v0.2s            // encoding: [0x04,0x88,0x21,0x0e]
 
+         frinta v4.4h, v0.4h
+         frinta v6.8h, v8.8h
          frinta v6.4s, v8.4s
          frinta v6.2d, v8.2d
          frinta v4.2s, v0.2s
 
+// CHECK: frinta  v4.4h, v0.4h            // encoding: [0x04,0x88,0x79,0x2e]
+// CHECK: frinta  v6.8h, v8.8h            // encoding: [0x06,0x89,0x79,0x6e]
 // CHECK:	frinta	v6.4s, v8.4s            // encoding: [0x06,0x89,0x21,0x6e]
 // CHECK:	frinta	v6.2d, v8.2d            // encoding: [0x06,0x89,0x61,0x6e]
 // CHECK:	frinta	v4.2s, v0.2s            // encoding: [0x04,0x88,0x21,0x2e]
 
+         frintp v4.4h, v0.4h
+         frintp v6.8h, v8.8h
          frintp v6.4s, v8.4s
          frintp v6.2d, v8.2d
          frintp v4.2s, v0.2s
 
+// CHECK: frintp  v4.4h, v0.4h            // encoding: [0x04,0x88,0xf9,0x0e]
+// CHECK: frintp  v6.8h, v8.8h            // encoding: [0x06,0x89,0xf9,0x4e]
 // CHECK:	frintp	v6.4s, v8.4s            // encoding: [0x06,0x89,0xa1,0x4e]
 // CHECK:	frintp	v6.2d, v8.2d            // encoding: [0x06,0x89,0xe1,0x4e]
 // CHECK:	frintp	v4.2s, v0.2s            // encoding: [0x04,0x88,0xa1,0x0e]
 
+         frintm v4.4h, v0.4h
+         frintm v6.8h, v8.8h
          frintm v6.4s, v8.4s
          frintm v6.2d, v8.2d
          frintm v4.2s, v0.2s
 
+// CHECK: frintm  v4.4h, v0.4h            // encoding: [0x04,0x98,0x79,0x0e]
+// CHECK: frintm  v6.8h, v8.8h            // encoding: [0x06,0x99,0x79,0x4e]
 // CHECK:	frintm	v6.4s, v8.4s            // encoding: [0x06,0x99,0x21,0x4e]
 // CHECK:	frintm	v6.2d, v8.2d            // encoding: [0x06,0x99,0x61,0x4e]
 // CHECK:	frintm	v4.2s, v0.2s            // encoding: [0x04,0x98,0x21,0x0e]
 
+         frintx v4.4h, v0.4h
+         frintx v6.8h, v8.8h
          frintx v6.4s, v8.4s
          frintx v6.2d, v8.2d
          frintx v4.2s, v0.2s
 
+// CHECK: frintx  v4.4h, v0.4h            // encoding: [0x04,0x98,0x79,0x2e]
+// CHECK: frintx  v6.8h, v8.8h            // encoding: [0x06,0x99,0x79,0x6e]
 // CHECK:	frintx	v6.4s, v8.4s            // encoding: [0x06,0x99,0x21,0x6e]
 // CHECK:	frintx	v6.2d, v8.2d            // encoding: [0x06,0x99,0x61,0x6e]
 // CHECK:	frintx	v4.2s, v0.2s            // encoding: [0x04,0x98,0x21,0x2e]
 
+         frintz v4.4h, v0.4h
+         frintz v6.8h, v8.8h
          frintz v6.4s, v8.4s
          frintz v6.2d, v8.2d
          frintz v4.2s, v0.2s
 
+// CHECK: frintz  v4.4h, v0.4h            // encoding: [0x04,0x98,0xf9,0x0e]
+// CHECK: frintz  v6.8h, v8.8h            // encoding: [0x06,0x99,0xf9,0x4e]
 // CHECK:	frintz	v6.4s, v8.4s            // encoding: [0x06,0x99,0xa1,0x4e]
 // CHECK:	frintz	v6.2d, v8.2d            // encoding: [0x06,0x99,0xe1,0x4e]
 // CHECK:	frintz	v4.2s, v0.2s            // encoding: [0x04,0x98,0xa1,0x0e]
 
+         frinti v4.4h, v0.4h
+         frinti v6.8h, v8.8h
          frinti v6.4s, v8.4s
          frinti v6.2d, v8.2d
          frinti v4.2s, v0.2s
 
+// CHECK: frinti  v4.4h, v0.4h            // encoding: [0x04,0x98,0xf9,0x2e]
+// CHECK: frinti  v6.8h, v8.8h            // encoding: [0x06,0x99,0xf9,0x6e]
 // CHECK:	frinti	v6.4s, v8.4s            // encoding: [0x06,0x99,0xa1,0x6e]
 // CHECK:	frinti	v6.2d, v8.2d            // encoding: [0x06,0x99,0xe1,0x6e]
 // CHECK:	frinti	v4.2s, v0.2s            // encoding: [0x04,0x98,0xa1,0x2e]
@@ -510,83 +546,123 @@
 // Floating-point convert to integer
 //------------------------------------------------------------------------------
 
+         fcvtns v4.4h, v0.4h
+         fcvtns v6.8h, v8.8h
          fcvtns v6.4s, v8.4s
          fcvtns v6.2d, v8.2d
          fcvtns v4.2s, v0.2s
 
+// CHECK: fcvtns  v4.4h, v0.4h            // encoding: [0x04,0xa8,0x79,0x0e]
+// CHECK: fcvtns  v6.8h, v8.8h            // encoding: [0x06,0xa9,0x79,0x4e]
 // CHECK:	fcvtns	v6.4s, v8.4s            // encoding: [0x06,0xa9,0x21,0x4e]
 // CHECK:	fcvtns	v6.2d, v8.2d            // encoding: [0x06,0xa9,0x61,0x4e]
 // CHECK:	fcvtns	v4.2s, v0.2s            // encoding: [0x04,0xa8,0x21,0x0e]
 
+         fcvtnu v4.4h, v0.4h
+         fcvtnu v6.8h, v8.8h
          fcvtnu v6.4s, v8.4s
          fcvtnu v6.2d, v8.2d
          fcvtnu v4.2s, v0.2s
 
+// CHECK: fcvtnu  v4.4h, v0.4h            // encoding: [0x04,0xa8,0x79,0x2e]
+// CHECK: fcvtnu  v6.8h, v8.8h            // encoding: [0x06,0xa9,0x79,0x6e]
 // CHECK:	fcvtnu	v6.4s, v8.4s            // encoding: [0x06,0xa9,0x21,0x6e]
 // CHECK:	fcvtnu	v6.2d, v8.2d            // encoding: [0x06,0xa9,0x61,0x6e]
 // CHECK:	fcvtnu	v4.2s, v0.2s            // encoding: [0x04,0xa8,0x21,0x2e]
 
+         fcvtps v4.4h, v0.4h
+         fcvtps v6.8h, v8.8h
          fcvtps v6.4s, v8.4s
          fcvtps v6.2d, v8.2d
          fcvtps v4.2s, v0.2s
 
+// CHECK: fcvtps  v4.4h, v0.4h            // encoding: [0x04,0xa8,0xf9,0x0e]
+// CHECK: fcvtps  v6.8h, v8.8h            // encoding: [0x06,0xa9,0xf9,0x4e]
 // CHECK:	fcvtps	v6.4s, v8.4s            // encoding: [0x06,0xa9,0xa1,0x4e]
 // CHECK:	fcvtps	v6.2d, v8.2d            // encoding: [0x06,0xa9,0xe1,0x4e]
 // CHECK:	fcvtps	v4.2s, v0.2s            // encoding: [0x04,0xa8,0xa1,0x0e]
 
+         fcvtpu v4.4h, v0.4h
+         fcvtpu v6.8h, v8.8h
          fcvtpu v6.4s, v8.4s
          fcvtpu v6.2d, v8.2d
          fcvtpu v4.2s, v0.2s
 
+// CHECK: fcvtpu  v4.4h, v0.4h            // encoding: [0x04,0xa8,0xf9,0x2e]
+// CHECK: fcvtpu  v6.8h, v8.8h            // encoding: [0x06,0xa9,0xf9,0x6e]
 // CHECK:	fcvtpu	v6.4s, v8.4s            // encoding: [0x06,0xa9,0xa1,0x6e]
 // CHECK:	fcvtpu	v6.2d, v8.2d            // encoding: [0x06,0xa9,0xe1,0x6e]
 // CHECK:	fcvtpu	v4.2s, v0.2s            // encoding: [0x04,0xa8,0xa1,0x2e]
 
+         fcvtms v4.4h, v0.4h
+         fcvtms v6.8h, v8.8h
          fcvtms v6.4s, v8.4s
          fcvtms v6.2d, v8.2d
          fcvtms v4.2s, v0.2s
 
+// CHECK: fcvtms  v4.4h, v0.4h            // encoding: [0x04,0xb8,0x79,0x0e]
+// CHECK: fcvtms  v6.8h, v8.8h            // encoding: [0x06,0xb9,0x79,0x4e]
 // CHECK:	fcvtms	v6.4s, v8.4s            // encoding: [0x06,0xb9,0x21,0x4e]
 // CHECK:	fcvtms	v6.2d, v8.2d            // encoding: [0x06,0xb9,0x61,0x4e]
 // CHECK:	fcvtms	v4.2s, v0.2s            // encoding: [0x04,0xb8,0x21,0x0e]
 
+         fcvtmu v4.4h, v0.4h
+         fcvtmu v6.8h, v8.8h
          fcvtmu v6.4s, v8.4s
          fcvtmu v6.2d, v8.2d
          fcvtmu v4.2s, v0.2s
 
+// CHECK: fcvtmu  v4.4h, v0.4h            // encoding: [0x04,0xb8,0x79,0x2e]
+// CHECK: fcvtmu  v6.8h, v8.8h            // encoding: [0x06,0xb9,0x79,0x6e]
 // CHECK:	fcvtmu	v6.4s, v8.4s            // encoding: [0x06,0xb9,0x21,0x6e]
 // CHECK:	fcvtmu	v6.2d, v8.2d            // encoding: [0x06,0xb9,0x61,0x6e]
 // CHECK:	fcvtmu	v4.2s, v0.2s            // encoding: [0x04,0xb8,0x21,0x2e]
 
+         fcvtzs v4.4h, v0.4h
+         fcvtzs v6.8h, v8.8h
          fcvtzs v6.4s, v8.4s
          fcvtzs v6.2d, v8.2d
          fcvtzs v4.2s, v0.2s
 
+// CHECK: fcvtzs  v4.4h, v0.4h            // encoding: [0x04,0xb8,0xf9,0x0e]
+// CHECK: fcvtzs  v6.8h, v8.8h            // encoding: [0x06,0xb9,0xf9,0x4e]
 // CHECK:	fcvtzs	v6.4s, v8.4s            // encoding: [0x06,0xb9,0xa1,0x4e]
 // CHECK:	fcvtzs	v6.2d, v8.2d            // encoding: [0x06,0xb9,0xe1,0x4e]
 // CHECK:	fcvtzs	v4.2s, v0.2s            // encoding: [0x04,0xb8,0xa1,0x0e]
 
 
+         fcvtzu v4.4h, v0.4h
+         fcvtzu v6.8h, v8.8h
          fcvtzu v6.4s, v8.4s
          fcvtzu v6.2d, v8.2d
          fcvtzu v4.2s, v0.2s
 
+// CHECK: fcvtzu  v4.4h, v0.4h            // encoding: [0x04,0xb8,0xf9,0x2e]
+// CHECK: fcvtzu  v6.8h, v8.8h            // encoding: [0x06,0xb9,0xf9,0x6e]
 // CHECK:	fcvtzu	v6.4s, v8.4s            // encoding: [0x06,0xb9,0xa1,0x6e]
 // CHECK:	fcvtzu	v6.2d, v8.2d            // encoding: [0x06,0xb9,0xe1,0x6e]
 // CHECK:	fcvtzu	v4.2s, v0.2s            // encoding: [0x04,0xb8,0xa1,0x2e]
 
+         fcvtas v4.4h, v0.4h
+         fcvtas v6.8h, v8.8h
          fcvtas v6.4s, v8.4s
          fcvtas v6.2d, v8.2d
          fcvtas v4.2s, v0.2s
 
+// CHECK: fcvtas  v4.4h, v0.4h            // encoding: [0x04,0xc8,0x79,0x0e]
+// CHECK: fcvtas  v6.8h, v8.8h            // encoding: [0x06,0xc9,0x79,0x4e]
 // CHECK:	fcvtas	v6.4s, v8.4s            // encoding: [0x06,0xc9,0x21,0x4e]
 // CHECK:	fcvtas	v6.2d, v8.2d            // encoding: [0x06,0xc9,0x61,0x4e]
 // CHECK:	fcvtas	v4.2s, v0.2s            // encoding: [0x04,0xc8,0x21,0x0e]
 
+         fcvtau v4.4h, v0.4h
+         fcvtau v6.8h, v8.8h
          fcvtau v6.4s, v8.4s
          fcvtau v6.2d, v8.2d
          fcvtau v4.2s, v0.2s
 
+// CHECK: fcvtau  v4.4h, v0.4h            // encoding: [0x04,0xc8,0x79,0x2e]
+// CHECK: fcvtau  v6.8h, v8.8h            // encoding: [0x06,0xc9,0x79,0x6e]
 // CHECK:	fcvtau	v6.4s, v8.4s            // encoding: [0x06,0xc9,0x21,0x6e]
 // CHECK:	fcvtau	v6.2d, v8.2d            // encoding: [0x06,0xc9,0x61,0x6e]
 // CHECK:	fcvtau	v4.2s, v0.2s            // encoding: [0x04,0xc8,0x21,0x2e]
@@ -603,42 +679,62 @@
 // CHECK:	ursqrte	v6.4s, v8.4s            // encoding: [0x06,0xc9,0xa1,0x6e]
 // CHECK:	ursqrte	v4.2s, v0.2s            // encoding: [0x04,0xc8,0xa1,0x2e]
 
+         scvtf v4.4h, v0.4h
+         scvtf v6.8h, v8.8h
          scvtf v6.4s, v8.4s
          scvtf v6.2d, v8.2d
          scvtf v4.2s, v0.2s
 
+// CHECK: scvtf   v4.4h, v0.4h            // encoding: [0x04,0xd8,0x79,0x0e]
+// CHECK: scvtf   v6.8h, v8.8h            // encoding: [0x06,0xd9,0x79,0x4e]
 // CHECK:	scvtf	v6.4s, v8.4s            // encoding: [0x06,0xd9,0x21,0x4e]
 // CHECK:	scvtf	v6.2d, v8.2d            // encoding: [0x06,0xd9,0x61,0x4e]
 // CHECK:	scvtf	v4.2s, v0.2s            // encoding: [0x04,0xd8,0x21,0x0e]
 
+         ucvtf v4.4h, v0.4h
+         ucvtf v6.8h, v8.8h
          ucvtf v6.4s, v8.4s
          ucvtf v6.2d, v8.2d
          ucvtf v4.2s, v0.2s
 
+// CHECK: ucvtf   v4.4h, v0.4h            // encoding: [0x04,0xd8,0x79,0x2e]
+// CHECK: ucvtf   v6.8h, v8.8h            // encoding: [0x06,0xd9,0x79,0x6e]
 // CHECK:	ucvtf	v6.4s, v8.4s            // encoding: [0x06,0xd9,0x21,0x6e]
 // CHECK:	ucvtf	v6.2d, v8.2d            // encoding: [0x06,0xd9,0x61,0x6e]
 // CHECK:	ucvtf	v4.2s, v0.2s            // encoding: [0x04,0xd8,0x21,0x2e]
 
+         frecpe v4.4h, v0.4h
+         frecpe v6.8h, v8.8h
          frecpe v6.4s, v8.4s
          frecpe v6.2d, v8.2d
          frecpe v4.2s, v0.2s
 
+// CHECK: frecpe  v4.4h, v0.4h            // encoding: [0x04,0xd8,0xf9,0x0e]
+// CHECK: frecpe  v6.8h, v8.8h            // encoding: [0x06,0xd9,0xf9,0x4e]
 // CHECK:	frecpe	v6.4s, v8.4s            // encoding: [0x06,0xd9,0xa1,0x4e]
 // CHECK:	frecpe	v6.2d, v8.2d            // encoding: [0x06,0xd9,0xe1,0x4e]
 // CHECK:	frecpe	v4.2s, v0.2s            // encoding: [0x04,0xd8,0xa1,0x0e]
 
+         frsqrte v4.4h, v0.4h
+         frsqrte v6.8h, v8.8h
          frsqrte v6.4s, v8.4s
          frsqrte v6.2d, v8.2d
          frsqrte v4.2s, v0.2s
 
+// CHECK: frsqrte v4.4h, v0.4h            // encoding: [0x04,0xd8,0xf9,0x2e]
+// CHECK: frsqrte v6.8h, v8.8h            // encoding: [0x06,0xd9,0xf9,0x6e]
 // CHECK:	frsqrte	v6.4s, v8.4s            // encoding: [0x06,0xd9,0xa1,0x6e]
 // CHECK:	frsqrte	v6.2d, v8.2d            // encoding: [0x06,0xd9,0xe1,0x6e]
 // CHECK:	frsqrte	v4.2s, v0.2s            // encoding: [0x04,0xd8,0xa1,0x2e]
 
+         fsqrt v4.4h, v0.4h
+         fsqrt v6.8h, v8.8h
          fsqrt v6.4s, v8.4s
          fsqrt v6.2d, v8.2d
          fsqrt v4.2s, v0.2s
 
+// CHECK: fsqrt   v4.4h, v0.4h            // encoding: [0x04,0xf8,0xf9,0x2e]
+// CHECK: fsqrt   v6.8h, v8.8h            // encoding: [0x06,0xf9,0xf9,0x6e]
 // CHECK:	fsqrt	v6.4s, v8.4s            // encoding: [0x06,0xf9,0xa1,0x6e]
 // CHECK:	fsqrt	v6.2d, v8.2d            // encoding: [0x06,0xf9,0xe1,0x6e]
 // CHECK:	fsqrt	v4.2s, v0.2s            // encoding: [0x04,0xf8,0xa1,0x2e]
diff --git a/test/MC/AArch64/neon-simd-shift.s b/test/MC/AArch64/neon-simd-shift.s
index a16432324ef..4638c535a6a 100644
--- a/test/MC/AArch64/neon-simd-shift.s
+++ b/test/MC/AArch64/neon-simd-shift.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -400,16 +400,24 @@
 //------------------------------------------------------------------------------
 // Fixed-point convert to floating-point
 //------------------------------------------------------------------------------
+         scvtf v0.4h, v1.4h, #3
+         scvtf v0.8h, v1.8h, #3
          scvtf v0.2s, v1.2s, #3
          scvtf v0.4s, v1.4s, #3
          scvtf v0.2d, v1.2d, #3
+         ucvtf v0.4h, v1.4h, #3
+         ucvtf v0.8h, v1.8h, #3
          ucvtf v0.2s, v1.2s, #3
          ucvtf v0.4s, v1.4s, #3
          ucvtf v0.2d, v1.2d, #3
 
+// CHECK: scvtf v0.4h, v1.4h, #3        // encoding: [0x20,0xe4,0x1d,0x0f]
+// CHECK: scvtf v0.8h, v1.8h, #3        // encoding: [0x20,0xe4,0x1d,0x4f]
 // CHECK:	scvtf	v0.2s, v1.2s, #3        // encoding: [0x20,0xe4,0x3d,0x0f]
 // CHECK:	scvtf	v0.4s, v1.4s, #3        // encoding: [0x20,0xe4,0x3d,0x4f]
 // CHECK:	scvtf	v0.2d, v1.2d, #3        // encoding: [0x20,0xe4,0x7d,0x4f]
+// CHECK:	ucvtf v0.4h, v1.4h, #3        // encoding: [0x20,0xe4,0x1d,0x2f]
+// CHECK:	ucvtf v0.8h, v1.8h, #3        // encoding: [0x20,0xe4,0x1d,0x6f]
 // CHECK:	ucvtf	v0.2s, v1.2s, #3        // encoding: [0x20,0xe4,0x3d,0x2f]
 // CHECK:	ucvtf	v0.4s, v1.4s, #3        // encoding: [0x20,0xe4,0x3d,0x6f]
 // CHECK:	ucvtf	v0.2d, v1.2d, #3        // encoding: [0x20,0xe4,0x7d,0x6f]
@@ -417,17 +425,25 @@
 //------------------------------------------------------------------------------
 // Floating-point convert to fixed-point
 //------------------------------------------------------------------------------
+         fcvtzs v0.4h, v1.4h, #3
+         fcvtzs v0.8h, v1.8h, #3
          fcvtzs v0.2s, v1.2s, #3
          fcvtzs v0.4s, v1.4s, #3
          fcvtzs v0.2d, v1.2d, #3
+         fcvtzu v0.4h, v1.4h, #3
+         fcvtzu v0.8h, v1.8h, #3
          fcvtzu v0.2s, v1.2s, #3
          fcvtzu v0.4s, v1.4s, #3
          fcvtzu v0.2d, v1.2d, #3
 
 
+// CHECK:	fcvtzs  v0.4h, v1.4h, #3        // encoding: [0x20,0xfc,0x1d,0x0f]
+// CHECK:	fcvtzs  v0.8h, v1.8h, #3        // encoding: [0x20,0xfc,0x1d,0x4f]
 // CHECK:	fcvtzs	v0.2s, v1.2s, #3        // encoding: [0x20,0xfc,0x3d,0x0f]
 // CHECK:	fcvtzs	v0.4s, v1.4s, #3        // encoding: [0x20,0xfc,0x3d,0x4f]
 // CHECK:	fcvtzs	v0.2d, v1.2d, #3        // encoding: [0x20,0xfc,0x7d,0x4f]
+// CHECK:	fcvtzu  v0.4h, v1.4h, #3        // encoding: [0x20,0xfc,0x1d,0x2f]
+// CHECK:	fcvtzu  v0.8h, v1.8h, #3        // encoding: [0x20,0xfc,0x1d,0x6f]
 // CHECK:	fcvtzu	v0.2s, v1.2s, #3        // encoding: [0x20,0xfc,0x3d,0x2f]
 // CHECK:	fcvtzu	v0.4s, v1.4s, #3        // encoding: [0x20,0xfc,0x3d,0x6f]
 // CHECK:	fcvtzu	v0.2d, v1.2d, #3        // encoding: [0x20,0xfc,0x7d,0x6f]
diff --git a/test/MC/Disassembler/AArch64/fullfp16-neon-neg.txt b/test/MC/Disassembler/AArch64/fullfp16-neon-neg.txt
new file mode 100644
index 00000000000..8b7e1c87800
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/fullfp16-neon-neg.txt
@@ -0,0 +1,382 @@
+# RUN: not llvm-mc -disassemble -triple=aarch64 -mattr=+neon,-fullfp16 < %s 2>&1 | FileCheck %s
+# RUN: not llvm-mc -disassemble -triple=aarch64 -mattr=-neon,-fullfp16 < %s 2>&1 | FileCheck %s
+# RUN: not llvm-mc -disassemble -triple=aarch64 -mattr=-neon,+fullfp16 < %s 2>&1 | FileCheck %s
+
+[0x00,0xf8,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xf8,0xf8,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xd8,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x88,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x88,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x88,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xd8,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xf8,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xf8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xf8,0xf8,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xd8,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x88,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x88,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x88,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xd8,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xf8,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x10,0x22,0x0f]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x11,0x12,0x4f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x50,0x22,0x0f]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x51,0x12,0x4f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x90,0x22,0x0f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x90,0x22,0x4f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x90,0x22,0x2f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x90,0x22,0x6f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0xc2,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0xc8,0x30,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0xc8,0xb0,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0xf8,0x30,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0xf8,0xb0,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0x42,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0x42,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0x42,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0x42,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0xc2,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0xc2,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0x27,0x50,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0x24,0x4f,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x25,0x4c,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0x27,0x5c,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x25,0x4c,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0x27,0x5c,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0x27,0xd0,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0x24,0xcf,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0x27,0xd0,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0x24,0xcf,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0xdb,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0xd8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0xdb,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0xd8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0xc9,0xf8,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0xcb,0xf8,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0xc9,0xf8,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0xcb,0xf8,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0xcb,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0xc8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0xcb,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0xc8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x83,0xda,0xf8,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x01,0xd9,0xf8,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x83,0xda,0xf8,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x01,0xd9,0xf8,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x50,0xe8,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x8f,0xe8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x50,0xe8,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x8f,0xe8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0x2f,0x50,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0x2c,0x4f,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0x2f,0x50,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0x2c,0x4f,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x2d,0xcc,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0x2f,0xdc,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x2d,0xcc,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0x2f,0xdc,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0x3f,0xd0,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0x3c,0xcf,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x3d,0x4c,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0x3f,0x5c,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x34,0x42,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xff,0x35,0x50,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xea,0x35,0xd6,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xa3,0x34,0xc6,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x04,0x42,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xff,0x05,0x50,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xea,0x05,0xd6,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xa3,0x04,0xc6,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x34,0x42,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x34,0x42,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0xea,0x35,0xd6,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xea,0x35,0xd6,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x04,0x42,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x04,0x42,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0xea,0x05,0xd6,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xea,0x05,0xd6,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x0c,0x42,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x0c,0x42,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x0c,0xc2,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x0c,0xc2,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x1d,0x17,0xd4,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x18,0x11,0x5f]
+# CHECK: warning: invalid instruction encoding
+[0x62,0x58,0x14,0x5f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x98,0x11,0x5f]
+# CHECK: warning: invalid instruction encoding
+[0x46,0x98,0x18,0x7f]
+# CHECK: warning: invalid instruction encoding
+[0x95,0xfd,0x1f,0x5f]
+# CHECK: warning: invalid instruction encoding
+[0x95,0xfd,0x1f,0x7f]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xc9,0x79,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xc9,0x79,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xb9,0x79,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xb9,0x79,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xa9,0x79,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xa9,0x79,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xa9,0xf9,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xa9,0xf9,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xb9,0xf9,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xb9,0xf9,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0x25,0x4c,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xd9,0xf8,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xd9,0xf8,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0x25,0x4c,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xc9,0xf8,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xc9,0xf8,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0x25,0xcc,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xc9,0xf8,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xc9,0xf8,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xd9,0xf8,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xd9,0xf8,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xe9,0xf8,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xe9,0xf8,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0x2d,0x4c,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0x2d,0xcc,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0xd4,0x1e,0x4f,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x15,0x3e,0x4d,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xb5,0x3c,0xcc,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xd3,0xd9,0xf9,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x52,0xf9,0xf9,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xd9,0xf9,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x72,0xd8,0x30,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xf8,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xf9,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xf8,0xf8,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xf9,0xf8,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x88,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x89,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x88,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x89,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x88,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x89,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x98,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x99,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x98,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x99,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x98,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x99,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x98,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x99,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xa8,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xa9,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xa8,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xa9,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xa8,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xa9,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xa8,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xa9,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xb8,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xb9,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xb8,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xb9,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xb8,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xb9,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xb8,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xb9,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xc8,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xc9,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xc8,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xc9,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xd8,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xd9,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xd8,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xd9,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xf8,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xf9,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+
+# CHECK-NOT: warning: invalid instruction encoding
-- 
2.34.1