[ARMv8] Implement the NEON instructions VRINT{N, X, A, Z, M, P}.
authorJoey Gouly <joey.gouly@arm.com>
Fri, 19 Jul 2013 16:34:16 +0000 (16:34 +0000)
committerJoey Gouly <joey.gouly@arm.com>
Fri, 19 Jul 2013 16:34:16 +0000 (16:34 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186688 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/IR/IntrinsicsARM.td
lib/Target/ARM/ARMInstrNEON.td
lib/Target/ARM/AsmParser/ARMAsmParser.cpp
test/MC/ARM/neon-v8.s
test/MC/ARM/thumb-neon-v8.s
test/MC/Disassembler/ARM/neon-v8.txt
test/MC/Disassembler/ARM/thumb-neon-v8.txt

index 7849e940990b0f03d9eb641a8e1ec652557b0c3e..3c5d5ff5c9c1b9052827216031edd933d85f9268 100644 (file)
@@ -364,6 +364,14 @@ def int_arm_neon_vtbx2 : Neon_Tbl4Arg_Intrinsic;
 def int_arm_neon_vtbx3 : Neon_Tbl5Arg_Intrinsic;
 def int_arm_neon_vtbx4 : Neon_Tbl6Arg_Intrinsic;
 
+// Vector Rounding
+def int_arm_neon_vrintn : Neon_1Arg_Intrinsic;
+def int_arm_neon_vrintx : Neon_1Arg_Intrinsic;
+def int_arm_neon_vrinta : Neon_1Arg_Intrinsic;
+def int_arm_neon_vrintz : Neon_1Arg_Intrinsic;
+def int_arm_neon_vrintm : Neon_1Arg_Intrinsic;
+def int_arm_neon_vrintp : Neon_1Arg_Intrinsic;
+
 // De-interleaving vector loads from N-element structures.
 // Source operands are the address and alignment.
 def int_arm_neon_vld1 : Intrinsic<[llvm_anyvector_ty],
index 083670041556c8df8578be9e2dd2ce669e872b79..3e2ab0698dda50f2007da5f4c6127a22e1893528 100644 (file)
@@ -5738,6 +5738,34 @@ def  VTBX4Pseudo
                 IIC_VTBX4, "$orig = $dst", []>;
 } // DecoderMethod = "DecodeTBLInstruction"
 
+// VRINT      : Vector Rounding
+multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
+  let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
+    def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary,
+                      !strconcat("vrint", op), "f32",
+                      v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
+      let Inst{9-7} = op9_7;
+    }
+    def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary,
+                      !strconcat("vrint", op), "f32",
+                      v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
+      let Inst{9-7} = op9_7;
+    }
+  }
+
+  def : InstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
+                  (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>;
+  def : InstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
+                  (!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>;
+}
+
+defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
+defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
+defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
+defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
+defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
+defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
+
 //===----------------------------------------------------------------------===//
 // NEON instructions for single-precision FP math
 //===----------------------------------------------------------------------===//
index f114b7a26dc6d0e0f451e6751b68d488a60e6c63..7061ede4e2c8d39838d0dcf7486752940d44e49c 100644 (file)
@@ -263,6 +263,8 @@ class ARMAsmParser : public MCTargetAsmParser {
                           const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
   bool shouldOmitCCOutOperand(StringRef Mnemonic,
                               SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+  bool shouldOmitPredicateOperand(StringRef Mnemonic,
+                              SmallVectorImpl<MCParsedAsmOperand*> &Operands);
 
 public:
   enum ARMMatchResultTy {
@@ -5157,6 +5159,25 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
   return false;
 }
 
+bool ARMAsmParser::shouldOmitPredicateOperand(
+    StringRef Mnemonic, SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+  // VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON
+  unsigned RegIdx = 3;
+  if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") &&
+      static_cast<ARMOperand *>(Operands[2])->getToken() == ".f32") {
+    if (static_cast<ARMOperand *>(Operands[3])->isToken() &&
+        static_cast<ARMOperand *>(Operands[3])->getToken() == ".f32")
+      RegIdx = 4;
+
+    if (static_cast<ARMOperand *>(Operands[RegIdx])->isReg() &&
+        (ARMMCRegisterClasses[ARM::DPRRegClassID]
+             .contains(static_cast<ARMOperand *>(Operands[RegIdx])->getReg()) ||
+         ARMMCRegisterClasses[ARM::QPRRegClassID]
+             .contains(static_cast<ARMOperand *>(Operands[RegIdx])->getReg())))
+      return true;
+  }
+}
+
 static bool isDataTypeToken(StringRef Tok) {
   return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" ||
     Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" ||
@@ -5359,6 +5380,15 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
     delete Op;
   }
 
+  // Some instructions have the same mnemonic, but don't always
+  // have a predicate. Distinguish them here and delete the
+  // predicate if needed.
+  if (shouldOmitPredicateOperand(Mnemonic, Operands)) {
+    ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]);
+    Operands.erase(Operands.begin() + 1);
+    delete Op;
+  }
+
   // ARM mode 'blx' need special handling, as the register operand version
   // is predicable, but the label operand version is not. So, we can't rely
   // on the Mnemonic based checking to correctly figure out when to put
index 06a22f7c4fde5e573cf277df668877cbbeb2e169..429c8e3c08732ead9e08b1fae9176ae0e73b5ed3 100644 (file)
@@ -44,3 +44,40 @@ vcvtp.s32.f32        q4, q15
 @ CHECK: vcvtp.s32.f32 q4, q15 @ encoding: [0x6e,0x82,0xbb,0xf3]
 vcvtp.u32.f32  q9, q8
 @ CHECK: vcvtp.u32.f32 q9, q8 @ encoding: [0xe0,0x22,0xfb,0xf3]
+
+vrintn.f32 d3, d0
+@ CHECK: vrintn.f32 d3, d0 @ encoding: [0x00,0x34,0xba,0xf3]
+vrintn.f32 q1, q4
+@ CHECK: vrintn.f32 q1, q4 @ encoding: [0x48,0x24,0xba,0xf3]
+vrintx.f32 d5, d12
+@ CHECK: vrintx.f32 d5, d12 @ encoding: [0x8c,0x54,0xba,0xf3]
+vrintx.f32 q0, q3
+@ CHECK: vrintx.f32 q0, q3 @ encoding: [0xc6,0x04,0xba,0xf3]
+vrinta.f32 d3, d0
+@ CHECK: vrinta.f32 d3, d0 @ encoding: [0x00,0x35,0xba,0xf3]
+vrinta.f32 q8, q2
+@ CHECK: vrinta.f32 q8, q2 @ encoding: [0x44,0x05,0xfa,0xf3]
+vrintz.f32 d12, d18
+@ CHECK: vrintz.f32 d12, d18 @ encoding: [0xa2,0xc5,0xba,0xf3]
+vrintz.f32 q9, q4
+@ CHECK: vrintz.f32 q9, q4 @ encoding: [0xc8,0x25,0xfa,0xf3]
+vrintm.f32 d3, d0
+@ CHECK: vrintm.f32 d3, d0 @ encoding: [0x80,0x36,0xba,0xf3]
+vrintm.f32 q1, q4
+@ CHECK: vrintm.f32 q1, q4 @ encoding: [0xc8,0x26,0xba,0xf3]
+vrintp.f32 d3, d0
+@ CHECK: vrintp.f32 d3, d0 @ encoding: [0x80,0x37,0xba,0xf3]
+vrintp.f32 q1, q4
+@ CHECK: vrintp.f32 q1, q4 @ encoding: [0xc8,0x27,0xba,0xf3]
+
+@ test the aliases of vrint
+vrintn.f32.f32 d3, d0
+@ CHECK: vrintn.f32 d3, d0 @ encoding: [0x00,0x34,0xba,0xf3]
+vrintx.f32.f32 q0, q3
+@ CHECK: vrintx.f32 q0, q3 @ encoding: [0xc6,0x04,0xba,0xf3]
+vrinta.f32.f32 d3, d0
+@ CHECK: vrinta.f32 d3, d0 @ encoding: [0x00,0x35,0xba,0xf3]
+vrintz.f32.f32 q9, q4
+@ CHECK: vrintz.f32 q9, q4 @ encoding: [0xc8,0x25,0xfa,0xf3]
+vrintp.f32.f32 q1, q4
+@ CHECK: vrintp.f32 q1, q4 @ encoding: [0xc8,0x27,0xba,0xf3]
index df40238757cfa9356d3cdb7a6342614ab94fc65d..5b327810f85acd5613de26716a62fce737aef8eb 100644 (file)
@@ -44,3 +44,40 @@ vcvtp.s32.f32        q4, q15
 @ CHECK: vcvtp.s32.f32 q4, q15 @ encoding: [0xbb,0xff,0x6e,0x82]
 vcvtp.u32.f32  q9, q8
 @ CHECK: vcvtp.u32.f32 q9, q8 @ encoding: [0xfb,0xff,0xe0,0x22]
+
+vrintn.f32 d3, d0
+@ CHECK: vrintn.f32 d3, d0 @ encoding: [0xba,0xff,0x00,0x34]
+vrintn.f32 q1, q4
+@ CHECK: vrintn.f32 q1, q4 @ encoding: [0xba,0xff,0x48,0x24]
+vrintx.f32 d5, d12
+@ CHECK: vrintx.f32 d5, d12 @ encoding: [0xba,0xff,0x8c,0x54]
+vrintx.f32 q0, q3
+@ CHECK: vrintx.f32 q0, q3 @ encoding: [0xba,0xff,0xc6,0x04]
+vrinta.f32 d3, d0
+@ CHECK: vrinta.f32 d3, d0 @ encoding: [0xba,0xff,0x00,0x35]
+vrinta.f32 q8, q2
+@ CHECK: vrinta.f32 q8, q2 @ encoding: [0xfa,0xff,0x44,0x05]
+vrintz.f32 d12, d18
+@ CHECK: vrintz.f32 d12, d18 @ encoding: [0xba,0xff,0xa2,0xc5]
+vrintz.f32 q9, q4
+@ CHECK: vrintz.f32 q9, q4 @ encoding: [0xfa,0xff,0xc8,0x25]
+vrintm.f32 d3, d0
+@ CHECK: vrintm.f32 d3, d0 @ encoding: [0xba,0xff,0x80,0x36]
+vrintm.f32 q1, q4
+@ CHECK: vrintm.f32 q1, q4 @ encoding: [0xba,0xff,0xc8,0x26]
+vrintp.f32 d3, d0
+@ CHECK: vrintp.f32 d3, d0 @ encoding: [0xba,0xff,0x80,0x37]
+vrintp.f32 q1, q4
+@ CHECK: vrintp.f32 q1, q4 @ encoding: [0xba,0xff,0xc8,0x27]
+
+@ test the aliases of vrint
+vrintn.f32.f32 d3, d0
+@ CHECK: vrintn.f32 d3, d0 @ encoding: [0xba,0xff,0x00,0x34]
+vrintx.f32.f32 q0, q3
+@ CHECK: vrintx.f32 q0, q3 @ encoding: [0xba,0xff,0xc6,0x04]
+vrinta.f32.f32 d3, d0
+@ CHECK: vrinta.f32 d3, d0 @ encoding: [0xba,0xff,0x00,0x35]
+vrintz.f32.f32 q9, q4
+@ CHECK: vrintz.f32 q9, q4 @ encoding: [0xfa,0xff,0xc8,0x25]
+vrintp.f32.f32 q1, q4
+@ CHECK: vrintp.f32 q1, q4 @ encoding: [0xba,0xff,0xc8,0x27]
index 4e9bf3f7def01fd0d72c3628c1bc7fcf74d5ac1a..8c6e6898b9418f16db88c3bb2b1e8f6b447ee04c 100644 (file)
 # CHECK: vcvtp.s32.f32 q4, q15
 0xe0 0x22 0xfb 0xf3
 # CHECK: vcvtp.u32.f32 q9, q8
+
+0x00 0x34 0xba 0xf3
+# CHECK: vrintn.f32 d3, d0
+0x48 0x24 0xba 0xf3
+# CHECK: vrintn.f32 q1, q4
+0x8c 0x54 0xba 0xf3
+# CHECK: vrintx.f32 d5, d12
+0xc6 0x04 0xba 0xf3
+# CHECK: vrintx.f32 q0, q3
+0x00 0x35 0xba 0xf3
+# CHECK: vrinta.f32 d3, d0
+0x44 0x05 0xfa 0xf3
+# CHECK: vrinta.f32 q8, q2
+0xa2 0xc5 0xba 0xf3
+# CHECK: vrintz.f32 d12, d18
+0xc8 0x25 0xfa 0xf3
+# CHECK: vrintz.f32 q9, q4
+0x80 0x36 0xba 0xf3
+# CHECK: vrintm.f32 d3, d0
+0xc8 0x26 0xba 0xf3
+# CHECK: vrintm.f32 q1, q4
+0x80 0x37 0xba 0xf3
+# CHECK: vrintp.f32 d3, d0
+0xc8 0x27 0xba 0xf3
+# CHECK: vrintp.f32 q1, q4
index f025b8b2ab885a1d012c1a4c8401716ced1f519f..27c09ea0f822de235bdb43ef2704adfac6d8405e 100644 (file)
 # CHECK: vcvtp.s32.f32 q4, q15
 0xfb 0xff 0xe0 0x22
 # CHECK: vcvtp.u32.f32 q9, q8
+
+0xba 0xff 0x00 0x34
+# CHECK: vrintn.f32 d3, d0
+0xba 0xff 0x48 0x24
+# CHECK: vrintn.f32 q1, q4
+0xba 0xff 0x8c 0x54
+# CHECK: vrintx.f32 d5, d12
+0xba 0xff 0xc6 0x04
+# CHECK: vrintx.f32 q0, q3
+0xba 0xff 0x00 0x35
+# CHECK: vrinta.f32 d3, d0
+0xfa 0xff 0x44 0x05
+# CHECK: vrinta.f32 q8, q2
+0xba 0xff 0xa2 0xc5
+# CHECK: vrintz.f32 d12, d18
+0xfa 0xff 0xc8 0x25
+# CHECK: vrintz.f32 q9, q4
+0xba 0xff 0x80 0x36
+# CHECK: vrintm.f32 d3, d0
+0xba 0xff 0xc8 0x26
+# CHECK: vrintm.f32 q1, q4
+0xba 0xff 0x80 0x37
+# CHECK: vrintp.f32 d3, d0
+0xba 0xff 0xc8 0x27
+# CHECK: vrintp.f32 q1, q4