From 498ec20703c89d0c2890b0967791f0f5f2b59a2f Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Wed, 27 Oct 2010 22:49:00 +0000 Subject: [PATCH] Provide correct encodings for NEON vcvt, which has its own special immediate encoding for specifying fractional bits for fixed point conversions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@117501 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMCodeEmitter.cpp | 2 + lib/Target/ARM/ARMInstrInfo.td | 4 + lib/Target/ARM/ARMInstrNEON.td | 12 +-- lib/Target/ARM/ARMMCCodeEmitter.cpp | 4 + test/MC/ARM/neon-convert-encoding.ll | 122 +++++++++++++++++++++++++++ utils/TableGen/EDEmitter.cpp | 1 + 6 files changed, 139 insertions(+), 6 deletions(-) create mode 100644 test/MC/ARM/neon-convert-encoding.ll diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index c103fab9a73..1fb99fe5b0d 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -194,6 +194,8 @@ namespace { Binary |= (Reg << 13); return Binary; } + unsigned getNEONVcvtImm32(const MachineInstr &MI, unsigned Op) const { + return 0; } /// getMovi32Value - Return binary encoding of operand for movw/movt. If the /// machine operand requires relocation, record the relocation and return diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index ac7c165d4cf..7e8f4fff0fd 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -299,6 +299,10 @@ def pclabel : Operand { let PrintMethod = "printPCLabel"; } +def neon_vcvt_imm32 : Operand { + string EncoderMethod = "getNEONVcvtImm32"; +} + // rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24. def rot_imm : Operand, PatLeaf<(i32 imm), [{ int32_t v = (int32_t)N->getZExtValue(); diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index a746049a30c..5208c2502ca 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1764,16 +1764,16 @@ class N2VCvtD op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm; + (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, + IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", + [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; class N2VCvtQ op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm; + (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, + IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; //===----------------------------------------------------------------------===// // Multiclasses diff --git a/lib/Target/ARM/ARMMCCodeEmitter.cpp b/lib/Target/ARM/ARMMCCodeEmitter.cpp index 13a80bdffde..1259672b9b9 100644 --- a/lib/Target/ARM/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/ARMMCCodeEmitter.cpp @@ -91,6 +91,10 @@ public: unsigned getImmMinusOneOpValue(const MCInst &MI, unsigned Op) const { return MI.getOperand(Op).getImm() - 1; } + + unsigned getNEONVcvtImm32(const MCInst &MI, unsigned Op) const { + return 64 - MI.getOperand(Op).getImm(); + } unsigned getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op) const; diff --git a/test/MC/ARM/neon-convert-encoding.ll b/test/MC/ARM/neon-convert-encoding.ll new file mode 100644 index 00000000000..69e5409565b --- /dev/null +++ b/test/MC/ARM/neon-convert-encoding.ll @@ -0,0 +1,122 @@ +; RUN: llc -show-mc-encoding -march=arm -mcpu=cortex-a8 -mattr=+neon < %s | FileCheck %s + +define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind { + %tmp1 = load <2 x float>* %A +; CHECK: vcvt.s32.f32 d16, d16 @ encoding: [0x20,0x07,0xfb,0xf3] + %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind { + %tmp1 = load <2 x float>* %A +; CHECK: vcvt.u32.f32 d16, d16 @ encoding: [0xa0,0x07,0xfb,0xf3] + %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32> + ret <2 x i32> %tmp2 +} + +define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind { + %tmp1 = load <2 x i32>* %A +; CHECK: vcvt.f32.s32 d16, d16 @ encoding: [0x20,0x06,0xfb,0xf3] + %tmp2 = sitofp <2 x i32> %tmp1 to <2 x float> + ret <2 x float> %tmp2 +} + +define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind { + %tmp1 = load <2 x i32>* %A +; CHECK: vcvt.f32.u32 d16, d16 @ encoding: [0xa0,0x06,0xfb,0xf3] + %tmp2 = uitofp <2 x i32> %tmp1 to <2 x float> + ret <2 x float> %tmp2 +} + +define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind { + %tmp1 = load <4 x float>* %A +; CHECK: vcvt.s32.f32 q8, q8 @ encoding: [0x60,0x07,0xfb,0xf3] + %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind { + %tmp1 = load <4 x float>* %A +; CHECK: vcvt.u32.f32 q8, q8 @ encoding: [0xe0,0x07,0xfb,0xf3] + %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32> + ret <4 x i32> %tmp2 +} + +define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind { + %tmp1 = load <4 x i32>* %A +; CHECK: vcvt.f32.s32 q8, q8 @ encoding: [0x60,0x06,0xfb,0xf3] + %tmp2 = sitofp <4 x i32> %tmp1 to <4 x float> + ret <4 x float> %tmp2 +} + +define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind { + %tmp1 = load <4 x i32>* %A +; CHECK: vcvt.f32.u32 q8, q8 @ encoding: [0xe0,0x06,0xfb,0xf3] + %tmp2 = uitofp <4 x i32> %tmp1 to <4 x float> + ret <4 x float> %tmp2 +} + +define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind { + %tmp1 = load <2 x float>* %A +; CHECK: vcvt.s32.f32 d16, d16, #1 @ encoding: [0x30,0x0f,0xff,0xf2 + %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %tmp1, i32 1) + ret <2 x i32> %tmp2 +} + +define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind { + %tmp1 = load <2 x float>* %A +; CHECK: vcvt.u32.f32 d16, d16, #1 @ encoding: [0x30,0x0f,0xff,0xf3] + %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %tmp1, i32 1) + ret <2 x i32> %tmp2 +} + +define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind { + %tmp1 = load <2 x i32>* %A +; CHECK: vcvt.f32.s32 d16, d16, #1 @ encoding: [0x30,0x0e,0xff,0xf2] + %tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1) + ret <2 x float> %tmp2 +} + +define <2 x float> @vcvt_n_u32tof32(<2 x i32>* %A) nounwind { + %tmp1 = load <2 x i32>* %A +; CHECK: vcvt.f32.u32 d16, d16, #1 @ encoding: [0x30,0x0e,0xff,0xf3] + %tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1) + ret <2 x float> %tmp2 +} + +declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone +declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone +declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone + +define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind { + %tmp1 = load <4 x float>* %A +; CHECK: vcvt.s32.f32 q8, q8, #1 @ encoding: [0x70,0x0f,0xff,0xf2] + %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %tmp1, i32 1) + ret <4 x i32> %tmp2 +} + +define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind { + %tmp1 = load <4 x float>* %A + %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %tmp1, i32 1) + ret <4 x i32> %tmp2 +} + +define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind { + %tmp1 = load <4 x i32>* %A +; CHECK: vcvt.u32.f32 q8, q8, #1 @ encoding: [0x70,0x0f,0xff,0xf3] + %tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1) + ret <4 x float> %tmp2 +} + +define <4 x float> @vcvtQ_n_u32tof32(<4 x i32>* %A) nounwind { + %tmp1 = load <4 x i32>* %A +; CHECK: vcvt.f32.u32 q8, q8, #1 @ encoding: [0x70,0x0e,0xff,0xf3] + %tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1) + ret <4 x float> %tmp2 +} + +declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone +declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone +declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp index a18443685a4..b216a74b2e4 100644 --- a/utils/TableGen/EDEmitter.cpp +++ b/utils/TableGen/EDEmitter.cpp @@ -582,6 +582,7 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type, IMM("t_imm_s4"); IMM("pclabel"); IMM("shift_imm"); + IMM("neon_vcvt_imm32"); MISC("brtarget", "kOperandTypeARMBranchTarget"); // ? MISC("so_reg", "kOperandTypeARMSoReg"); // R, R, I -- 2.34.1