From: Jim Grosbach Date: Wed, 21 Dec 2011 19:40:55 +0000 (+0000) Subject: ARM NEON assmebly parsing for VLD2 to all lanes instructions. X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=e6949b13997e6d31aa4719a0e80c4b6b405e42a9 ARM NEON assmebly parsing for VLD2 to all lanes instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147069 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 3cb69828c6c..13f1e664516 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2584,9 +2584,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD2DUPd8: case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: - case ARM::VLD2DUPd8_UPD: - case ARM::VLD2DUPd16_UPD: - case ARM::VLD2DUPd32_UPD: + case ARM::VLD2DUPd8wb_fixed: + case ARM::VLD2DUPd16wb_fixed: + case ARM::VLD2DUPd32wb_fixed: + case ARM::VLD2DUPd8wb_register: + case ARM::VLD2DUPd16wb_register: + case ARM::VLD2DUPd32wb_register: case ARM::VLD4DUPd8: case ARM::VLD4DUPd16: case ARM::VLD4DUPd32: @@ -2768,9 +2771,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD2DUPd8Pseudo: case ARM::VLD2DUPd16Pseudo: case ARM::VLD2DUPd32Pseudo: - case ARM::VLD2DUPd8Pseudo_UPD: - case ARM::VLD2DUPd16Pseudo_UPD: - case ARM::VLD2DUPd32Pseudo_UPD: + case ARM::VLD2DUPd8PseudoWB_fixed: + case ARM::VLD2DUPd16PseudoWB_fixed: + case ARM::VLD2DUPd32PseudoWB_fixed: + case ARM::VLD2DUPd8PseudoWB_register: + case ARM::VLD2DUPd16PseudoWB_register: + case ARM::VLD2DUPd32PseudoWB_register: case ARM::VLD4DUPd8Pseudo: case ARM::VLD4DUPd16Pseudo: case ARM::VLD4DUPd32Pseudo: diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 8e8d3ff2af0..1fb769754c0 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -162,11 +162,14 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD1q8PseudoWB_register, ARM::VLD1q8wb_register,true,true, true,SingleSpc,2,8,false}, { ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, false, SingleSpc, 2, 4,false}, -{ ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true, true, SingleSpc, 2, 4,true}, +{ ARM::VLD2DUPd16PseudoWB_fixed, ARM::VLD2DUPd16wb_fixed, true, true, false, SingleSpc, 2, 4,false}, +{ ARM::VLD2DUPd16PseudoWB_register, ARM::VLD2DUPd16wb_register, true, true, true, SingleSpc, 2, 4,false}, { ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, false, SingleSpc, 2, 2,false}, -{ ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true, true, SingleSpc, 2, 2,true}, +{ ARM::VLD2DUPd32PseudoWB_fixed, ARM::VLD2DUPd32wb_fixed, true, true, false, SingleSpc, 2, 2,false}, +{ ARM::VLD2DUPd32PseudoWB_register, ARM::VLD2DUPd32wb_register, true, true, true, SingleSpc, 2, 2,false}, { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, false, SingleSpc, 2, 8,false}, -{ ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd8_UPD, true, true, true, SingleSpc, 2, 8,true}, +{ ARM::VLD2DUPd8PseudoWB_fixed, ARM::VLD2DUPd8wb_fixed, true, true, false, SingleSpc, 2, 8,false}, +{ ARM::VLD2DUPd8PseudoWB_register, ARM::VLD2DUPd8wb_register, true, true, true, SingleSpc, 2, 8,false}, { ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true}, { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true}, @@ -1163,9 +1166,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD2DUPd8Pseudo: case ARM::VLD2DUPd16Pseudo: case ARM::VLD2DUPd32Pseudo: - case ARM::VLD2DUPd8Pseudo_UPD: - case ARM::VLD2DUPd16Pseudo_UPD: - case ARM::VLD2DUPd32Pseudo_UPD: + case ARM::VLD2DUPd8PseudoWB_fixed: + case ARM::VLD2DUPd16PseudoWB_fixed: + case ARM::VLD2DUPd32PseudoWB_fixed: + case ARM::VLD2DUPd8PseudoWB_register: + case ARM::VLD2DUPd16PseudoWB_register: + case ARM::VLD2DUPd32PseudoWB_register: case ARM::VLD3DUPd8Pseudo: case ARM::VLD3DUPd16Pseudo: case ARM::VLD3DUPd32Pseudo: diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 74731419fd9..7bd0bace909 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1595,6 +1595,10 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; + + case ARM::VLD2DUPd8PseudoWB_fixed: return ARM::VLD2DUPd8PseudoWB_register; + case ARM::VLD2DUPd16PseudoWB_fixed: return ARM::VLD2DUPd16PseudoWB_register; + case ARM::VLD2DUPd32PseudoWB_fixed: return ARM::VLD2DUPd32PseudoWB_register; } return Opc; // If not one we handle, return it unchanged. } @@ -2043,8 +2047,14 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, Ops.push_back(MemAddr); Ops.push_back(Align); if (isUpdating) { + // fixed-stride update instructions don't have an explicit writeback + // operand. It's implicit in the opcode itself. SDValue Inc = N->getOperand(2); - Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc); + if (!isa(Inc.getNode())) + Ops.push_back(Inc); + // FIXME: VLD3 and VLD4 haven't been updated to that form yet. + else if (NumVecs > 2) + Ops.push_back(Reg0); } Ops.push_back(Pred); Ops.push_back(Reg0); @@ -2798,8 +2808,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VLD2DUP_UPD: { - unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd16Pseudo_UPD, - ARM::VLD2DUPd32Pseudo_UPD }; + unsigned Opcodes[] = { ARM::VLD2DUPd8PseudoWB_fixed, + ARM::VLD2DUPd16PseudoWB_fixed, + ARM::VLD2DUPd32PseudoWB_fixed }; return SelectVLDDup(N, true, 2, Opcodes); } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 2a3c736eef5..5c4dcde90db 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1254,25 +1254,42 @@ def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListTwoQAllLanes>; def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListTwoQAllLanes>; // ...with address register writeback: -class VLD2DUPWB op7_4, string Dt> - : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), - (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu, - "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD2DupInstruction"; +multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy> { + def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, + (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn), IIC_VLD2dupu, + "vld2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2DupInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<1, 0b10, 0b1101, op7_4, + (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu, + "vld2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2DupInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">; -def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">; -def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">; +defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListTwoDAllLanes>; +defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListTwoDAllLanes>; +defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListTwoDAllLanes>; -def VLD2DUPd8x2_UPD : VLD2DUPWB<{0,0,1,0}, "8">; -def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">; -def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">; +defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListTwoQAllLanes>; +defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListTwoQAllLanes>; +defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListTwoQAllLanes>; -def VLD2DUPd8Pseudo_UPD : VLDQWBPseudo; -def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo; -def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo; +def VLD2DUPd8PseudoWB_fixed : VLDQWBfixedPseudo ; +def VLD2DUPd8PseudoWB_register : VLDQWBregisterPseudo; +def VLD2DUPd16PseudoWB_fixed : VLDQWBfixedPseudo ; +def VLD2DUPd16PseudoWB_register : VLDQWBregisterPseudo; +def VLD2DUPd32PseudoWB_fixed : VLDQWBfixedPseudo ; +def VLD2DUPd32PseudoWB_register : VLDQWBregisterPseudo; // VLD3DUP : Vector Load (single 3-element structure to all lanes) class VLD3DUP op7_4, string Dt> diff --git a/test/MC/ARM/neon-vld-encoding.s b/test/MC/ARM/neon-vld-encoding.s index d61fb77447e..f747e739683 100644 --- a/test/MC/ARM/neon-vld-encoding.s +++ b/test/MC/ARM/neon-vld-encoding.s @@ -238,6 +238,10 @@ vld2.8 {d2[4], d3[4]}, [r2] vld2.32 {d22[], d23[]}, [r1] vld2.32 {d22[], d24[]}, [r1] + vld2.32 {d10[ ],d11[ ]}, [r3]! + vld2.32 {d14[ ],d16[ ]}, [r4]! + vld2.32 {d22[ ],d23[ ]}, [r5], r4 + vld2.32 {d22[ ],d24[ ]}, [r6], r4 @ CHECK: vld2.8 {d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xe0,0xf4] @ CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xe0,0xf4] @@ -250,6 +254,10 @@ @ CHECK: vld2.8 {d2[4], d3[4]}, [r2] @ encoding: [0x8f,0x21,0xa2,0xf4] @ CHECK: vld2.32 {d22[], d23[]}, [r1] @ encoding: [0x8f,0x6d,0xe1,0xf4] @ CHECK: vld2.32 {d22[], d24[]}, [r1] @ encoding: [0xaf,0x6d,0xe1,0xf4] +@ CHECK: vld2.32 {d10[], d11[]}, [r3]! @ encoding: [0x8d,0xad,0xa3,0xf4] +@ CHECK: vld2.32 {d14[], d16[]}, [r4]! @ encoding: [0xad,0xed,0xa4,0xf4] +@ CHECK: vld2.32 {d22[], d23[]}, [r5], r4 @ encoding: [0x84,0x6d,0xe5,0xf4] +@ CHECK: vld2.32 {d22[], d24[]}, [r6], r4 @ encoding: [0xa4,0x6d,0xe6,0xf4] @ vld3.8 {d16[1], d17[1], d18[1]}, [r0]