Revert r163298 "Optimize codegen for VSETLNi{8,16,32} operating on Q registers."

author Jakob Stoklund Olesen <stoklund@2pi.dk>

Fri, 26 Oct 2012 23:39:46 +0000 (23:39 +0000)

committer Jakob Stoklund Olesen <stoklund@2pi.dk>

Fri, 26 Oct 2012 23:39:46 +0000 (23:39 +0000)
author Jakob Stoklund Olesen <stoklund@2pi.dk>
Fri, 26 Oct 2012 23:39:46 +0000 (23:39 +0000)
committer Jakob Stoklund Olesen <stoklund@2pi.dk>
Fri, 26 Oct 2012 23:39:46 +0000 (23:39 +0000)
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp

index c130b2eede5e67faa7190d25ff3dbf3240cfeb6a..f7c8a9c2dcc2d95d8677323f8249da097bce8d4d 100644 (file)
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1208,57 +1208,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
        ExpandLaneOp(MBBI);
        return true;
  
-    case ARM::VSETLNi8Q:
-    case ARM::VSETLNi16Q: {
-      // Expand VSETLNs acting on a Q register to equivalent VSETLNs acting
-      // on the respective D register.
-
-      unsigned QReg  = MI.getOperand(1).getReg();
-      unsigned QLane = MI.getOperand(3).getImm();
-
-      unsigned NewOpcode, DLane, DSubReg;
-      switch (Opcode) {
-      default: llvm_unreachable("Invalid opcode!");
-      case ARM::VSETLNi8Q:
-        // 4 possible 8-bit lanes per DPR:
-        NewOpcode = ARM::VSETLNi8;
-        DLane = QLane % 8;
-        DSubReg  = (QLane / 8) ? ARM::dsub_1 : ARM::dsub_0;
-        break;
-      case ARM::VSETLNi16Q:
-        // 4 possible 16-bit lanes per DPR.
-        NewOpcode = ARM::VSETLNi16;
-        DLane = QLane % 4;
-        DSubReg  = (QLane / 4) ? ARM::dsub_1 : ARM::dsub_0;
-        break;
-      }
-
-      MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpcode));
-
-      unsigned DReg = TRI->getSubReg(QReg, DSubReg);
-
-      MIB.addReg(DReg, RegState::Define); // Output DPR
-      MIB.addReg(DReg);                   // Input DPR
-      MIB.addOperand(MI.getOperand(2));   // Input GPR
-      MIB.addImm(DLane);                  // Lane
-
-      // Add the predicate operands.
-      MIB.addOperand(MI.getOperand(4));
-      MIB.addOperand(MI.getOperand(5));
-
-      if (MI.getOperand(1).isKill()) // Add an implicit kill for the Q register.
-        MIB->addRegisterKilled(QReg, TRI, true);
-      // And an implicit def of the output register (which should always be the
-      // same as the input register).
-      MIB->addRegisterDefined(QReg, TRI);
-
-      TransferImpOps(MI, MIB, MIB);
-
-      MI.eraseFromParent();
-      return true;
-    }
-
      case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
      case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
      case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td

index ede4def2b73c5861bd8756b959a6d0ea8d2a09f0..3cf213cbffee2de9f0ce029ae23f20f3132680a6 100644 (file)
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -5140,23 +5140,25 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
                                             GPR:$R, imm:$lane))]> {
    let Inst{21} = lane{0};
  }
-
-def VSETLNi8Q : PseudoNeonI<(outs QPR:$V),
-                             (ins QPR:$src1, GPR:$R, VectorIndex8:$lane),
-                             IIC_VMOVISL, "",
-                             [(set QPR:$V, (vector_insert (v16i8 QPR:$src1),
-                                           GPR:$R, imm:$lane))]>;
-def VSETLNi16Q : PseudoNeonI<(outs QPR:$V),
-                             (ins QPR:$src1, GPR:$R, VectorIndex16:$lane),
-                             IIC_VMOVISL, "",
-                             [(set QPR:$V, (vector_insert (v8i16 QPR:$src1),
-                                           GPR:$R, imm:$lane))]>;
  }
-
+def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
+          (v16i8 (INSERT_SUBREG QPR:$src1,
+                  (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
+                                   (DSubReg_i8_reg imm:$lane))),
+                            GPR:$src2, (SubReg_i8_lane imm:$lane))),
+                  (DSubReg_i8_reg imm:$lane)))>;
+def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
+          (v8i16 (INSERT_SUBREG QPR:$src1,
+                  (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
+                                     (DSubReg_i16_reg imm:$lane))),
+                             GPR:$src2, (SubReg_i16_lane imm:$lane))),
+                  (DSubReg_i16_reg imm:$lane)))>;
  def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
-         (v4i32 (INSERT_SUBREG QPR:$src1,
-                 GPR:$src2,
-                 (SSubReg_f32_reg imm:$lane)))>;
+          (v4i32 (INSERT_SUBREG QPR:$src1,
+                  (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
+                                     (DSubReg_i32_reg imm:$lane))),
+                             GPR:$src2, (SubReg_i32_lane imm:$lane))),
+                  (DSubReg_i32_reg imm:$lane)))>;
  
  def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
            (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
diff --git a/test/CodeGen/ARM/integer_insertelement.ll b/test/CodeGen/ARM/integer_insertelement.ll

index 4f2d7e3f73ef16f3bd61cc3c6a4192f033cf422f..1d72afefb5b8d9d53cd2b61c13ddf2ba8a72e16e 100644 (file)
--- a/test/CodeGen/ARM/integer_insertelement.ll
+++ b/test/CodeGen/ARM/integer_insertelement.ll
@@ -6,7 +6,7 @@
  
  ; CHECK: @f
  ; CHECK-NOT: vorr d
-; CHECK: vmov s
+; CHECK: vmov.32 d
  ; CHECK-NOT: vorr d
  ; CHECK: mov pc, lr
  define <4 x i32> @f(<4 x i32> %in) {
diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll

index 2ed65c9aeed8e891d7514c0b2de549facf1c4ff0..c9ce3b7450b64583fe14f121fbdf337d5bd34892 100644 (file)
--- a/test/CodeGen/ARM/vget_lane.ll
+++ b/test/CodeGen/ARM/vget_lane.ll
@@ -200,7 +200,7 @@ define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
  
  define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
  ;CHECK: vsetQ_lane32:
-;CHECK: vmov s
+;CHECK: vmov.32 d{{.*}}[1], r1
         %tmp1 = load <4 x i32>* %A
         %tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1
         ret <4 x i32> %tmp2
author	Jakob Stoklund Olesen <stoklund@2pi.dk>
	Fri, 26 Oct 2012 23:39:46 +0000 (23:39 +0000)
committer	Jakob Stoklund Olesen <stoklund@2pi.dk>
	Fri, 26 Oct 2012 23:39:46 +0000 (23:39 +0000)
lib/Target/ARM/ARMExpandPseudoInsts.cpp		patch \| blob \| history
lib/Target/ARM/ARMInstrNEON.td		patch \| blob \| history
test/CodeGen/ARM/integer_insertelement.ll		patch \| blob \| history
test/CodeGen/ARM/vget_lane.ll		patch \| blob \| history