Implement ARMBaseInstrInfo::commuteInstruction() for MOVCCr.

author Jakob Stoklund Olesen <stoklund@2pi.dk>

Wed, 4 Apr 2012 18:23:42 +0000 (18:23 +0000)

committer Jakob Stoklund Olesen <stoklund@2pi.dk>

Wed, 4 Apr 2012 18:23:42 +0000 (18:23 +0000)
author Jakob Stoklund Olesen <stoklund@2pi.dk>
Wed, 4 Apr 2012 18:23:42 +0000 (18:23 +0000)
committer Jakob Stoklund Olesen <stoklund@2pi.dk>
Wed, 4 Apr 2012 18:23:42 +0000 (18:23 +0000)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp

index 56cd6e56e71d7b571668ba12915479637962169d..c6280f819a4f2d0c6d6cdd99db93a7367be4ebfc 100644 (file)
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1500,6 +1500,29 @@ int llvm::getMatchingCondBranchOpcode(int Opc) {
    llvm_unreachable("Unknown unconditional branch opcode!");
  }
  
+/// commuteInstruction - Handle commutable instructions.
+MachineInstr *
+ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+  switch (MI->getOpcode()) {
+  case ARM::MOVCCr:
+  case ARM::t2MOVCCr: {
+    // MOVCC can be commuted by inverting the condition.
+    unsigned PredReg = 0;
+    ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
+    // MOVCC AL can't be inverted. Shouldn't happen.
+    if (CC == ARMCC::AL || PredReg != ARM::CPSR)
+      return NULL;
+    MI = TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+    if (!MI)
+      return NULL;
+    // After swapping the MOVCC operands, also invert the condition.
+    MI->getOperand(MI->findFirstPredOperandIdx())
+      .setImm(ARMCC::getOppositeCondition(CC));
+    return MI;
+  }
+  }
+  return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+}
  
  /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
  /// instruction is encoded with an 'S' bit is determined by the optional CPSR
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h

index 314e3177bec29fb960d08a2ccd8d54511b129a6d..2fe85072a33080bccdc72f51ec834f90ae2fd4e7 100644 (file)
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -139,6 +139,8 @@ public:
  
    MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
  
+  MachineInstr *commuteInstruction(MachineInstr*, bool=false) const;
+
    virtual bool produceSameValue(const MachineInstr *MI0,
                                  const MachineInstr *MI1,
                                  const MachineRegisterInfo *MRI) const;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td

index c0bd237f2ed3d77ee01c355015e86c2c549ce782..37d53b09a79a6f5cccab9e86e28c08da3753a554 100644 (file)
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -4044,10 +4044,13 @@ def BCCZi64 : PseudoInst<(outs),
  // FIXME: should be able to write a pattern for ARMcmov, but can't use
  // a two-value operand where a dag node expects two operands. :(
  let neverHasSideEffects = 1 in {
+
+let isCommutable = 1 in
  def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
                             4, IIC_iCMOVr,
    [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
        RegConstraint<"$false = $Rd">;
+
  def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd),
                             (ins GPR:$false, so_reg_imm:$shift, pred:$p),
                             4, IIC_iCMOVsr,
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td

index 63d3a63c73715aaaa04867fb5a2a00b906843576..41d4e206491bf70d22fe7613a4aac7903a7f4b34 100644 (file)
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -2872,6 +2872,8 @@ defm t2TEQ  : T2I_cmp_irs<0b0100, "teq",
  // FIXME: should be able to write a pattern for ARMcmov, but can't use
  // a two-value operand where a dag node expects two operands. :(
  let neverHasSideEffects = 1 in {
+
+let isCommutable = 1 in
  def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
                              (ins rGPR:$false, rGPR:$Rm, pred:$p),
                              4, IIC_iCMOVr,
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt

index 4fcaecfcb2359389b4e28004b3e62a3823913573..3eddda812f848a7def9198456a992a1ebaa99078 100644 (file)
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -501,11 +501,6 @@ those operations and the ARMv6 scalar versions.
  
  //===---------------------------------------------------------------------===//
  
-ARM::MOVCCr is commutable (by flipping the condition). But we need to implement
-ARMInstrInfo::commuteInstruction() to support it.
-
-//===---------------------------------------------------------------------===//
-
  Split out LDR (literal) from normal ARM LDR instruction. Also consider spliting
  LDR into imm12 and so_reg forms. This allows us to clean up some code. e.g.
  ARMLoadStoreOptimizer does not need to look at LDR (literal) and LDR (so_reg)
diff --git a/test/CodeGen/ARM/commute-movcc.ll b/test/CodeGen/ARM/commute-movcc.ll

new file mode 100644 (file)

index 0000000..f53ac17
--- /dev/null
+++ b/test/CodeGen/ARM/commute-movcc.ll
@@ -0,0 +1,63 @@
+; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios   -disable-code-place < %s | FileCheck %s
+
+; LLVM IR optimizers canonicalize icmp+select this way.
+; Make sure that TwoAddressInstructionPass can commute the corresponding
+; MOVCC instructions to avoid excessive copies in one of the if blocks.
+;
+; CHECK: %if.then
+; CHECK-NOT: mov
+; CHECK: movlo
+; CHECK: movlo
+; CHECK-NOT: mov
+
+; CHECK: %if.else
+; CHECK-NOT: mov
+; CHECK: movls
+; CHECK: movls
+; CHECK-NOT: mov
+
+; CHECK: %if.end8
+
+define i32 @f(i32* nocapture %a, i32 %Pref) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %if.end8
+  %i.012 = phi i32 [ 0, %entry ], [ %inc, %if.end8 ]
+  %BestCost.011 = phi i32 [ -1, %entry ], [ %BestCost.1, %if.end8 ]
+  %BestIdx.010 = phi i32 [ 0, %entry ], [ %BestIdx.1, %if.end8 ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %i.012
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %mul = mul i32 %0, %0
+  %sub = add nsw i32 %i.012, -5
+  %cmp2 = icmp eq i32 %sub, %Pref
+  br i1 %cmp2, label %if.else, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %cmp3 = icmp ult i32 %mul, %BestCost.011
+  %i.0.BestIdx.0 = select i1 %cmp3, i32 %i.012, i32 %BestIdx.010
+  %mul.BestCost.0 = select i1 %cmp3, i32 %mul, i32 %BestCost.011
+  br label %if.end8
+
+if.else:                                          ; preds = %for.body
+  %cmp5 = icmp ugt i32 %mul, %BestCost.011
+  %BestIdx.0.i.0 = select i1 %cmp5, i32 %BestIdx.010, i32 %i.012
+  %BestCost.0.mul = select i1 %cmp5, i32 %BestCost.011, i32 %mul
+  br label %if.end8
+
+if.end8:                                          ; preds = %if.else, %if.then
+  %BestIdx.1 = phi i32 [ %i.0.BestIdx.0, %if.then ], [ %BestIdx.0.i.0, %if.else ]
+  %BestCost.1 = phi i32 [ %mul.BestCost.0, %if.then ], [ %BestCost.0.mul, %if.else ]
+  store i32 %mul, i32* %arrayidx, align 4, !tbaa !0
+  %inc = add i32 %i.012, 1
+  %cmp = icmp eq i32 %inc, 11
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %if.end8
+  ret i32 %BestIdx.1
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll

index f566974c2351ec5516f30e4093c80a67a90c69a3..3ac7d77d6f79940b685f8ca40e4d54ed210a77be 100644 (file)
--- a/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -10,7 +10,7 @@ entry:
  ; CHECK: cmp
  ; CHECK: moveq
  ; CHECK-NOT: cmp
-; CHECK: moveq
+; CHECK: mov{{eq|ne}}
      %tmp1 = icmp eq i32 %cond1, 0
      %tmp2 = select i1 %tmp1, i32 %x1, i32 %x2
      %tmp3 = select i1 %tmp1, i32 %x2, i32 %x3
diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll

index e927b39be59754bc4baf7b40de3968046f9de655..c9ac66acbfd870606c294722aa10b932af29226c 100644 (file)
--- a/test/CodeGen/ARM/select-imm.ll
+++ b/test/CodeGen/ARM/select-imm.ll
@@ -64,7 +64,7 @@ define i32 @t4(i32 %a, i32 %b, i32 %x) nounwind {
  entry:
  ; ARM: t4:
  ; ARM: ldr
-; ARM: movlt
+; ARM: mov{{lt|ge}}
  
  ; ARMT2: t4:
  ; ARMT2: movwlt [[R0:r[0-9]+]], #65365
author	Jakob Stoklund Olesen <stoklund@2pi.dk>
	Wed, 4 Apr 2012 18:23:42 +0000 (18:23 +0000)
committer	Jakob Stoklund Olesen <stoklund@2pi.dk>
	Wed, 4 Apr 2012 18:23:42 +0000 (18:23 +0000)
lib/Target/ARM/ARMBaseInstrInfo.cpp		patch \| blob \| history
lib/Target/ARM/ARMBaseInstrInfo.h		patch \| blob \| history
lib/Target/ARM/ARMInstrInfo.td		patch \| blob \| history
lib/Target/ARM/ARMInstrThumb2.td		patch \| blob \| history
lib/Target/ARM/README.txt		patch \| blob \| history
test/CodeGen/ARM/commute-movcc.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/ARM/machine-cse-cmp.ll		patch \| blob \| history
test/CodeGen/ARM/select-imm.ll		patch \| blob \| history