From: Evan Cheng Date: Thu, 3 May 2012 01:45:13 +0000 (+0000) Subject: Fix two-address pass's aggressive instruction commuting heuristics. It's meant X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=d99d68bcee8ce91a18c397756c702363e030fd83 Fix two-address pass's aggressive instruction commuting heuristics. It's meant to catch cases like: %reg1024 = MOV r1 %reg1025 = MOV r0 %reg1026 = ADD %reg1024, %reg1025 r0 = MOV %reg1026 By commuting ADD, it let coalescer eliminate all of the copies. However, there was a bug in the heuristics where it ended up commuting the ADD in: %reg1024 = MOV r0 %reg1025 = MOV 0 %reg1026 = ADD %reg1024, %reg1025 r0 = MOV %reg1026 That did no benefit but rather ensure the last MOV would not be coalesced. rdar://11355268 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@156048 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index f98e059ed83..1277df3dc70 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -102,7 +102,7 @@ namespace { MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist); - bool isProfitableToCommute(unsigned regB, unsigned regC, + bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, MachineInstr *MI, MachineBasicBlock *MBB, unsigned Dist); @@ -567,7 +567,8 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { /// isProfitableToReMat - Return true if it's potentially profitable to commute /// the two-address instruction that's being processed. bool -TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC, +TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB, + unsigned regC, MachineInstr *MI, MachineBasicBlock *MBB, unsigned Dist) { if (OptLevel == CodeGenOpt::None) @@ -604,15 +605,15 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC, // %reg1026 = ADD %reg1024, %reg1025 // r0 = MOV %reg1026 // Commute the ADD to hopefully eliminate an otherwise unavoidable copy. - unsigned FromRegB = getMappedReg(regB, SrcRegMap); - unsigned FromRegC = getMappedReg(regC, SrcRegMap); - unsigned ToRegB = getMappedReg(regB, DstRegMap); - unsigned ToRegC = getMappedReg(regC, DstRegMap); - if ((FromRegB && ToRegB && !regsAreCompatible(FromRegB, ToRegB, TRI)) && - ((!FromRegC && !ToRegC) || - regsAreCompatible(FromRegB, ToRegC, TRI) || - regsAreCompatible(FromRegC, ToRegB, TRI))) - return true; + unsigned ToRegA = getMappedReg(regA, DstRegMap); + if (ToRegA) { + unsigned FromRegB = getMappedReg(regB, SrcRegMap); + unsigned FromRegC = getMappedReg(regC, SrcRegMap); + bool BComp = !FromRegB || regsAreCompatible(FromRegB, ToRegA, TRI); + bool CComp = !FromRegC || regsAreCompatible(FromRegC, ToRegA, TRI); + if (BComp != CComp) + return !BComp && CComp; + } // If there is a use of regC between its last def (could be livein) and this // instruction, then bail. @@ -1211,6 +1212,9 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, return true; // Done with this instruction. } + if (TargetRegisterInfo::isVirtualRegister(regA)) + ScanUses(regA, &*mbbi, Processed); + // Check if it is profitable to commute the operands. unsigned SrcOp1, SrcOp2; unsigned regC = 0; @@ -1230,7 +1234,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // If C dies but B does not, swap the B and C operands. // This makes the live ranges of A and C joinable. TryCommute = true; - else if (isProfitableToCommute(regB, regC, &MI, mbbi, Dist)) { + else if (isProfitableToCommute(regA, regB, regC, &MI, mbbi, Dist)) { TryCommute = true; AggressiveCommute = true; } @@ -1252,9 +1256,6 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, return true; } - if (TargetRegisterInfo::isVirtualRegister(regA)) - ScanUses(regA, &*mbbi, Processed); - if (MI.isConvertibleTo3Addr()) { // This instruction is potentially convertible to a true // three-address instruction. Check if it is profitable. diff --git a/test/CodeGen/X86/4char-promote.ll b/test/CodeGen/X86/4char-promote.ll index 386057f0a3b..6d7bc0cc702 100644 --- a/test/CodeGen/X86/4char-promote.ll +++ b/test/CodeGen/X86/4char-promote.ll @@ -1,11 +1,12 @@ ; A test for checking PR 9623 -;RUN: llc -march=x86-64 -mcpu=corei7 -promote-elements < %s | FileCheck %s +; RUN: llc -march=x86-64 -mcpu=corei7 -promote-elements < %s | FileCheck %s target triple = "x86_64-apple-darwin" ; CHECK: pmulld ; CHECK: paddd -; CHECK: movdqa +; CHECK-NOT: movdqa +; CHECK: ret define <4 x i8> @foo(<4 x i8> %x, <4 x i8> %y) { entry: diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll index 35f69c95683..94cbe5d1937 100644 --- a/test/CodeGen/X86/jump_sign.ll +++ b/test/CodeGen/X86/jump_sign.ll @@ -22,6 +22,7 @@ declare i32 @bar(...) declare i32 @baz(...) ; rdar://10633221 +; rdar://11355268 define i32 @g(i32 %a, i32 %b) nounwind { entry: ; CHECK: g: @@ -39,6 +40,8 @@ entry: ; CHECK: h: ; CHECK-NOT: cmp ; CHECK: cmov +; CHECK-NOT: movl +; CHECK: ret %cmp = icmp slt i32 %b, %a %sub = sub nsw i32 %a, %b %cond = select i1 %cmp, i32 %sub, i32 0 @@ -49,6 +52,8 @@ entry: ; CHECK: i: ; CHECK-NOT: cmp ; CHECK: cmov +; CHECK-NOT: movl +; CHECK: ret %cmp = icmp sgt i32 %a, %b %sub = sub nsw i32 %a, %b %cond = select i1 %cmp, i32 %sub, i32 0 @@ -59,6 +64,8 @@ entry: ; CHECK: j: ; CHECK-NOT: cmp ; CHECK: cmov +; CHECK-NOT: movl +; CHECK: ret %cmp = icmp ugt i32 %a, %b %sub = sub i32 %a, %b %cond = select i1 %cmp, i32 %sub, i32 0 @@ -69,6 +76,8 @@ entry: ; CHECK: k: ; CHECK-NOT: cmp ; CHECK: cmov +; CHECK-NOT: movl +; CHECK: ret %cmp = icmp ult i32 %b, %a %sub = sub i32 %a, %b %cond = select i1 %cmp, i32 %sub, i32 0