From e7082383732df492d8fabd17c31300db08de7627 Mon Sep 17 00:00:00 2001 From: Artyom Skrobov Date: Wed, 26 Feb 2014 11:27:28 +0000 Subject: [PATCH] ARMv8 IfConversion must skip narrow instructions that a) define CPSR and b) wouldn't affect CPSR in an IT block git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@202257 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 14 +++ lib/Target/ARM/ARMFeatures.h | 37 ++++---- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 4 + test/CodeGen/Thumb2/v8_IT_6.ll | 100 ++++++++++++++++++++++ 4 files changed, 139 insertions(+), 16 deletions(-) create mode 100644 test/CodeGen/Thumb2/v8_IT_6.ll diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index a244c5fb8a3..320f54a327e 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -535,6 +535,20 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { return true; } +template<> bool IsCPSRDead(MachineInstr* MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isUndef() || MO.isUse()) + continue; + if (MO.getReg() != ARM::CPSR) + continue; + if (!MO.isDead()) + return false; + } + // all definitions of CPSR are dead + return true; +} + /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. LLVM_ATTRIBUTE_NOINLINE static unsigned getNumJTEntries(const std::vector &JT, diff --git a/lib/Target/ARM/ARMFeatures.h b/lib/Target/ARM/ARMFeatures.h index 2580f7bf7f4..e2a228c18ab 100644 --- a/lib/Target/ARM/ARMFeatures.h +++ b/lib/Target/ARM/ARMFeatures.h @@ -16,6 +16,9 @@ #include "ARM.h" +template // could be MachineInstr or MCInst +bool IsCPSRDead(InstrType *Instr); + namespace llvm { template // could be MachineInstr or MCInst @@ -26,25 +29,12 @@ inline bool isV8EligibleForIT(InstrType *Instr) { case ARM::tADC: case ARM::tADDi3: case ARM::tADDi8: - case ARM::tADDrSPi: case ARM::tADDrr: case ARM::tAND: case ARM::tASRri: case ARM::tASRrr: case ARM::tBIC: - case ARM::tCMNz: - case ARM::tCMPi8: - case ARM::tCMPr: case ARM::tEOR: - case ARM::tLDRBi: - case ARM::tLDRBr: - case ARM::tLDRHi: - case ARM::tLDRHr: - case ARM::tLDRSB: - case ARM::tLDRSH: - case ARM::tLDRi: - case ARM::tLDRr: - case ARM::tLDRspi: case ARM::tLSLri: case ARM::tLSLrr: case ARM::tLSRri: @@ -56,6 +46,24 @@ inline bool isV8EligibleForIT(InstrType *Instr) { case ARM::tROR: case ARM::tRSB: case ARM::tSBC: + case ARM::tSUBi3: + case ARM::tSUBi8: + case ARM::tSUBrr: + // Outside of an IT block, these set CPSR. + return IsCPSRDead(Instr); + case ARM::tADDrSPi: + case ARM::tCMNz: + case ARM::tCMPi8: + case ARM::tCMPr: + case ARM::tLDRBi: + case ARM::tLDRBr: + case ARM::tLDRHi: + case ARM::tLDRHr: + case ARM::tLDRSB: + case ARM::tLDRSH: + case ARM::tLDRi: + case ARM::tLDRr: + case ARM::tLDRspi: case ARM::tSTRBi: case ARM::tSTRBr: case ARM::tSTRHi: @@ -63,9 +71,6 @@ inline bool isV8EligibleForIT(InstrType *Instr) { case ARM::tSTRi: case ARM::tSTRr: case ARM::tSTRspi: - case ARM::tSUBi3: - case ARM::tSUBi8: - case ARM::tSUBrr: case ARM::tTST: return true; // there are some "conditionally deprecated" opcodes diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 9bb1cf2b4c2..7dc4d18a072 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -7850,6 +7850,10 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { return Match_Success; } +template<> inline bool IsCPSRDead(MCInst* Instr) { + return true; // In an assembly source, no need to second-guess +} + static const char *getSubtargetFeatureName(unsigned Val); bool ARMAsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, diff --git a/test/CodeGen/Thumb2/v8_IT_6.ll b/test/CodeGen/Thumb2/v8_IT_6.ll new file mode 100644 index 00000000000..b12c4797d24 --- /dev/null +++ b/test/CodeGen/Thumb2/v8_IT_6.ll @@ -0,0 +1,100 @@ +; RUN: llc < %s -mtriple=thumbv8 -show-mc-encoding | FileCheck %s +; CHECK-NOT: orrsne r0, r1 @ encoding: [0x08,0x43] +; Narrow tORR cannot be predicated and set CPSR at the same time! + +declare void @f(i32) + +define void @initCloneLookups() #1 { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc24, %entry + %cmp108 = phi i1 [ true, %entry ], [ %cmp, %for.inc24 ] + %y.0105 = phi i32 [ 1, %entry ], [ %inc25, %for.inc24 ] + %notlhs = icmp slt i32 %y.0105, 6 + %notlhs69 = icmp sgt i32 %y.0105, 4 + %sub = add nsw i32 %y.0105, -1 + %cmp1.i = icmp sgt i32 %sub, 5 + %cmp1.i54 = icmp sgt i32 %y.0105, 5 + br i1 %cmp108, label %if.then.us, label %for.cond1.preheader.for.cond1.preheader.split_crit_edge + +for.cond1.preheader.for.cond1.preheader.split_crit_edge: ; preds = %for.cond1.preheader + br i1 %notlhs, label %for.inc.us101, label %for.inc + +if.then.us: ; preds = %for.cond1.preheader, %for.inc.us + %x.071.us = phi i32 [ %inc.us.pre-phi, %for.inc.us ], [ 1, %for.cond1.preheader ] + %notrhs.us = icmp sge i32 %x.071.us, %y.0105 + %or.cond44.not.us = or i1 %notrhs.us, %notlhs + %notrhs70.us = icmp sle i32 %x.071.us, %y.0105 + %tobool.us = or i1 %notrhs70.us, %notlhs69 + %or.cond66.us = and i1 %or.cond44.not.us, %tobool.us + br i1 %or.cond66.us, label %getHexxagonIndex.exit52.us, label %if.then.us.for.inc.us_crit_edge + +if.then.us.for.inc.us_crit_edge: ; preds = %if.then.us + %inc.us.pre = add nsw i32 %x.071.us, 1 + br label %for.inc.us + +getHexxagonIndex.exit52.us: ; preds = %if.then.us + %cmp3.i.us = icmp slt i32 %x.071.us, 5 + %or.cond.i.us = and i1 %cmp1.i, %cmp3.i.us + %..i.us = sext i1 %or.cond.i.us to i32 + tail call void @f(i32 %..i.us) #3 + %add.us = add nsw i32 %x.071.us, 1 + %cmp3.i55.us = icmp slt i32 %add.us, 5 + %or.cond.i56.us = and i1 %cmp1.i54, %cmp3.i55.us + %..i57.us = sext i1 %or.cond.i56.us to i32 + tail call void @f(i32 %..i57.us) #3 + %or.cond.i48.us = and i1 %notlhs69, %cmp3.i55.us + %..i49.us = sext i1 %or.cond.i48.us to i32 + tail call void @f(i32 %..i49.us) #3 + br label %for.inc.us + +for.inc.us: ; preds = %if.then.us.for.inc.us_crit_edge, %getHexxagonIndex.exit52.us + %inc.us.pre-phi = phi i32 [ %inc.us.pre, %if.then.us.for.inc.us_crit_edge ], [ %add.us, %getHexxagonIndex.exit52.us ] + %exitcond109 = icmp eq i32 %inc.us.pre-phi, 10 + br i1 %exitcond109, label %for.inc24, label %if.then.us + +for.inc.us101: ; preds = %for.cond1.preheader.for.cond1.preheader.split_crit_edge, %for.inc.us101 + %x.071.us74 = phi i32 [ %add.us89, %for.inc.us101 ], [ 1, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ] + %cmp3.i.us84 = icmp slt i32 %x.071.us74, 5 + %or.cond.i.us85 = and i1 %cmp1.i, %cmp3.i.us84 + %..i.us86 = sext i1 %or.cond.i.us85 to i32 + tail call void @f(i32 %..i.us86) #3 + %add.us89 = add nsw i32 %x.071.us74, 1 + %cmp3.i55.us93 = icmp slt i32 %add.us89, 5 + %or.cond.i56.us94 = and i1 %cmp1.i54, %cmp3.i55.us93 + %..i57.us95 = sext i1 %or.cond.i56.us94 to i32 + tail call void @f(i32 %..i57.us95) #3 + %or.cond.i48.us97 = and i1 %notlhs69, %cmp3.i55.us93 + %..i49.us98 = sext i1 %or.cond.i48.us97 to i32 + tail call void @f(i32 %..i49.us98) #3 + %exitcond110 = icmp eq i32 %add.us89, 10 + br i1 %exitcond110, label %for.inc24, label %for.inc.us101 + +for.inc: ; preds = %for.cond1.preheader.for.cond1.preheader.split_crit_edge, %for.inc + %x.071 = phi i32 [ %add, %for.inc ], [ 1, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ] + %cmp3.i = icmp slt i32 %x.071, 5 + %or.cond.i = and i1 %cmp1.i, %cmp3.i + %..i = sext i1 %or.cond.i to i32 + tail call void @f(i32 %..i) #3 + %add = add nsw i32 %x.071, 1 + %cmp3.i55 = icmp slt i32 %add, 5 + %or.cond.i56 = and i1 %cmp1.i54, %cmp3.i55 + %..i57 = sext i1 %or.cond.i56 to i32 + tail call void @f(i32 %..i57) #3 + %or.cond.i48 = and i1 %notlhs69, %cmp3.i55 + %..i49 = sext i1 %or.cond.i48 to i32 + tail call void @f(i32 %..i49) #3 + %exitcond = icmp eq i32 %add, 10 + br i1 %exitcond, label %for.inc24, label %for.inc + +for.inc24: ; preds = %for.inc, %for.inc.us101, %for.inc.us + %inc25 = add nsw i32 %y.0105, 1 + %cmp = icmp slt i32 %inc25, 10 + %exitcond111 = icmp eq i32 %inc25, 10 + br i1 %exitcond111, label %for.end26, label %for.cond1.preheader + +for.end26: ; preds = %for.inc24 + ret void +} + -- 2.34.1