From 150cef218fc3a25046c0f35761806e5a056cd430 Mon Sep 17 00:00:00 2001 From: Jiangning Liu Date: Thu, 21 Aug 2014 01:59:30 +0000 Subject: [PATCH] Revert r216066, "Optimize ZERO_EXTEND and SIGN_EXTEND in both SelectionDAG Builder and type". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216147 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 23 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 28 +- test/CodeGen/AArch64/atomic-ops.ll | 24 +- test/CodeGen/AArch64/rm_redundant_cmp.ll | 254 ------------------ 4 files changed, 15 insertions(+), 314 deletions(-) delete mode 100644 test/CodeGen/AArch64/rm_redundant_cmp.ll diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 23d88e55dc4..0b7a3cbaa9f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -861,28 +861,7 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, switch (CCCode) { default: llvm_unreachable("Unknown integer comparison!"); case ISD::SETEQ: - case ISD::SETNE: { - SDValue OpL, OpR; - - OpL = GetPromotedInteger(NewLHS); - OpR = GetPromotedInteger(NewRHS); - - // We would prefer to promote the comparison operand with sign extension, - // if we find the operand is actually to truncate an AssertSext. With this - // optimization, we can avoid inserting real truncate instruction, which - // is redudant eventually. - if (OpL->getOpcode() == ISD::AssertSext && - cast(OpL->getOperand(1))->getVT() == NewLHS.getValueType() && - OpR->getOpcode() == ISD::AssertSext && - cast(OpR->getOperand(1))->getVT() == NewRHS.getValueType()) { - NewLHS = OpL; - NewRHS = OpR; - } else { - NewLHS = ZExtPromotedInteger(NewLHS); - NewRHS = ZExtPromotedInteger(NewRHS); - } - break; - } + case ISD::SETNE: case ISD::SETUGE: case ISD::SETUGT: case ISD::SETULE: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1ca496bc3cf..e264cd9ae4a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -757,28 +757,6 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values); } -static ISD::NodeType getPreferredExtendForValue(const Value *V) { - // For the users of the source value being used for compare instruction, if - // the number of signed predicate is greater than unsigned predicate, we - // prefer to use SIGN_EXTEND. - // - // With this optimization, we would be able to reduce some redundant sign or - // zero extension instruction, and eventually more machine CSE opportunities - // can be exposed. - ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - unsigned int NumOfSigned = 0, NumOfUnsigned = 0; - for (const User *U : V->users()) { - if (const CmpInst *CI = dyn_cast(U)) { - NumOfSigned += CI->isSigned(); - NumOfUnsigned += CI->isUnsigned(); - } - } - if (NumOfSigned > NumOfUnsigned) - ExtendKind = ISD::SIGN_EXTEND; - - return ExtendKind; -} - /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. @@ -787,7 +765,6 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, SDValue *Flag, const Value *V) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - ISD::NodeType ExtendKind = getPreferredExtendForValue(V); // Get the list of the values's legal parts. unsigned NumRegs = Regs.size(); @@ -796,9 +773,8 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, EVT ValueVT = ValueVTs[Value]; unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); MVT RegisterVT = RegVTs[Value]; - - if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) - ExtendKind = ISD::ZERO_EXTEND; + ISD::NodeType ExtendKind = + TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND; getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], NumParts, RegisterVT, V, ExtendKind); diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll index eb78da4ec8a..26301b92f9f 100644 --- a/test/CodeGen/AArch64/atomic-ops.ll +++ b/test/CodeGen/AArch64/atomic-ops.ll @@ -493,7 +493,6 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8: %old = atomicrmw min i8* @var8, i8 %offset acquire ; CHECK-NOT: dmb -; CHECK: sxtb w[[TMP:[0-9]+]], w0 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 @@ -503,13 +502,14 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind { ; function there. ; CHECK-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]] -; CHECK-NEXT: cmp w[[OLD_EXT]], w[[TMP]] -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w[[TMP]], le +; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxtb +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb +; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i8 %old } @@ -517,7 +517,6 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16: %old = atomicrmw min i16* @var16, i16 %offset release ; CHECK-NOT: dmb -; CHECK: sxth w[[TMP:[0-9]+]], w0 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 @@ -527,14 +526,15 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind { ; function there. ; CHECK-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]] -; CHECK-NEXT: cmp w[[OLD_EXT]], w[[TMP]] -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w[[TMP]], le +; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxth +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb +; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i16 %old } @@ -590,7 +590,6 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8: %old = atomicrmw max i8* @var8, i8 %offset seq_cst ; CHECK-NOT: dmb -; CHECK: sxtb w[[TMP:[0-9]+]], w0 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 @@ -600,14 +599,15 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind { ; function there. ; CHECK-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]] -; CHECK-NEXT: cmp w[[OLD_EXT]], w[[TMP]] -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w[[TMP]], gt +; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxtb +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb +; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i8 %old } @@ -615,7 +615,6 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16: %old = atomicrmw max i16* @var16, i16 %offset acquire ; CHECK-NOT: dmb -; CHECK: sxth w[[TMP:[0-9]+]], w0 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 @@ -625,14 +624,15 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind { ; function there. ; CHECK-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]] -; CHECK-NEXT: cmp w[[OLD_EXT]], w[[TMP]] -; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w[[TMP]], gt +; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxth +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb +; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i16 %old } diff --git a/test/CodeGen/AArch64/rm_redundant_cmp.ll b/test/CodeGen/AArch64/rm_redundant_cmp.ll deleted file mode 100644 index 36dc118ed1a..00000000000 --- a/test/CodeGen/AArch64/rm_redundant_cmp.ll +++ /dev/null @@ -1,254 +0,0 @@ -; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 | FileCheck %s - -; The following cases are for i16 - -%struct.s_signed_i16 = type { i16, i16, i16 } -%struct.s_unsigned_i16 = type { i16, i16, i16 } - -@cost_s_i8_i16 = common global %struct.s_signed_i16 zeroinitializer, align 2 -@cost_u_i16 = common global %struct.s_unsigned_i16 zeroinitializer, align 2 - -define void @test_i16_2cmp_signed_1() { -; CHECK-LABEL: test_i16_2cmp_signed_1 -; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.gt -; CHECK-NOT: cmp -; CHECK: b.ne -entry: - %0 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2 - %1 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2 - %cmp = icmp sgt i16 %0, %1 - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2 - br label %if.end8 - -if.else: ; preds = %entry - %cmp5 = icmp eq i16 %0, %1 - br i1 %cmp5, label %if.then7, label %if.end8 - -if.then7: ; preds = %if.else - store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2 - br label %if.end8 - -if.end8: ; preds = %if.else, %if.then7, %if.then - ret void -} - -define void @test_i16_2cmp_signed_2() { -; CHECK-LABEL: test_i16_2cmp_signed_2 -; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.le -; CHECK-NOT: cmp -; CHECK: b.ge -entry: - %0 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2 - %1 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2 - %cmp = icmp sgt i16 %0, %1 - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2 - br label %if.end8 - -if.else: ; preds = %entry - %cmp5 = icmp slt i16 %0, %1 - br i1 %cmp5, label %if.then7, label %if.end8 - -if.then7: ; preds = %if.else - store i16 %1, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2 - br label %if.end8 - -if.end8: ; preds = %if.else, %if.then7, %if.then - ret void -} - -define void @test_i16_2cmp_unsigned_1() { -; CHECK-LABEL: test_i16_2cmp_unsigned_1 -; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.hi -; CHECK-NOT: cmp -; CHECK: b.ne -entry: - %0 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2 - %1 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2 - %cmp = icmp ugt i16 %0, %1 - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2 - br label %if.end8 - -if.else: ; preds = %entry - %cmp5 = icmp eq i16 %0, %1 - br i1 %cmp5, label %if.then7, label %if.end8 - -if.then7: ; preds = %if.else - store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2 - br label %if.end8 - -if.end8: ; preds = %if.else, %if.then7, %if.then - ret void -} - -define void @test_i16_2cmp_unsigned_2() { -; CHECK-LABEL: test_i16_2cmp_unsigned_2 -; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.ls -; CHECK-NOT: cmp -; CHECK: b.hs -entry: - %0 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2 - %1 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2 - %cmp = icmp ugt i16 %0, %1 - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2 - br label %if.end8 - -if.else: ; preds = %entry - %cmp5 = icmp ult i16 %0, %1 - br i1 %cmp5, label %if.then7, label %if.end8 - -if.then7: ; preds = %if.else - store i16 %1, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2 - br label %if.end8 - -if.end8: ; preds = %if.else, %if.then7, %if.then - ret void -} - -; The following cases are for i8 - -%struct.s_signed_i8 = type { i8, i8, i8 } -%struct.s_unsigned_i8 = type { i8, i8, i8 } - -@cost_s = common global %struct.s_signed_i8 zeroinitializer, align 2 -@cost_u_i8 = common global %struct.s_unsigned_i8 zeroinitializer, align 2 - - -define void @test_i8_2cmp_signed_1() { -; CHECK-LABEL: test_i8_2cmp_signed_1 -; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.gt -; CHECK-NOT: cmp -; CHECK: b.ne -entry: - %0 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2 - %1 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2 - %cmp = icmp sgt i8 %0, %1 - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2 - br label %if.end8 - -if.else: ; preds = %entry - %cmp5 = icmp eq i8 %0, %1 - br i1 %cmp5, label %if.then7, label %if.end8 - -if.then7: ; preds = %if.else - store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2 - br label %if.end8 - -if.end8: ; preds = %if.else, %if.then7, %if.then - ret void -} - -define void @test_i8_2cmp_signed_2() { -; CHECK-LABEL: test_i8_2cmp_signed_2 -; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.le -; CHECK-NOT: cmp -; CHECK: b.ge -entry: - %0 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2 - %1 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2 - %cmp = icmp sgt i8 %0, %1 - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2 - br label %if.end8 - -if.else: ; preds = %entry - %cmp5 = icmp slt i8 %0, %1 - br i1 %cmp5, label %if.then7, label %if.end8 - -if.then7: ; preds = %if.else - store i8 %1, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2 - br label %if.end8 - -if.end8: ; preds = %if.else, %if.then7, %if.then - ret void -} - -define void @test_i8_2cmp_unsigned_1() { -; CHECK-LABEL: test_i8_2cmp_unsigned_1 -; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.hi -; CHECK-NOT: cmp -; CHECK: b.ne -entry: - %0 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2 - %1 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2 - %cmp = icmp ugt i8 %0, %1 - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2 - br label %if.end8 - -if.else: ; preds = %entry - %cmp5 = icmp eq i8 %0, %1 - br i1 %cmp5, label %if.then7, label %if.end8 - -if.then7: ; preds = %if.else - store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2 - br label %if.end8 - -if.end8: ; preds = %if.else, %if.then7, %if.then - ret void -} - -define void @test_i8_2cmp_unsigned_2() { -; CHECK-LABEL: test_i8_2cmp_unsigned_2 -; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.ls -; CHECK-NOT: cmp -; CHECK: b.hs -entry: - %0 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2 - %1 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2 - %cmp = icmp ugt i8 %0, %1 - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2 - br label %if.end8 - -if.else: ; preds = %entry - %cmp5 = icmp ult i8 %0, %1 - br i1 %cmp5, label %if.then7, label %if.end8 - -if.then7: ; preds = %if.else - store i8 %1, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2 - br label %if.end8 - -if.end8: ; preds = %if.else, %if.then7, %if.then - ret void -} - -; Make sure the case below won't crash. - -; The optimization of ZERO_EXTEND and SIGN_EXTEND in type legalization stage can't assert -; the operand of a set_cc is always a TRUNCATE. - -define i1 @foo(float %inl, float %inr) { - %lval = fptosi float %inl to i8 - %rval = fptosi float %inr to i8 - %sum = icmp eq i8 %lval, %rval - ret i1 %sum -} -- 2.34.1