SDValue SetCC;
const ConstantSDNode* C = 0;
bool needOppositeCond = (CC == X86::COND_E);
+ bool checkAgainstTrue = false; // Is it a comparison against 1?
if ((C = dyn_cast<ConstantSDNode>(Op1)))
SetCC = Op2;
else // Quit if all operands are not constants.
return SDValue();
- if (C->getZExtValue() == 1)
+ if (C->getZExtValue() == 1) {
needOppositeCond = !needOppositeCond;
- else if (C->getZExtValue() != 0)
+ checkAgainstTrue = true;
+ } else if (C->getZExtValue() != 0)
// Quit if the constant is neither 0 or 1.
return SDValue();
- // Skip 'zext' or 'trunc' node.
- if (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
- SetCC.getOpcode() == ISD::TRUNCATE)
- SetCC = SetCC.getOperand(0);
+ bool truncatedToBoolWithAnd = false;
+ // Skip (zext $x), (trunc $x), or (and $x, 1) node.
+ while (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
+ SetCC.getOpcode() == ISD::TRUNCATE ||
+ SetCC.getOpcode() == ISD::AND) {
+ if (SetCC.getOpcode() == ISD::AND) {
+ int OpIdx = -1;
+ ConstantSDNode *CS;
+ if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(0))) &&
+ CS->getZExtValue() == 1)
+ OpIdx = 1;
+ if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(1))) &&
+ CS->getZExtValue() == 1)
+ OpIdx = 0;
+ if (OpIdx == -1)
+ break;
+ SetCC = SetCC.getOperand(OpIdx);
+ truncatedToBoolWithAnd = true;
+ } else
+ SetCC = SetCC.getOperand(0);
+ }
switch (SetCC.getOpcode()) {
+ case X86ISD::SETCC_CARRY:
+ // Since SETCC_CARRY gives output based on R = CF ? ~0 : 0, it's unsafe to
+ // simplify it if the result of SETCC_CARRY is not canonicalized to 0 or 1,
+ // i.e. it's a comparison against true but the result of SETCC_CARRY is not
+ // truncated to i1 using 'and'.
+ if (checkAgainstTrue && !truncatedToBoolWithAnd)
+ break;
+ assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B &&
+ "Invalid use of SETCC_CARRY!");
+ // FALL THROUGH
case X86ISD::SETCC:
// Set the condition code or opposite one if necessary.
CC = X86::CondCode(SetCC.getConstantOperandVal(0));
; X64: test8:
; X64: addq
-; X64-NEXT: sbbq
-; X64-NEXT: testb
+; X64-NEXT: setb
+; X64: ret
define i32 @test9(i32 %x, i32 %y) nounwind readnone {
%cmp = icmp eq i32 %x, 10
--- /dev/null
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+declare i32 @llvm.x86.avx.ptestz.256(<4 x i64> %p1, <4 x i64> %p2) nounwind
+declare i32 @llvm.x86.avx.ptestc.256(<4 x i64> %p1, <4 x i64> %p2) nounwind
+
+define <4 x float> @test1(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test1:
+; CHECK: vptest
+; CHECK-NEXT: jne
+; CHECK: ret
+
+ %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind
+ %one = icmp ne i32 %res, 0
+ br i1 %one, label %bb1, label %bb2
+
+bb1:
+ %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+bb2:
+ %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+return:
+ %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+ ret <4 x float> %e
+}
+
+define <4 x float> @test3(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test3:
+; CHECK: vptest
+; CHECK-NEXT: jne
+; CHECK: ret
+
+ %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind
+ %one = trunc i32 %res to i1
+ br i1 %one, label %bb1, label %bb2
+
+bb1:
+ %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+bb2:
+ %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+return:
+ %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+ ret <4 x float> %e
+}
+
+define <4 x float> @test4(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test4:
+; CHECK: vptest
+; CHECK-NEXT: jae
+; CHECK: ret
+
+ %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a, <4 x i64> %a) nounwind
+ %one = icmp ne i32 %res, 0
+ br i1 %one, label %bb1, label %bb2
+
+bb1:
+ %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+bb2:
+ %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+return:
+ %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+ ret <4 x float> %e
+}
+
+define <4 x float> @test6(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test6:
+; CHECK: vptest
+; CHECK-NEXT: jae
+; CHECK: ret
+
+ %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a, <4 x i64> %a) nounwind
+ %one = trunc i32 %res to i1
+ br i1 %one, label %bb1, label %bb2
+
+bb1:
+ %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+bb2:
+ %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+return:
+ %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+ ret <4 x float> %e
+}
+
+define <4 x float> @test7(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test7:
+; CHECK: vptest
+; CHECK-NEXT: jne
+; CHECK: ret
+
+ %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind
+ %one = icmp eq i32 %res, 1
+ br i1 %one, label %bb1, label %bb2
+
+bb1:
+ %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+bb2:
+ %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+return:
+ %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+ ret <4 x float> %e
+}
+
+define <4 x float> @test8(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test8:
+; CHECK: vptest
+; CHECK-NEXT: je
+; CHECK: ret
+
+ %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind
+ %one = icmp ne i32 %res, 1
+ br i1 %one, label %bb1, label %bb2
+
+bb1:
+ %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+bb2:
+ %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+return:
+ %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+ ret <4 x float> %e
+}
+
+
ret float %.0
}
+declare i32 @llvm.x86.sse41.ptestz(<4 x float> %p1, <4 x float> %p2) nounwind
+declare i32 @llvm.x86.sse41.ptestc(<4 x float> %p1, <4 x float> %p2) nounwind
+
+define <4 x float> @test5(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test5:
+; CHECK: ptest
+; CHECK-NEXT: jne
+; CHECK: ret
+
+ %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind
+ %one = icmp ne i32 %res, 0
+ br i1 %one, label %bb1, label %bb2
+
+bb1:
+ %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+bb2:
+ %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+return:
+ %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+ ret <4 x float> %e
+}
+
+define <4 x float> @test7(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test7:
+; CHECK: ptest
+; CHECK-NEXT: jne
+; CHECK: ret
+
+ %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind
+ %one = trunc i32 %res to i1
+ br i1 %one, label %bb1, label %bb2
+
+bb1:
+ %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+bb2:
+ %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+return:
+ %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+ ret <4 x float> %e
+}
+
+define <4 x float> @test8(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test8:
+; CHECK: ptest
+; CHECK-NEXT: jae
+; CHECK: ret
+
+ %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind
+ %one = icmp ne i32 %res, 0
+ br i1 %one, label %bb1, label %bb2
+
+bb1:
+ %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+bb2:
+ %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+return:
+ %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+ ret <4 x float> %e
+}
+
+define <4 x float> @test10(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test10:
+; CHECK: ptest
+; CHECK-NEXT: jae
+; CHECK: ret
+
+ %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind
+ %one = trunc i32 %res to i1
+ br i1 %one, label %bb1, label %bb2
+
+bb1:
+ %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+bb2:
+ %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+return:
+ %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+ ret <4 x float> %e
+}
+
+define <4 x float> @test11(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test11:
+; CHECK: ptest
+; CHECK-NEXT: jne
+; CHECK: ret
+
+ %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind
+ %one = icmp eq i32 %res, 1
+ br i1 %one, label %bb1, label %bb2
+
+bb1:
+ %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+bb2:
+ %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+return:
+ %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+ ret <4 x float> %e
+}
+
+define <4 x float> @test12(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test12:
+; CHECK: ptest
+; CHECK-NEXT: je
+; CHECK: ret
+
+ %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind
+ %one = icmp ne i32 %res, 1
+ br i1 %one, label %bb1, label %bb2
+
+bb1:
+ %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+bb2:
+ %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+ br label %return
+
+return:
+ %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+ ret <4 x float> %e
+}
+