From: Richard Sandiford Date: Fri, 12 Jul 2013 09:17:10 +0000 (+0000) Subject: [SystemZ] Optimize sign-extends of vector setccs X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=9bcad42c3aadab118b6ed5f30f2ea0d87228fd3f;p=oota-llvm.git [SystemZ] Optimize sign-extends of vector setccs Normal (sext (setcc ...)) sequences are optimised into (select_cc ..., -1, 0) by DAGCombiner::visitSIGN_EXTEND. However, this is deliberately not done for vectors, and after vector type legalization we have (sext_inreg (setcc ...)) instead. I wondered about trying to extend DAGCombiner to handle this case too, but it seemed to be a loss on some other targets I tried, even those for which SETCC isn't "legal" and SELECT_CC is. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186149 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 5e13c7f3461..c47e04b3cfb 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1073,3 +1073,12 @@ def : Pat<(add GR64:$src1, imm64zx32n:$src2), (SLGFI GR64:$src1, imm64zx32n:$src2)>; def : Pat<(sub GR64:$src1, (zextloadi32 bdxaddr20only:$addr)), (SLGF GR64:$src1, bdxaddr20only:$addr)>; + +// Optimize sign-extended 1/0 selects to -1/0 selects. This is important +// for vector legalization. +def : Pat<(sra (shl (i32 (z_select_ccmask 1, 0, imm:$cc)), (i32 31)), (i32 31)), + (Select32 (LHI -1), (LHI 0), imm:$cc)>; +def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, imm:$cc)))), + (i32 63)), + (i32 63)), + (Select64 (LGHI -1), (LGHI 0), imm:$cc)>; diff --git a/test/CodeGen/SystemZ/branch-07.ll b/test/CodeGen/SystemZ/branch-07.ll index 00e8b8a95f7..d009af2f1f0 100644 --- a/test/CodeGen/SystemZ/branch-07.ll +++ b/test/CodeGen/SystemZ/branch-07.ll @@ -4,6 +4,7 @@ declare i64 @foo() +; Test EQ. define void @f1(i64 %target) { ; CHECK: f1: ; CHECK: .cfi_def_cfa_offset @@ -18,6 +19,7 @@ exit: ret void } +; Test NE. define void @f2(i64 %target) { ; CHECK: f2: ; CHECK: .cfi_def_cfa_offset @@ -32,6 +34,7 @@ exit: ret void } +; Test SLE. define void @f3(i64 %target) { ; CHECK: f3: ; CHECK: .cfi_def_cfa_offset @@ -46,6 +49,7 @@ exit: ret void } +; Test SLT. define void @f4(i64 %target) { ; CHECK: f4: ; CHECK: .cfi_def_cfa_offset @@ -60,6 +64,7 @@ exit: ret void } +; Test SGT. define void @f5(i64 %target) { ; CHECK: f5: ; CHECK: .cfi_def_cfa_offset @@ -74,6 +79,7 @@ exit: ret void } +; Test SGE. define void @f6(i64 %target) { ; CHECK: f6: ; CHECK: .cfi_def_cfa_offset @@ -87,3 +93,67 @@ loop: exit: ret void } + +; Test a vector of 0/-1 results for i32 EQ. +define i64 @f7(i64 %a, i64 %b) { +; CHECK: f7: +; CHECK: lhi [[REG:%r[0-5]]], -1 +; CHECK: crje {{%r[0-5]}} +; CHECK: lhi [[REG]], 0 +; CHECK-NOT: sra +; CHECK: br %r14 + %avec = bitcast i64 %a to <2 x i32> + %bvec = bitcast i64 %b to <2 x i32> + %cmp = icmp eq <2 x i32> %avec, %bvec + %ext = sext <2 x i1> %cmp to <2 x i32> + %ret = bitcast <2 x i32> %ext to i64 + ret i64 %ret +} + +; Test a vector of 0/-1 results for i32 NE. +define i64 @f8(i64 %a, i64 %b) { +; CHECK: f8: +; CHECK: lhi [[REG:%r[0-5]]], -1 +; CHECK: crjlh {{%r[0-5]}} +; CHECK: lhi [[REG]], 0 +; CHECK-NOT: sra +; CHECK: br %r14 + %avec = bitcast i64 %a to <2 x i32> + %bvec = bitcast i64 %b to <2 x i32> + %cmp = icmp ne <2 x i32> %avec, %bvec + %ext = sext <2 x i1> %cmp to <2 x i32> + %ret = bitcast <2 x i32> %ext to i64 + ret i64 %ret +} + +; Test a vector of 0/-1 results for i64 EQ. +define void @f9(i64 %a, i64 %b, <2 x i64> *%dest) { +; CHECK: f9: +; CHECK: lghi [[REG:%r[0-5]]], -1 +; CHECK: crje {{%r[0-5]}} +; CHECK: lghi [[REG]], 0 +; CHECK-NOT: sra +; CHECK: br %r14 + %avec = bitcast i64 %a to <2 x i32> + %bvec = bitcast i64 %b to <2 x i32> + %cmp = icmp eq <2 x i32> %avec, %bvec + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, <2 x i64> *%dest + ret void +} + +; Test a vector of 0/-1 results for i64 NE. +define void @f10(i64 %a, i64 %b, <2 x i64> *%dest) { +; CHECK: f10: +; CHECK: lghi [[REG:%r[0-5]]], -1 +; CHECK: crjlh {{%r[0-5]}} +; CHECK: lghi [[REG]], 0 +; CHECK-NOT: sra +; CHECK: br %r14 + %avec = bitcast i64 %a to <2 x i32> + %bvec = bitcast i64 %b to <2 x i32> + %cmp = icmp ne <2 x i32> %avec, %bvec + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, <2 x i64> *%dest + ret void +}