DAGCombiner: make sure or/shl/srl really has zero high bits before forming bswap

author Tim Northover <tnorthover@apple.com>

Tue, 27 Aug 2013 13:46:45 +0000 (13:46 +0000)

committer Tim Northover <tnorthover@apple.com>

Tue, 27 Aug 2013 13:46:45 +0000 (13:46 +0000)
author Tim Northover <tnorthover@apple.com>
Tue, 27 Aug 2013 13:46:45 +0000 (13:46 +0000)
committer Tim Northover <tnorthover@apple.com>
Tue, 27 Aug 2013 13:46:45 +0000 (13:46 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index bd127b5a64691ada56ccfa7450f65f7eb152fa2c..47c332a5be49df5149185a71069a627cc6081acc 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2861,6 +2861,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
      }
    }
  
+  // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
+  if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
+    SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+                                       N0.getOperand(1), false);
+    if (BSwap.getNode())
+      return BSwap;
+  }
+
    return SDValue();
  }
  
@@ -2945,13 +2953,23 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
    if (N00 != N10)
      return SDValue();
  
-  // Make sure everything beyond the low halfword is zero since the SRL 16
-  // will clear the top bits.
+  // Make sure everything beyond the low halfword gets set to zero since the SRL
+  // 16 will clear the top bits.
    unsigned OpSizeInBits = VT.getSizeInBits();
-  if (DemandHighBits && OpSizeInBits > 16 &&
-      (!LookPassAnd0 || !LookPassAnd1) &&
-      !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16)))
-    return SDValue();
+  if (DemandHighBits && OpSizeInBits > 16) {
+    // If the left-shift isn't masked out then the only way this is a bswap is
+    // if all bits beyond the low 8 are 0. In that case the entire pattern
+    // reduces to a left shift anyway: leave it for other parts of the combiner.
+    if (!LookPassAnd0)
+      return SDValue();
+
+    // However, if the right shift isn't masked out then it might be because
+    // it's not needed. See if we can spot that too.
+    if (!LookPassAnd1 &&
+        !DAG.MaskedValueIsZero(
+            N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
+      return SDValue();
+  }
  
    SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
    if (OpSizeInBits > 16)
diff --git a/test/CodeGen/X86/bswap.ll b/test/CodeGen/X86/bswap.ll

index 9e46592a4ac289758e68a90aef7800f7a3fc2c56..e6a456c39ddd0efa63d4bd47fd3a7b458d7a8df6 100644 (file)
--- a/test/CodeGen/X86/bswap.ll
+++ b/test/CodeGen/X86/bswap.ll
@@ -1,6 +1,7 @@
  ; bswap should be constant folded when it is passed a constant argument
  
  ; RUN: llc < %s -march=x86 -mcpu=i686 | FileCheck %s
+; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK64
  
  declare i16 @llvm.bswap.i16(i16)
  
@@ -11,6 +12,9 @@ declare i64 @llvm.bswap.i64(i64)
  define i16 @W(i16 %A) {
  ; CHECK-LABEL: W:
  ; CHECK: rolw $8, %ax
+
+; CHECK64-LABEL: W:
+; CHECK64: rolw $8, %
          %Z = call i16 @llvm.bswap.i16( i16 %A )         ; <i16> [#uses=1]
          ret i16 %Z
  }
@@ -18,6 +22,9 @@ define i16 @W(i16 %A) {
  define i32 @X(i32 %A) {
  ; CHECK-LABEL: X:
  ; CHECK: bswapl %eax
+
+; CHECK64-LABEL: X:
+; CHECK64: bswapl %
          %Z = call i32 @llvm.bswap.i32( i32 %A )         ; <i32> [#uses=1]
          ret i32 %Z
  }
@@ -26,6 +33,9 @@ define i64 @Y(i64 %A) {
  ; CHECK-LABEL: Y:
  ; CHECK: bswapl %eax
  ; CHECK: bswapl %edx
+
+; CHECK64-LABEL: Y:
+; CHECK64: bswapq %
          %Z = call i64 @llvm.bswap.i64( i64 %A )         ; <i64> [#uses=1]
          ret i64 %Z
  }
@@ -33,9 +43,13 @@ define i64 @Y(i64 %A) {
  ; rdar://9164521
  define i32 @test1(i32 %a) nounwind readnone {
  entry:
-; CHECK: test1
-; CHECK: bswapl %eax
-; CHECK: shrl $16, %eax
+; CHECK-LABEL: test1:
+; CHECK: bswapl [[REG:%.*]]
+; CHECK: shrl $16, [[REG]]
+
+; CHECK64-LABEL: test1:
+; CHECK64: bswapl [[REG:%.*]]
+; CHECK64: shrl $16, [[REG]]
    %and = lshr i32 %a, 8
    %shr3 = and i32 %and, 255
    %and2 = shl i32 %a, 8
@@ -46,9 +60,13 @@ entry:
  
  define i32 @test2(i32 %a) nounwind readnone {
  entry:
-; CHECK: test2
-; CHECK: bswapl %eax
-; CHECK: sarl $16, %eax
+; CHECK-LABEL: test2:
+; CHECK: bswapl [[REG:%.*]]
+; CHECK: sarl $16, [[REG]]
+
+; CHECK64-LABEL: test2:
+; CHECK64: bswapl [[REG:%.*]]
+; CHECK64: sarl $16, [[REG]]
    %and = lshr i32 %a, 8
    %shr4 = and i32 %and, 255
    %and2 = shl i32 %a, 8
@@ -57,3 +75,80 @@ entry:
    %conv3 = ashr exact i32 %sext, 16
    ret i32 %conv3
  }
+
+@var8 = global i8 0
+@var16 = global i16 0
+
+; The "shl" below can move bits into the high parts of the value, so the
+; operation is not a "bswap, shr" pair.
+
+; rdar://problem/14814049
+define i64 @not_bswap() {
+; CHECK-LABEL: not_bswap:
+; CHECK-NOT: bswapl
+; CHECK: ret
+
+; CHECK64-LABEL: not_bswap:
+; CHECK64-NOT: bswapq
+; CHECK64: ret
+  %init = load i16* @var16
+  %big = zext i16 %init to i64
+
+  %hishifted = lshr i64 %big, 8
+  %loshifted = shl i64 %big, 8
+
+  %notswapped = or i64 %hishifted, %loshifted
+
+  ret i64 %notswapped
+}
+
+; This time, the lshr (and subsequent or) is completely useless. While it's
+; technically correct to convert this into a "bswap, shr", it's suboptimal. A
+; simple shl works better.
+
+define i64 @not_useful_bswap() {
+; CHECK-LABEL: not_useful_bswap:
+; CHECK-NOT: bswapl
+; CHECK: ret
+
+; CHECK64-LABEL: not_useful_bswap:
+; CHECK64-NOT: bswapq
+; CHECK64: ret
+
+  %init = load i8* @var8
+  %big = zext i8 %init to i64
+
+  %hishifted = lshr i64 %big, 8
+  %loshifted = shl i64 %big, 8
+
+  %notswapped = or i64 %hishifted, %loshifted
+
+  ret i64 %notswapped
+}
+
+; Finally, it *is* OK to just mask off the shl if we know that the value is zero
+; beyond 16 bits anyway. This is a legitimate bswap.
+
+define i64 @finally_useful_bswap() {
+; CHECK-LABEL: finally_useful_bswap:
+; CHECK: bswapl [[REG:%.*]]
+; CHECK: shrl $16, [[REG]]
+; CHECK: ret
+
+; CHECK64-LABEL: finally_useful_bswap:
+; CHECK64: bswapq [[REG:%.*]]
+; CHECK64: shrq $48, [[REG]]
+; CHECK64: ret
+
+  %init = load i16* @var16
+  %big = zext i16 %init to i64
+
+  %hishifted = lshr i64 %big, 8
+  %lomasked = and i64 %big, 255
+  %loshifted = shl i64 %lomasked, 8
+
+  %swapped = or i64 %hishifted, %loshifted
+
+  ret i64 %swapped
+}
+
author	Tim Northover <tnorthover@apple.com>
	Tue, 27 Aug 2013 13:46:45 +0000 (13:46 +0000)
committer	Tim Northover <tnorthover@apple.com>
	Tue, 27 Aug 2013 13:46:45 +0000 (13:46 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/X86/bswap.ll		patch \| blob \| history