DAGCombine: fold (or (and X, M), (and X, N)) -> (and X, (or M, N))

author Tim Northover <tnorthover@apple.com>

Wed, 21 Jan 2015 15:43:28 +0000 (15:43 +0000)

committer Tim Northover <tnorthover@apple.com>

Wed, 21 Jan 2015 15:43:28 +0000 (15:43 +0000)
author Tim Northover <tnorthover@apple.com>
Wed, 21 Jan 2015 15:43:28 +0000 (15:43 +0000)
committer Tim Northover <tnorthover@apple.com>
Wed, 21 Jan 2015 15:43:28 +0000 (15:43 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 3bde991879396a03a32d18934e6f7b6e89c75242..849508891d3846a7f2e8df4eaff37a7b81934a4c 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3527,6 +3527,17 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
      }
    }
  
+  // (or (and X, M), (and X, N)) -> (and X, (or M, N))
+  if (N0.getOpcode() == ISD::AND &&
+      N1.getOpcode() == ISD::AND &&
+      N0.getOperand(0) == N1.getOperand(0) &&
+      // Don't increase # computations.
+      (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+    SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+                            N0.getOperand(1), N1.getOperand(1));
+    return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), X);
+  }
+
    // See if this is some rotate idiom.
    if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
      return SDValue(Rot, 0);
diff --git a/test/CodeGen/AArch64/or-combine.ll b/test/CodeGen/AArch64/or-combine.ll

new file mode 100644 (file)

index 0000000..c6c343a
--- /dev/null
+++ b/test/CodeGen/AArch64/or-combine.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
+
+define i32 @test_consts(i32 %in) {
+; CHECK-LABEL: test_consts:
+; CHECK-NOT: bfxil
+; CHECK-NOT: and
+; CHECK-NOT: orr
+; CHECK: ret
+
+  %lo = and i32 %in, 65535
+  %hi = and i32 %in, -65536
+  %res = or i32 %lo, %hi
+  ret i32 %res
+}
+
+define i32 @test_generic(i32 %in, i32 %mask1, i32 %mask2) {
+; CHECK-LABEL: test_generic:
+; CHECK: orr [[FULL_MASK:w[0-9]+]], w1, w2
+; CHECK: and w0, w0, [[FULL_MASK]]
+
+  %lo = and i32 %in, %mask1
+  %hi = and i32 %in, %mask2
+  %res = or i32 %lo, %hi
+  ret i32 %res
+}
+
+; In this case the transformation isn't profitable, since %lo and %hi
+; are used more than once.
+define [3 x i32] @test_reuse(i32 %in, i32 %mask1, i32 %mask2) {
+; CHECK-LABEL: test_reuse:
+; CHECK-DAG: and w1, w0, w1
+; CHECK-DAG: and w2, w0, w2
+; CHECK-DAG: orr w0, w1, w2
+
+  %lo = and i32 %in, %mask1
+  %hi = and i32 %in, %mask2
+  %recombine = or i32 %lo, %hi
+
+  %res.tmp0 = insertvalue [3 x i32] undef, i32 %recombine, 0
+  %res.tmp1 = insertvalue [3 x i32] %res.tmp0, i32 %lo, 1
+  %res = insertvalue [3 x i32] %res.tmp1, i32 %hi, 2
+
+  ret [3 x i32] %res
+}
diff --git a/test/CodeGen/R600/extload.ll b/test/CodeGen/R600/extload.ll

index 4a94acaba0b541c914cafaad2cd0b442276969f7..45c5d44992a55e4491c2be6fc2b23254a7bae323 100644 (file)
--- a/test/CodeGen/R600/extload.ll
+++ b/test/CodeGen/R600/extload.ll
@@ -2,8 +2,9 @@
  ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
  
  ; FUNC-LABEL: {{^}}anyext_load_i8:
-; EG: AND_INT
-; EG: 255
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+.[XYZW]]],
+; EG: VTX_READ_32 [[VAL]]
+
  define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
    %cast = bitcast i8 addrspace(1)* %src to i32 addrspace(1)*
    %load = load i32 addrspace(1)* %cast, align 1
@@ -14,10 +15,9 @@ define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspac
  }
  
  ; FUNC-LABEL: {{^}}anyext_load_i16:
-; EG: AND_INT
-; EG: AND_INT
-; EG-DAG: 65535
-; EG-DAG: -65536
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+.[XYZW]]],
+; EG: VTX_READ_32 [[VAL]]
+
  define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind {
    %cast = bitcast i16 addrspace(1)* %src to i32 addrspace(1)*
    %load = load i32 addrspace(1)* %cast, align 1
@@ -28,8 +28,8 @@ define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrs
  }
  
  ; FUNC-LABEL: {{^}}anyext_load_lds_i8:
-; EG: AND_INT
-; EG: 255
+; EG: LDS_READ_RET {{.*}}, [[VAL:T[0-9]+.[XYZW]]]
+; EG: LDS_WRITE * [[VAL]]
  define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
    %cast = bitcast i8 addrspace(3)* %src to i32 addrspace(3)*
    %load = load i32 addrspace(3)* %cast, align 1
@@ -40,10 +40,8 @@ define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addr
  }
  
  ; FUNC-LABEL: {{^}}anyext_load_lds_i16:
-; EG: AND_INT
-; EG: AND_INT
-; EG-DAG: 65535
-; EG-DAG: -65536
+; EG: LDS_READ_RET {{.*}}, [[VAL:T[0-9]+.[XYZW]]]
+; EG: LDS_WRITE * [[VAL]]
  define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind {
    %cast = bitcast i16 addrspace(3)* %src to i32 addrspace(3)*
    %load = load i32 addrspace(3)* %cast, align 1
diff --git a/test/CodeGen/X86/vselect.ll b/test/CodeGen/X86/vselect.ll

index 3bd1dc4cb97239533db989e3fe3bcd4298ae1e8c..ad734100f7ba0e0a86d615fcf162bf3ab6ffe355 100644 (file)
--- a/test/CodeGen/X86/vselect.ll
+++ b/test/CodeGen/X86/vselect.ll
@@ -53,10 +53,9 @@ define <4 x float> @test5(<4 x float> %a, <4 x float> %b) {
  define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) {
  ; CHECK-LABEL: test6:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    movaps {{.*#+}} xmm1 = [0,65535,0,65535,0,65535,0,65535]
-; CHECK-NEXT:    andps %xmm0, %xmm1
-; CHECK-NEXT:    andps {{.*}}(%rip), %xmm0
-; CHECK-NEXT:    orps %xmm1, %xmm0
+; CHECK-NEXT:    movaps {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0]
+; CHECK-NEXT:    orps {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    andps %xmm1, %xmm0
  ; CHECK-NEXT:    retq
    %1 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i16> %a, <8 x i16> %a
    ret <8 x i16> %1
author	Tim Northover <tnorthover@apple.com>
	Wed, 21 Jan 2015 15:43:28 +0000 (15:43 +0000)
committer	Tim Northover <tnorthover@apple.com>
	Wed, 21 Jan 2015 15:43:28 +0000 (15:43 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/AArch64/or-combine.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/R600/extload.ll		patch \| blob \| history
test/CodeGen/X86/vselect.ll		patch \| blob \| history