Teach DAG combine to handle vector logical operations with vectors of all 1s or all...

author Craig Topper <craig.topper@gmail.com>

Sat, 8 Dec 2012 22:49:19 +0000 (22:49 +0000)

committer Craig Topper <craig.topper@gmail.com>

Sat, 8 Dec 2012 22:49:19 +0000 (22:49 +0000)
author Craig Topper <craig.topper@gmail.com>
Sat, 8 Dec 2012 22:49:19 +0000 (22:49 +0000)
committer Craig Topper <craig.topper@gmail.com>
Sat, 8 Dec 2012 22:49:19 +0000 (22:49 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 6cd07c2c729b06686a4e7cea70b75769c9d9d4d1..4a17dc0d2c3253671ae4ce2a219e4100b673d9d0 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2427,6 +2427,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
    if (VT.isVector()) {
      SDValue FoldedVOp = SimplifyVBinOp(N);
      if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (and x, 0) -> 0, vector edition
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N0;
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N1;
+
+    // fold (and x, -1) -> x, vector edition
+    if (ISD::isBuildVectorAllOnes(N0.getNode()))
+      return N1;
+    if (ISD::isBuildVectorAllOnes(N1.getNode()))
+      return N0;
    }
  
    // fold (and x, undef) -> 0
@@ -3025,6 +3037,18 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
    if (VT.isVector()) {
      SDValue FoldedVOp = SimplifyVBinOp(N);
      if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (or x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N1;
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
+
+    // fold (or x, -1) -> -1, vector edition
+    if (ISD::isBuildVectorAllOnes(N0.getNode()))
+      return N0;
+    if (ISD::isBuildVectorAllOnes(N1.getNode()))
+      return N1;
    }
  
    // fold (or x, undef) -> -1
@@ -3334,6 +3358,12 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
    if (VT.isVector()) {
      SDValue FoldedVOp = SimplifyVBinOp(N);
      if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (xor x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N1;
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
    }
  
    // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
diff --git a/test/CodeGen/X86/2011-11-30-or.ll b/test/CodeGen/X86/2011-11-30-or.ll

index 0a949eb29b898fa2f9d08d3123c527b5880f93c0..f66248bc5a7571b75379e9ce3d4d27dbd8651250 100644 (file)
--- a/test/CodeGen/X86/2011-11-30-or.ll
+++ b/test/CodeGen/X86/2011-11-30-or.ll
@@ -11,12 +11,12 @@ target triple = "x86_64-apple-macosx10.6.6"
  define void @select_func() {
  entry:
    %c.lobit.i.i.i = ashr <8 x i16> <i16 17, i16 5, i16 1, i16 15, i16 19, i16 15, i16 4, i16 1> , <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-  %a35 = bitcast <8 x i16> %c.lobit.i.i.i to <2 x i64>
    %and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, <i16 25, i16 8, i16 65, i16 25, i16 8, i16 95, i16 15, i16 45>
    %and.i5.i.i.i = bitcast <8 x i16> %and.i56.i.i.i to <2 x i64>
-  %neg.i.i.i.i = xor <2 x i64> %a35, <i64 -1, i64 -1>
-  %and.i.i.i.i = and <2 x i64> zeroinitializer, %neg.i.i.i.i
-  %or.i.i.i.i = or <2 x i64> %and.i.i.i.i, %and.i5.i.i.i
+  %neg.i.i.i.i = xor <8 x i16> %c.lobit.i.i.i, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  %and.i.i.i = and <8 x i16> %neg.i.i.i.i, <i16 45, i16 15, i16 95, i16 8, i16 25, i16 65, i16 8, i16 25>
+  %and.i2.i.i.i = bitcast <8 x i16> %and.i.i.i to <2 x i64>
+  %or.i.i.i.i = or <2 x i64> %and.i2.i.i.i, %and.i5.i.i.i
    %a37 = bitcast <2 x i64> %or.i.i.i.i to <8 x i16>
    store <8 x i16> %a37, <8 x i16> addrspace(1)* undef, align 4
    ret void
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll

index 9cf4607cf5b2393e2b1f03b9e51267121b6c60a7..6cd27618de1df0e73befffb619cf822544a870ff 100644 (file)
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -43,21 +43,21 @@ forbody:            ; preds = %forcond
         %mul171.i = fmul <4 x float> %add167.i, %sub140.i               ; <<4 x float>> [#uses=1]
         %add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >              ; <<4 x float>> [#uses=1]
         %bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32>              ; <<4 x i32>> [#uses=1]
-       %andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer             ; <<4 x i32>> [#uses=1]
+       %andnps178.i = add <4 x i32> %bitcast176.i, zeroinitializer             ; <<4 x i32>> [#uses=1]
         %bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float>           ; <<4 x float>> [#uses=1]
         %mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer             ; <<4 x float>> [#uses=1]
         %bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32>              ; <<4 x i32>> [#uses=1]
-       %andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer             ; <<4 x i32>> [#uses=1]
+       %andnps192.i = add <4 x i32> %bitcast190.i, zeroinitializer             ; <<4 x i32>> [#uses=1]
         %xorps.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >            ; <<4 x i32>> [#uses=1]
-       %orps203.i = or <4 x i32> %andnps192.i, %xorps.i                ; <<4 x i32>> [#uses=1]
+       %orps203.i = add <4 x i32> %andnps192.i, %xorps.i               ; <<4 x i32>> [#uses=1]
         %bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float>             ; <<4 x float>> [#uses=1]
         %mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer            ; <<4 x float>> [#uses=2]
         %mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer               ; <<4 x float>> [#uses=1]
         %cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind          ; <<4 x float>> [#uses=1]
         %bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32>          ; <<4 x i32>> [#uses=2]
-       %andps.i14 = and <4 x i32> zeroinitializer, %bitcast6.i13               ; <<4 x i32>> [#uses=1]
+       %andps.i14 = add <4 x i32> zeroinitializer, %bitcast6.i13               ; <<4 x i32>> [#uses=1]
         %not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 >              ; <<4 x i32>> [#uses=1]
-       %andnps.i17 = and <4 x i32> zeroinitializer, %not.i16           ; <<4 x i32>> [#uses=1]
+       %andnps.i17 = add <4 x i32> zeroinitializer, %not.i16           ; <<4 x i32>> [#uses=1]
         %orps.i18 = or <4 x i32> %andnps.i17, %andps.i14                ; <<4 x i32>> [#uses=1]
         %bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float>             ; <<4 x float>> [#uses=1]
         %tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind               ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll

index 2f4317bf294c252f278745026bc209e5c74682b0..67ce1be135254ff1d93c56e9e94147eda6800707 100644 (file)
--- a/test/CodeGen/X86/sse2-blend.ll
+++ b/test/CodeGen/X86/sse2-blend.ll
@@ -28,33 +28,31 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
  
  ; Without forcing instructions, fall back to the preferred PS domain.
  ; CHECK: vsel_i64
-; CHECK: xorps
-; CHECK: andps
  ; CHECK: andnps
+; CHECK: andps
  ; CHECK: orps
  ; CHECK: ret
  
-define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) {
-  %A = load <4 x i64>* %v1
-  %B = load <4 x i64>* %v2
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %A, <4 x i64> %B
-  store <4 x i64 > %vsel, <4 x i64>* %v1
+define void@vsel_i64(<2 x i64>* %v1, <2 x i64>* %v2) {
+  %A = load <2 x i64>* %v1
+  %B = load <2 x i64>* %v2
+  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %A, <2 x i64> %B
+  store <2 x i64 > %vsel, <2 x i64>* %v1
    ret void
  }
  
  ; Without forcing instructions, fall back to the preferred PS domain.
  ; CHECK: vsel_double
-; CHECK: xorps
-; CHECK: andps
  ; CHECK: andnps
+; CHECK: andps
  ; CHECK: orps
  ; CHECK: ret
  
-define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) {
-  %A = load <4 x double>* %v1
-  %B = load <4 x double>* %v2
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> %A, <4 x double> %B
-  store <4 x double > %vsel, <4 x double>* %v1
+define void@vsel_double(<2 x double>* %v1, <2 x double>* %v2) {
+  %A = load <2 x double>* %v1
+  %B = load <2 x double>* %v2
+  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %A, <2 x double> %B
+  store <2 x double > %vsel, <2 x double>* %v1
    ret void
  }
author	Craig Topper <craig.topper@gmail.com>
	Sat, 8 Dec 2012 22:49:19 +0000 (22:49 +0000)
committer	Craig Topper <craig.topper@gmail.com>
	Sat, 8 Dec 2012 22:49:19 +0000 (22:49 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/X86/2011-11-30-or.ll		patch \| blob \| history
test/CodeGen/X86/fold-pcmpeqd-2.ll		patch \| blob \| history
test/CodeGen/X86/sse2-blend.ll		patch \| blob \| history