[SKX] Enable lowering of integer CMP operations.

author Robert Khasanov <rob.khasanov@gmail.com>

Fri, 29 Aug 2014 08:46:04 +0000 (08:46 +0000)

committer Robert Khasanov <rob.khasanov@gmail.com>

Fri, 29 Aug 2014 08:46:04 +0000 (08:46 +0000)
author Robert Khasanov <rob.khasanov@gmail.com>
Fri, 29 Aug 2014 08:46:04 +0000 (08:46 +0000)
committer Robert Khasanov <rob.khasanov@gmail.com>
Fri, 29 Aug 2014 08:46:04 +0000 (08:46 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index b3a02726ebfd8d4f1a8ae6763eb07e71561a09e4..7c0bfeef947fca3dfce0a99202bccd3be29e5b3c 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1526,8 +1526,39 @@ void X86TargetLowering::resetOperationActions() {
    }// has  AVX-512
  
    if (!TM.Options.UseSoftFloat && Subtarget->hasBWI()) {
+    addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
+    addRegisterClass(MVT::v64i8,  &X86::VR512RegClass);
+
      addRegisterClass(MVT::v32i1,  &X86::VK32RegClass);
      addRegisterClass(MVT::v64i1,  &X86::VK64RegClass);
+
+    setOperationAction(ISD::LOAD,               MVT::v32i16, Legal);
+    setOperationAction(ISD::LOAD,               MVT::v64i8, Legal);
+    setOperationAction(ISD::SETCC,              MVT::v32i1, Custom);
+    setOperationAction(ISD::SETCC,              MVT::v64i1, Custom);
+
+    for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
+      const MVT VT = (MVT::SimpleValueType)i;
+
+      const unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+
+      // Do not attempt to promote non-256-bit vectors
+      if (!VT.is512BitVector())
+        continue;
+
+      if ( EltSize < 32) {
+        setOperationAction(ISD::BUILD_VECTOR,        VT, Custom);
+        setOperationAction(ISD::VSELECT,             VT, Legal);
+      }
+    }
+  }
+
+  if (!TM.Options.UseSoftFloat && Subtarget->hasVLX()) {
+    addRegisterClass(MVT::v4i1,   &X86::VK4RegClass);
+    addRegisterClass(MVT::v2i1,   &X86::VK2RegClass);
+
+    setOperationAction(ISD::SETCC,              MVT::v4i1, Custom);
+    setOperationAction(ISD::SETCC,              MVT::v2i1, Custom);
    }
  
    // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
@@ -1665,10 +1696,40 @@ EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
    if (!VT.isVector())
      return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
  
-  if (Subtarget->hasAVX512())
-    switch(VT.getVectorNumElements()) {
-    case  8: return MVT::v8i1;
-    case 16: return MVT::v16i1;
+  const unsigned NumElts = VT.getVectorNumElements();
+  const EVT EltVT = VT.getVectorElementType();
+  if (VT.is512BitVector()) {
+    if (Subtarget->hasAVX512())
+      if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
+          EltVT == MVT::f32 || EltVT == MVT::f64)
+        switch(NumElts) {
+        case  8: return MVT::v8i1;
+        case 16: return MVT::v16i1;
+      }
+    if (Subtarget->hasBWI())
+      if (EltVT == MVT::i8 || EltVT == MVT::i16)
+        switch(NumElts) {
+        case 32: return MVT::v32i1;
+        case 64: return MVT::v64i1;
+      }
+  }
+
+  if (VT.is256BitVector() || VT.is128BitVector()) {
+    if (Subtarget->hasVLX())
+      if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
+          EltVT == MVT::f32 || EltVT == MVT::f64)
+        switch(NumElts) {
+        case 2: return MVT::v2i1;
+        case 4: return MVT::v4i1;
+        case 8: return MVT::v8i1;
+      }
+    if (Subtarget->hasBWI() && Subtarget->hasVLX())
+      if (EltVT == MVT::i8 || EltVT == MVT::i16)
+        switch(NumElts) {
+        case  8: return MVT::v8i1;
+        case 16: return MVT::v16i1;
+        case 32: return MVT::v32i1;
+      }
    }
  
    return VT.changeVectorElementTypeToInteger();
@@ -10435,6 +10496,8 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
      break;
    case MVT::v8i16:
    case MVT::v16i16:
+    if (Subtarget->hasBWI() && Subtarget->hasVLX())
+      break;
      return SDValue();
    }
  
@@ -12829,7 +12892,7 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
    MVT VT = Op.getSimpleValueType();
    SDLoc dl(Op);
  
-  assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 32 &&
+  assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 8 &&
           Op.getValueType().getScalarType() == MVT::i1 &&
           "Cannot set masked compare for this operation");
  
@@ -12943,11 +13006,12 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
    EVT OpVT = Op1.getValueType();
    if (Subtarget->hasAVX512()) {
      if (Op1.getValueType().is512BitVector() ||
+        (Subtarget->hasBWI() && Subtarget->hasVLX()) ||
          (MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
        return LowerIntVSETCC_AVX512(Op, DAG, Subtarget);
  
      // In AVX-512 architecture setcc returns mask with i1 elements,
-    // But there is no compare instruction for i8 and i16 elements.
+    // But there is no compare instruction for i8 and i16 elements in KNL.
      // We are not talking about 512-bit operands in this case, these
      // types are illegal.
      if (MaskResult &&
@@ -20218,13 +20282,15 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
    if (Subtarget->hasAVX512() && VT.isVector() && CondVT.isVector() &&
        CondVT.getVectorElementType() == MVT::i1) {
      // v16i8 (select v16i1, v16i8, v16i8) does not have a proper
-    // lowering on AVX-512. In this case we convert it to
+    // lowering on KNL. In this case we convert it to
      // v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
-    // The same situation for all 128 and 256-bit vectors of i8 and i16
+    // The same situation for all 128 and 256-bit vectors of i8 and i16.
+    // Since SKX these selects have a proper lowering.
      EVT OpVT = LHS.getValueType();
      if ((OpVT.is128BitVector() || OpVT.is256BitVector()) &&
          (OpVT.getVectorElementType() == MVT::i8 ||
-         OpVT.getVectorElementType() == MVT::i16)) {
+         OpVT.getVectorElementType() == MVT::i16) &&
+        !(Subtarget->hasBWI() && Subtarget->hasVLX())) {
        Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond);
        DCI.AddToWorklist(Cond.getNode());
        return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS);
diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll

index 950e43fea676630c776defad53e83f1bb38fb188..d9acc1d325f546d59f42ac93607f881bf00316ee 100644 (file)
--- a/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -162,3 +162,151 @@ define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
    ret <8 x i64>%res
  }
  
+; CHECK-LABEL: @test16
+; CHECK: vpcmpled
+; CHECK: vmovdqa32
+; CHECK: ret
+define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y) nounwind {
+  %mask = icmp sge <16 x i32> %x, %y
+  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
+  ret <16 x i32> %max
+}
+
+; CHECK-LABEL: @test17
+; CHECK: vpcmpgtd (%rdi)
+; CHECK: vmovdqa32
+; CHECK: ret
+define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
+  %y = load <16 x i32>* %y.ptr, align 4
+  %mask = icmp sgt <16 x i32> %x, %y
+  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
+  ret <16 x i32> %max
+}
+
+; CHECK-LABEL: @test18
+; CHECK: vpcmpled (%rdi)
+; CHECK: vmovdqa32
+; CHECK: ret
+define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
+  %y = load <16 x i32>* %y.ptr, align 4
+  %mask = icmp sle <16 x i32> %x, %y
+  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
+  ret <16 x i32> %max
+}
+
+; CHECK-LABEL: @test19
+; CHECK: vpcmpleud (%rdi)
+; CHECK: vmovdqa32
+; CHECK: ret
+define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
+  %y = load <16 x i32>* %y.ptr, align 4
+  %mask = icmp ule <16 x i32> %x, %y
+  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
+  ret <16 x i32> %max
+}
+
+; CHECK-LABEL: @test20
+; CHECK: vpcmpeqd %zmm{{.*{%k[1-7]}}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
+  %mask1 = icmp eq <16 x i32> %x1, %y1
+  %mask0 = icmp eq <16 x i32> %x, %y
+  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
+  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
+  ret <16 x i32> %max
+}
+
+; CHECK-LABEL: @test21
+; CHECK: vpcmpleq %zmm{{.*{%k[1-7]}}}
+; CHECK: vmovdqa64
+; CHECK: ret
+define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
+  %mask1 = icmp sge <8 x i64> %x1, %y1
+  %mask0 = icmp sle <8 x i64> %x, %y
+  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
+  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
+  ret <8 x i64> %max
+}
+
+; CHECK-LABEL: @test22
+; CHECK: vpcmpgtq (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqa64
+; CHECK: ret
+define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
+  %mask1 = icmp sgt <8 x i64> %x1, %y1
+  %y = load <8 x i64>* %y.ptr, align 4
+  %mask0 = icmp sgt <8 x i64> %x, %y
+  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
+  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
+  ret <8 x i64> %max
+}
+
+; CHECK-LABEL: @test23
+; CHECK: vpcmpleud (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
+  %mask1 = icmp sge <16 x i32> %x1, %y1
+  %y = load <16 x i32>* %y.ptr, align 4
+  %mask0 = icmp ule <16 x i32> %x, %y
+  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
+  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
+  ret <16 x i32> %max
+}
+
+; CHECK-LABEL: test24
+; CHECK: vpcmpeqq (%rdi){1to8}
+; CHECK: vmovdqa64
+; CHECK: ret
+define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
+  %yb = load i64* %yb.ptr, align 4
+  %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
+  %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
+  %mask = icmp eq <8 x i64> %x, %y
+  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
+  ret <8 x i64> %max
+}
+
+; CHECK-LABEL: test25
+; CHECK: vpcmpled (%rdi){1to16}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
+  %yb = load i32* %yb.ptr, align 4
+  %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
+  %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
+  %mask = icmp sle <16 x i32> %x, %y
+  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
+  ret <16 x i32> %max
+}
+
+; CHECK-LABEL: test26
+; CHECK: vpcmpgtd (%rdi){1to16}{{.*{%k[1-7]}}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
+  %mask1 = icmp sge <16 x i32> %x1, %y1
+  %yb = load i32* %yb.ptr, align 4
+  %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
+  %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
+  %mask0 = icmp sgt <16 x i32> %x, %y
+  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
+  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
+  ret <16 x i32> %max
+}
+
+; CHECK-LABEL: test27
+; CHECK: vpcmpleq (%rdi){1to8}{{.*{%k[1-7]}}}
+; CHECK: vmovdqa64
+; CHECK: ret
+define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
+  %mask1 = icmp sge <8 x i64> %x1, %y1
+  %yb = load i64* %yb.ptr, align 4
+  %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
+  %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
+  %mask0 = icmp sle <8 x i64> %x, %y
+  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
+  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
+  ret <8 x i64> %max
+}
diff --git a/test/CodeGen/X86/avx512bw-vec-cmp.ll b/test/CodeGen/X86/avx512bw-vec-cmp.ll

new file mode 100644 (file)

index 0000000..d2b1724
--- /dev/null
+++ b/test/CodeGen/X86/avx512bw-vec-cmp.ll
@@ -0,0 +1,135 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
+
+; CHECK-LABEL: test1
+; CHECK: vpcmpeqb {{.*%k[0-7]}}
+; CHECK: vmovdqu8 {{.*}}%k1
+; CHECK: ret
+define <64 x i8> @test1(<64 x i8> %x, <64 x i8> %y) nounwind {
+  %mask = icmp eq <64 x i8> %x, %y
+  %max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %y
+  ret <64 x i8> %max
+}
+
+; CHECK-LABEL: test2
+; CHECK: vpcmpgtb {{.*%k[0-7]}}
+; CHECK: vmovdqu8 {{.*}}%k1
+; CHECK: ret
+define <64 x i8> @test2(<64 x i8> %x, <64 x i8> %y) nounwind {
+  %mask = icmp sgt <64 x i8> %x, %y
+  %max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %y
+  ret <64 x i8> %max
+}
+
+; CHECK-LABEL: @test3
+; CHECK: vpcmplew {{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <32 x i16> @test3(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1) nounwind {
+  %mask = icmp sge <32 x i16> %x, %y
+  %max = select <32 x i1> %mask, <32 x i16> %x1, <32 x i16> %y
+  ret <32 x i16> %max
+}
+
+; CHECK-LABEL: test4
+; CHECK: vpcmpnleub {{.*%k[0-7]}}
+; CHECK: vmovdqu8 {{.*}}%k1
+; CHECK: ret
+define <64 x i8> @test4(<64 x i8> %x, <64 x i8> %y) nounwind {
+  %mask = icmp ugt <64 x i8> %x, %y
+  %max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %y
+  ret <64 x i8> %max
+}
+
+; CHECK-LABEL: test5
+; CHECK: vpcmpeqw  (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <32 x i16> @test5(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %yp) nounwind {
+  %y = load <32 x i16>* %yp, align 4
+  %mask = icmp eq <32 x i16> %x, %y
+  %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
+  ret <32 x i16> %max
+}
+
+; CHECK-LABEL: @test6
+; CHECK: vpcmpgtw (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <32 x i16> @test6(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind {
+  %y = load <32 x i16>* %y.ptr, align 4
+  %mask = icmp sgt <32 x i16> %x, %y
+  %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
+  ret <32 x i16> %max
+}
+
+; CHECK-LABEL: @test7
+; CHECK: vpcmplew (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <32 x i16> @test7(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind {
+  %y = load <32 x i16>* %y.ptr, align 4
+  %mask = icmp sle <32 x i16> %x, %y
+  %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
+  ret <32 x i16> %max
+}
+
+; CHECK-LABEL: @test8
+; CHECK: vpcmpleuw (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <32 x i16> @test8(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind {
+  %y = load <32 x i16>* %y.ptr, align 4
+  %mask = icmp ule <32 x i16> %x, %y
+  %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
+  ret <32 x i16> %max
+}
+
+; CHECK-LABEL: @test9
+; CHECK: vpcmpeqw %zmm{{.*{%k[1-7]}}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <32 x i16> @test9(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1, <32 x i16> %y1) nounwind {
+  %mask1 = icmp eq <32 x i16> %x1, %y1
+  %mask0 = icmp eq <32 x i16> %x, %y
+  %mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer
+  %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %y
+  ret <32 x i16> %max
+}
+
+; CHECK-LABEL: @test10
+; CHECK: vpcmpleb %zmm{{.*{%k[1-7]}}}
+; CHECK: vmovdqu8
+; CHECK: ret
+define <64 x i8> @test10(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1, <64 x i8> %y1) nounwind {
+  %mask1 = icmp sge <64 x i8> %x1, %y1
+  %mask0 = icmp sle <64 x i8> %x, %y
+  %mask = select <64 x i1> %mask0, <64 x i1> %mask1, <64 x i1> zeroinitializer
+  %max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %x1
+  ret <64 x i8> %max
+}
+
+; CHECK-LABEL: @test11
+; CHECK: vpcmpgtb (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqu8
+; CHECK: ret
+define <64 x i8> @test11(<64 x i8> %x, <64 x i8>* %y.ptr, <64 x i8> %x1, <64 x i8> %y1) nounwind {
+  %mask1 = icmp sgt <64 x i8> %x1, %y1
+  %y = load <64 x i8>* %y.ptr, align 4
+  %mask0 = icmp sgt <64 x i8> %x, %y
+  %mask = select <64 x i1> %mask0, <64 x i1> %mask1, <64 x i1> zeroinitializer
+  %max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %x1
+  ret <64 x i8> %max
+}
+
+; CHECK-LABEL: @test12
+; CHECK: vpcmpleuw (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <32 x i16> @test12(<32 x i16> %x, <32 x i16>* %y.ptr, <32 x i16> %x1, <32 x i16> %y1) nounwind {
+  %mask1 = icmp sge <32 x i16> %x1, %y1
+  %y = load <32 x i16>* %y.ptr, align 4
+  %mask0 = icmp ule <32 x i16> %x, %y
+  %mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer
+  %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
+  ret <32 x i16> %max
+}
diff --git a/test/CodeGen/X86/avx512bwvl-vec-cmp.ll b/test/CodeGen/X86/avx512bwvl-vec-cmp.ll

new file mode 100644 (file)

index 0000000..2d13a16
--- /dev/null
+++ b/test/CodeGen/X86/avx512bwvl-vec-cmp.ll
@@ -0,0 +1,269 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
+
+; CHECK-LABEL: test256_1
+; CHECK: vpcmpeqb {{.*%k[0-7]}}
+; CHECK: vmovdqu8 {{.*}}%k1
+; CHECK: ret
+define <32 x i8> @test256_1(<32 x i8> %x, <32 x i8> %y) nounwind {
+  %mask = icmp eq <32 x i8> %x, %y
+  %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %y
+  ret <32 x i8> %max
+}
+
+; CHECK-LABEL: test256_2
+; CHECK: vpcmpgtb {{.*%k[0-7]}}
+; CHECK: vmovdqu8 {{.*}}%k1
+; CHECK: ret
+define <32 x i8> @test256_2(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1) nounwind {
+  %mask = icmp sgt <32 x i8> %x, %y
+  %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
+  ret <32 x i8> %max
+}
+
+; CHECK-LABEL: @test256_3
+; CHECK: vpcmplew {{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <16 x i16> @test256_3(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1) nounwind {
+  %mask = icmp sge <16 x i16> %x, %y
+  %max = select <16 x i1> %mask, <16 x i16> %x1, <16 x i16> %y
+  ret <16 x i16> %max
+}
+
+; CHECK-LABEL: test256_4
+; CHECK: vpcmpnleub {{.*%k[0-7]}}
+; CHECK: vmovdqu8 {{.*}}%k1
+; CHECK: ret
+define <32 x i8> @test256_4(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1) nounwind {
+  %mask = icmp ugt <32 x i8> %x, %y
+  %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
+  ret <32 x i8> %max
+}
+
+; CHECK-LABEL: test256_5
+; CHECK: vpcmpeqw  (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <16 x i16> @test256_5(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %yp) nounwind {
+  %y = load <16 x i16>* %yp, align 4
+  %mask = icmp eq <16 x i16> %x, %y
+  %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
+  ret <16 x i16> %max
+}
+
+; CHECK-LABEL: @test256_6
+; CHECK: vpcmpgtw (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <16 x i16> @test256_6(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind {
+  %y = load <16 x i16>* %y.ptr, align 4
+  %mask = icmp sgt <16 x i16> %x, %y
+  %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
+  ret <16 x i16> %max
+}
+
+; CHECK-LABEL: @test256_7
+; CHECK: vpcmplew (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <16 x i16> @test256_7(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind {
+  %y = load <16 x i16>* %y.ptr, align 4
+  %mask = icmp sle <16 x i16> %x, %y
+  %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
+  ret <16 x i16> %max
+}
+
+; CHECK-LABEL: @test256_8
+; CHECK: vpcmpleuw (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <16 x i16> @test256_8(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind {
+  %y = load <16 x i16>* %y.ptr, align 4
+  %mask = icmp ule <16 x i16> %x, %y
+  %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
+  ret <16 x i16> %max
+}
+
+; CHECK-LABEL: @test256_9
+; CHECK: vpcmpeqw %ymm{{.*{%k[1-7]}}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <16 x i16> @test256_9(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1, <16 x i16> %y1) nounwind {
+  %mask1 = icmp eq <16 x i16> %x1, %y1
+  %mask0 = icmp eq <16 x i16> %x, %y
+  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
+  %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %y
+  ret <16 x i16> %max
+}
+
+; CHECK-LABEL: @test256_10
+; CHECK: vpcmpleb %ymm{{.*{%k[1-7]}}}
+; CHECK: vmovdqu8
+; CHECK: ret
+define <32 x i8> @test256_10(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1, <32 x i8> %y1) nounwind {
+  %mask1 = icmp sge <32 x i8> %x1, %y1
+  %mask0 = icmp sle <32 x i8> %x, %y
+  %mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer
+  %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
+  ret <32 x i8> %max
+}
+
+; CHECK-LABEL: @test256_11
+; CHECK: vpcmpgtb (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqu8
+; CHECK: ret
+define <32 x i8> @test256_11(<32 x i8> %x, <32 x i8>* %y.ptr, <32 x i8> %x1, <32 x i8> %y1) nounwind {
+  %mask1 = icmp sgt <32 x i8> %x1, %y1
+  %y = load <32 x i8>* %y.ptr, align 4
+  %mask0 = icmp sgt <32 x i8> %x, %y
+  %mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer
+  %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
+  ret <32 x i8> %max
+}
+
+; CHECK-LABEL: @test256_12
+; CHECK: vpcmpleuw (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <16 x i16> @test256_12(<16 x i16> %x, <16 x i16>* %y.ptr, <16 x i16> %x1, <16 x i16> %y1) nounwind {
+  %mask1 = icmp sge <16 x i16> %x1, %y1
+  %y = load <16 x i16>* %y.ptr, align 4
+  %mask0 = icmp ule <16 x i16> %x, %y
+  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
+  %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
+  ret <16 x i16> %max
+}
+
+; CHECK-LABEL: test128_1
+; CHECK: vpcmpeqb {{.*%k[0-7]}}
+; CHECK: vmovdqu8 {{.*}}%k1
+; CHECK: ret
+define <16 x i8> @test128_1(<16 x i8> %x, <16 x i8> %y) nounwind {
+  %mask = icmp eq <16 x i8> %x, %y
+  %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %y
+  ret <16 x i8> %max
+}
+
+; CHECK-LABEL: test128_2
+; CHECK: vpcmpgtb {{.*%k[0-7]}}
+; CHECK: vmovdqu8 {{.*}}%k1
+; CHECK: ret
+define <16 x i8> @test128_2(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1) nounwind {
+  %mask = icmp sgt <16 x i8> %x, %y
+  %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
+  ret <16 x i8> %max
+}
+
+; CHECK-LABEL: @test128_3
+; CHECK: vpcmplew {{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <8 x i16> @test128_3(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1) nounwind {
+  %mask = icmp sge <8 x i16> %x, %y
+  %max = select <8 x i1> %mask, <8 x i16> %x1, <8 x i16> %y
+  ret <8 x i16> %max
+}
+
+; CHECK-LABEL: test128_4
+; CHECK: vpcmpnleub {{.*%k[0-7]}}
+; CHECK: vmovdqu8 {{.*}}%k1
+; CHECK: ret
+define <16 x i8> @test128_4(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1) nounwind {
+  %mask = icmp ugt <16 x i8> %x, %y
+  %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
+  ret <16 x i8> %max
+}
+
+; CHECK-LABEL: test128_5
+; CHECK: vpcmpeqw  (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <8 x i16> @test128_5(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %yp) nounwind {
+  %y = load <8 x i16>* %yp, align 4
+  %mask = icmp eq <8 x i16> %x, %y
+  %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
+  ret <8 x i16> %max
+}
+
+; CHECK-LABEL: @test128_6
+; CHECK: vpcmpgtw (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <8 x i16> @test128_6(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind {
+  %y = load <8 x i16>* %y.ptr, align 4
+  %mask = icmp sgt <8 x i16> %x, %y
+  %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
+  ret <8 x i16> %max
+}
+
+; CHECK-LABEL: @test128_7
+; CHECK: vpcmplew (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <8 x i16> @test128_7(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind {
+  %y = load <8 x i16>* %y.ptr, align 4
+  %mask = icmp sle <8 x i16> %x, %y
+  %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
+  ret <8 x i16> %max
+}
+
+; CHECK-LABEL: @test128_8
+; CHECK: vpcmpleuw (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <8 x i16> @test128_8(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind {
+  %y = load <8 x i16>* %y.ptr, align 4
+  %mask = icmp ule <8 x i16> %x, %y
+  %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
+  ret <8 x i16> %max
+}
+
+; CHECK-LABEL: @test128_9
+; CHECK: vpcmpeqw %xmm{{.*{%k[1-7]}}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <8 x i16> @test128_9(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1, <8 x i16> %y1) nounwind {
+  %mask1 = icmp eq <8 x i16> %x1, %y1
+  %mask0 = icmp eq <8 x i16> %x, %y
+  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
+  %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %y
+  ret <8 x i16> %max
+}
+
+; CHECK-LABEL: @test128_10
+; CHECK: vpcmpleb %xmm{{.*{%k[1-7]}}}
+; CHECK: vmovdqu8
+; CHECK: ret
+define <16 x i8> @test128_10(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1, <16 x i8> %y1) nounwind {
+  %mask1 = icmp sge <16 x i8> %x1, %y1
+  %mask0 = icmp sle <16 x i8> %x, %y
+  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
+  %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
+  ret <16 x i8> %max
+}
+
+; CHECK-LABEL: @test128_11
+; CHECK: vpcmpgtb (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqu8
+; CHECK: ret
+define <16 x i8> @test128_11(<16 x i8> %x, <16 x i8>* %y.ptr, <16 x i8> %x1, <16 x i8> %y1) nounwind {
+  %mask1 = icmp sgt <16 x i8> %x1, %y1
+  %y = load <16 x i8>* %y.ptr, align 4
+  %mask0 = icmp sgt <16 x i8> %x, %y
+  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
+  %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
+  ret <16 x i8> %max
+}
+
+; CHECK-LABEL: @test128_12
+; CHECK: vpcmpleuw (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <8 x i16> @test128_12(<8 x i16> %x, <8 x i16>* %y.ptr, <8 x i16> %x1, <8 x i16> %y1) nounwind {
+  %mask1 = icmp sge <8 x i16> %x1, %y1
+  %y = load <8 x i16>* %y.ptr, align 4
+  %mask0 = icmp ule <8 x i16> %x, %y
+  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
+  %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
+  ret <8 x i16> %max
+}
diff --git a/test/CodeGen/X86/avx512vl-vec-cmp.ll b/test/CodeGen/X86/avx512vl-vec-cmp.ll

new file mode 100644 (file)

index 0000000..9c64c03
--- /dev/null
+++ b/test/CodeGen/X86/avx512vl-vec-cmp.ll
@@ -0,0 +1,381 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
+
+; CHECK-LABEL: test256_1
+; CHECK: vpcmpeqq {{.*%k[0-7]}}
+; CHECK: vmovdqa64 {{.*}}%k1
+; CHECK: ret
+define <4 x i64> @test256_1(<4 x i64> %x, <4 x i64> %y) nounwind {
+  %mask = icmp eq <4 x i64> %x, %y
+  %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %y
+  ret <4 x i64> %max
+}
+
+; CHECK-LABEL: test256_2
+; CHECK: vpcmpgtq {{.*%k[0-7]}}
+; CHECK: vmovdqa64 {{.*}}%k1
+; CHECK: ret
+define <4 x i64> @test256_2(<4 x i64> %x, <4 x i64> %y) nounwind {
+  %mask = icmp sgt <4 x i64> %x, %y
+  %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %y
+  ret <4 x i64> %max
+}
+
+; CHECK-LABEL: @test256_3
+; CHECK: vpcmpled {{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <8 x i32> @test256_3(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1) nounwind {
+  %mask = icmp sge <8 x i32> %x, %y
+  %max = select <8 x i1> %mask, <8 x i32> %x1, <8 x i32> %y
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: test256_4
+; CHECK: vpcmpnleuq {{.*%k[0-7]}}
+; CHECK: vmovdqa64 {{.*}}%k1
+; CHECK: ret
+define <4 x i64> @test256_4(<4 x i64> %x, <4 x i64> %y) nounwind {
+  %mask = icmp ugt <4 x i64> %x, %y
+  %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %y
+  ret <4 x i64> %max
+}
+
+; CHECK-LABEL: test256_5
+; CHECK: vpcmpeqd  (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <8 x i32> @test256_5(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwind {
+  %y = load <8 x i32>* %yp, align 4
+  %mask = icmp eq <8 x i32> %x, %y
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: @test256_6
+; CHECK: vpcmpgtd (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <8 x i32> @test256_6(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind {
+  %y = load <8 x i32>* %y.ptr, align 4
+  %mask = icmp sgt <8 x i32> %x, %y
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: @test256_7
+; CHECK: vpcmpled (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <8 x i32> @test256_7(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind {
+  %y = load <8 x i32>* %y.ptr, align 4
+  %mask = icmp sle <8 x i32> %x, %y
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: @test256_8
+; CHECK: vpcmpleud (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <8 x i32> @test256_8(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind {
+  %y = load <8 x i32>* %y.ptr, align 4
+  %mask = icmp ule <8 x i32> %x, %y
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: @test256_9
+; CHECK: vpcmpeqd %ymm{{.*{%k[1-7]}}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <8 x i32> @test256_9(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1, <8 x i32> %y1) nounwind {
+  %mask1 = icmp eq <8 x i32> %x1, %y1
+  %mask0 = icmp eq <8 x i32> %x, %y
+  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: @test256_10
+; CHECK: vpcmpleq %ymm{{.*{%k[1-7]}}}
+; CHECK: vmovdqa64
+; CHECK: ret
+define <4 x i64> @test256_10(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) nounwind {
+  %mask1 = icmp sge <4 x i64> %x1, %y1
+  %mask0 = icmp sle <4 x i64> %x, %y
+  %mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
+  %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %x1
+  ret <4 x i64> %max
+}
+
+; CHECK-LABEL: @test256_11
+; CHECK: vpcmpgtq (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqa64
+; CHECK: ret
+define <4 x i64> @test256_11(<4 x i64> %x, <4 x i64>* %y.ptr, <4 x i64> %x1, <4 x i64> %y1) nounwind {
+  %mask1 = icmp sgt <4 x i64> %x1, %y1
+  %y = load <4 x i64>* %y.ptr, align 4
+  %mask0 = icmp sgt <4 x i64> %x, %y
+  %mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
+  %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %x1
+  ret <4 x i64> %max
+}
+
+; CHECK-LABEL: @test256_12
+; CHECK: vpcmpleud (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <8 x i32> @test256_12(<8 x i32> %x, <8 x i32>* %y.ptr, <8 x i32> %x1, <8 x i32> %y1) nounwind {
+  %mask1 = icmp sge <8 x i32> %x1, %y1
+  %y = load <8 x i32>* %y.ptr, align 4
+  %mask0 = icmp ule <8 x i32> %x, %y
+  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: test256_13
+; CHECK: vpcmpeqq  (%rdi){1to4}, %ymm
+; CHECK: vmovdqa64
+; CHECK: ret
+define <4 x i64> @test256_13(<4 x i64> %x, <4 x i64> %x1, i64* %yb.ptr) nounwind {
+  %yb = load i64* %yb.ptr, align 4
+  %y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0
+  %y = shufflevector <4 x i64> %y.0, <4 x i64> undef, <4 x i32> zeroinitializer
+  %mask = icmp eq <4 x i64> %x, %y
+  %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %x1
+  ret <4 x i64> %max
+}
+
+; CHECK-LABEL: test256_14
+; CHECK: vpcmpled  (%rdi){1to8}, %ymm
+; CHECK: vmovdqa32
+; CHECK: ret
+define <8 x i32> @test256_14(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1) nounwind {
+  %yb = load i32* %yb.ptr, align 4
+  %y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0
+  %y = shufflevector <8 x i32> %y.0, <8 x i32> undef, <8 x i32> zeroinitializer
+  %mask = icmp sle <8 x i32> %x, %y
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: test256_15
+; CHECK: vpcmpgtd  (%rdi){1to8}, %ymm{{.*{%k[1-7]}}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <8 x i32> @test256_15(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1, <8 x i32> %y1) nounwind {
+  %mask1 = icmp sge <8 x i32> %x1, %y1
+  %yb = load i32* %yb.ptr, align 4
+  %y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0
+  %y = shufflevector <8 x i32> %y.0, <8 x i32> undef, <8 x i32> zeroinitializer
+  %mask0 = icmp sgt <8 x i32> %x, %y
+  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: test256_16
+; CHECK: vpcmpgtq  (%rdi){1to4}, %ymm{{.*{%k[1-7]}}}
+; CHECK: vmovdqa64
+; CHECK: ret
+define <4 x i64> @test256_16(<4 x i64> %x, i64* %yb.ptr, <4 x i64> %x1, <4 x i64> %y1) nounwind {
+  %mask1 = icmp sge <4 x i64> %x1, %y1
+  %yb = load i64* %yb.ptr, align 4
+  %y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0
+  %y = shufflevector <4 x i64> %y.0, <4 x i64> undef, <4 x i32> zeroinitializer
+  %mask0 = icmp sgt <4 x i64> %x, %y
+  %mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
+  %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %x1
+  ret <4 x i64> %max
+}
+
+; CHECK-LABEL: test128_1
+; CHECK: vpcmpeqq {{.*%k[0-7]}}
+; CHECK: vmovdqa64 {{.*}}%k1
+; CHECK: ret
+define <2 x i64> @test128_1(<2 x i64> %x, <2 x i64> %y) nounwind {
+  %mask = icmp eq <2 x i64> %x, %y
+  %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %y
+  ret <2 x i64> %max
+}
+
+; CHECK-LABEL: test128_2
+; CHECK: vpcmpgtq {{.*%k[0-7]}}
+; CHECK: vmovdqa64 {{.*}}%k1
+; CHECK: ret
+define <2 x i64> @test128_2(<2 x i64> %x, <2 x i64> %y) nounwind {
+  %mask = icmp sgt <2 x i64> %x, %y
+  %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %y
+  ret <2 x i64> %max
+}
+
+; CHECK-LABEL: @test128_3
+; CHECK: vpcmpled {{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <4 x i32> @test128_3(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1) nounwind {
+  %mask = icmp sge <4 x i32> %x, %y
+  %max = select <4 x i1> %mask, <4 x i32> %x1, <4 x i32> %y
+  ret <4 x i32> %max
+}
+
+; CHECK-LABEL: test128_4
+; CHECK: vpcmpnleuq {{.*%k[0-7]}}
+; CHECK: vmovdqa64 {{.*}}%k1
+; CHECK: ret
+define <2 x i64> @test128_4(<2 x i64> %x, <2 x i64> %y) nounwind {
+  %mask = icmp ugt <2 x i64> %x, %y
+  %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %y
+  ret <2 x i64> %max
+}
+
+; CHECK-LABEL: test128_5
+; CHECK: vpcmpeqd  (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <4 x i32> @test128_5(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %yp) nounwind {
+  %y = load <4 x i32>* %yp, align 4
+  %mask = icmp eq <4 x i32> %x, %y
+  %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
+  ret <4 x i32> %max
+}
+
+; CHECK-LABEL: @test128_6
+; CHECK: vpcmpgtd (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <4 x i32> @test128_6(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
+  %y = load <4 x i32>* %y.ptr, align 4
+  %mask = icmp sgt <4 x i32> %x, %y
+  %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
+  ret <4 x i32> %max
+}
+
+; CHECK-LABEL: @test128_7
+; CHECK: vpcmpled (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <4 x i32> @test128_7(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
+  %y = load <4 x i32>* %y.ptr, align 4
+  %mask = icmp sle <4 x i32> %x, %y
+  %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
+  ret <4 x i32> %max
+}
+
+; CHECK-LABEL: @test128_8
+; CHECK: vpcmpleud (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <4 x i32> @test128_8(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
+  %y = load <4 x i32>* %y.ptr, align 4
+  %mask = icmp ule <4 x i32> %x, %y
+  %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
+  ret <4 x i32> %max
+}
+
+; CHECK-LABEL: @test128_9
+; CHECK: vpcmpeqd %xmm{{.*{%k[1-7]}}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <4 x i32> @test128_9(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1, <4 x i32> %y1) nounwind {
+  %mask1 = icmp eq <4 x i32> %x1, %y1
+  %mask0 = icmp eq <4 x i32> %x, %y
+  %mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
+  %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %y
+  ret <4 x i32> %max
+}
+
+; CHECK-LABEL: @test128_10
+; CHECK: vpcmpleq %xmm{{.*{%k[1-7]}}}
+; CHECK: vmovdqa64
+; CHECK: ret
+define <2 x i64> @test128_10(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) nounwind {
+  %mask1 = icmp sge <2 x i64> %x1, %y1
+  %mask0 = icmp sle <2 x i64> %x, %y
+  %mask = select <2 x i1> %mask0, <2 x i1> %mask1, <2 x i1> zeroinitializer
+  %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %x1
+  ret <2 x i64> %max
+}
+
+; CHECK-LABEL: @test128_11
+; CHECK: vpcmpgtq (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqa64
+; CHECK: ret
+define <2 x i64> @test128_11(<2 x i64> %x, <2 x i64>* %y.ptr, <2 x i64> %x1, <2 x i64> %y1) nounwind {
+  %mask1 = icmp sgt <2 x i64> %x1, %y1
+  %y = load <2 x i64>* %y.ptr, align 4
+  %mask0 = icmp sgt <2 x i64> %x, %y
+  %mask = select <2 x i1> %mask0, <2 x i1> %mask1, <2 x i1> zeroinitializer
+  %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %x1
+  ret <2 x i64> %max
+}
+
+; CHECK-LABEL: @test128_12
+; CHECK: vpcmpleud (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <4 x i32> @test128_12(<4 x i32> %x, <4 x i32>* %y.ptr, <4 x i32> %x1, <4 x i32> %y1) nounwind {
+  %mask1 = icmp sge <4 x i32> %x1, %y1
+  %y = load <4 x i32>* %y.ptr, align 4
+  %mask0 = icmp ule <4 x i32> %x, %y
+  %mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
+  %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
+  ret <4 x i32> %max
+}
+
+; CHECK-LABEL: test128_13
+; CHECK: vpcmpeqq  (%rdi){1to2}, %xmm
+; CHECK: vmovdqa64
+; CHECK: ret
+define <2 x i64> @test128_13(<2 x i64> %x, <2 x i64> %x1, i64* %yb.ptr) nounwind {
+  %yb = load i64* %yb.ptr, align 4
+  %y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0
+  %y = insertelement <2 x i64> %y.0, i64 %yb, i32 1
+  %mask = icmp eq <2 x i64> %x, %y
+  %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %x1
+  ret <2 x i64> %max
+}
+
+; CHECK-LABEL: test128_14
+; CHECK: vpcmpled  (%rdi){1to4}, %xmm
+; CHECK: vmovdqa32
+; CHECK: ret
+define <4 x i32> @test128_14(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1) nounwind {
+  %yb = load i32* %yb.ptr, align 4
+  %y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0
+  %y = shufflevector <4 x i32> %y.0, <4 x i32> undef, <4 x i32> zeroinitializer
+  %mask = icmp sle <4 x i32> %x, %y
+  %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
+  ret <4 x i32> %max
+}
+
+; CHECK-LABEL: test128_15
+; CHECK: vpcmpgtd  (%rdi){1to4}, %xmm{{.*{%k[1-7]}}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <4 x i32> @test128_15(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1, <4 x i32> %y1) nounwind {
+  %mask1 = icmp sge <4 x i32> %x1, %y1
+  %yb = load i32* %yb.ptr, align 4
+  %y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0
+  %y = shufflevector <4 x i32> %y.0, <4 x i32> undef, <4 x i32> zeroinitializer
+  %mask0 = icmp sgt <4 x i32> %x, %y
+  %mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
+  %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
+  ret <4 x i32> %max
+}
+
+; CHECK-LABEL: test128_16
+; CHECK: vpcmpgtq  (%rdi){1to2}, %xmm{{.*{%k[1-7]}}}
+; CHECK: vmovdqa64
+; CHECK: ret
+define <2 x i64> @test128_16(<2 x i64> %x, i64* %yb.ptr, <2 x i64> %x1, <2 x i64> %y1) nounwind {
+  %mask1 = icmp sge <2 x i64> %x1, %y1
+  %yb = load i64* %yb.ptr, align 4
+  %y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0
+  %y = insertelement <2 x i64> %y.0, i64 %yb, i32 1
+  %mask0 = icmp sgt <2 x i64> %x, %y
+  %mask = select <2 x i1> %mask0, <2 x i1> %mask1, <2 x i1> zeroinitializer
+  %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %x1
+  ret <2 x i64> %max
+}
author	Robert Khasanov <rob.khasanov@gmail.com>
	Fri, 29 Aug 2014 08:46:04 +0000 (08:46 +0000)
committer	Robert Khasanov <rob.khasanov@gmail.com>
	Fri, 29 Aug 2014 08:46:04 +0000 (08:46 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/avx512-vec-cmp.ll		patch \| blob \| history
test/CodeGen/X86/avx512bw-vec-cmp.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/avx512bwvl-vec-cmp.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/avx512vl-vec-cmp.ll	[new file with mode: 0644]	patch \| blob