AVX-512: Optimized BUILD_VECTOR pattern;

author Elena Demikhovsky <elena.demikhovsky@intel.com>

Tue, 11 Feb 2014 07:25:59 +0000 (07:25 +0000)

committer Elena Demikhovsky <elena.demikhovsky@intel.com>

Tue, 11 Feb 2014 07:25:59 +0000 (07:25 +0000)
author Elena Demikhovsky <elena.demikhovsky@intel.com>
Tue, 11 Feb 2014 07:25:59 +0000 (07:25 +0000)
committer Elena Demikhovsky <elena.demikhovsky@intel.com>
Tue, 11 Feb 2014 07:25:59 +0000 (07:25 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 2e9d5744073f4ca42fcdf0e78f0b98c9850a9e34..32e41c36802baa49a8677bd12e916ceb45125923 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -6070,8 +6070,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
  
    // For AVX-length vectors, build the individual 128-bit pieces and use
    // shuffles to put them in place.
-  if (VT.is256BitVector()) {
-    SmallVector<SDValue, 32> V;
+  if (VT.is256BitVector() || VT.is512BitVector()) {
+    SmallVector<SDValue, 64> V;
      for (unsigned i = 0; i != NumElems; ++i)
        V.push_back(Op.getOperand(i));
  
@@ -6083,7 +6083,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
                                  NumElems/2);
  
      // Recreate the wider vector with the lower and upper part.
-    return Concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
+    if (VT.is256BitVector())
+      return Concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
+    return Concat256BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
    }
  
    // Let legalizer expand 2-wide build_vectors.
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td

index edcc32751db343ba00edc2dfeb680af2e6341d1a..7990d6abcd22a440719188b362c5a3129d78c7cc 100644 (file)
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -361,7 +361,7 @@ def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
        (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2),
        "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
        [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
-                          addr:$dst)]>, EVEX;
+                          addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
  
  //===---------------------------------------------------------------------===//
  // AVX-512 BROADCAST
diff --git a/test/CodeGen/X86/avx512-build-vector.ll b/test/CodeGen/X86/avx512-build-vector.ll

index bc4560b3f3fcaf3a93e718eac488b5ad5313a9fd..b5a2aa80ce1667e5672c1ca6fdc8ac0394a3748b 100644 (file)
--- a/test/CodeGen/X86/avx512-build-vector.ll
+++ b/test/CodeGen/X86/avx512-build-vector.ll
@@ -15,4 +15,16 @@ define <16 x i32> @test1(i32* %x) {
  define <16 x i32> @test2(<16 x i32> %x) {
     %res = add <16 x i32><i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, %x
     ret <16 x i32>%res
+}
+
+; CHECK-LABEL: test3
+; CHECK: vinsertf128
+; CHECK: vinsertf64x4
+; CHECK: ret
+define <16 x float> @test3(<4 x float> %a) {
+  %b = extractelement <4 x float> %a, i32 2
+  %c = insertelement <16 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %b, i32 5
+  %b1 = extractelement <4 x float> %a, i32 0
+  %c1 = insertelement <16 x float> %c, float %b1, i32 6
+  ret <16 x float>%c1
  }
 \ No newline at end of file
diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s

index b2f1d11e081540dc06d78684df03a9ae7c876a7f..97c64a4dd2e5bb2becdf2eece98debdad319ab66 100644 (file)
--- a/test/MC/X86/avx512-encodings.s
+++ b/test/MC/X86/avx512-encodings.s
@@ -46,4 +46,8 @@ vmovdqu64 %zmm0, %zmm1 {%k3}
  
  // CHECK: vmovd
  // CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0xb4,0x24,0xac,0xff,0xff,0xff]
-vmovd %xmm22, -84(%rsp)
-\ No newline at end of file
+vmovd %xmm22, -84(%rsp)
+
+// CHECK: vextractps
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x17,0x61,0x1f,0x02]
+vextractps      $2, %xmm20, 124(%rcx)
author	Elena Demikhovsky <elena.demikhovsky@intel.com>
	Tue, 11 Feb 2014 07:25:59 +0000 (07:25 +0000)
committer	Elena Demikhovsky <elena.demikhovsky@intel.com>
	Tue, 11 Feb 2014 07:25:59 +0000 (07:25 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86InstrAVX512.td		patch \| blob \| history
test/CodeGen/X86/avx512-build-vector.ll		patch \| blob \| history
test/MC/X86/avx512-encodings.s		patch \| blob \| history