From: Nadav Rotem Date: Wed, 11 Jan 2012 14:07:51 +0000 (+0000) Subject: Fix a bug in the lowering of BUILD_VECTOR for AVX. SCALAR_TO_VECTOR does not zero... X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=394a1f53b90698486ac7c75724a6bda349cd0353;p=oota-llvm.git Fix a bug in the lowering of BUILD_VECTOR for AVX. SCALAR_TO_VECTOR does not zero untouched elements. Use INSERT_VECTOR_ELT instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147948 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 07451a5e749..4409389e7c4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5161,11 +5161,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 || (ExtVT == MVT::i64 && Subtarget->is64Bit())) { if (VT.getSizeInBits() == 256) { - EVT VT128 = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems / 2); - Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Item); SDValue ZeroVec = getZeroVector(VT, true, DAG, dl); - return Insert128BitVector(ZeroVec, Item, DAG.getConstant(0, MVT::i32), - DAG, dl); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec, + Item, DAG.getIntPtrConstant(0)); } assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!"); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); diff --git a/test/CodeGen/X86/2012-1-10-buildvector.ll b/test/CodeGen/X86/2012-1-10-buildvector.ll index 4e7cbc6c14e..ff6be369dc5 100644 --- a/test/CodeGen/X86/2012-1-10-buildvector.ll +++ b/test/CodeGen/X86/2012-1-10-buildvector.ll @@ -1,13 +1,26 @@ -; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 +; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32" target triple = "i686-pc-win32" +;CHECK: bad_cast define void @bad_cast() { entry: %vext.i = shufflevector <2 x i64> undef, <2 x i64> undef, <3 x i32> %vecinit8.i = shufflevector <3 x i64> zeroinitializer, <3 x i64> %vext.i, <3 x i32> store <3 x i64> %vecinit8.i, <3 x i64>* undef, align 32 - unreachable +;CHECK: ret + ret void +} + + +;CHECK: bad_insert +define void @bad_insert(i32 %t) { +entry: +;CHECK: vpinsrd + %v2 = insertelement <8 x i32> zeroinitializer, i32 %t, i32 0 + store <8 x i32> %v2, <8 x i32> addrspace(1)* undef, align 32 +;CHECK: ret + ret void } diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll index 07a63efd71f..c9fc66a8a79 100644 --- a/test/CodeGen/X86/avx-load-store.ll +++ b/test/CodeGen/X86/avx-load-store.ll @@ -25,20 +25,26 @@ declare void @dummy(<4 x double>, <8 x float>, <4 x i64>) ;; ;; The two tests below check that we must fold load + scalar_to_vector -;; + ins_subvec+ zext into only a single vmovss or vmovsd +;; + ins_subvec+ zext into only a single vmovss or vmovsd or vinsertps from memory -; CHECK: vmovss (% +; CHECK: mov00 define <8 x float> @mov00(<8 x float> %v, float * %ptr) nounwind { %val = load float* %ptr +; CHECK: vinsertps +; CHECK: vinsertf128 %i0 = insertelement <8 x float> zeroinitializer, float %val, i32 0 ret <8 x float> %i0 +; CHECK: ret } -; CHECK: vmovsd (% +; CHECK: mov01 define <4 x double> @mov01(<4 x double> %v, double * %ptr) nounwind { %val = load double* %ptr +; CHECK: vmovlpd +; CHECK: vinsertf128 %i0 = insertelement <4 x double> zeroinitializer, double %val, i32 0 ret <4 x double> %i0 +; CHECK: ret } ; CHECK: vmovaps %ymm