Splats for v8i32/v8f32 can be handled by VPERMILPSY. This was causing

author Bruno Cardoso Lopes <bruno.cardoso@gmail.com>

Thu, 11 Aug 2011 02:49:44 +0000 (02:49 +0000)

committer Bruno Cardoso Lopes <bruno.cardoso@gmail.com>

Thu, 11 Aug 2011 02:49:44 +0000 (02:49 +0000)
author Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
Thu, 11 Aug 2011 02:49:44 +0000 (02:49 +0000)
committer Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
Thu, 11 Aug 2011 02:49:44 +0000 (02:49 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index ae573284d7dc6a0af11c7e329d9d8ba819346c1e..be3ecd7e531b2c8fef6da040acc7c69aba085751 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -4066,11 +4066,11 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
    return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
  }
  
-// PromoteSplatv8v16 - All i16 and i8 vector types can't be used directly by
+// PromoteSplati8i16 - All i16 and i8 vector types can't be used directly by
  // a generic shuffle instruction because the target has no such instructions.
  // Generate shuffles which repeat i16 and i8 several times until they can be
  // represented by v4f32 and then be manipulated by target suported shuffles.
-static SDValue PromoteSplatv8v16(SDValue V, SelectionDAG &DAG, int &EltNo) {
+static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
    EVT VT = V.getValueType();
    int NumElems = VT.getVectorNumElements();
    DebugLoc dl = V.getDebugLoc();
@@ -4162,8 +4162,9 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
    }
  
    // Make this 128-bit vector duplicate i8 and i16 elements
-  if (NumElems > 4)
-    V1 = PromoteSplatv8v16(V1, DAG, EltNo);
+  EVT EltVT = SrcVT.getVectorElementType();
+  if (NumElems > 4 && (EltVT == MVT::i8 || EltVT == MVT::i16))
+    V1 = PromoteSplati8i16(V1, DAG, EltNo);
  
    // Recreate the 256-bit vector and place the same 128-bit vector
    // into the low and high part. This is necessary because we want
@@ -6027,8 +6028,7 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
        return PromoteVectorToScalarSplat(SVOp, DAG);
  
      // Handle splats by matching through known shuffle masks
-    if ((VT.is128BitVector() && NumElem <= 4) ||
-        (VT.is256BitVector() && NumElem <= 8))
+    if (VT.is128BitVector() && NumElem <= 4)
        return SDValue();
  
      // All i16 and i8 vector types can't be used directly by a generic shuffle
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll

index 243ab9ba318f8a389787add8f46b7e2d512c07ab..ca35b7f265c591c1b7c35fddfb3ae71efd42b0da 100644 (file)
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -51,8 +51,9 @@ entry:
  ; To:
  ;   shuffle (vload ptr)), undef, <1, 1, 1, 1>
  ; CHECK: vmovaps
-; CHECK-NEXT: vpextrd
-define void @funcE() nounwind {
+; CHECK-NEXT: vinsertf128  $1
+; CHECK-NEXT: vpermilps $-1
+define <8 x float> @funcE() nounwind {
  allocas:
    %udx495 = alloca [18 x [18 x float]], align 32
    br label %for_test505.preheader
@@ -74,7 +75,7 @@ load.i1247:                                       ; preds = %for_exit499
  
  __load_and_broadcast_32.exit1249:                 ; preds = %load.i1247, %for_exit499
    %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
-  ret void
+  ret <8 x float> %load_broadcast12281250
  }
  
  ; CHECK: vpshufd  $0
@@ -87,3 +88,20 @@ define <8 x float> @funcF(i32* %ptr) nounwind {
    ret <8 x float> %tmp
  }
  
+; CHECK: vinsertf128  $1
+; CHECK-NEXT: vpermilps  $0
+define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vextractf128  $1
+; CHECK-NEXT: vinsertf128  $1
+; CHECK-NEXT: vpermilps  $85
+define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <8 x float> %shuffle
+}
+
author	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
	Thu, 11 Aug 2011 02:49:44 +0000 (02:49 +0000)
committer	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
	Thu, 11 Aug 2011 02:49:44 +0000 (02:49 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/avx-splat.ll		patch \| blob \| history