[X86][AVX] Add support for i64 broadcast loads on 32-bit targets

[oota-llvm.git] / lib / Target / X86 / X86ISelLowering.cpp
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index d32146b32cf63baa5ddb697fad1bd745cef06590..7b97e79de5db1a82acbe33b8ee626933e5fe3249 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -8173,6 +8173,8 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
              DL, VT, V.getOperand(0), BroadcastIdx, Subtarget, DAG))
        return TruncBroadcast;
  
+  MVT BroadcastVT = VT;
+
    // Also check the simpler case, where we can directly reuse the scalar.
    if (V.getOpcode() == ISD::BUILD_VECTOR ||
        (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
@@ -8183,12 +8185,16 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
      if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V))
        return SDValue();
    } else if (MayFoldLoad(V) && !cast<LoadSDNode>(V)->isVolatile()) {
+    // 32-bit targets need to load i64 as a f64 and then bitcast the result.
+    if (!Subtarget->is64Bit() && VT.getScalarType() == MVT::i64)
+      BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements());
+
      // If we are broadcasting a load that is only used by the shuffle
      // then we can reduce the vector load to the broadcasted scalar load.
      LoadSDNode *Ld = cast<LoadSDNode>(V);
      SDValue BaseAddr = Ld->getOperand(1);
      EVT AddrVT = BaseAddr.getValueType();
-    EVT SVT = VT.getScalarType();
+    EVT SVT = BroadcastVT.getScalarType();
      unsigned Offset = BroadcastIdx * SVT.getStoreSize();
      SDValue NewAddr = DAG.getNode(
          ISD::ADD, DL, AddrVT, BaseAddr,
@@ -8202,7 +8208,8 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
      return SDValue();
    }
  
-  return DAG.getNode(X86ISD::VBROADCAST, DL, VT, V);
+  V = DAG.getNode(X86ISD::VBROADCAST, DL, BroadcastVT, V);
+  return DAG.getBitcast(VT, V);
  }
  
  // Check for whether we can use INSERTPS to perform the shuffle. We only use