[X86] Fix indentation of case statements in switch. NFC

[oota-llvm.git] / lib / Target / X86 / X86ISelLowering.cpp
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 28a815bdc6e367835680b40f90bfc9f52c9e8140..1afb82c735cf0ede3b6d7ddd3c9e7ca674757a76 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -4373,19 +4373,18 @@ static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
  
    // Extract the relevant vectorWidth bits.  Generate an EXTRACT_SUBVECTOR
    unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
+  assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
  
    // This is the index of the first element of the vectorWidth-bit chunk
-  // we want.
-  unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
-                               * ElemsPerChunk);
+  // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
+  IdxVal &= ~(ElemsPerChunk - 1);
  
    // If the input is a buildvector just emit a smaller one.
    if (Vec.getOpcode() == ISD::BUILD_VECTOR)
      return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
-                       makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
-                                    ElemsPerChunk));
+                       makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk));
  
-  SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
+  SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
  }
  
@@ -4423,13 +4422,13 @@ static SDValue InsertSubVector(SDValue Result, SDValue Vec,
  
    // Insert the relevant vectorWidth bits.
    unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
+  assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
  
    // This is the index of the first element of the vectorWidth-bit chunk
-  // we want.
-  unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
-                               * ElemsPerChunk);
+  // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
+  IdxVal &= ~(ElemsPerChunk - 1);
  
-  SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
+  SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
    return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
  }
  
@@ -6975,7 +6974,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
      // FALLTHROUGH
    case MVT::v16i8:
    case MVT::v32i8: {
-    assert((VT.getSizeInBits() == 128 || Subtarget->hasAVX2()) &&
+    assert((VT.is128BitVector() || Subtarget->hasAVX2()) &&
             "256-bit byte-blends require AVX2 support!");
  
      // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
@@ -7202,7 +7201,7 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
                          DAG.getConstant(Rotation * Scale, DL, MVT::i8)));
    }
  
-  assert(VT.getSizeInBits() == 128 &&
+  assert(VT.is128BitVector() &&
           "Rotate-based lowering only supports 128-bit lowering!");
    assert(Mask.size() <= 16 &&
           "Can shuffle at most 16 bytes in a 128-bit vector!");
@@ -7490,7 +7489,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
    if (Subtarget->hasSSE41()) {
      // Not worth offseting 128-bit vectors if scale == 2, a pattern using
      // PUNPCK will catch this in a later shuffle match.
-    if (Offset && Scale == 2 && VT.getSizeInBits() == 128)
+    if (Offset && Scale == 2 && VT.is128BitVector())
        return SDValue();
      MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
                                   NumElements / Scale);
@@ -7498,7 +7497,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
      return DAG.getBitcast(VT, InputV);
    }
  
-  assert(VT.getSizeInBits() == 128 && "Only 128-bit vectors can be extended.");
+  assert(VT.is128BitVector() && "Only 128-bit vectors can be extended.");
  
    // For any extends we can cheat for larger element sizes and use shuffle
    // instructions that can fold with a load and/or copy.
@@ -7528,7 +7527,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
    // to 64-bits.
    if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget->hasSSE4A()) {
      assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!");
-    assert(VT.getSizeInBits() == 128 && "Unexpected vector width!");
+    assert(VT.is128BitVector() && "Unexpected vector width!");
  
      int LoIdx = Offset * EltBits;
      SDValue Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
@@ -9917,7 +9916,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(SDLoc DL, MVT VT,
                                                         ArrayRef<int> Mask,
                                                         SelectionDAG &DAG) {
    // FIXME: This should probably be generalized for 512-bit vectors as well.
-  assert(VT.getSizeInBits() == 256 && "Only for 256-bit vector shuffles!");
+  assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
    int LaneSize = Mask.size() / 2;
  
    // If there are only inputs from one 128-bit lane, splitting will in fact be
@@ -11120,13 +11119,13 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
    }
  
    // For each vector width, delegate to a specialized lowering routine.
-  if (VT.getSizeInBits() == 128)
+  if (VT.is128BitVector())
      return lower128BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
  
-  if (VT.getSizeInBits() == 256)
+  if (VT.is256BitVector())
      return lower256BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
  
-  if (VT.getSizeInBits() == 512)
+  if (VT.is512BitVector())
      return lower512BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
  
    if (Is1BitVector)
@@ -11390,10 +11389,11 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
      MVT EltVT = VecVT.getVectorElementType();
  
      unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits();
+    assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
  
-    //if (IdxVal >= NumElems/2)
-    //  IdxVal -= NumElems/2;
-    IdxVal -= (IdxVal/ElemsPerChunk)*ElemsPerChunk;
+    // Find IdxVal modulo ElemsPerChunk. Since ElemsPerChunk is a power of 2
+    // this can be done with a mask.
+    IdxVal &= ElemsPerChunk - 1;
      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
                         DAG.getConstant(IdxVal, dl, MVT::i32));
    }
@@ -11529,7 +11529,9 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
  
      // Insert the element into the desired chunk.
      unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits();
-    unsigned IdxIn128 = IdxVal - (IdxVal / NumEltsIn128) * NumEltsIn128;
+    assert(isPowerOf2_32(NumEltsIn128));
+    // Since NumEltsIn128 is a power of 2 we can use mask instead of modulo.
+    unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1);
  
      V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
                      DAG.getConstant(IdxIn128, dl, MVT::i32));
@@ -15921,22 +15923,22 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
      }
  
      switch (Op.getOpcode()) {
-      default: break;
-      case X86ISD::PCMPEQM:
-      case X86ISD::PCMPGTM:
-      case X86ISD::CMPM:
-      case X86ISD::CMPMU:
-        return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
-      case X86ISD::VFPCLASS:
-        return DAG.getNode(ISD::OR, dl, VT, Op, VMask);
-      case X86ISD::VTRUNC:
-      case X86ISD::VTRUNCS:
-      case X86ISD::VTRUNCUS:
-        // We can't use ISD::VSELECT here because it is not always "Legal"
-        // for the destination type. For example vpmovqb require only AVX512
-        // and vselect that can operate on byte element type require BWI
-        OpcodeSelect = X86ISD::SELECT;
-        break;
+    default: break;
+    case X86ISD::PCMPEQM:
+    case X86ISD::PCMPGTM:
+    case X86ISD::CMPM:
+    case X86ISD::CMPMU:
+      return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
+    case X86ISD::VFPCLASS:
+      return DAG.getNode(ISD::OR, dl, VT, Op, VMask);
+    case X86ISD::VTRUNC:
+    case X86ISD::VTRUNCS:
+    case X86ISD::VTRUNCUS:
+      // We can't use ISD::VSELECT here because it is not always "Legal"
+      // for the destination type. For example vpmovqb require only AVX512
+      // and vselect that can operate on byte element type require BWI
+      OpcodeSelect = X86ISD::SELECT;
+      break;
      }
      if (PreservedSrc.getOpcode() == ISD::UNDEF)
        PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
@@ -18777,10 +18779,10 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget *Subtarget,
    // +ve/-ve Amt = rotate left/right.
  
    // Split 256-bit integers.
-  if (VT.getSizeInBits() == 256)
+  if (VT.is256BitVector())
      return Lower256IntArith(Op, DAG);
  
-  assert(VT.getSizeInBits() == 128 && "Only rotate 128-bit vectors!");
+  assert(VT.is128BitVector() && "Only rotate 128-bit vectors!");
  
    // Attempt to rotate by immediate.
    if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
@@ -22234,7 +22236,7 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
    // doesn't preclude something switching to the shorter encoding post-RA.
    //
    // FIXME: Should teach these routines about AVX vector widths.
-  if (FloatDomain && VT.getSizeInBits() == 128) {
+  if (FloatDomain && VT.is128BitVector()) {
      if (Mask.equals({0, 0}) || Mask.equals({1, 1})) {
        bool Lo = Mask.equals({0, 0});
        unsigned Shuffle;
@@ -22298,7 +22300,7 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
    // We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK
    // variants as none of these have single-instruction variants that are
    // superior to the UNPCK formulation.
-  if (!FloatDomain && VT.getSizeInBits() == 128 &&
+  if (!FloatDomain && VT.is128BitVector() &&
        (Mask.equals({0, 0, 1, 1, 2, 2, 3, 3}) ||
         Mask.equals({4, 4, 5, 5, 6, 6, 7, 7}) ||
         Mask.equals({0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}) ||
@@ -23819,11 +23821,10 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
      if (VT.getScalarType() == MVT::i16)
        return SDValue();
      // Dynamic blending was only available from SSE4.1 onward.
-    if (VT.getSizeInBits() == 128 && !Subtarget->hasSSE41())
+    if (VT.is128BitVector() && !Subtarget->hasSSE41())
        return SDValue();
      // Byte blends are only available in AVX2
-    if (VT.getSizeInBits() == 256 && VT.getScalarType() == MVT::i8 &&
-        !Subtarget->hasAVX2())
+    if (VT == MVT::v32i8 && !Subtarget->hasAVX2())
        return SDValue();
  
      assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
@@ -26024,7 +26025,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
      }
    }
  
-  if (Subtarget->hasAVX() && VT.isVector() && VT.getSizeInBits() == 256)
+  if (Subtarget->hasAVX() && VT.is256BitVector())
      if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
        return R;