Use movaps to load a v4f32 build_vector of all-constant values into a

[oota-llvm.git] / lib / Target / X86 / X86ISelLowering.cpp
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 641ccb03dec96d5b2d93a1d6e537c85a65de3f6b..23f9e9500c2dde606a1cc11adde7835c8aaf3e29 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -42,6 +42,8 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
    X86ScalarSSE = Subtarget->hasSSE2();
    X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
  
+  RegInfo = TM.getRegisterInfo();
+
    // Set up the TargetLowering object.
  
    // X86 is weird, it always uses i8 for shift amounts and setcc results.
@@ -197,6 +199,9 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
    }
    // X86 ret instruction may pop stack.
    setOperationAction(ISD::RET             , MVT::Other, Custom);
+  if (!Subtarget->is64Bit())
+    setOperationAction(ISD::EH_RETURN       , MVT::Other, Custom);
+
    // Darwin ABI issue.
    setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
    setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
@@ -331,6 +336,13 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
      setOperationAction(ISD::VECTOR_SHUFFLE,     (MVT::ValueType)VT, Expand);
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
      setOperationAction(ISD::INSERT_VECTOR_ELT,  (MVT::ValueType)VT, Expand);
+    setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand);
+    setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand);
+    setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand);
+    setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand);
+    setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand);
+    setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand);
+    setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand);
    }
  
    if (Subtarget->hasMMX()) {
@@ -408,6 +420,8 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
      setOperationAction(ISD::FSUB,               MVT::v4f32, Legal);
      setOperationAction(ISD::FMUL,               MVT::v4f32, Legal);
      setOperationAction(ISD::FDIV,               MVT::v4f32, Legal);
+    setOperationAction(ISD::FSQRT,              MVT::v4f32, Legal);
+    setOperationAction(ISD::FNEG,               MVT::v4f32, Custom);
      setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
      setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
      setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
@@ -435,6 +449,8 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
      setOperationAction(ISD::FSUB,               MVT::v2f64, Legal);
      setOperationAction(ISD::FMUL,               MVT::v2f64, Legal);
      setOperationAction(ISD::FDIV,               MVT::v2f64, Legal);
+    setOperationAction(ISD::FSQRT,              MVT::v2f64, Legal);
+    setOperationAction(ISD::FNEG,               MVT::v2f64, Custom);
  
      setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
      setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
@@ -2479,6 +2495,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
    unsigned NumZero  = 0;
    unsigned NumNonZero = 0;
    unsigned NonZeros = 0;
+  unsigned NumNonZeroImms = 0;
    std::set<SDOperand> Values;
    for (unsigned i = 0; i < NumElems; ++i) {
      SDOperand Elt = Op.getOperand(i);
@@ -2489,6 +2506,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
        else {
          NonZeros |= (1 << i);
          NumNonZero++;
+        if (Elt.getOpcode() == ISD::Constant ||
+            Elt.getOpcode() == ISD::ConstantFP)
+          NumNonZeroImms++;
        }
      }
    }
@@ -2532,6 +2552,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
      }
    }
  
+  // A vector full of immediates; various special cases are already
+  // handled, so this is best done with a single constant-pool load.
+  if (NumNonZero == NumNonZeroImms)
+    return SDOperand();
+
    // Let legalizer expand 2-wide build_vectors.
    if (EVTBits == 64)
      return SDOperand();
@@ -3326,16 +3351,21 @@ SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
  
  SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
    MVT::ValueType VT = Op.getValueType();
-  const Type *OpNTy =  MVT::getTypeForValueType(VT);
+  MVT::ValueType EltVT = VT;
+  if (MVT::isVector(VT))
+    EltVT = MVT::getVectorElementType(VT);
+  const Type *OpNTy =  MVT::getTypeForValueType(EltVT);
    std::vector<Constant*> CV;
-  if (VT == MVT::f64) {
-    CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))));
-    CV.push_back(ConstantFP::get(OpNTy, 0.0));
+  if (EltVT == MVT::f64) {
+    Constant *C = ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)));
+    CV.push_back(C);
+    CV.push_back(C);
    } else {
-    CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))));
-    CV.push_back(ConstantFP::get(OpNTy, 0.0));
-    CV.push_back(ConstantFP::get(OpNTy, 0.0));
-    CV.push_back(ConstantFP::get(OpNTy, 0.0));
+    Constant *C = ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)));
+    CV.push_back(C);
+    CV.push_back(C);
+    CV.push_back(C);
+    CV.push_back(C);
    }
    Constant *CS = ConstantStruct::get(CV);
    SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
@@ -3350,26 +3380,42 @@ SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
  
  SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
    MVT::ValueType VT = Op.getValueType();
-  const Type *OpNTy =  MVT::getTypeForValueType(VT);
+  MVT::ValueType EltVT = VT;
+  unsigned EltNum = 1;
+  if (MVT::isVector(VT)) {
+    EltVT = MVT::getVectorElementType(VT);
+    EltNum = MVT::getVectorNumElements(VT);
+  }
+  const Type *OpNTy =  MVT::getTypeForValueType(EltVT);
    std::vector<Constant*> CV;
-  if (VT == MVT::f64) {
-    CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)));
-    CV.push_back(ConstantFP::get(OpNTy, 0.0));
+  if (EltVT == MVT::f64) {
+    Constant *C = ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63));
+    CV.push_back(C);
+    CV.push_back(C);
    } else {
-    CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31)));
-    CV.push_back(ConstantFP::get(OpNTy, 0.0));
-    CV.push_back(ConstantFP::get(OpNTy, 0.0));
-    CV.push_back(ConstantFP::get(OpNTy, 0.0));
+    Constant *C = ConstantFP::get(OpNTy, BitsToFloat(1U << 31));
+    CV.push_back(C);
+    CV.push_back(C);
+    CV.push_back(C);
+    CV.push_back(C);
    }
    Constant *CS = ConstantStruct::get(CV);
    SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
-  SDVTList Tys = DAG.getVTList(VT, MVT::Other);
-  SmallVector<SDOperand, 3> Ops;
-  Ops.push_back(DAG.getEntryNode());
-  Ops.push_back(CPIdx);
-  Ops.push_back(DAG.getSrcValue(NULL));
-  SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
-  return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
+  if (MVT::isVector(VT)) {
+    SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0);
+    return DAG.getNode(ISD::BIT_CONVERT, VT,
+                       DAG.getNode(ISD::XOR, MVT::v2i64,
+                    DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)),
+                    DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask)));
+  } else {
+    SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+    SmallVector<SDOperand, 3> Ops;
+    Ops.push_back(DAG.getEntryNode());
+    Ops.push_back(CPIdx);
+    Ops.push_back(DAG.getSrcValue(NULL));
+    SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
+    return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
+  }
  }
  
  SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
@@ -3602,8 +3648,9 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
  // bytes in one go. Touching the stack at 4K increments is necessary to ensure
  // that the guard pages used by the OS virtual memory manager are allocated in
  // correct sequence.
-SDOperand X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op,
-                                                     SelectionDAG &DAG) {
+SDOperand
+X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op,
+                                           SelectionDAG &DAG) {
    assert(Subtarget->isTargetCygMing() &&
           "This should be used only on Cygwin/Mingw targets");
    
@@ -3612,27 +3659,29 @@ SDOperand X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op,
    SDOperand Size  = Op.getOperand(1);
    // FIXME: Ensure alignment here
  
-  TargetLowering::ArgListTy Args; 
-  TargetLowering::ArgListEntry Entry;
+  SDOperand Flag;
+  
    MVT::ValueType IntPtr = getPointerTy();
    MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
-  const Type *IntPtrTy = getTargetData()->getIntPtrType();
-  
-  Entry.Node    = Size;
-  Entry.Ty      = IntPtrTy;
-  Entry.isInReg = true; // Should pass in EAX
-  Args.push_back(Entry);
-  std::pair<SDOperand, SDOperand> CallResult =
-    LowerCallTo(Chain, IntPtrTy, false, false, CallingConv::C, false,
-                DAG.getExternalSymbol("_alloca", IntPtr), Args, DAG);
-
-  SDOperand SP = DAG.getCopyFromReg(CallResult.second, X86StackPtr, SPTy);
+
+  Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag);
+  Flag = Chain.getValue(1);
+
+  SDVTList  NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SDOperand Ops[] = { Chain,
+                      DAG.getTargetExternalSymbol("_alloca", IntPtr),
+                      DAG.getRegister(X86::EAX, IntPtr),
+                      Flag };
+  Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4);
+  Flag = Chain.getValue(1);
+
+  Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1);
    
    std::vector<MVT::ValueType> Tys;
    Tys.push_back(SPTy);
    Tys.push_back(MVT::Other);
-  SDOperand Ops[2] = { SP, CallResult.second };
-  return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2);
+  SDOperand Ops1[2] = { Chain.getValue(0), Chain };
+  return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2);
  }
  
  SDOperand
@@ -4200,6 +4249,39 @@ SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) {
                       DAG.getConstant(4, getPointerTy()));
  }
  
+SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op,
+                                                       SelectionDAG &DAG) {
+  // Is not yet supported on x86-64
+  if (Subtarget->is64Bit())
+    return SDOperand();
+  
+  return DAG.getConstant(8, getPointerTy());
+}
+
+SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG)
+{
+  assert(!Subtarget->is64Bit() &&
+         "Lowering of eh_return builtin is not supported yet on x86-64");
+    
+  MachineFunction &MF = DAG.getMachineFunction();
+  SDOperand Chain     = Op.getOperand(0);
+  SDOperand Offset    = Op.getOperand(1);
+  SDOperand Handler   = Op.getOperand(2);
+
+  SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF),
+                                    getPointerTy());
+
+  SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame,
+                                    DAG.getConstant(-4UL, getPointerTy()));
+  StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset);
+  Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0);
+  Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr);
+  MF.addLiveOut(X86::ECX);
+
+  return DAG.getNode(X86ISD::EH_RETURN, MVT::Other,
+                     Chain, DAG.getRegister(X86::ECX, getPointerTy()));
+}
+
  /// LowerOperation - Provide custom lowering hooks for some operations.
  ///
  SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
@@ -4237,7 +4319,10 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
    case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
    case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
    case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
+  case ISD::FRAME_TO_ARGS_OFFSET:
+                                return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
    case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+  case ISD::EH_RETURN:          return LowerEH_RETURN(Op, DAG);
    }
    return SDOperand();
  }
@@ -4281,8 +4366,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
    case X86ISD::PINSRW:             return "X86ISD::PINSRW";
    case X86ISD::FMAX:               return "X86ISD::FMAX";
    case X86ISD::FMIN:               return "X86ISD::FMIN";
+  case X86ISD::FRSQRT:             return "X86ISD::FRSQRT";
+  case X86ISD::FRCP:               return "X86ISD::FRCP";
    case X86ISD::TLSADDR:            return "X86ISD::TLSADDR";
    case X86ISD::THREAD_POINTER:     return "X86ISD::THREAD_POINTER";
+  case X86ISD::EH_RETURN:          return "X86ISD::EH_RETURN";
    }
  }
  
@@ -4468,12 +4556,12 @@ X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
      unsigned Opc;
      switch (MI->getOpcode()) {
      default: assert(0 && "illegal opcode!");
-    case X86::FP32_TO_INT16_IN_MEM: Opc = X86::FpIST16m32; break;
-    case X86::FP32_TO_INT32_IN_MEM: Opc = X86::FpIST32m32; break;
-    case X86::FP32_TO_INT64_IN_MEM: Opc = X86::FpIST64m32; break;
-    case X86::FP64_TO_INT16_IN_MEM: Opc = X86::FpIST16m64; break;
-    case X86::FP64_TO_INT32_IN_MEM: Opc = X86::FpIST32m64; break;
-    case X86::FP64_TO_INT64_IN_MEM: Opc = X86::FpIST64m64; break;
+    case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
+    case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
+    case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
+    case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
+    case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
+    case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
      }
  
      X86AddressMode AM;
@@ -4597,8 +4685,8 @@ static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size,
    if (Loc.getOpcode() == ISD::FrameIndex) {
      if (BaseLoc.getOpcode() != ISD::FrameIndex)
        return false;
-    int FI  = dyn_cast<FrameIndexSDNode>(Loc)->getIndex();
-    int BFI = dyn_cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+    int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
+    int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
      int FS  = MFI->getObjectSize(FI);
      int BFS = MFI->getObjectSize(BFI);
      if (FS != BFS || FS != Size) return false;
@@ -4625,7 +4713,7 @@ static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI,
      return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
    else {
      assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!");
-    int BFI = dyn_cast<FrameIndexSDNode>(Base)->getIndex();
+    int BFI = cast<FrameIndexSDNode>(Base)->getIndex();
      if (BFI < 0)
        // Fixed objects do not specify alignment, however the offsets are known.
        return ((Subtarget->getStackAlignment() % 16) == 0 &&