64-bit (MMX) vectors do not need restrictive alignment.

[oota-llvm.git] / lib / Target / X86 / X86ISelLowering.cpp
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 79ac36d5aa93a22a47d08af05acb96b61ff8b16e..e4454709ae87a32162665ad9e65df9927e8a5ff1 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -704,9 +704,6 @@ static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) {
    if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
      if (VTy->getBitWidth() == 128)
        MaxAlign = 16;
-    else if (VTy->getBitWidth() == 64)
-      if (MaxAlign < 8)
-        MaxAlign = 8;
    } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
      unsigned EltAlign = 0;
      getMaxByValAlign(ATy->getElementType(), EltAlign);
@@ -727,13 +724,14 @@ static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) {
  
  /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
  /// function arguments in the caller parameter area. For X86, aggregates
-/// that contains are placed at 16-byte boundaries while the rest are at
-/// 4-byte boundaries.
+/// that contain SSE vectors are placed at 16-byte boundaries while the rest
+/// are at 4-byte boundaries.
  unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
    if (Subtarget->is64Bit())
      return getTargetData()->getABITypeAlignment(Ty);
    unsigned Align = 4;
-  getMaxByValAlign(Ty, Align);
+  if (Subtarget->hasSSE1())
+    getMaxByValAlign(Ty, Align);
    return Align;
  }
  
@@ -1091,7 +1089,7 @@ SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
    if (isByVal)
      return FIN;
    return DAG.getLoad(VA.getValVT(), Root, FIN,
-                     &PseudoSourceValue::getFixedStack(), FI);
+                     PseudoSourceValue::getFixedStack(), FI);
  }
  
  SDOperand
@@ -1221,7 +1219,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
          SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
          SDOperand Store =
            DAG.getStore(Val.getValue(1), Val, FIN,
-                       &PseudoSourceValue::getFixedStack(),
+                       PseudoSourceValue::getFixedStack(),
                         RegSaveFrameIndex);
          MemOps.push_back(Store);
          FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
@@ -1237,7 +1235,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
          SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
          SDOperand Store =
            DAG.getStore(Val.getValue(1), Val, FIN,
-                       &PseudoSourceValue::getFixedStack(),
+                       PseudoSourceValue::getFixedStack(),
                         RegSaveFrameIndex);
          MemOps.push_back(Store);
          FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
@@ -1290,14 +1288,16 @@ X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG,
                                      const CCValAssign &VA,
                                      SDOperand Chain,
                                      SDOperand Arg) {
-  SDOperand PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset());
+  unsigned LocMemOffset = VA.getLocMemOffset();
+  SDOperand PtrOff = DAG.getIntPtrConstant(LocMemOffset);
    PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
    SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
    unsigned Flags    = cast<ConstantSDNode>(FlagsOp)->getValue();
    if (Flags & ISD::ParamFlags::ByVal) {
      return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
    }
-  return DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
+  return DAG.getStore(Chain, Arg, PtrOff,
+                      PseudoSourceValue::getStack(), LocMemOffset);
  }
  
  /// ClassifyX86_64SRetCallReturn - Classify how to implement a x86-64
@@ -1569,7 +1569,7 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
            // Store relative to framepointer.
            MemOpChains2.push_back(
              DAG.getStore(Chain, Source, FIN,
-                         &PseudoSourceValue::getFixedStack(), FI));
+                         PseudoSourceValue::getFixedStack(), FI));
          }            
        }
      }
@@ -3799,7 +3799,7 @@ X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
    // The same applies for external symbols during PIC codegen
    if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false))
      Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result,
-                         &PseudoSourceValue::getGOT(), 0);
+                         PseudoSourceValue::getGOT(), 0);
  
    return Result;
  }
@@ -3858,7 +3858,7 @@ LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
  
    if (GA->getGlobal()->isDeclaration()) // initial exec TLS model
      Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset,
-                         &PseudoSourceValue::getGOT(), 0);
+                         PseudoSourceValue::getGOT(), 0);
  
    // The address of the thread local variable is the add of the thread
    // pointer with the offset of the variable.
@@ -3991,7 +3991,7 @@ SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
    SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
    SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
                                   StackSlot,
-                                 &PseudoSourceValue::getFixedStack(),
+                                 PseudoSourceValue::getFixedStack(),
                                   SSFI);
  
    // These are really Legal; caller falls through into that case.
@@ -4034,7 +4034,7 @@ SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
      Ops.push_back(InFlag);
      Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
      Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot,
-                         &PseudoSourceValue::getFixedStack(), SSFI);
+                         PseudoSourceValue::getFixedStack(), SSFI);
    }
  
    return Result;
@@ -4073,7 +4073,7 @@ FP_TO_SINTHelper(SDOperand Op, SelectionDAG &DAG) {
    if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) {
      assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
      Chain = DAG.getStore(Chain, Value, StackSlot,
-                         &PseudoSourceValue::getFixedStack(), SSFI);
+                         PseudoSourceValue::getFixedStack(), SSFI);
      SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
      SDOperand Ops[] = {
        Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
@@ -4133,7 +4133,7 @@ SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
    Constant *C = ConstantVector::get(CV);
    SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
    SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
-                               &PseudoSourceValue::getConstantPool(), 0,
+                               PseudoSourceValue::getConstantPool(), 0,
                                 false, 16);
    return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
  }
@@ -4162,7 +4162,7 @@ SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
    Constant *C = ConstantVector::get(CV);
    SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
    SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
-                               &PseudoSourceValue::getConstantPool(), 0,
+                               PseudoSourceValue::getConstantPool(), 0,
                                 false, 16);
    if (MVT::isVector(VT)) {
      return DAG.getNode(ISD::BIT_CONVERT, VT,
@@ -4211,7 +4211,7 @@ SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
    Constant *C = ConstantVector::get(CV);
    SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
    SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx,
-                                &PseudoSourceValue::getConstantPool(), 0,
+                                PseudoSourceValue::getConstantPool(), 0,
                                  false, 16);
    SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
  
@@ -4240,7 +4240,7 @@ SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
    C = ConstantVector::get(CV);
    CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
    SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
-                                &PseudoSourceValue::getConstantPool(), 0,
+                                PseudoSourceValue::getConstantPool(), 0,
                                  false, 16);
    SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);
  
@@ -4946,9 +4946,9 @@ SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op,
      const unsigned char MOV64ri = TII->getBaseOpcodeFor(X86::MOV64ri);
  
      const unsigned char N86R10 =
-      ((X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R10);
+      ((const X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R10);
      const unsigned char N86R11 =
-      ((X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R11);
+      ((const X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R11);
  
      const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
  
@@ -5036,7 +5036,7 @@ SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op,
  
      const unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri);
      const unsigned char N86Reg =
-      ((X86RegisterInfo*)RegInfo)->getX86RegNum(NestReg);
+      ((const X86RegisterInfo*)RegInfo)->getX86RegNum(NestReg);
      OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
                                  Trmp, TrmpAddr, 0);