simplify some code based on the fact that picstyles != none are only valid

[oota-llvm.git] / lib / Target / X86 / X86ISelLowering.cpp
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 9614e69e0f8946f1ccb8aff79e90223d53877730..d859423300117c26c769e6abdf81c3c998c64844 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -33,10 +33,12 @@
  #include "llvm/CodeGen/PseudoSourceValue.h"
  #include "llvm/Support/MathExtras.h"
  #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
  #include "llvm/Target/TargetOptions.h"
  #include "llvm/ADT/SmallSet.h"
  #include "llvm/ADT/StringExtras.h"
  #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
  using namespace llvm;
  
  static cl::opt<bool>
@@ -700,6 +702,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
        // Do not attempt to custom lower non-power-of-2 vectors
        if (!isPowerOf2_32(VT.getVectorNumElements()))
          continue;
+      // Do not attempt to custom lower non-128-bit vectors
+      if (!VT.is128BitVector())
+        continue;
        setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
        setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
@@ -718,17 +723,23 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
      }
  
      // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
-    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
-      setOperationAction(ISD::AND,    (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::AND,    (MVT::SimpleValueType)VT, MVT::v2i64);
-      setOperationAction(ISD::OR,     (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::OR,     (MVT::SimpleValueType)VT, MVT::v2i64);
-      setOperationAction(ISD::XOR,    (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::XOR,    (MVT::SimpleValueType)VT, MVT::v2i64);
-      setOperationAction(ISD::LOAD,   (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::LOAD,   (MVT::SimpleValueType)VT, MVT::v2i64);
-      setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v2i64);
+    for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) {
+      MVT VT = (MVT::SimpleValueType)i;
+
+      // Do not attempt to promote non-128-bit vectors
+      if (!VT.is128BitVector()) {
+        continue;
+      }
+      setOperationAction(ISD::AND,    VT, Promote);
+      AddPromotedToType (ISD::AND,    VT, MVT::v2i64);
+      setOperationAction(ISD::OR,     VT, Promote);
+      AddPromotedToType (ISD::OR,     VT, MVT::v2i64);
+      setOperationAction(ISD::XOR,    VT, Promote);
+      AddPromotedToType (ISD::XOR,    VT, MVT::v2i64);
+      setOperationAction(ISD::LOAD,   VT, Promote);
+      AddPromotedToType (ISD::LOAD,   VT, MVT::v2i64);
+      setOperationAction(ISD::SELECT, VT, Promote);
+      AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
      }
  
      setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -775,6 +786,114 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
      setOperationAction(ISD::VSETCC,             MVT::v2i64, Custom);
    }
  
+  if (!UseSoftFloat && Subtarget->hasAVX()) {
+    addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
+    addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
+    addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
+    addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
+
+    setOperationAction(ISD::LOAD,               MVT::v8f32, Legal);
+    setOperationAction(ISD::LOAD,               MVT::v8i32, Legal);
+    setOperationAction(ISD::LOAD,               MVT::v4f64, Legal);
+    setOperationAction(ISD::LOAD,               MVT::v4i64, Legal);
+    setOperationAction(ISD::FADD,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FSUB,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FMUL,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FDIV,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FSQRT,              MVT::v8f32, Legal);
+    setOperationAction(ISD::FNEG,               MVT::v8f32, Custom);
+    //setOperationAction(ISD::BUILD_VECTOR,       MVT::v8f32, Custom);
+    //setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v8f32, Custom);
+    //setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom);
+    //setOperationAction(ISD::SELECT,             MVT::v8f32, Custom);
+    //setOperationAction(ISD::VSETCC,             MVT::v8f32, Custom);
+
+    // Operations to consider commented out -v16i16 v32i8
+    //setOperationAction(ISD::ADD,                MVT::v16i16, Legal);
+    setOperationAction(ISD::ADD,                MVT::v8i32, Custom);
+    setOperationAction(ISD::ADD,                MVT::v4i64, Custom);
+    //setOperationAction(ISD::SUB,                MVT::v32i8, Legal);
+    //setOperationAction(ISD::SUB,                MVT::v16i16, Legal);
+    setOperationAction(ISD::SUB,                MVT::v8i32, Custom);
+    setOperationAction(ISD::SUB,                MVT::v4i64, Custom);
+    //setOperationAction(ISD::MUL,                MVT::v16i16, Legal);
+    setOperationAction(ISD::FADD,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FSUB,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FMUL,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FDIV,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FSQRT,              MVT::v4f64, Legal);
+    setOperationAction(ISD::FNEG,               MVT::v4f64, Custom);
+
+    setOperationAction(ISD::VSETCC,             MVT::v4f64, Custom);
+    // setOperationAction(ISD::VSETCC,             MVT::v32i8, Custom);
+    // setOperationAction(ISD::VSETCC,             MVT::v16i16, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v8i32, Custom);
+
+    // setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v32i8, Custom);
+    // setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i16, Custom);
+    // setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i16, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i32, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8f32, Custom);
+
+    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f64, Custom);
+    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4i64, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f64, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4i64, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f64, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f64, Custom);
+
+#if 0
+    // Not sure we want to do this since there are no 256-bit integer
+    // operations in AVX
+
+    // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
+    // This includes 256-bit vectors
+    for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; ++i) {
+      MVT VT = (MVT::SimpleValueType)i;
+
+      // Do not attempt to custom lower non-power-of-2 vectors
+      if (!isPowerOf2_32(VT.getVectorNumElements()))
+        continue;
+
+      setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+    }
+
+    if (Subtarget->is64Bit()) {
+      setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i64, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Custom);
+    }    
+#endif
+
+#if 0
+    // Not sure we want to do this since there are no 256-bit integer
+    // operations in AVX
+
+    // Promote v32i8, v16i16, v8i32 load, select, and, or, xor to v4i64.
+    // Including 256-bit vectors
+    for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; i++) {
+      MVT VT = (MVT::SimpleValueType)i;
+
+      if (!VT.is256BitVector()) {
+        continue;
+      }
+      setOperationAction(ISD::AND,    VT, Promote);
+      AddPromotedToType (ISD::AND,    VT, MVT::v4i64);
+      setOperationAction(ISD::OR,     VT, Promote);
+      AddPromotedToType (ISD::OR,     VT, MVT::v4i64);
+      setOperationAction(ISD::XOR,    VT, Promote);
+      AddPromotedToType (ISD::XOR,    VT, MVT::v4i64);
+      setOperationAction(ISD::LOAD,   VT, Promote);
+      AddPromotedToType (ISD::LOAD,   VT, MVT::v4i64);
+      setOperationAction(ISD::SELECT, VT, Promote);
+      AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
+    }
+
+    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+#endif
+  }
+
    // We want to custom lower some of our intrinsics.
    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
  
@@ -805,6 +924,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
    setTargetDAGCombine(ISD::SRA);
    setTargetDAGCombine(ISD::SRL);
    setTargetDAGCombine(ISD::STORE);
+  setTargetDAGCombine(ISD::MEMBARRIER);
    if (Subtarget->is64Bit())
      setTargetDAGCombine(ISD::MUL);
  
@@ -901,7 +1021,7 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
                                                        SelectionDAG &DAG) const {
    if (usesGlobalOffsetTable())
      return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
-  if (!Subtarget->isPICStyleRIPRel())
+  if (!Subtarget->is64Bit())
      // This doesn't have DebugLoc associated with it, but is not really the
      // same as a Register.
      return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc::getUnknownLoc(),
@@ -909,6 +1029,11 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
    return Table;
  }
  
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
+  return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4;
+}
+
  //===----------------------------------------------------------------------===//
  //               Return Value Calling Convention Implementation
  //===----------------------------------------------------------------------===//
@@ -1062,8 +1187,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
      // If this is x86-64, and we disabled SSE, we can't return FP values
      if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
          ((Is64Bit || TheCall->isInreg()) && !Subtarget->hasSSE1())) {
-      cerr << "SSE register return with SSE disabled\n";
-      exit(1);
+      llvm_report_error("SSE register return with SSE disabled");
      }
  
      // If this is a call to a function that returns an fp value on the floating
@@ -1195,23 +1319,6 @@ X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDValue Op) {
  }
  
  
-/// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer
-/// in a register before calling.
-bool X86TargetLowering::CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall) {
-  return !IsTailCall && !Is64Bit &&
-    getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
-    Subtarget->isPICStyleGOT();
-}
-
-/// CallRequiresFnAddressInReg - Check whether the call requires the function
-/// address to be loaded in a register.
-bool
-X86TargetLowering::CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall) {
-  return !Is64Bit && IsTailCall &&
-    getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
-    Subtarget->isPICStyleGOT();
-}
-
  /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
  /// by "Src" to address "Dst" with size and alignment information specified by
  /// the specific parameter attribute. The copy will be passed as a byval
@@ -1678,30 +1785,34 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
        InFlag = Chain.getValue(1);
      }
  
-  // ELF / PIC requires GOT in the EBX register before function calls via PLT
-  // GOT pointer.
-  if (CallRequiresGOTPtrInReg(Is64Bit, IsTailCall)) {
-    Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
-                             DAG.getNode(X86ISD::GlobalBaseReg,
-                                         DebugLoc::getUnknownLoc(),
-                                         getPointerTy()),
-                             InFlag);
-    InFlag = Chain.getValue(1);
-  }
-  // If we are tail calling and generating PIC/GOT style code load the address
-  // of the callee into ecx. The value in ecx is used as target of the tail
-  // jump. This is done to circumvent the ebx/callee-saved problem for tail
-  // calls on PIC/GOT architectures. Normally we would just put the address of
-  // GOT into ebx and then call target@PLT. But for tail callss ebx would be
-  // restored (since ebx is callee saved) before jumping to the target@PLT.
-  if (CallRequiresFnAddressInReg(Is64Bit, IsTailCall)) {
-    // Note: The actual moving to ecx is done further down.
-    GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
-    if (G && !G->getGlobal()->hasHiddenVisibility() &&
-        !G->getGlobal()->hasProtectedVisibility())
-      Callee =  LowerGlobalAddress(Callee, DAG);
-    else if (isa<ExternalSymbolSDNode>(Callee))
-      Callee = LowerExternalSymbol(Callee,DAG);
+  
+  if (Subtarget->isPICStyleGOT()) {
+    // ELF / PIC requires GOT in the EBX register before function calls via PLT
+    // GOT pointer.
+    if (!IsTailCall) {
+      Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
+                               DAG.getNode(X86ISD::GlobalBaseReg,
+                                           DebugLoc::getUnknownLoc(),
+                                           getPointerTy()),
+                               InFlag);
+      InFlag = Chain.getValue(1);
+    } else {
+      // If we are tail calling and generating PIC/GOT style code load the
+      // address of the callee into ECX. The value in ecx is used as target of
+      // the tail jump. This is done to circumvent the ebx/callee-saved problem
+      // for tail calls on PIC/GOT architectures. Normally we would just put the
+      // address of GOT into ebx and then call target@PLT. But for tail calls
+      // ebx would be restored (since ebx is callee saved) before jumping to the
+      // target@PLT.
+
+      // Note: The actual moving to ECX is done further down.
+      GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+      if (G && !G->getGlobal()->hasHiddenVisibility() &&
+          !G->getGlobal()->hasProtectedVisibility())
+        Callee = LowerGlobalAddress(Callee, DAG);
+      else if (isa<ExternalSymbolSDNode>(Callee))
+        Callee = LowerExternalSymbol(Callee, DAG);
+    }
    }
  
    if (Is64Bit && isVarArg) {
@@ -1833,9 +1944,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
                                    RegsToPass[i].second.getValueType()));
  
    // Add an implicit use GOT pointer in EBX.
-  if (!IsTailCall && !Is64Bit &&
-      getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
-      Subtarget->isPICStyleGOT())
+  if (!IsTailCall && Subtarget->isPICStyleGOT())
      Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
  
    // Add an implicit use of AL for x86 vararg functions.
@@ -1948,22 +2057,11 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall,
      return false;
  
    if (CheckTailCallReturnConstraints(TheCall, Ret)) {
-    MachineFunction &MF = DAG.getMachineFunction();
-    unsigned CallerCC = MF.getFunction()->getCallingConv();
-    unsigned CalleeCC= TheCall->getCallingConv();
-    if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
-      SDValue Callee = TheCall->getCallee();
-      // On x86/32Bit PIC/GOT  tail calls are supported.
-      if (getTargetMachine().getRelocationModel() != Reloc::PIC_ ||
-          !Subtarget->isPICStyleGOT()|| !Subtarget->is64Bit())
-        return true;
-
-      // Can only do local tail calls (in same module, hidden or protected) on
-      // x86_64 PIC/GOT at the moment.
-      if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
-        return G->getGlobal()->hasHiddenVisibility()
-            || G->getGlobal()->hasProtectedVisibility();
-    }
+    unsigned CallerCC =
+      DAG.getMachineFunction().getFunction()->getCallingConv();
+    unsigned CalleeCC = TheCall->getCallingConv();
+    if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC)
+      return true;
    }
  
    return false;
@@ -4317,14 +4415,15 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
    // global base reg.
    unsigned char OpFlag = 0;
    unsigned WrapperKind = X86ISD::Wrapper;
-  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
-    if (Subtarget->isPICStyleStub())
-      OpFlag = X86II::MO_PIC_BASE_OFFSET;
-    else if (Subtarget->isPICStyleGOT())
-      OpFlag = X86II::MO_GOTOFF;
-    else if (Subtarget->isPICStyleRIPRel() &&
-             getTargetMachine().getCodeModel() == CodeModel::Small)
-      WrapperKind = X86ISD::WrapperRIP;
+  
+  if (Subtarget->is64Bit() &&
+      getTargetMachine().getCodeModel() == CodeModel::Small) {
+    WrapperKind = X86ISD::WrapperRIP;
+  } else if (Subtarget->isPICStyleGOT()) {
+    OpFlag = X86II::MO_GOTOFF;
+  } else if (Subtarget->isPICStyleStub() &&
+             getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    OpFlag = X86II::MO_PIC_BASE_OFFSET;
    }
    
    SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
@@ -4350,13 +4449,14 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
    // global base reg.
    unsigned char OpFlag = 0;
    unsigned WrapperKind = X86ISD::Wrapper;
-  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
-    if (Subtarget->isPICStyleStub())
-      OpFlag = X86II::MO_PIC_BASE_OFFSET;
-    else if (Subtarget->isPICStyleGOT())
-      OpFlag = X86II::MO_GOTOFF;
-    else if (Subtarget->isPICStyleRIPRel())
-      WrapperKind = X86ISD::WrapperRIP;
+  
+  if (Subtarget->is64Bit()) {
+    WrapperKind = X86ISD::WrapperRIP;
+  } else if (Subtarget->isPICStyleGOT()) {
+    OpFlag = X86II::MO_GOTOFF;
+  } else if (Subtarget->isPICStyleStub() &&
+             getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    OpFlag = X86II::MO_PIC_BASE_OFFSET;
    }
    
    SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
@@ -4383,13 +4483,13 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
    // global base reg.
    unsigned char OpFlag = 0;
    unsigned WrapperKind = X86ISD::Wrapper;
-  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
-    if (Subtarget->isPICStyleStub())
-      OpFlag = X86II::MO_PIC_BASE_OFFSET;
-    else if (Subtarget->isPICStyleGOT())
-      OpFlag = X86II::MO_GOTOFF;
-    else if (Subtarget->isPICStyleRIPRel())
-      WrapperKind = X86ISD::WrapperRIP;
+  if (Subtarget->is64Bit()) {
+    WrapperKind = X86ISD::WrapperRIP;
+  } else if (Subtarget->isPICStyleGOT()) {
+    OpFlag = X86II::MO_GOTOFF;
+  } else if (Subtarget->isPICStyleStub() &&
+             getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    OpFlag = X86II::MO_PIC_BASE_OFFSET;
    }
    
    SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag);
@@ -4400,7 +4500,7 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
    
    // With PIC, the address is actually $g + Offset.
    if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
-      !Subtarget->isPICStyleRIPRel()) {
+      !Subtarget->is64Bit()) {
      Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                           DAG.getNode(X86ISD::GlobalBaseReg,
                                       DebugLoc::getUnknownLoc(),
@@ -4423,17 +4523,18 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
    // offset if it is legal.
    SDValue Result;
    if (!IsPic && !ExtraLoadRequired && isInt32(Offset)) {
+    // A direct static reference to a global.
      Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
      Offset = 0;
    } else {
      unsigned char OpFlags = 0;
      
-    if (Subtarget->isPICStyleRIPRel() &&
-        getTargetMachine().getRelocationModel() != Reloc::Static) {
+    if (GV->hasDLLImportLinkage())
+      OpFlags = X86II::MO_DLLIMPORT;
+    else if (Subtarget->isPICStyleRIPRel()) {
        if (ExtraLoadRequired)
          OpFlags = X86II::MO_GOTPCREL;
-    } else if (Subtarget->isPICStyleGOT() &&
-               getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    } else if (Subtarget->isPICStyleGOT()) {
        if (ExtraLoadRequired)
          OpFlags = X86II::MO_GOT;
        else
@@ -4443,14 +4544,14 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
      Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags);
    }
    
-  if (Subtarget->isPICStyleRIPRel() &&
+  if (Subtarget->is64Bit() &&
        getTargetMachine().getCodeModel() == CodeModel::Small)
      Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
    else
      Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
  
    // With PIC, the address is actually $g + Offset.
-  if (IsPic && !Subtarget->isPICStyleRIPRel()) {
+  if (IsPic && !Subtarget->is64Bit()) {
      Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
                           DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
                           Result);
@@ -5690,7 +5791,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
        Args.push_back(Entry);
        std::pair<SDValue,SDValue> CallResult =
          LowerCallTo(Chain, Type::VoidTy, false, false, false, false,
-                    CallingConv::C, false,
+                    0, CallingConv::C, false,
                      DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
        return CallResult.second;
      }
@@ -5931,8 +6032,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) {
    SDValue SrcPtr = Op.getOperand(1);
    SDValue SrcSV = Op.getOperand(2);
  
-  assert(0 && "VAArgInst is not yet implemented for x86-64!");
-  abort();
+  llvm_report_error("VAArgInst is not yet implemented for x86-64!");
    return SDValue();
  }
  
@@ -6133,7 +6233,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
        case Intrinsic::x86_mmx_psrai_d:
          NewIntNo = Intrinsic::x86_mmx_psra_d;
          break;
-      default: abort();  // Can't reach here.
+      default: LLVM_UNREACHABLE("Impossible intrinsic");  // Can't reach here.
        }
        break;
      }
@@ -6305,8 +6405,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
              InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
  
          if (InRegCount > 2) {
-          cerr << "Nest register in use - reduce number of inreg parameters!\n";
-          abort();
+          llvm_report_error("Nest register in use - reduce number of inreg parameters!");
          }
        }
        break;
@@ -8454,6 +8553,58 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
    return SDValue();
  }
  
+// On X86 and X86-64, atomic operations are lowered to locked instructions.
+// Locked instructions, in turn, have implicit fence semantics (all memory
+// operations are flushed before issuing the locked instruction, and the
+// are not buffered), so we can fold away the common pattern of 
+// fence-atomic-fence.
+static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) {
+  SDValue atomic = N->getOperand(0);
+  switch (atomic.getOpcode()) {
+    case ISD::ATOMIC_CMP_SWAP:
+    case ISD::ATOMIC_SWAP:
+    case ISD::ATOMIC_LOAD_ADD:
+    case ISD::ATOMIC_LOAD_SUB:
+    case ISD::ATOMIC_LOAD_AND:
+    case ISD::ATOMIC_LOAD_OR:
+    case ISD::ATOMIC_LOAD_XOR:
+    case ISD::ATOMIC_LOAD_NAND:
+    case ISD::ATOMIC_LOAD_MIN:
+    case ISD::ATOMIC_LOAD_MAX:
+    case ISD::ATOMIC_LOAD_UMIN:
+    case ISD::ATOMIC_LOAD_UMAX:
+      break;
+    default:
+      return SDValue();
+  }
+  
+  SDValue fence = atomic.getOperand(0);
+  if (fence.getOpcode() != ISD::MEMBARRIER)
+    return SDValue();
+  
+  switch (atomic.getOpcode()) {
+    case ISD::ATOMIC_CMP_SWAP:
+      return DAG.UpdateNodeOperands(atomic, fence.getOperand(0),
+                                    atomic.getOperand(1), atomic.getOperand(2),
+                                    atomic.getOperand(3));
+    case ISD::ATOMIC_SWAP:
+    case ISD::ATOMIC_LOAD_ADD:
+    case ISD::ATOMIC_LOAD_SUB:
+    case ISD::ATOMIC_LOAD_AND:
+    case ISD::ATOMIC_LOAD_OR:
+    case ISD::ATOMIC_LOAD_XOR:
+    case ISD::ATOMIC_LOAD_NAND:
+    case ISD::ATOMIC_LOAD_MIN:
+    case ISD::ATOMIC_LOAD_MAX:
+    case ISD::ATOMIC_LOAD_UMIN:
+    case ISD::ATOMIC_LOAD_UMAX:
+      return DAG.UpdateNodeOperands(atomic, fence.getOperand(0),
+                                    atomic.getOperand(1), atomic.getOperand(2));
+    default:
+      return SDValue();
+  }
+}
+
  SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
                                               DAGCombinerInfo &DCI) const {
    SelectionDAG &DAG = DCI.DAG;
@@ -8472,6 +8623,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
    case X86ISD::FAND:        return PerformFANDCombine(N, DAG);
    case X86ISD::BT:          return PerformBTCombine(N, DAG, DCI);
    case X86ISD::VZEXT_MOVL:  return PerformVZEXT_MOVLCombine(N, DAG);
+  case ISD::MEMBARRIER:     return PerformMEMBARRIERCombine(N, DAG);
    }
  
    return SDValue();
@@ -8627,10 +8779,15 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
            continue;
          }
        }
-      
+
        // Otherwise, this isn't something we can handle, reject it.
        return;
      }
+    // If we require an extra load to get this address, as in PIC mode, we
+    // can't accept it.
+    if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(),
+                                       getTargetMachine(), false))
+      return;
  
      if (hasMemory)
        Op = LowerGlobalAddress(GA->getGlobal(), Op.getDebugLoc(), Offset, DAG);