Remove some errant space charcters in mnemonic strings.

[oota-llvm.git] / lib / Target / X86 / X86FastISel.cpp
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp

index 681ee6c735f69fc17cbe66f33382be7a558ec8f6..5bc34201aa4ea361ca4267efec8b507912096abb 100644 (file)
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -79,8 +79,10 @@ private:
  
    bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
  
-  bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM);
-  bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM);
+  bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
+                        bool Aligned = false);
+  bool X86FastEmitStore(EVT VT, unsigned ValReg, const X86AddressMode &AM,
+                        bool Aligned = false);
  
    bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
                           unsigned &ResultReg);
@@ -233,7 +235,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
  /// and a displacement offset, or a GlobalAddress,
  /// i.e. V. Return true if it is possible.
  bool
-X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
+X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
+                              const X86AddressMode &AM, bool Aligned) {
    // Get opcode and regclass of the output for the given store instruction.
    unsigned Opc = 0;
    switch (VT.getSimpleVT().SimpleTy) {
@@ -243,8 +246,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
      // Mask out all but lowest bit.
      unsigned AndResult = createResultReg(&X86::GR8RegClass);
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-            TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
-    Val = AndResult;
+            TII.get(X86::AND8ri), AndResult).addReg(ValReg).addImm(1);
+    ValReg = AndResult;
    }
    // FALLTHROUGH, handling i1 as i8.
    case MVT::i8:  Opc = X86::MOV8mr;  break;
@@ -260,26 +263,35 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
            (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
      break;
    case MVT::v4f32:
-    Opc = X86::MOVAPSmr;
+    if (Aligned)
+      Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
+    else
+      Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
      break;
    case MVT::v2f64:
-    Opc = X86::MOVAPDmr;
+    if (Aligned)
+      Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr;
+    else
+      Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr;
      break;
    case MVT::v4i32:
    case MVT::v2i64:
    case MVT::v8i16:
    case MVT::v16i8:
-    Opc = X86::MOVDQAmr;
+    if (Aligned)
+      Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr;
+    else
+      Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
      break;
    }
  
    addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
-                         DL, TII.get(Opc)), AM).addReg(Val);
+                         DL, TII.get(Opc)), AM).addReg(ValReg);
    return true;
  }
  
  bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
-                                   const X86AddressMode &AM) {
+                                   const X86AddressMode &AM, bool Aligned) {
    // Handle 'null' like i32/i64 0.
    if (isa<ConstantPointerNull>(Val))
      Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
@@ -314,7 +326,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
    if (ValReg == 0)
      return false;
  
-  return X86FastEmitStore(VT, ValReg, AM);
+  return X86FastEmitStore(VT, ValReg, AM, Aligned);
  }
  
  /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
@@ -688,6 +700,10 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
    if (S->isAtomic())
      return false;
  
+  unsigned SABIAlignment =
+    TD.getABITypeAlignment(S->getValueOperand()->getType());
+  bool Aligned = S->getAlignment() == 0 || S->getAlignment() >= SABIAlignment;
+
    MVT VT;
    if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
      return false;
@@ -696,7 +712,7 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
    if (!X86SelectAddress(I->getOperand(1), AM))
      return false;
  
-  return X86FastEmitStore(VT, I->getOperand(0), AM);
+  return X86FastEmitStore(VT, I->getOperand(0), AM, Aligned);
  }
  
  /// X86SelectRet - Select and emit code to implement ret instructions.
@@ -712,10 +728,11 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
    CallingConv::ID CC = F.getCallingConv();
    if (CC != CallingConv::C &&
        CC != CallingConv::Fast &&
-      CC != CallingConv::X86_FastCall)
+      CC != CallingConv::X86_FastCall &&
+      CC != CallingConv::X86_64_SysV)
      return false;
  
-  if (Subtarget->isTargetWin64())
+  if (Subtarget->isCallingConvWin64(CC))
      return false;
  
    // Don't handle popping bytes on return for now.
@@ -1360,11 +1377,11 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
        // fit neatly into the table above.
        if (VT.SimpleTy == MVT::i16) {
          BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                TII.get(TargetOpcode::COPY), TypeEntry.HighInReg)
+                TII.get(Copy), TypeEntry.HighInReg)
            .addReg(Zero32, 0, X86::sub_16bit);
        } else if (VT.SimpleTy == MVT::i32) {
          BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                TII.get(TargetOpcode::COPY), TypeEntry.HighInReg)
+                TII.get(Copy), TypeEntry.HighInReg)
              .addReg(Zero32);
        } else if (VT.SimpleTy == MVT::i64) {
          BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -1376,10 +1393,37 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
    // Generate the DIV/IDIV instruction.
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
            TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
-  // Copy output register into result register.
-  unsigned ResultReg = createResultReg(TypeEntry.RC);
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-          TII.get(Copy), ResultReg).addReg(OpEntry.DivRemResultReg);
+  // For i8 remainder, we can't reference AH directly, as we'll end
+  // up with bogus copies like %R9B = COPY %AH. Reference AX
+  // instead to prevent AH references in a REX instruction.
+  //
+  // The current assumption of the fast register allocator is that isel
+  // won't generate explicit references to the GPR8_NOREX registers. If
+  // the allocator and/or the backend get enhanced to be more robust in
+  // that regard, this can be, and should be, removed.
+  unsigned ResultReg = 0;
+  if ((I->getOpcode() == Instruction::SRem ||
+       I->getOpcode() == Instruction::URem) &&
+      OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
+    unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
+    unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(Copy), SourceSuperReg).addReg(X86::AX);
+
+    // Shift AX right by 8 bits instead of using AH.
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SHR16ri),
+            ResultSuperReg).addReg(SourceSuperReg).addImm(8);
+
+    // Now reference the 8-bit subreg of the result.
+    ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
+                                           /*Kill=*/true, X86::sub_8bit);
+  }
+  // Copy the result out of the physreg if we haven't already.
+  if (!ResultReg) {
+    ResultReg = createResultReg(TypeEntry.RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Copy), ResultReg)
+        .addReg(OpEntry.DivRemResultReg);
+  }
    UpdateValueMap(I, ResultReg);
  
    return true;
@@ -1678,9 +1722,6 @@ bool X86FastISel::FastLowerArguments() {
    if (!FuncInfo.CanLowerReturn)
      return false;
  
-  if (Subtarget->isTargetWin64())
-    return false;
-
    const Function *F = FuncInfo.Fn;
    if (F->isVarArg())
      return false;
@@ -1688,7 +1729,10 @@ bool X86FastISel::FastLowerArguments() {
    CallingConv::ID CC = F->getCallingConv();
    if (CC != CallingConv::C)
      return false;
-  
+
+  if (Subtarget->isCallingConvWin64(CC))
+    return false;
+
    if (!Subtarget->is64Bit())
      return false;
    
@@ -1732,8 +1776,6 @@ bool X86FastISel::FastLowerArguments() {
    const TargetRegisterClass *RC64 = TLI.getRegClassFor(MVT::i64);
    for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
         I != E; ++I, ++Idx) {
-    if (I->use_empty())
-      continue;
      bool is32Bit = TLI.getValueType(I->getType()) == MVT::i32;
      const TargetRegisterClass *RC = is32Bit ? RC32 : RC64;
      unsigned SrcReg = is32Bit ? GPR32ArgRegs[Idx] : GPR64ArgRegs[Idx];
@@ -1792,8 +1834,10 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
    // Handle only C and fastcc calling conventions for now.
    ImmutableCallSite CS(CI);
    CallingConv::ID CC = CS.getCallingConv();
+  bool isWin64 = Subtarget->isCallingConvWin64(CC);
    if (CC != CallingConv::C && CC != CallingConv::Fast &&
-      CC != CallingConv::X86_FastCall)
+      CC != CallingConv::X86_FastCall && CC != CallingConv::X86_64_Win64 &&
+      CC != CallingConv::X86_64_SysV)
      return false;
  
    // fastcc with -tailcallopt is intended to provide a guaranteed
@@ -1807,7 +1851,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
  
    // Don't know how to handle Win64 varargs yet.  Nothing special needed for
    // x86-32.  Special handling for x86-64 is implemented.
-  if (isVarArg && Subtarget->isTargetWin64())
+  if (isVarArg && isWin64)
      return false;
  
    // Fast-isel doesn't know about callee-pop yet.
@@ -1937,7 +1981,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
                   I->getParent()->getContext());
  
    // Allocate shadow area for Win64
-  if (Subtarget->isTargetWin64())
+  if (isWin64)
      CCInfo.AllocateStack(32, 8);
  
    CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
@@ -2053,7 +2097,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
              X86::EBX).addReg(Base);
    }
  
-  if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
+  if (Subtarget->is64Bit() && isVarArg && !isWin64) {
      // Count the number of XMM registers allocated.
      static const uint16_t XMMArgRegs[] = {
        X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
@@ -2122,7 +2166,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
    if (Subtarget->isPICStyleGOT())
      MIB.addReg(X86::EBX, RegState::Implicit);
  
-  if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64())
+  if (Subtarget->is64Bit() && isVarArg && !isWin64)
      MIB.addReg(X86::AL, RegState::Implicit);
  
    // Add implicit physical register uses to the call.