[AArch64][FastISel] Don't even try to select vector icmps.

[oota-llvm.git] / lib / Target / AArch64 / AArch64FastISel.cpp
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp

index 1f9882ed31ea300a6bd71aeb91aac77ec0e8f105..284f5263f90e9748c3a8b59c90231cddb619ad95 100644 (file)
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -523,7 +523,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
      U = C;
    }
  
-  if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
+  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
      if (Ty->getAddressSpace() > 255)
        // Fast instruction selection doesn't support the special
        // address spaces.
@@ -969,7 +969,7 @@ bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
  
    // Cannot encode an offset register and an immediate offset in the same
    // instruction. Fold the immediate offset into the load/store instruction and
-  // emit an additonal add to take care of the offset register.
+  // emit an additional add to take care of the offset register.
    if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
      RegisterOffsetNeedsLowering = true;
  
@@ -1058,8 +1058,8 @@ void AArch64FastISel::addLoadStoreOperands(Address &Addr,
      // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
      // and alignment should be based on the VT.
      MMO = FuncInfo.MF->getMachineMemOperand(
-      MachinePointerInfo::getFixedStack(FI, Offset), Flags,
-      MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+        MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
+        MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
      // Now add the rest of the operands.
      MIB.addFrameIndex(FI).addImm(Offset);
    } else {
@@ -1178,7 +1178,7 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
    }
  
    // Check if the mul can be folded into the instruction.
-  if (RHS->hasOneUse() && isValueAvailable(RHS))
+  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
      if (isMulPowOf2(RHS)) {
        const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
        const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
@@ -1193,12 +1193,16 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
        if (!RHSReg)
          return 0;
        bool RHSIsKill = hasTrivialKill(MulLHS);
-      return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
-                           AArch64_AM::LSL, ShiftVal, SetFlags, WantResult);
+      ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
+                                RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
+                                WantResult);
+      if (ResultReg)
+        return ResultReg;
      }
+  }
  
    // Check if the shift can be folded into the instruction.
-  if (RHS->hasOneUse() && isValueAvailable(RHS))
+  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
      if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
        if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
          AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
@@ -1214,12 +1218,15 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
            if (!RHSReg)
              return 0;
            bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
-          return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
-                               RHSIsKill, ShiftType, ShiftVal, SetFlags,
-                               WantResult);
+          ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
+                                    RHSIsKill, ShiftType, ShiftVal, SetFlags,
+                                    WantResult);
+          if (ResultReg)
+            return ResultReg;
          }
        }
      }
+  }
  
    unsigned RHSReg = getRegForValue(RHS);
    if (!RHSReg)
@@ -1323,6 +1330,10 @@ unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
    if (RetVT != MVT::i32 && RetVT != MVT::i64)
      return 0;
  
+  // Don't deal with undefined shifts.
+  if (ShiftImm >= RetVT.getSizeInBits())
+    return 0;
+
    static const unsigned OpcTable[2][2][2] = {
      { { AArch64::SUBWrs,  AArch64::SUBXrs  },
        { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
@@ -1360,6 +1371,9 @@ unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
    if (RetVT != MVT::i32 && RetVT != MVT::i64)
      return 0;
  
+  if (ShiftImm >= 4)
+    return 0;
+
    static const unsigned OpcTable[2][2][2] = {
      { { AArch64::SUBWrx,  AArch64::SUBXrx  },
        { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
@@ -1542,7 +1556,7 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
      return ResultReg;
  
    // Check if the mul can be folded into the instruction.
-  if (RHS->hasOneUse() && isValueAvailable(RHS))
+  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
      if (isMulPowOf2(RHS)) {
        const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
        const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
@@ -1558,12 +1572,15 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
        if (!RHSReg)
          return 0;
        bool RHSIsKill = hasTrivialKill(MulLHS);
-      return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
-                              RHSIsKill, ShiftVal);
+      ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
+                                   RHSIsKill, ShiftVal);
+      if (ResultReg)
+        return ResultReg;
      }
+  }
  
    // Check if the shift can be folded into the instruction.
-  if (RHS->hasOneUse() && isValueAvailable(RHS))
+  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
      if (const auto *SI = dyn_cast<ShlOperator>(RHS))
        if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
          uint64_t ShiftVal = C->getZExtValue();
@@ -1571,9 +1588,12 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
          if (!RHSReg)
            return 0;
          bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
-        return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
-                                RHSIsKill, ShiftVal);
+        ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
+                                     RHSIsKill, ShiftVal);
+        if (ResultReg)
+          return ResultReg;
        }
+  }
  
    unsigned RHSReg = getRegForValue(RHS);
    if (!RHSReg)
@@ -1646,6 +1666,11 @@ unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
      { AArch64::ORRWrs, AArch64::ORRXrs },
      { AArch64::EORWrs, AArch64::EORXrs }
    };
+
+  // Don't deal with undefined shifts.
+  if (ShiftImm >= RetVT.getSizeInBits())
+    return 0;
+
    const TargetRegisterClass *RC;
    unsigned Opc;
    switch (RetVT.SimpleTy) {
@@ -2235,14 +2260,7 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
      MIB.addImm(TestBit);
    MIB.addMBB(TBB);
  
-  // Obtain the branch weight and add the TrueBB to the successor list.
-  uint32_t BranchWeight = 0;
-  if (FuncInfo.BPI)
-    BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
-                                               TBB->getBasicBlock());
-  FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-  fastEmitBranch(FBB, DbgLoc);
-
+  finishCondBranch(BI->getParent(), TBB, FBB);
    return true;
  }
  
@@ -2317,14 +2335,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
            .addImm(CC)
            .addMBB(TBB);
  
-      // Obtain the branch weight and add the TrueBB to the successor list.
-      uint32_t BranchWeight = 0;
-      if (FuncInfo.BPI)
-        BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
-                                                  TBB->getBasicBlock());
-      FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-
-      fastEmitBranch(FBB, DbgLoc);
+      finishCondBranch(BI->getParent(), TBB, FBB);
        return true;
      }
    } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
@@ -2355,14 +2366,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
            .addImm(CC)
            .addMBB(TBB);
  
-      // Obtain the branch weight and add the TrueBB to the successor list.
-      uint32_t BranchWeight = 0;
-      if (FuncInfo.BPI)
-        BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
-                                                  TBB->getBasicBlock());
-      FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-
-      fastEmitBranch(FBB, DbgLoc);
+      finishCondBranch(BI->getParent(), TBB, FBB);
        return true;
      }
    } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
@@ -2372,11 +2376,12 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
          .addMBB(Target);
  
      // Obtain the branch weight and add the target to the successor list.
-    uint32_t BranchWeight = 0;
-    if (FuncInfo.BPI)
-      BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
-                                                 Target->getBasicBlock());
-    FuncInfo.MBB->addSuccessor(Target, BranchWeight);
+    if (FuncInfo.BPI) {
+      uint32_t BranchWeight =
+          FuncInfo.BPI->getEdgeWeight(BI->getParent(), Target->getBasicBlock());
+      FuncInfo.MBB->addSuccessor(Target, BranchWeight);
+    } else
+      FuncInfo.MBB->addSuccessorWithoutWeight(Target);
      return true;
    } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
      // Fake request the condition, otherwise the intrinsic might be completely
@@ -2390,14 +2395,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
        .addImm(CC)
        .addMBB(TBB);
  
-    // Obtain the branch weight and add the TrueBB to the successor list.
-    uint32_t BranchWeight = 0;
-    if (FuncInfo.BPI)
-      BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
-                                                 TBB->getBasicBlock());
-    FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-
-    fastEmitBranch(FBB, DbgLoc);
+    finishCondBranch(BI->getParent(), TBB, FBB);
      return true;
    }
  
@@ -2413,7 +2411,11 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
    // Regardless, the compare has been done in the predecessor block,
    // and it left a value for us in a virtual register.  Ergo, we test
    // the one-bit value left in the virtual register.
-  emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
+  //
+  // FIXME: Optimize this with TBZW/TBZNW.
+  unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondRegIsKill, 1);
+  assert(ANDReg && "Unexpected AND instruction emission failure.");
+  emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
  
    if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
      std::swap(TBB, FBB);
@@ -2424,14 +2426,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
        .addImm(CC)
        .addMBB(TBB);
  
-  // Obtain the branch weight and add the TrueBB to the successor list.
-  uint32_t BranchWeight = 0;
-  if (FuncInfo.BPI)
-    BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
-                                               TBB->getBasicBlock());
-  FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-
-  fastEmitBranch(FBB, DbgLoc);
+  finishCondBranch(BI->getParent(), TBB, FBB);
    return true;
  }
  
@@ -2447,8 +2442,8 @@ bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
  
    // Make sure the CFG is up-to-date.
-  for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
-    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
+  for (auto *Succ : BI->successors())
+    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
  
    return true;
  }
@@ -2456,6 +2451,10 @@ bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
  bool AArch64FastISel::selectCmp(const Instruction *I) {
    const CmpInst *CI = cast<CmpInst>(I);
  
+  // Vectors of i1 are weird: bail out.
+  if (CI->getType()->isVectorTy())
+    return false;
+
    // Try to optimize or fold the cmp.
    CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
    unsigned ResultReg = 0;
@@ -2954,8 +2953,7 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
      .addImm(NumBytes);
  
    // Process the args.
-  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-    CCValAssign &VA = ArgLocs[i];
+  for (CCValAssign &VA : ArgLocs) {
      const Value *ArgVal = CLI.OutVals[VA.getValNo()];
      MVT ArgVT = OutVTs[VA.getValNo()];
  
@@ -3018,8 +3016,8 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
  
        unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
        MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
-        MachinePointerInfo::getStack(Addr.getOffset()),
-        MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
+          MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
+          MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
  
        if (!emitStore(ArgVT, ArgReg, Addr, MMO))
          return false;
@@ -3318,8 +3316,8 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
      return false;
  
    // Make sure nothing is in the way
-  BasicBlock::const_iterator Start = I;
-  BasicBlock::const_iterator End = II;
+  BasicBlock::const_iterator Start(I);
+  BasicBlock::const_iterator End(II);
    for (auto Itr = std::prev(Start); Itr != End; --Itr) {
      // We only expect extractvalue instructions between the intrinsic and the
      // instruction to be selected.
@@ -3763,8 +3761,8 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
  
    MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                                      TII.get(AArch64::RET_ReallyLR));
-  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
-    MIB.addReg(RetRegs[i], RegState::Implicit);
+  for (unsigned RetReg : RetRegs)
+    MIB.addReg(RetReg, RegState::Implicit);
    return true;
  }
  
@@ -3795,34 +3793,41 @@ bool AArch64FastISel::selectTrunc(const Instruction *I) {
      return false;
    bool SrcIsKill = hasTrivialKill(Op);
  
-  // If we're truncating from i64/i32 to a smaller non-legal type then generate
-  // an AND.
-  uint64_t Mask = 0;
-  switch (DestVT.SimpleTy) {
-  default:
-    // Trunc i64 to i32 is handled by the target-independent fast-isel.
-    return false;
-  case MVT::i1:
-    Mask = 0x1;
-    break;
-  case MVT::i8:
-    Mask = 0xff;
-    break;
-  case MVT::i16:
-    Mask = 0xffff;
-    break;
-  }
+  // If we're truncating from i64 to a smaller non-legal type then generate an
+  // AND. Otherwise, we know the high bits are undefined and a truncate only
+  // generate a COPY. We cannot mark the source register also as result
+  // register, because this can incorrectly transfer the kill flag onto the
+  // source register.
+  unsigned ResultReg;
    if (SrcVT == MVT::i64) {
+    uint64_t Mask = 0;
+    switch (DestVT.SimpleTy) {
+    default:
+      // Trunc i64 to i32 is handled by the target-independent fast-isel.
+      return false;
+    case MVT::i1:
+      Mask = 0x1;
+      break;
+    case MVT::i8:
+      Mask = 0xff;
+      break;
+    case MVT::i16:
+      Mask = 0xffff;
+      break;
+    }
      // Issue an extract_subreg to get the lower 32-bits.
-    SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
-                                        AArch64::sub_32);
-    SrcIsKill = true;
+    unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
+                                                AArch64::sub_32);
+    // Create the AND instruction which performs the actual truncation.
+    ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
+    assert(ResultReg && "Unexpected AND instruction emission failure.");
+  } else {
+    ResultReg = createResultReg(&AArch64::GPR32RegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), ResultReg)
+        .addReg(SrcReg, getKillRegState(SrcIsKill));
    }
  
-  // Create the AND instruction which performs the actual truncation.
-  unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, SrcIsKill, Mask);
-  assert(ResultReg && "Unexpected AND instruction emission failure.");
-
    updateValueMap(I, ResultReg);
    return true;
  }