Add some testing for thumb1 and thumb2 inline asm immediate constraints

[oota-llvm.git] / lib / Target / ARM / ARMISelLowering.cpp
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index cc9656aa0b4facdb6d3917f41fbbe0954546c573..fc57d83ce1eaec0708a82778b62d617357b03eda 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -143,15 +143,10 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
    setOperationAction(ISD::UREM, VT, Expand);
    setOperationAction(ISD::FREM, VT, Expand);
  
-  if (VT.isInteger()) {
-    setOperationAction(ISD::SABSDIFF, VT, Legal);
-    setOperationAction(ISD::UABSDIFF, VT, Legal);
-  }
    if (!VT.isFloatingPoint() &&
        VT != MVT::v2i64 && VT != MVT::v1i64)
      for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
        setOperationAction(Opcode, VT, Legal);
-
  }
  
  void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
@@ -2535,6 +2530,72 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
    return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
  }
  
+/// \brief Convert a TLS address reference into the correct sequence of loads
+/// and calls to compute the variable's address for Darwin, and return an
+/// SDValue containing the final node.
+
+/// Darwin only has one TLS scheme which must be capable of dealing with the
+/// fully general situation, in the worst case. This means:
+///     + "extern __thread" declaration.
+///     + Defined in a possibly unknown dynamic library.
+///
+/// The general system is that each __thread variable has a [3 x i32] descriptor
+/// which contains information used by the runtime to calculate the address. The
+/// only part of this the compiler needs to know about is the first word, which
+/// contains a function pointer that must be called with the address of the
+/// entire descriptor in "r0".
+///
+/// Since this descriptor may be in a different unit, in general access must
+/// proceed along the usual ARM rules. A common sequence to produce is:
+///
+///     movw rT1, :lower16:_var$non_lazy_ptr
+///     movt rT1, :upper16:_var$non_lazy_ptr
+///     ldr r0, [rT1]
+///     ldr rT2, [r0]
+///     blx rT2
+///     [...address now in r0...]
+SDValue
+ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
+  SDLoc DL(Op);
+
+  // First step is to get the address of the actua global symbol. This is where
+  // the TLS descriptor lives.
+  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
+
+  // The first entry in the descriptor is a function pointer that we must call
+  // to obtain the address of the variable.
+  SDValue Chain = DAG.getEntryNode();
+  SDValue FuncTLVGet =
+      DAG.getLoad(MVT::i32, DL, Chain, DescAddr,
+                  MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+                  false, true, true, 4);
+  Chain = FuncTLVGet.getValue(1);
+
+  MachineFunction &F = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = F.getFrameInfo();
+  MFI->setAdjustsStack(true);
+
+  // TLS calls preserve all registers except those that absolutely must be
+  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
+  // silly).
+  auto TRI =
+      getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
+  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
+  const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
+
+  // Finally, we can make the call. This is just a degenerate version of a
+  // normal AArch64 call node: r0 takes the address of the descriptor, and
+  // returns the address of the variable in this thread.
+  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
+  Chain =
+      DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
+                  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
+                  DAG.getRegisterMask(Mask), Chain.getValue(1));
+  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
+}
+
  // Lower ISD::GlobalTLSAddress using the "general dynamic" model
  SDValue
  ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
@@ -2636,9 +2697,11 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
  
  SDValue
  ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+  if (Subtarget->isTargetDarwin())
+    return LowerGlobalTLSAddressDarwin(Op, DAG);
+
    // TODO: implement the "local dynamic" model
-  assert(Subtarget->isTargetELF() &&
-         "TLS not implemented for non-ELF targets");
+  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
    GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
    if (DAG.getTarget().Options.EmulatedTLS)
      return LowerToTLSEmulatedModel(GA, DAG);
@@ -7402,6 +7465,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
      }
  
      BB->addSuccessor(DispatchBB, BranchProbability::getZero());
+    BB->normalizeSuccProbs();
  
      // Find the invoke call and mark all of the callee-saved registers as
      // 'implicit defined' so that they're spilled. This prevents code from
@@ -9072,7 +9136,7 @@ static SDValue PerformXORCombine(SDNode *N,
  // their position in "to" (Rd).
  static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
    assert(N->getOpcode() == ARMISD::BFI);
-  
+
    SDValue From = N->getOperand(1);
    ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
    FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation());
@@ -9133,7 +9197,7 @@ static SDValue FindBFIToCombineWith(SDNode *N) {
      if (BitsProperlyConcatenate(NewToMask, ToMask) &&
          BitsProperlyConcatenate(NewFromMask, FromMask))
        return V;
-    
+
      // We've seen a write to some bits, so track it.
      CombinedToMask |= NewToMask;
      // Keep going...
@@ -9180,7 +9244,7 @@ static SDValue PerformBFICombine(SDNode *N,
      SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
      assert(From1 == From2);
      (void)From2;
-  
+
      // First, unlink CombineBFI.
      DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0));
      // Then create a new BFI, combining the two together.
@@ -10148,15 +10212,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
      // Don't do anything for most intrinsics.
      break;
  
-  case Intrinsic::arm_neon_vabds:
-    if (!N->getValueType(0).isInteger())
-      return SDValue();
-    return DAG.getNode(ISD::SABSDIFF, SDLoc(N), N->getValueType(0),
-                       N->getOperand(1), N->getOperand(2));
-  case Intrinsic::arm_neon_vabdu:
-    return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0),
-                       N->getOperand(1), N->getOperand(2));
-
    // Vector shifts: check for immediate versions and lower them.
    // Note: This is done during DAG combining instead of DAG legalizing because
    // the build_vectors for 64-bit vector element shift counts are generally
@@ -11420,7 +11475,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
          return;
  
        case 'J':
-        if (Subtarget->isThumb()) {  // FIXME thumb2
+        if (Subtarget->isThumb1Only()) {
            // This must be a constant between -255 and -1, for negated ADD
            // immediates. This can be used in GCC with an "n" modifier that
            // prints the negated value, for use with SUB instructions. It is
@@ -11489,7 +11544,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
          return;
  
        case 'M':
-        if (Subtarget->isThumb()) { // FIXME thumb2
+        if (Subtarget->isThumb1Only()) {
            // This must be a multiple of 4 between 0 and 1020, for
            // ADD sp + immediate.
            if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))