Style; no functional change.

[oota-llvm.git] / lib / Target / ARM / ARMISelLowering.cpp
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index 82b475a44f38a2eb5a121476337bed1630522223..60a07a474faad0b15e051cc4bfae79883641a882 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -554,6 +554,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
      setOperationAction(ISD::CTPOP,      MVT::v4i16, Custom);
      setOperationAction(ISD::CTPOP,      MVT::v8i16, Custom);
  
+    // NEON only has FMA instructions as of VFP4.
+    if (!Subtarget->hasVFP4()) {
+      setOperationAction(ISD::FMA, MVT::v2f32, Expand);
+      setOperationAction(ISD::FMA, MVT::v4f32, Expand);
+    }
+
      setTargetDAGCombine(ISD::INTRINSIC_VOID);
      setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
      setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
@@ -835,21 +841,21 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
      setSchedulingPreference(Sched::Hybrid);
  
    //// temporary - rewrite interface to use type
-  maxStoresPerMemset = 8;
-  maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
-  maxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
-  maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
-  maxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
-  maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+  MaxStoresPerMemset = 8;
+  MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
+  MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
+  MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
  
    // On ARM arguments smaller than 4 bytes are extended, so all arguments
    // are at least 4 bytes aligned.
    setMinStackArgumentAlignment(4);
  
-  benefitFromCodePlacementOpt = true;
+  BenefitFromCodePlacementOpt = true;
  
    // Prefer likely predicted branches to selects on out-of-order cores.
-  predictableSelectIsExpensive = Subtarget->isLikeA9();
+  PredictableSelectIsExpensive = Subtarget->isLikeA9();
  
    setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
  }
@@ -1928,15 +1934,9 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
    CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
                                                 isVarArg));
  
-  // If this is the first return lowered for this function, add
-  // the regs to the liveout set for the function.
-  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
-    for (unsigned i = 0; i != RVLocs.size(); ++i)
-      if (RVLocs[i].isRegLoc())
-        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
-  }
-
    SDValue Flag;
+  SmallVector<SDValue, 4> RetOps;
+  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
  
    // Copy the result values into the output registers.
    for (unsigned i = 0, realRVLocIdx = 0;
@@ -1965,10 +1965,12 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
  
          Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
          Flag = Chain.getValue(1);
+        RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
          VA = RVLocs[++i]; // skip ahead to next loc
          Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
                                   HalfGPRs.getValue(1), Flag);
          Flag = Chain.getValue(1);
+        RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
          VA = RVLocs[++i]; // skip ahead to next loc
  
          // Extract the 2nd half and fall through to handle it as an f64 value.
@@ -1981,6 +1983,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
                                    DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
        Flag = Chain.getValue(1);
+      RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
        VA = RVLocs[++i]; // skip ahead to next loc
        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
                                 Flag);
@@ -1990,15 +1993,16 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
      // Guarantee that all emitted copies are
      // stuck together, avoiding something bad.
      Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
    }
  
-  SDValue result;
+  // Update chain and glue.
+  RetOps[0] = Chain;
    if (Flag.getNode())
-    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
-  else // Return Void
-    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
+    RetOps.push_back(Flag);
  
-  return result;
+  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
+                     RetOps.data(), RetOps.size());
  }
  
  bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
@@ -2403,7 +2407,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
      ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
      EVT PtrVT = getPointerTy();
-    DebugLoc dl = Op.getDebugLoc();
      Reloc::Model RelocM = getTargetMachine().getRelocationModel();
      SDValue CPAddr;
      unsigned PCAdj = (RelocM != Reloc::PIC_)
@@ -2578,7 +2581,7 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
  }
  
  // The remaining GPRs hold either the beginning of variable-argument
-// data, or the beginning of an aggregate passed by value (usuall
+// data, or the beginning of an aggregate passed by value (usually
  // byval).  Either way, we allocate stack slots adjacent to the data
  // provided by our caller, and store the unallocated registers there.
  // If this is a variadic function, the va_list pointer will begin with
@@ -4296,6 +4299,21 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
    return true;
  }
  
+/// \return true if this is a reverse operation on an vector.
+static bool isReverseMask(ArrayRef<int> M, EVT VT) {
+  unsigned NumElts = VT.getVectorNumElements();
+  // Make sure the mask has the right size.
+  if (NumElts != M.size())
+      return false;
+
+  // Look for <15, ..., 3, -1, 1, 0>.
+  for (unsigned i = 0; i != NumElts; ++i)
+    if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
+      return false;
+
+  return true;
+}
+
  // If N is an integer constant that can be moved into a register in one
  // instruction, return an SDValue of such a constant (will become a MOV
  // instruction).  Otherwise return null.
@@ -4691,7 +4709,8 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
            isVZIPMask(M, VT, WhichResult) ||
            isVTRN_v_undef_Mask(M, VT, WhichResult) ||
            isVUZP_v_undef_Mask(M, VT, WhichResult) ||
-          isVZIP_v_undef_Mask(M, VT, WhichResult));
+          isVZIP_v_undef_Mask(M, VT, WhichResult) ||
+          ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
  }
  
  /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
@@ -4795,6 +4814,23 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
                                   &VTBLMask[0], 8));
  }
  
+static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
+                                                      SelectionDAG &DAG) {
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue OpLHS = Op.getOperand(0);
+  EVT VT = OpLHS.getValueType();
+
+  assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
+         "Expect an v8i16/v16i8 type");
+  OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
+  // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
+  // extract the first 8 bytes into the top double word and the last 8 bytes
+  // into the bottom double word. The v8i16 case is similar.
+  unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
+  return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
+                     DAG.getConstant(ExtractNum, MVT::i32));
+}
+
  static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
    SDValue V1 = Op.getOperand(0);
    SDValue V2 = Op.getOperand(1);
@@ -4932,6 +4968,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
      return DAG.getNode(ISD::BITCAST, dl, VT, Val);
    }
  
+  if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
+    return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
+
    if (VT == MVT::v8i8) {
      SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
      if (NewOp.getNode())
@@ -6304,14 +6343,11 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
  
    MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
    unsigned trap_opcode;
-  if (Subtarget->isThumb()) {
+  if (Subtarget->isThumb())
      trap_opcode = ARM::tTRAP;
-  } else {
-    if (Subtarget->useNaClTrap())
-      trap_opcode = ARM::TRAPNaCl;
-    else
-      trap_opcode = ARM::TRAP;
-  }
+  else
+    trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
+
    BuildMI(TrapBB, dl, TII->get(trap_opcode));
    DispatchBB->addSuccessor(TrapBB);