From: Juergen Ributzka Date: Tue, 22 Jul 2014 23:14:58 +0000 (+0000) Subject: [FastIsel][AArch64] Add support for the FastLowerCall and FastLowerIntrinsicCall... X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=7edf396977fc56c786e6375a4cec93ff6d791d20;p=oota-llvm.git [FastIsel][AArch64] Add support for the FastLowerCall and FastLowerIntrinsicCall target-hooks. This commit modifies the existing call lowering functions to be used as the FastLowerCall and FastLowerIntrinsicCall target-hooks instead. This enables patchpoint intrinsic lowering for AArch64. This fixes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213704 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 2164d77b790..f621246a7b3 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -89,6 +89,9 @@ class AArch64FastISel : public FastISel { const AArch64Subtarget *Subtarget; LLVMContext *Context; + bool FastLowerCall(CallLoweringInfo &CLI) override; + bool FastLowerIntrinsicCall(const IntrinsicInst *II) override; + private: // Selection routines. bool SelectLoad(const Instruction *I); @@ -102,8 +105,6 @@ private: bool SelectFPToInt(const Instruction *I, bool Signed); bool SelectIntToFP(const Instruction *I, bool Signed); bool SelectRem(const Instruction *I, unsigned ISDOpcode); - bool SelectCall(const Instruction *I, const char *IntrMemName); - bool SelectIntrinsicCall(const IntrinsicInst &I); bool SelectRet(const Instruction *I); bool SelectTrunc(const Instruction *I); bool SelectIntExt(const Instruction *I); @@ -135,14 +136,9 @@ private: // Call handling routines. private: CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; - bool ProcessCallArgs(SmallVectorImpl &Args, - SmallVectorImpl &ArgRegs, - SmallVectorImpl &ArgVTs, - SmallVectorImpl &ArgFlags, - SmallVectorImpl &RegArgs, CallingConv::ID CC, + bool ProcessCallArgs(CallLoweringInfo &CLI, SmallVectorImpl &ArgVTs, unsigned &NumBytes); - bool FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, - const Instruction *I, CallingConv::ID CC, unsigned &NumBytes); + bool FinishCall(CallLoweringInfo &CLI, unsigned NumBytes); public: // Backend specific FastISel code. @@ -1192,14 +1188,13 @@ bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) { return true; } -bool AArch64FastISel::ProcessCallArgs( - SmallVectorImpl &Args, SmallVectorImpl &ArgRegs, - SmallVectorImpl &ArgVTs, SmallVectorImpl &ArgFlags, - SmallVectorImpl &RegArgs, CallingConv::ID CC, - unsigned &NumBytes) { +bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI, + SmallVectorImpl &OutVTs, + unsigned &NumBytes) { + CallingConv::ID CC = CLI.CallConv; SmallVector ArgLocs; CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context); - CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); + CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); // Get a count of how many bytes are to be pushed on the stack. NumBytes = CCInfo.getNextStackOffset(); @@ -1207,13 +1202,17 @@ bool AArch64FastISel::ProcessCallArgs( // Issue CALLSEQ_START unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) - .addImm(NumBytes); + .addImm(NumBytes); // Process the args. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - unsigned Arg = ArgRegs[VA.getValNo()]; - MVT ArgVT = ArgVTs[VA.getValNo()]; + const Value *ArgVal = CLI.OutVals[VA.getValNo()]; + MVT ArgVT = OutVTs[VA.getValNo()]; + + unsigned ArgReg = getRegForValue(ArgVal); + if (!ArgReg) + return false; // Handle arg promotion: SExt, ZExt, AExt. switch (VA.getLocInfo()) { @@ -1222,8 +1221,8 @@ bool AArch64FastISel::ProcessCallArgs( case CCValAssign::SExt: { MVT DestVT = VA.getLocVT(); MVT SrcVT = ArgVT; - Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ false); - if (Arg == 0) + ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); + if (!ArgReg) return false; break; } @@ -1232,8 +1231,8 @@ bool AArch64FastISel::ProcessCallArgs( case CCValAssign::ZExt: { MVT DestVT = VA.getLocVT(); MVT SrcVT = ArgVT; - Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ true); - if (Arg == 0) + ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); + if (!ArgReg) return false; break; } @@ -1244,8 +1243,8 @@ bool AArch64FastISel::ProcessCallArgs( // Now copy/store arg to correct locations. if (VA.isRegLoc() && !VA.needsCustom()) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg); - RegArgs.push_back(VA.getLocReg()); + TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); + CLI.OutRegs.push_back(VA.getLocReg()); } else if (VA.needsCustom()) { // FIXME: Handle custom args. return false; @@ -1264,21 +1263,21 @@ bool AArch64FastISel::ProcessCallArgs( Addr.setReg(AArch64::SP); Addr.setOffset(VA.getLocMemOffset() + BEAlign); - if (!EmitStore(ArgVT, Arg, Addr)) + if (!EmitStore(ArgVT, ArgReg, Addr)) return false; } } return true; } -bool AArch64FastISel::FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, - const Instruction *I, CallingConv::ID CC, - unsigned &NumBytes) { +bool AArch64FastISel::FinishCall(CallLoweringInfo &CLI, unsigned NumBytes) { + CallingConv::ID CC = CLI.CallConv; + MVT RetVT = MVT::getVT(CLI.RetTy); + // Issue CALLSEQ_END unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) - .addImm(NumBytes) - .addImm(0); + .addImm(NumBytes).addImm(0); // Now the return value. if (RetVT != MVT::isVoid) { @@ -1294,134 +1293,84 @@ bool AArch64FastISel::FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, MVT CopyVT = RVLocs[0].getValVT(); unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), - ResultReg).addReg(RVLocs[0].getLocReg()); - UsedRegs.push_back(RVLocs[0].getLocReg()); + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(RVLocs[0].getLocReg()); + CLI.InRegs.push_back(RVLocs[0].getLocReg()); - // Finally update the result. - UpdateValueMap(I, ResultReg); + CLI.ResultReg = ResultReg; + CLI.NumResultRegs = 1; } return true; } -bool AArch64FastISel::SelectCall(const Instruction *I, - const char *IntrMemName = nullptr) { - const CallInst *CI = cast(I); - const Value *Callee = CI->getCalledValue(); - - // Don't handle inline asm or intrinsics. - if (isa(Callee)) - return false; +bool AArch64FastISel::FastLowerCall(CallLoweringInfo &CLI) { + CallingConv::ID CC = CLI.CallConv; + bool IsVarArg = CLI.IsVarArg; + const Value *Callee = CLI.Callee; + const char *SymName = CLI.SymName; // Only handle global variable Callees. const GlobalValue *GV = dyn_cast(Callee); if (!GV) return false; - // Check the calling convention. - ImmutableCallSite CS(CI); - CallingConv::ID CC = CS.getCallingConv(); - // Let SDISel handle vararg functions. - PointerType *PT = cast(CS.getCalledValue()->getType()); - FunctionType *FTy = cast(PT->getElementType()); - if (FTy->isVarArg()) + if (IsVarArg) return false; - // Handle *simple* calls for now. + // FIXME: Only handle *simple* calls for now. MVT RetVT; - Type *RetTy = I->getType(); - if (RetTy->isVoidTy()) + if (CLI.RetTy->isVoidTy()) RetVT = MVT::isVoid; - else if (!isTypeLegal(RetTy, RetVT)) + else if (!isTypeLegal(CLI.RetTy, RetVT)) return false; - // Set up the argument vectors. - SmallVector Args; - SmallVector ArgRegs; - SmallVector ArgVTs; - SmallVector ArgFlags; - Args.reserve(CS.arg_size()); - ArgRegs.reserve(CS.arg_size()); - ArgVTs.reserve(CS.arg_size()); - ArgFlags.reserve(CS.arg_size()); - - for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); - i != e; ++i) { - // If we're lowering a memory intrinsic instead of a regular call, skip the - // last two arguments, which shouldn't be passed to the underlying function. - if (IntrMemName && e - i <= 2) - break; - - unsigned Arg = getRegForValue(*i); - if (Arg == 0) + for (auto Flag : CLI.OutFlags) + if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal()) return false; - ISD::ArgFlagsTy Flags; - unsigned AttrInd = i - CS.arg_begin() + 1; - if (CS.paramHasAttr(AttrInd, Attribute::SExt)) - Flags.setSExt(); - if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) - Flags.setZExt(); - - // FIXME: Only handle *easy* calls for now. - if (CS.paramHasAttr(AttrInd, Attribute::InReg) || - CS.paramHasAttr(AttrInd, Attribute::StructRet) || - CS.paramHasAttr(AttrInd, Attribute::Nest) || - CS.paramHasAttr(AttrInd, Attribute::ByVal)) - return false; + // Set up the argument vectors. + SmallVector OutVTs; + OutVTs.reserve(CLI.OutVals.size()); - MVT ArgVT; - Type *ArgTy = (*i)->getType(); - if (!isTypeLegal(ArgTy, ArgVT) && - !(ArgVT == MVT::i1 || ArgVT == MVT::i8 || ArgVT == MVT::i16)) + for (auto *Val : CLI.OutVals) { + MVT VT; + if (!isTypeLegal(Val->getType(), VT) && + !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) return false; // We don't handle vector parameters yet. - if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64) + if (VT.isVector() || VT.getSizeInBits() > 64) return false; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - Flags.setOrigAlign(OriginalAlignment); - - Args.push_back(*i); - ArgRegs.push_back(Arg); - ArgVTs.push_back(ArgVT); - ArgFlags.push_back(Flags); + OutVTs.push_back(VT); } // Handle the arguments now that we've gotten them. - SmallVector RegArgs; unsigned NumBytes; - if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) + if (!ProcessCallArgs(CLI, OutVTs, NumBytes)) return false; // Issue the call. MachineInstrBuilder MIB; MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BL)); - if (!IntrMemName) + CLI.Call = MIB; + if (!SymName) MIB.addGlobalAddress(GV, 0, 0); else - MIB.addExternalSymbol(IntrMemName, 0); + MIB.addExternalSymbol(SymName, 0); // Add implicit physical register uses to the call. - for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) - MIB.addReg(RegArgs[i], RegState::Implicit); + for (auto Reg : CLI.OutRegs) + MIB.addReg(Reg, RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv())); + MIB.addRegMask(TRI.getCallPreservedMask(CC)); // Finish off the call including any return values. - SmallVector UsedRegs; - if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) - return false; - - // Set all unused physreg defs as dead. - static_cast(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); - - return true; + return FinishCall(CLI, NumBytes); } bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) { @@ -1486,62 +1435,62 @@ bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src, return true; } -bool AArch64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) { +bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) { // FIXME: Handle more intrinsics. - switch (I.getIntrinsicID()) { + switch (II->getIntrinsicID()) { default: return false; case Intrinsic::memcpy: case Intrinsic::memmove: { - const MemTransferInst &MTI = cast(I); + const auto *MTI = cast(II); // Don't handle volatile. - if (MTI.isVolatile()) + if (MTI->isVolatile()) return false; // Disable inlining for memmove before calls to ComputeAddress. Otherwise, // we would emit dead code because we don't currently handle memmoves. - bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy); - if (isa(MTI.getLength()) && isMemCpy) { + bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); + if (isa(MTI->getLength()) && IsMemCpy) { // Small memcpy's are common enough that we want to do them without a call // if possible. - uint64_t Len = cast(MTI.getLength())->getZExtValue(); - unsigned Alignment = MTI.getAlignment(); + uint64_t Len = cast(MTI->getLength())->getZExtValue(); + unsigned Alignment = MTI->getAlignment(); if (IsMemCpySmall(Len, Alignment)) { Address Dest, Src; - if (!ComputeAddress(MTI.getRawDest(), Dest) || - !ComputeAddress(MTI.getRawSource(), Src)) + if (!ComputeAddress(MTI->getRawDest(), Dest) || + !ComputeAddress(MTI->getRawSource(), Src)) return false; if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment)) return true; } } - if (!MTI.getLength()->getType()->isIntegerTy(64)) + if (!MTI->getLength()->getType()->isIntegerTy(64)) return false; - if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255) + if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) // Fast instruction selection doesn't support the special // address spaces. return false; - const char *IntrMemName = isa(I) ? "memcpy" : "memmove"; - return SelectCall(&I, IntrMemName); + const char *IntrMemName = isa(II) ? "memcpy" : "memmove"; + return LowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2); } case Intrinsic::memset: { - const MemSetInst &MSI = cast(I); + const MemSetInst *MSI = cast(II); // Don't handle volatile. - if (MSI.isVolatile()) + if (MSI->isVolatile()) return false; - if (!MSI.getLength()->getType()->isIntegerTy(64)) + if (!MSI->getLength()->getType()->isIntegerTy(64)) return false; - if (MSI.getDestAddressSpace() > 255) + if (MSI->getDestAddressSpace() > 255) // Fast instruction selection doesn't support the special // address spaces. return false; - return SelectCall(&I, "memset"); + return LowerCallTo(II, "memset", II->getNumArgOperands() - 2); } case Intrinsic::trap: { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) @@ -1966,10 +1915,6 @@ bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) { return SelectRem(I, ISD::SREM); case Instruction::URem: return SelectRem(I, ISD::UREM); - case Instruction::Call: - if (const IntrinsicInst *II = dyn_cast(I)) - return SelectIntrinsicCall(*II); - return SelectCall(I); case Instruction::Ret: return SelectRet(I); case Instruction::Trunc: diff --git a/test/CodeGen/AArch64/arm64-abi.ll b/test/CodeGen/AArch64/arm64-abi.ll index 9137a36ae21..b48e3574756 100644 --- a/test/CodeGen/AArch64/arm64-abi.ll +++ b/test/CodeGen/AArch64/arm64-abi.ll @@ -78,7 +78,7 @@ entry: ; CHECK-LABEL: fixed_4i ; CHECK: str [[REG_1:q[0-9]+]], [sp, #16] ; FAST-LABEL: fixed_4i -; FAST: sub sp, sp, #64 +; FAST: sub sp, sp ; FAST: mov x[[ADDR:[0-9]+]], sp ; FAST: str [[REG_1:q[0-9]+]], [x[[ADDR]], #16] %0 = load <4 x i32>* %in, align 16 diff --git a/test/CodeGen/AArch64/arm64-abi_align.ll b/test/CodeGen/AArch64/arm64-abi_align.ll index 8b7685f75db..23e8f42eb88 100644 --- a/test/CodeGen/AArch64/arm64-abi_align.ll +++ b/test/CodeGen/AArch64/arm64-abi_align.ll @@ -487,7 +487,7 @@ entry: ; CHECK: str {{w[0-9]+}}, [sp, #16] ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp] ; FAST-LABEL: i128_split -; FAST: sub sp, sp, #48 +; FAST: sub sp, sp ; FAST: mov x[[ADDR:[0-9]+]], sp ; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16] ; Load/Store opt is disabled with -O0, so the i128 is split.