/// AllocateRegBlock - Attempt to allocate a block of RegsRequired consecutive
/// registers. If this is not possible, return zero. Otherwise, return the first
/// register of the block that were allocated, marking the entire block as allocated.
- unsigned AllocateRegBlock(const uint16_t *Regs, unsigned NumRegs, unsigned RegsRequired) {
- for (unsigned StartIdx = 0; StartIdx <= NumRegs - RegsRequired; ++StartIdx) {
+ unsigned AllocateRegBlock(ArrayRef<const uint16_t> Regs,
+ unsigned RegsRequired) {
+ if (RegsRequired > Regs.size())
+ return 0;
+
+ for (unsigned StartIdx = 0; StartIdx <= Regs.size() - RegsRequired;
+ ++StartIdx) {
bool BlockAvailable = true;
// Check for already-allocated regs in this block
for (unsigned BlockIdx = 0; BlockIdx < RegsRequired; ++BlockIdx) {
return (StackNaturalAlign != 0) && (Align > StackNaturalAlign);
}
+ unsigned getStackAlignment() const { return StackNaturalAlign; }
+
bool hasMicrosoftFastStdCallMangling() const {
return ManglingMode == MM_WINCOFF;
}
--- /dev/null
+//=== AArch64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the AArch64 Calling Convention
+// that aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64CALLINGCONVENTION_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64CALLINGCONVENTION_H
+
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace {
+using namespace llvm;
+
+static const uint16_t XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
+ AArch64::X3, AArch64::X4, AArch64::X5,
+ AArch64::X6, AArch64::X7};
+static const uint16_t SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
+ AArch64::S3, AArch64::S4, AArch64::S5,
+ AArch64::S6, AArch64::S7};
+static const uint16_t DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
+ AArch64::D3, AArch64::D4, AArch64::D5,
+ AArch64::D6, AArch64::D7};
+static const uint16_t QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
+ AArch64::Q3, AArch64::Q4, AArch64::Q5,
+ AArch64::Q6, AArch64::Q7};
+
+static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
+ MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
+ CCState &State, unsigned SlotAlign) {
+ unsigned Size = LocVT.getSizeInBits() / 8;
+ unsigned StackAlign = State.getMachineFunction()
+ .getSubtarget()
+ .getDataLayout()
+ ->getStackAlignment();
+ unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign);
+
+ for (auto &It : PendingMembers) {
+ It.convertToMem(State.AllocateStack(Size, std::max(Align, SlotAlign)));
+ State.addLoc(It);
+ SlotAlign = 1;
+ }
+
+ // All pending members have now been allocated
+ PendingMembers.clear();
+ return true;
+}
+
+/// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An
+/// [N x Ty] type must still be contiguous in memory though.
+static bool CC_AArch64_Custom_Stack_Block(
+ unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+ // Add the argument to the list to be allocated once we know the size of the
+ // block.
+ PendingMembers.push_back(
+ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+ if (!ArgFlags.isInConsecutiveRegsLast())
+ return true;
+
+ return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, 8);
+}
+
+/// Given an [N x Ty] block, it should be passed in a consecutive sequence of
+/// registers. If no such sequence is available, mark the rest of the registers
+/// of that type as used and place the argument on the stack.
+static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ // Try to allocate a contiguous block of registers, each of the correct
+ // size to hold one member.
+ ArrayRef<const uint16_t> RegList;
+ if (LocVT.SimpleTy == MVT::i64)
+ RegList = XRegList;
+ else if (LocVT.SimpleTy == MVT::f32)
+ RegList = SRegList;
+ else if (LocVT.SimpleTy == MVT::f64)
+ RegList = DRegList;
+ else if (LocVT.SimpleTy == MVT::v2f64)
+ RegList = QRegList;
+ else {
+ // Not an array we want to split up after all.
+ return false;
+ }
+
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+ // Add the argument to the list to be allocated once we know the size of the
+ // block.
+ PendingMembers.push_back(
+ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+ if (!ArgFlags.isInConsecutiveRegsLast())
+ return true;
+
+ unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
+ if (RegResult) {
+ for (auto &It : PendingMembers) {
+ It.convertToReg(RegResult);
+ State.addLoc(It);
+ ++RegResult;
+ }
+ PendingMembers.clear();
+ return true;
+ }
+
+ // Mark all regs in the class as unavailable
+ for (auto Reg : RegList)
+ State.AllocateReg(Reg);
+
+ const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
+ State.getMachineFunction().getSubtarget());
+ unsigned SlotAlign = Subtarget.isTargetDarwin() ? 1 : 8;
+
+ return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
+}
+
+}
+
+#endif
// slot is 64-bit.
CCIfByVal<CCPassByVal<8, 8>>,
+ CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
+
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
// slot is 64-bit.
CCIfByVal<CCPassByVal<8, 8>>,
+ CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
+
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
+ CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Stack_Block">>,
+
// Handle all scalar types as either i64 or f64.
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
CCIfType<[f16, f32], CCPromoteToType<f64>>,
//===----------------------------------------------------------------------===//
#include "AArch64.h"
+#include "AArch64CallingConvention.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
//===----------------------------------------------------------------------===//
#include "AArch64ISelLowering.h"
+#include "AArch64CallingConvention.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64PerfectShuffle.h"
#include "AArch64Subtarget.h"
Val, Stxr->getFunctionType()->getParamType(0)),
Addr);
}
+
+bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
+ return Ty->isArrayTy();
+}
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
+
+ bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
+ CallingConv::ID CallConv,
+ bool isVarArg) const;
};
namespace AArch64 {
// Try to allocate a contiguous block of registers, each of the correct
// size to hold one member.
- const uint16_t *RegList;
- unsigned NumRegs;
+ ArrayRef<const uint16_t> RegList;
switch (LocVT.SimpleTy) {
case MVT::f32:
RegList = SRegList;
- NumRegs = 16;
break;
case MVT::f64:
RegList = DRegList;
- NumRegs = 8;
break;
case MVT::v2f64:
RegList = QRegList;
- NumRegs = 4;
break;
default:
llvm_unreachable("Unexpected member type for HA");
}
unsigned RegResult =
- State.AllocateRegBlock(RegList, NumRegs, PendingHAMembers.size());
+ State.AllocateRegBlock(RegList, PendingHAMembers.size());
if (RegResult) {
for (SmallVectorImpl<CCValAssign>::iterator It = PendingHAMembers.begin();
--- /dev/null
+; RUN: llc -mtriple=aarch64-apple-ios7.0 -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DARWINPCS
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AAPCS
+
+declare void @callee(...)
+
+define float @test_hfa_regs(float, [2 x float] %in) {
+; CHECK-LABEL: test_hfa_regs:
+; CHECK: fadd s0, s1, s2
+
+ %lhs = extractvalue [2 x float] %in, 0
+ %rhs = extractvalue [2 x float] %in, 1
+ %sum = fadd float %lhs, %rhs
+ ret float %sum
+}
+
+; Check that the array gets allocated to a contiguous block on the stack (rather
+; than the default of 2 8-byte slots).
+define float @test_hfa_block([7 x float], [2 x float] %in) {
+; CHECK-LABEL: test_hfa_block:
+; CHECK: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp]
+; CHECK: fadd s0, [[LHS]], [[RHS]]
+
+ %lhs = extractvalue [2 x float] %in, 0
+ %rhs = extractvalue [2 x float] %in, 1
+ %sum = fadd float %lhs, %rhs
+ ret float %sum
+}
+
+; Check that an HFA prevents backfilling of VFP registers (i.e. %rhs must go on
+; the stack rather than in s7).
+define float @test_hfa_block_consume([7 x float], [2 x float] %in, float %rhs) {
+; CHECK-LABEL: test_hfa_block_consume:
+; CHECK-DAG: ldr [[LHS:s[0-9]+]], [sp]
+; CHECK-DAG: ldr [[RHS:s[0-9]+]], [sp, #8]
+; CHECK: fadd s0, [[LHS]], [[RHS]]
+
+ %lhs = extractvalue [2 x float] %in, 0
+ %sum = fadd float %lhs, %rhs
+ ret float %sum
+}
+
+define float @test_hfa_stackalign([8 x float], [1 x float], [2 x float] %in) {
+; CHECK-LABEL: test_hfa_stackalign:
+; CHECK-AAPCS: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp, #8]
+; CHECK-DARWINPCS: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp, #4]
+; CHECK: fadd s0, [[LHS]], [[RHS]]
+ %lhs = extractvalue [2 x float] %in, 0
+ %rhs = extractvalue [2 x float] %in, 1
+ %sum = fadd float %lhs, %rhs
+ ret float %sum
+}
+
+; An HFA that ends up on the stack should not have any effect on where
+; integer-based arguments go.
+define i64 @test_hfa_ignores_gprs([7 x float], [2 x float] %in, i64, i64 %res) {
+; CHECK-LABEL: test_hfa_ignores_gprs:
+; CHECK: mov x0, x1
+ ret i64 %res
+}
+
+; [2 x float] should not be promoted to double by the Darwin varargs handling,
+; but should go in an 8-byte aligned slot.
+define void @test_varargs_stackalign() {
+; CHECK-LABEL: test_varargs_stackalign:
+; CHECK-DARWINPCS: stp {{w[0-9]+}}, {{w[0-9]+}}, [sp, #16]
+
+ call void(...)* @callee([3 x float] undef, [2 x float] [float 1.0, float 2.0])
+ ret void
+}
+
+define i64 @test_smallstruct_block([7 x i64], [2 x i64] %in) {
+; CHECK-LABEL: test_smallstruct_block:
+; CHECK: ldp [[LHS:x[0-9]+]], [[RHS:x[0-9]+]], [sp]
+; CHECK: add x0, [[LHS]], [[RHS]]
+ %lhs = extractvalue [2 x i64] %in, 0
+ %rhs = extractvalue [2 x i64] %in, 1
+ %sum = add i64 %lhs, %rhs
+ ret i64 %sum
+}
+
+; Check that a small struct prevents backfilling of registers (i.e. %rhs
+; must go on the stack rather than in x7).
+define i64 @test_smallstruct_block_consume([7 x i64], [2 x i64] %in, i64 %rhs) {
+; CHECK-LABEL: test_smallstruct_block_consume:
+; CHECK-DAG: ldr [[LHS:x[0-9]+]], [sp]
+; CHECK-DAG: ldr [[RHS:x[0-9]+]], [sp, #16]
+; CHECK: add x0, [[LHS]], [[RHS]]
+
+ %lhs = extractvalue [2 x i64] %in, 0
+ %sum = add i64 %lhs, %rhs
+ ret i64 %sum
+}
; If there are non-variadic arguments on the stack (here two i64s) then the
; __stack field should point just past them.
-define void @test_offsetstack([10 x i64], [3 x float], ...) {
+define void @test_offsetstack([8 x i64], [2 x i64], [3 x float], ...) {
; CHECK-LABEL: test_offsetstack:
; CHECK: sub sp, sp, #80
; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96