From: Hal Finkel Date: Tue, 9 Jul 2013 06:34:51 +0000 (+0000) Subject: PPC: Allocate RS spill slot for unaligned i64 load/store X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=fa55969acb64da32acf6305064c9f6e3c237b74e;p=oota-llvm.git PPC: Allocate RS spill slot for unaligned i64 load/store This fixes another bug found by llvm-stress! If we happen to be doing an i64 load or store into a stack slot that has less than a 4-byte alignment, then the frame-index elimination may need to use an indexed load or store instruction (because the offset may not be a multiple of 4, a requirement of the STD/LD instructions). The extra register needed to hold the offset comes from the register scavenger, and it is possible that the scavenger will need to use an emergency spill slot. As a result, we need to make sure that a spill slot is allocated when doing an i64 load/store into a less-than-4-byte-aligned stack slot. Because test cases for things like this tend to be fairly fragile, I've concatenated a few small bugpoint-reduced test cases together to form the regression test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185907 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 1759c0415e5..cf41c02e749 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1030,6 +1030,35 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, return false; } +// If we happen to be doing an i64 load or store into a stack slot that has +// less than a 4-byte alignment, then the frame-index elimination may need to +// use an indexed load or store instruction (because the offset may not be a +// multiple of 4). The extra register needed to hold the offset comes from the +// register scavenger, and it is possible that the scavenger will need to use +// an emergency spill slot. As a result, we need to make sure that a spill slot +// is allocated when doing an i64 load/store into a less-than-4-byte-aligned +// stack slot. +static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { + // FIXME: This does not handle the LWA case. + if (VT != MVT::i64) + return; + + // This should not be needed for negative FIs, which come from argument + // lowering, because the ABI should guarentee the necessary alignment. + if (FrameIdx < 0) + return; + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + unsigned Align = MFI->getObjectAlignment(FrameIdx); + if (Align >= 4) + return; + + PPCFunctionInfo *FuncInfo = MF.getInfo(); + FuncInfo->setHasNonRISpills(); +} + /// Returns true if the address N can be represented by a base register plus /// a signed 16-bit displacement [r+imm], and if it is not better /// represented as reg+reg. If Aligned is true, only accept displacements @@ -1051,6 +1080,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, Disp = DAG.getTargetConstant(imm, N.getValueType()); if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); } else { Base = N.getOperand(0); } @@ -1115,9 +1145,10 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, } Disp = DAG.getTargetConstant(0, getPointerTy()); - if (FrameIndexSDNode *FI = dyn_cast(N)) + if (FrameIndexSDNode *FI = dyn_cast(N)) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); - else + fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); + } else Base = N; return true; // [r+0] } diff --git a/test/CodeGen/PowerPC/std-unal-fi.ll b/test/CodeGen/PowerPC/std-unal-fi.ll new file mode 100644 index 00000000000..8b9606e1624 --- /dev/null +++ b/test/CodeGen/PowerPC/std-unal-fi.ll @@ -0,0 +1,119 @@ +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s +target triple = "powerpc64-unknown-linux-gnu" + +define void @autogen_SD4932(i8) { +BB: + %A4 = alloca i8 + %A = alloca <1 x ppc_fp128> + %Shuff = shufflevector <16 x i32> , <16 x i32> , <16 x i32> + br label %CF + +CF: ; preds = %CF80, %CF, %BB + %L5 = load i64* undef + store i8 %0, i8* %A4 + %Shuff7 = shufflevector <16 x i32> , <16 x i32> %Shuff, <16 x i32> + %PC10 = bitcast i8* %A4 to ppc_fp128* + br i1 undef, label %CF, label %CF77 + +CF77: ; preds = %CF81, %CF83, %CF77, %CF + br i1 undef, label %CF77, label %CF82 + +CF82: ; preds = %CF82, %CF77 + %L19 = load i64* undef + store <1 x ppc_fp128> zeroinitializer, <1 x ppc_fp128>* %A + store i8 -65, i8* %A4 + br i1 undef, label %CF82, label %CF83 + +CF83: ; preds = %CF82 + %L34 = load i64* undef + br i1 undef, label %CF77, label %CF81 + +CF81: ; preds = %CF83 + %Shuff43 = shufflevector <16 x i32> %Shuff7, <16 x i32> undef, <16 x i32> + store ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128* %PC10 + br i1 undef, label %CF77, label %CF78 + +CF78: ; preds = %CF78, %CF81 + br i1 undef, label %CF78, label %CF79 + +CF79: ; preds = %CF79, %CF78 + br i1 undef, label %CF79, label %CF80 + +CF80: ; preds = %CF79 + store i64 %L19, i64* undef + %Cmp75 = icmp uge i32 206779, undef + br i1 %Cmp75, label %CF, label %CF76 + +CF76: ; preds = %CF80 + store i64 %L5, i64* undef + store i64 %L34, i64* undef + ret void +} + +define void @autogen_SD88042(i8*, i32*, i8) { +BB: + %A4 = alloca <2 x i1> + %A = alloca <16 x float> + %L = load i8* %0 + %Sl = select i1 false, <16 x float>* %A, <16 x float>* %A + %PC = bitcast <2 x i1>* %A4 to i64* + %Sl27 = select i1 false, i8 undef, i8 %L + br label %CF + +CF: ; preds = %CF78, %CF, %BB + %PC33 = bitcast i32* %1 to i32* + br i1 undef, label %CF, label %CF77 + +CF77: ; preds = %CF80, %CF77, %CF + store <16 x float> zeroinitializer, <16 x float>* %Sl + %L58 = load i32* %PC33 + store i8 0, i8* %0 + br i1 undef, label %CF77, label %CF80 + +CF80: ; preds = %CF77 + store i64 0, i64* %PC + %E67 = extractelement <8 x i1> zeroinitializer, i32 1 + br i1 %E67, label %CF77, label %CF78 + +CF78: ; preds = %CF80 + %Cmp73 = icmp eq i32 189865, %L58 + br i1 %Cmp73, label %CF, label %CF76 + +CF76: ; preds = %CF78 + store i8 %2, i8* %0 + store i8 %Sl27, i8* %0 + ret void +} + +define void @autogen_SD37497(i8*, i32*, i64*) { +BB: + %A1 = alloca i1 + %I8 = insertelement <1 x i32> , i32 454855, i32 0 + %Cmp = icmp ult <4 x i64> , undef + %L10 = load i64* %2 + %E11 = extractelement <4 x i1> %Cmp, i32 2 + br label %CF72 + +CF72: ; preds = %CF74, %CF72, %BB + store double 0xB47BB29A53790718, double* undef + %E18 = extractelement <1 x i32> , i32 0 + %FC22 = sitofp <1 x i32> %I8 to <1 x float> + br i1 undef, label %CF72, label %CF74 + +CF74: ; preds = %CF72 + store i8 0, i8* %0 + %PC = bitcast i1* %A1 to i64* + %L31 = load i64* %PC + store i64 477323, i64* %PC + %Sl37 = select i1 false, i32* undef, i32* %1 + %Cmp38 = icmp ugt i1 undef, undef + br i1 %Cmp38, label %CF72, label %CF73 + +CF73: ; preds = %CF74 + store i64 %L31, i64* %PC + %B55 = fdiv <1 x float> undef, %FC22 + %Sl63 = select i1 %E11, i32* undef, i32* %Sl37 + store i32 %E18, i32* %Sl63 + store i64 %L10, i64* %PC + ret void +}