From: Hal Finkel Date: Sat, 29 Mar 2014 05:29:01 +0000 (+0000) Subject: [PowerPC] Add subregister classes for f64 VSX values X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=44b2b9dc1a6192fda90990ec9eec922e3f8d2049;p=oota-llvm.git [PowerPC] Add subregister classes for f64 VSX values We had stored both f64 values and v2f64, etc. values in the VSX registers. This worked, but was suboptimal because we would always spill 16-byte values even through we almost always had scalar 8-byte values. This resulted in an increase in stack-size use, extra memory bandwidth, etc. To fix this, I've added 64-bit subregisters of the Altivec registers, and combined those with the existing scalar floating-point registers to form a class of VSX scalar floating-point registers. The ABI code has also been enhanced to use this register class and some other necessary improvements have been made. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205075 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 5f0109a1f32..8bb91cf0b9c 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -114,6 +114,25 @@ static unsigned VSRegs[64] = { PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27, PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31 }; +static unsigned VSFRegs[64] = { + PPC::F0, PPC::F1, PPC::F2, PPC::F3, + PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8, PPC::F9, PPC::F10, PPC::F11, + PPC::F12, PPC::F13, PPC::F14, PPC::F15, + PPC::F16, PPC::F17, PPC::F18, PPC::F19, + PPC::F20, PPC::F21, PPC::F22, PPC::F23, + PPC::F24, PPC::F25, PPC::F26, PPC::F27, + PPC::F28, PPC::F29, PPC::F30, PPC::F31, + + PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, + PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, + PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, + PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, + PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, + PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, + PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, + PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 +}; static unsigned CRBITRegs[32] = { PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, @@ -479,6 +498,11 @@ public: Inst.addOperand(MCOperand::CreateReg(VSRegs[getVSReg()])); } + void addRegVSFRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(VSFRegs[getVSReg()])); + } + void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()])); diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 904f871b817..c4a7544d494 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -112,6 +112,26 @@ static const unsigned VSRegs[] = { PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31 }; +static const unsigned VSFRegs[] = { + PPC::F0, PPC::F1, PPC::F2, PPC::F3, + PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8, PPC::F9, PPC::F10, PPC::F11, + PPC::F12, PPC::F13, PPC::F14, PPC::F15, + PPC::F16, PPC::F17, PPC::F18, PPC::F19, + PPC::F20, PPC::F21, PPC::F22, PPC::F23, + PPC::F24, PPC::F25, PPC::F26, PPC::F27, + PPC::F28, PPC::F29, PPC::F30, PPC::F31, + + PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, + PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, + PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, + PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, + PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, + PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, + PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, + PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 +}; + static const unsigned GPRegs[] = { PPC::R0, PPC::R1, PPC::R2, PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, @@ -189,6 +209,12 @@ static DecodeStatus DecodeVSRCRegisterClass(MCInst &Inst, uint64_t RegNo, return decodeRegisterClass(Inst, RegNo, VSRegs); } +static DecodeStatus DecodeVSFRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, VSFRegs); +} + static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 6a43ecc2500..527430238cb 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -573,7 +573,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); - addRegisterClass(MVT::f64, &PPC::VSRCRegClass); + addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); @@ -2156,7 +2156,10 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( RC = &PPC::F4RCRegClass; break; case MVT::f64: - RC = &PPC::F8RCRegClass; + if (PPCSubTarget.hasVSX()) + RC = &PPC::VSFRCRegClass; + else + RC = &PPC::F8RCRegClass; break; case MVT::v16i8: case MVT::v8i16: @@ -2559,7 +2562,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( if (ObjectVT == MVT::f32) VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); else - VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); + VReg = MF.addLiveIn(FPR[FPR_idx], PPCSubTarget.hasVSX() ? + &PPC::VSFRCRegClass : + &PPC::F8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; @@ -8506,8 +8511,10 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, } else if (Constraint == "wc") { // an individual CR bit. return std::make_pair(0U, &PPC::CRBITRCRegClass); } else if (Constraint == "wa" || Constraint == "wd" || - Constraint == "wf" || Constraint == "ws") { + Constraint == "wf") { return std::make_pair(0U, &PPC::VSRCRegClass); + } else if (Constraint == "ws") { + return std::make_pair(0U, &PPC::VSFRCRegClass); } std::pair R = diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 0f799017916..939bbdc6cc2 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -744,6 +744,8 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // copies are generated, they are close enough to some use that the // lower-latency form is preferable. Opc = PPC::XXLOR; + else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::XXLORf; else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::CROR; else @@ -815,6 +817,12 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); NonRI = true; + } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSDX)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { assert(TM.getSubtargetImpl()->isDarwin() && "VRSAVE only needs spill/restore on Darwin"); @@ -906,6 +914,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXVD2X), DestReg), FrameIdx)); NonRI = true; + } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSDX), DestReg), + FrameIdx)); + NonRI = true; } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { assert(TM.getSubtargetImpl()->isDarwin() && "VRSAVE only needs spill/restore on Darwin"); @@ -1638,7 +1650,7 @@ protected: // The addend and this instruction must be in the same block. - if (AddendMI->getParent() != MI->getParent()) + if (!AddendMI || AddendMI->getParent() != MI->getParent()) continue; // The addend must be a full copy within the same register class. @@ -1646,9 +1658,18 @@ protected: if (!AddendMI->isFullCopy()) continue; - if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) != - MRI.getRegClass(AddendMI->getOperand(1).getReg())) - continue; + unsigned AddendSrcReg = AddendMI->getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) { + if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) != + MRI.getRegClass(AddendSrcReg)) + continue; + } else { + // If AddendSrcReg is a physical register, make sure the destination + // register class contains it. + if (!MRI.getRegClass(AddendMI->getOperand(0).getReg()) + ->contains(AddendSrcReg)) + continue; + } // In theory, there could be other uses of the addend copy before this // fma. We could deal with this, but that would require additional @@ -1678,8 +1699,8 @@ protected: OtherProdOp = 2; } - // If there are no killed product operands, then this transformation is - // likely not profitable. + // If there are no killed product operands, then this transformation is + // likely not profitable. if (!KilledProdOp) continue; diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 1ece55977a1..14f2e4636ce 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -18,6 +18,13 @@ def vsrc : RegisterOperand { let ParserMatchClass = PPCRegVSRCAsmOperand; } +def PPCRegVSFRCAsmOperand : AsmOperandClass { + let Name = "RegVSFRC"; let PredicateMethod = "isVSRegNumber"; +} +def vsfrc : RegisterOperand { + let ParserMatchClass = PPCRegVSFRCAsmOperand; +} + multiclass XX3Form_Rcr opcode, bits<7> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { @@ -41,7 +48,7 @@ let Uses = [RM] in { // Load indexed instructions let mayLoad = 1, canFoldAsLoad = 1 in { def LXSDX : XForm_1<31, 588, - (outs vsrc:$XT), (ins memrr:$src), + (outs vsfrc:$XT), (ins memrr:$src), "lxsdx $XT, $src", IIC_LdStLFD, [(set f64:$XT, (load xoaddr:$src))]>; @@ -62,7 +69,7 @@ let Uses = [RM] in { // Store indexed instructions let mayStore = 1 in { def STXSDX : XX1Form<31, 716, - (outs), (ins vsrc:$XT, memrr:$dst), + (outs), (ins vsfrc:$XT, memrr:$dst), "stxsdx $XT, $dst", IIC_LdStSTFD, [(store f64:$XT, xoaddr:$dst)]>; @@ -79,11 +86,11 @@ let Uses = [RM] in { // Add/Mul Instructions let isCommutable = 1 in { def XSADDDP : XX3Form<60, 32, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), "xsadddp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (fadd f64:$XA, f64:$XB))]>; def XSMULDP : XX3Form<60, 48, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), "xsmuldp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (fmul f64:$XA, f64:$XB))]>; @@ -110,7 +117,7 @@ let Uses = [RM] in { // Subtract Instructions def XSSUBDP : XX3Form<60, 40, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), "xssubdp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (fsub f64:$XA, f64:$XB))]>; @@ -127,14 +134,14 @@ let Uses = [RM] in { let BaseName = "XSMADDADP" in { let isCommutable = 1 in def XSMADDADP : XX3Form<60, 33, - (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsmaddadp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XSMADDMDP : XX3Form<60, 41, - (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; @@ -143,14 +150,14 @@ let Uses = [RM] in { let BaseName = "XSMSUBADP" in { let isCommutable = 1 in def XSMSUBADP : XX3Form<60, 49, - (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsmsubadp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XSMSUBMDP : XX3Form<60, 57, - (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; @@ -159,14 +166,14 @@ let Uses = [RM] in { let BaseName = "XSNMADDADP" in { let isCommutable = 1 in def XSNMADDADP : XX3Form<60, 161, - (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsnmaddadp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XSNMADDMDP : XX3Form<60, 169, - (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; @@ -175,14 +182,14 @@ let Uses = [RM] in { let BaseName = "XSNMSUBADP" in { let isCommutable = 1 in def XSNMSUBADP : XX3Form<60, 177, - (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsnmsubadp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XSNMSUBMDP : XX3Form<60, 185, - (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; @@ -318,28 +325,28 @@ let Uses = [RM] in { // Division Instructions def XSDIVDP : XX3Form<60, 56, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), "xsdivdp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (fdiv f64:$XA, f64:$XB))]>; def XSSQRTDP : XX2Form<60, 75, - (outs vsrc:$XT), (ins vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XB), "xssqrtdp $XT, $XB", IIC_VecFP, [(set f64:$XT, (fsqrt f64:$XB))]>; def XSREDP : XX2Form<60, 90, - (outs vsrc:$XT), (ins vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XB), "xsredp $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfre f64:$XB))]>; def XSRSQRTEDP : XX2Form<60, 74, - (outs vsrc:$XT), (ins vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrsqrtedp $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfrsqrte f64:$XB))]>; def XSTDIVDP : XX3Form_1<60, 61, - (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), "xstdivdp $crD, $XA, $XB", IIC_VecFP, []>; def XSTSQRTDP : XX2Form_1<60, 106, - (outs crrc:$crD), (ins vsrc:$XB), + (outs crrc:$crD), (ins vsfrc:$XB), "xstsqrtdp $crD, $XB", IIC_VecFP, []>; def XVDIVDP : XX3Form<60, 120, @@ -394,10 +401,10 @@ let Uses = [RM] in { // Compare Instructions def XSCMPODP : XX3Form_1<60, 43, - (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), "xscmpodp $crD, $XA, $XB", IIC_VecFPCompare, []>; def XSCMPUDP : XX3Form_1<60, 35, - (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), "xscmpudp $crD, $XA, $XB", IIC_VecFPCompare, []>; defm XVCMPEQDP : XX3Form_Rcr<60, 99, @@ -421,19 +428,19 @@ let Uses = [RM] in { // Move Instructions def XSABSDP : XX2Form<60, 345, - (outs vsrc:$XT), (ins vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XB), "xsabsdp $XT, $XB", IIC_VecFP, [(set f64:$XT, (fabs f64:$XB))]>; def XSNABSDP : XX2Form<60, 361, - (outs vsrc:$XT), (ins vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XB), "xsnabsdp $XT, $XB", IIC_VecFP, [(set f64:$XT, (fneg (fabs f64:$XB)))]>; def XSNEGDP : XX2Form<60, 377, - (outs vsrc:$XT), (ins vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XB), "xsnegdp $XT, $XB", IIC_VecFP, [(set f64:$XT, (fneg f64:$XB))]>; def XSCPSGNDP : XX3Form<60, 176, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), "xscpsgndp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (fcopysign f64:$XB, f64:$XA))]>; @@ -476,33 +483,33 @@ let Uses = [RM] in { // Conversion Instructions def XSCVDPSP : XX2Form<60, 265, - (outs vsrc:$XT), (ins vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvdpsp $XT, $XB", IIC_VecFP, []>; def XSCVDPSXDS : XX2Form<60, 344, - (outs vsrc:$XT), (ins vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvdpsxds $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfctidz f64:$XB))]>; def XSCVDPSXWS : XX2Form<60, 88, - (outs vsrc:$XT), (ins vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvdpsxws $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfctiwz f64:$XB))]>; def XSCVDPUXDS : XX2Form<60, 328, - (outs vsrc:$XT), (ins vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvdpuxds $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfctiduz f64:$XB))]>; def XSCVDPUXWS : XX2Form<60, 72, - (outs vsrc:$XT), (ins vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvdpuxws $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfctiwuz f64:$XB))]>; def XSCVSPDP : XX2Form<60, 329, - (outs vsrc:$XT), (ins vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvspdp $XT, $XB", IIC_VecFP, []>; def XSCVSXDDP : XX2Form<60, 376, - (outs vsrc:$XT), (ins vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvsxddp $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfcfid f64:$XB))]>; def XSCVUXDDP : XX2Form<60, 360, - (outs vsrc:$XT), (ins vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XB), "xscvuxddp $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfcfidu f64:$XB))]>; @@ -568,23 +575,23 @@ let Uses = [RM] in { // Rounding Instructions def XSRDPI : XX2Form<60, 73, - (outs vsrc:$XT), (ins vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpi $XT, $XB", IIC_VecFP, [(set f64:$XT, (frnd f64:$XB))]>; def XSRDPIC : XX2Form<60, 107, - (outs vsrc:$XT), (ins vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpic $XT, $XB", IIC_VecFP, [(set f64:$XT, (fnearbyint f64:$XB))]>; def XSRDPIM : XX2Form<60, 121, - (outs vsrc:$XT), (ins vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpim $XT, $XB", IIC_VecFP, [(set f64:$XT, (ffloor f64:$XB))]>; def XSRDPIP : XX2Form<60, 105, - (outs vsrc:$XT), (ins vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpip $XT, $XB", IIC_VecFP, [(set f64:$XT, (fceil f64:$XB))]>; def XSRDPIZ : XX2Form<60, 89, - (outs vsrc:$XT), (ins vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrdpiz $XT, $XB", IIC_VecFP, [(set f64:$XT, (ftrunc f64:$XB))]>; @@ -633,10 +640,10 @@ let Uses = [RM] in { // Max/Min Instructions let isCommutable = 1 in { def XSMAXDP : XX3Form<60, 160, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), "xsmaxdp $XT, $XA, $XB", IIC_VecFP, []>; def XSMINDP : XX3Form<60, 168, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), "xsmindp $XT, $XA, $XB", IIC_VecFP, []>; def XVMAXDP : XX3Form<60, 224, @@ -676,6 +683,10 @@ let Uses = [RM] in { (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxlor $XT, $XA, $XB", IIC_VecGeneral, [(set v4i32:$XT, (or v4i32:$XA, v4i32:$XB))]>; + let isCodeGenOnly = 1 in + def XXLORf: XX3Form<60, 146, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xxlor $XT, $XA, $XB", IIC_VecGeneral, []>; def XXLXOR : XX3Form<60, 154, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxlxor $XT, $XA, $XB", IIC_VecGeneral, @@ -724,12 +735,12 @@ def : InstAlias<"xxswapd $XT, $XB", let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. def : Pat<(v2f64 (scalar_to_vector f64:$A)), - (v2f64 (COPY_TO_REGCLASS $A, VSRC))>; + (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>; def : Pat<(f64 (vector_extract v2f64:$S, 0)), - (f64 (COPY_TO_REGCLASS $S, VSRC))>; + (f64 (EXTRACT_SUBREG $S, sub_64))>; def : Pat<(f64 (vector_extract v2f64:$S, 1)), - (f64 (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSRC))>; + (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>; // Additional fnmsub patterns: -a*c + b == -(a*c - b) def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B), diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index c68e922355e..78c5a124fbb 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -229,16 +229,33 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, case PPC::F8RCRegClassID: case PPC::F4RCRegClassID: case PPC::VRRCRegClassID: + case PPC::VFRCRegClassID: case PPC::VSLRCRegClassID: case PPC::VSHRCRegClassID: return 32 - DefaultSafety; case PPC::VSRCRegClassID: + case PPC::VSFRCRegClassID: return 64 - DefaultSafety; case PPC::CRRCRegClassID: return 8 - DefaultSafety; } } +const TargetRegisterClass* +PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)const { + if (Subtarget.hasVSX()) { + // With VSX, we can inflate various sub-register classes to the full VSX + // register set. + + if (RC == &PPC::F8RCRegClass) + return &PPC::VSFRCRegClass; + else if (RC == &PPC::VRRCRegClass) + return &PPC::VSRCRegClass; + } + + return TargetRegisterInfo::getLargestLegalSuperClass(RC); +} + //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 4871834c26d..7a8c2aa4754 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -40,6 +40,9 @@ public: unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const; + const TargetRegisterClass* + getLargestLegalSuperClass(const TargetRegisterClass *RC) const; + /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID CC) const; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index dab222b7bb7..e11f7d4a800 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -49,9 +49,19 @@ class FPR num, string n> : PPCReg { let HWEncoding{4-0} = num; } -// VR - One of the 32 128-bit vector registers -class VR num, string n> : PPCReg { +// VF - One of the 32 64-bit floating-point subregisters of the vector +// registers (used by VSX). +class VF num, string n> : PPCReg { let HWEncoding{4-0} = num; + let HWEncoding{5} = 1; +} + +// VR - One of the 32 128-bit vector registers +class VR : PPCReg { + let HWEncoding{4-0} = SubReg.HWEncoding{4-0}; + let HWEncoding{5} = 0; + let SubRegs = [SubReg]; + let SubRegIndices = [sub_64]; } // VSRL - One of the 32 128-bit VSX registers that overlap with the scalar @@ -99,9 +109,14 @@ foreach Index = 0-31 in { DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>; } +// Floating-point vector subregisters (for VSX) +foreach Index = 0-31 in { + def VF#Index : VF; +} + // Vector registers foreach Index = 0-31 in { - def V#Index : VR, + def V#Index : VR("VF"#Index), "v"#Index>, DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>; } @@ -235,18 +250,27 @@ def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128, // VSX register classes (the allocation order mirrors that of the corresponding // subregister classes). -def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,f64,v2f64,v2i64], 128, +def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128, (add (sequence "VSL%u", 0, 13), (sequence "VSL%u", 31, 14))>; -def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,f64,v2f64,v2i64], 128, +def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128, (add VSH2, VSH3, VSH4, VSH5, VSH0, VSH1, VSH6, VSH7, VSH8, VSH9, VSH10, VSH11, VSH12, VSH13, VSH14, VSH15, VSH16, VSH17, VSH18, VSH19, VSH31, VSH30, VSH29, VSH28, VSH27, VSH26, VSH25, VSH24, VSH23, VSH22, VSH21, VSH20)>; -def VSRC : RegisterClass<"PPC", [v4i32,v4f32,f64,v2f64,v2i64], 128, +def VSRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128, (add VSLRC, VSHRC)>; +// Register classes for the 64-bit "scalar" VSX subregisters. +def VFRC : RegisterClass<"PPC", [f64], 64, + (add VF2, VF3, VF4, VF5, VF0, VF1, VF6, VF7, + VF8, VF9, VF10, VF11, VF12, VF13, VF14, + VF15, VF16, VF17, VF18, VF19, VF31, VF30, + VF29, VF28, VF27, VF26, VF25, VF24, VF23, + VF22, VF21, VF20)>; +def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>; + def CRBITRC : RegisterClass<"PPC", [i1], 32, (add CR2LT, CR2GT, CR2EQ, CR2UN, CR3LT, CR3GT, CR3EQ, CR3UN, diff --git a/test/CodeGen/PowerPC/vsx-fma-m.ll b/test/CodeGen/PowerPC/vsx-fma-m.ll index 1e123407d13..da4a20481e6 100644 --- a/test/CodeGen/PowerPC/vsx-fma-m.ll +++ b/test/CodeGen/PowerPC/vsx-fma-m.ll @@ -64,7 +64,7 @@ entry: ret void ; CHECK-LABEL: @test3 -; CHECK-DAG: xxlor [[F1:[0-9]+]], 1, 1 +; CHECK-DAG: fmr [[F1:[0-9]+]], 1 ; CHECK-DAG: li [[C1:[0-9]+]], 24 ; CHECK-DAG: li [[C2:[0-9]+]], 16 ; CHECK-DAG: li [[C3:[0-9]+]], 8 @@ -80,7 +80,7 @@ entry: ; CHECK-DAG: stxsdx 2, 8, [[C1]] ; CHECK-DAG: stxsdx 1, 8, [[C2]] ; CHECK-DAG: stxsdx 4, 8, [[C3]] -; CHECK-DAG: blr +; CHECK: blr } define void @test4(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 { @@ -99,7 +99,7 @@ entry: ret void ; CHECK-LABEL: @test4 -; CHECK-DAG: xxlor [[F1:[0-9]+]], 1, 1 +; CHECK-DAG: fmr [[F1:[0-9]+]], 1 ; CHECK-DAG: li [[C1:[0-9]+]], 8 ; CHECK-DAG: li [[C2:[0-9]+]], 16 ; CHECK-DAG: xsmaddmdp 4, 2, 1 @@ -120,5 +120,119 @@ entry: declare double @llvm.fma.f64(double, double, double) #0 +define void @testv1(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double>* nocapture %d) #0 { +entry: + %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a) + store <2 x double> %0, <2 x double>* %d, align 8 + %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a) + %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1 + store <2 x double> %1, <2 x double>* %arrayidx1, align 8 + ret void + +; CHECK-LABEL: @testv1 +; CHECK-DAG: xvmaddmdp 36, 35, 34 +; CHECK-DAG: xvmaddadp 34, 35, 37 +; CHECK-DAG: li [[C1:[0-9]+]], 16 +; CHECK-DAG: stxvd2x 36, 0, 3 +; CHECK-DAG: stxvd2x 34, 3, [[C1:[0-9]+]] +; CHECK: blr +} + +define void @testv2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 { +entry: + %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a) + store <2 x double> %0, <2 x double>* %d, align 8 + %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a) + %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1 + store <2 x double> %1, <2 x double>* %arrayidx1, align 8 + %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a) + %arrayidx2 = getelementptr inbounds <2 x double>* %d, i64 2 + store <2 x double> %2, <2 x double>* %arrayidx2, align 8 + ret void + +; CHECK-LABEL: @testv2 +; CHECK-DAG: xvmaddmdp 36, 35, 34 +; CHECK-DAG: xvmaddmdp 37, 35, 34 +; CHECK-DAG: li [[C1:[0-9]+]], 16 +; CHECK-DAG: li [[C2:[0-9]+]], 32 +; CHECK-DAG: xvmaddadp 34, 35, 38 +; CHECK-DAG: stxvd2x 36, 0, 3 +; CHECK-DAG: stxvd2x 37, 3, [[C1:[0-9]+]] +; CHECK-DAG: stxvd2x 34, 3, [[C2:[0-9]+]] +; CHECK: blr +} + +define void @testv3(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 { +entry: + %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a) + store <2 x double> %0, <2 x double>* %d, align 8 + %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a) + %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1) + %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 3 + store <2 x double> %2, <2 x double>* %arrayidx1, align 8 + %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a) + %arrayidx2 = getelementptr inbounds <2 x double>* %d, i64 2 + store <2 x double> %3, <2 x double>* %arrayidx2, align 8 + %arrayidx3 = getelementptr inbounds <2 x double>* %d, i64 1 + store <2 x double> %1, <2 x double>* %arrayidx3, align 8 + ret void + +; CHECK-LABEL: @testv3 +; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34 +; CHECK-DAG: xvmaddmdp 37, 35, 34 +; CHECK-DAG: li [[C1:[0-9]+]], 48 +; CHECK-DAG: li [[C2:[0-9]+]], 32 +; CHECK-DAG: xvmaddadp 34, 35, 38 +; CHECK-DAG: li [[C3:[0-9]+]], 16 + +; Note: We could convert this next FMA to M-type as well, but it would require +; re-ordering the instructions. +; CHECK-DAG: xvmaddadp [[V1]], 35, 36 + +; CHECK-DAG: xvmaddmdp 35, 36, 37 +; CHECK-DAG: stxvd2x 32, 0, 3 +; CHECK-DAG: stxvd2x 35, 3, [[C1]] +; CHECK-DAG: stxvd2x 34, 3, [[C2]] +; CHECK-DAG: stxvd2x 37, 3, [[C3]] +; CHECK: blr +} + +define void @testv4(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 { +entry: + %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a) + store <2 x double> %0, <2 x double>* %d, align 8 + %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a) + %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1 + store <2 x double> %1, <2 x double>* %arrayidx1, align 8 + %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1) + %arrayidx3 = getelementptr inbounds <2 x double>* %d, i64 3 + store <2 x double> %2, <2 x double>* %arrayidx3, align 8 + %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a) + %arrayidx4 = getelementptr inbounds <2 x double>* %d, i64 2 + store <2 x double> %3, <2 x double>* %arrayidx4, align 8 + ret void + +; CHECK-LABEL: @testv4 +; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34 +; CHECK-DAG: xvmaddmdp 37, 35, 34 +; CHECK-DAG: li [[C1:[0-9]+]], 16 +; CHECK-DAG: li [[C2:[0-9]+]], 32 +; CHECK-DAG: xvmaddadp 34, 35, 38 + +; Note: We could convert this next FMA to M-type as well, but it would require +; re-ordering the instructions. +; CHECK-DAG: xvmaddadp [[V1]], 35, 36 + +; CHECK-DAG: stxvd2x 32, 0, 3 +; CHECK-DAG: stxvd2x 37, 3, [[C1]] +; CHECK-DAG: li [[C3:[0-9]+]], 48 +; CHECK-DAG: xvmaddadp 37, 35, 36 +; CHECK-DAG: stxvd2x 37, 3, [[C3]] +; CHECK-DAG: stxvd2x 34, 3, [[C2]] +; CHECK: blr +} + +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0 + attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/PowerPC/vsx-spill.ll b/test/CodeGen/PowerPC/vsx-spill.ll new file mode 100644 index 00000000000..29bc6fcc710 --- /dev/null +++ b/test/CodeGen/PowerPC/vsx-spill.ll @@ -0,0 +1,49 @@ +; RUN: llc -mcpu=pwr7 -mattr=+vsx < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define double @foo1(double %a) nounwind { +entry: + call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"() nounwind + br label %return + +; CHECK: @foo1 +; CHECK: xxlor [[R1:[0-9]+]], 1, 1 +; CHECK: xxlor 1, [[R1]], [[R1]] +; CHECK: blr + +return: ; preds = %entry + ret double %a +} + +define double @foo2(double %a) nounwind { +entry: + %b = fadd double %a, %a + call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"() nounwind + br label %return + +; CHECK: @foo2 +; CHECK: {{xxlor|xsadddp}} [[R1:[0-9]+]], 1, 1 +; CHECK: {{xxlor|xsadddp}} 1, [[R1]], [[R1]] +; CHECK: blr + +return: ; preds = %entry + ret double %b +} + +define double @foo3(double %a) nounwind { +entry: + call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31},~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() nounwind + br label %return + +; CHECK: @foo3 +; CHECK: stxsdx 1, +; CHECK: lxsdx [[R1:[0-9]+]], +; CHECK: xsadddp 1, [[R1]], [[R1]] +; CHECK: blr + +return: ; preds = %entry + %b = fadd double %a, %a + ret double %b +} +