From: Hal Finkel Date: Mon, 1 Apr 2013 17:52:07 +0000 (+0000) Subject: Add more PPC floating-point conversion instructions X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=46479197843ecb651adc9417c49bbd1b00acfcb6 Add more PPC floating-point conversion instructions The P7 and A2 have additional floating-point conversion instructions which allow a direct two-instruction sequence (plus load/store) to convert from all combinations (signed/unsigned i32/i64) <--> (float/double) (on previous cores, only some combinations were directly available). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178480 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 602e33cd290..a1ea2297bf5 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -63,6 +63,8 @@ def FeatureLFIWAX : SubtargetFeature<"lfiwax","HasLFIWAX", "true", "Enable the lfiwax instruction">; def FeatureFPRND : SubtargetFeature<"fprnd", "HasFPRND", "true", "Enable the fri[mnpz] instructions">; +def FeatureFPCVT : SubtargetFeature<"fpcvt", "HasFPCVT", "true", + "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions">; def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", "Enable the isel instruction">; def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true", @@ -79,10 +81,8 @@ def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", // // CMPB p6, p6x, p7 cmpb // DFP p6, p6x, p7 decimal floating-point instructions -// FLT_CVT p7 fcfids, fcfidu, fcfidus, fcfiduz, fctiwuz // FRE p5 through p7 fre (vs. fres, available since p3) // FRSQRTES p5 through p7 frsqrtes (vs. frsqrte, available since p3) -// LFIWZX p7 lfiwzx // POPCNTB p5 through p7 popcntb and related instructions // RECIP_PREC p6, p6x, p7 higher precision reciprocal estimates // VSX p7 vector-scalar instruction set @@ -135,14 +135,15 @@ def : ProcessorModel<"e5500", PPCE5500Model, def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX, - FeatureFPRND, FeatureISEL, FeaturePOPCNTD, - FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */]>; + FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeaturePOPCNTD, FeatureLDBRX, Feature64Bit + /*, Feature64BitRegs */]>; def : Processor<"a2q", PPCA2Itineraries, [DirectiveA2, FeatureBookE, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX, - FeatureFPRND, FeatureISEL, FeaturePOPCNTD, - FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */, - FeatureQPX]>; + FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeaturePOPCNTD, FeatureLDBRX, Feature64Bit + /*, Feature64BitRegs */, FeatureQPX]>; def : Processor<"pwr3", G5Itineraries, [DirectivePwr3, FeatureAltivec, FeatureMFOCRF, FeatureSTFIWX, Feature64Bit]>; @@ -168,9 +169,9 @@ def : Processor<"pwr6x", G5Itineraries, def : Processor<"pwr7", G5Itineraries, [DirectivePwr7, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, - FeatureLFIWAX, FeatureFPRND, FeatureISEL, - FeaturePOPCNTD, FeatureLDBRX, Feature64Bit - /*, Feature64BitRegs */]>; + FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, + FeatureISEL, FeaturePOPCNTD, FeatureLDBRX, + Feature64Bit /*, Feature64BitRegs */]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : Processor<"ppc64", G5Itineraries, [Directive64, FeatureAltivec, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 89b42af72e9..9c686cb9467 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -326,13 +326,28 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We cannot do this with Promote because i64 is not a legal type. setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - if (Subtarget->isPPC64()) + if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64()) setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); } else { // PowerPC does not have FP_TO_UINT on 32-bit implementations. setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); } + // With the instructions enabled under FPCVT, we can do everything. + if (PPCSubTarget.hasFPCVT()) { + if (Subtarget->has64BitSupport()) { + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + } + + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + } + if (Subtarget->use64BitRegs()) { // 64-bit PowerPC implementations can support i64 types directly addRegisterClass(MVT::i64, &PPC::G8RCRegClass); @@ -4716,37 +4731,72 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : - PPCISD::FCTIDZ, + (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ : + PPCISD::FCTIDZ), dl, MVT::f64, Src); break; case MVT::i64: - Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src); + assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) && + "i64 UINT_TO_FP is supported only with FPCVT"); + Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : + PPCISD::FCTIDUZ, + dl, MVT::f64, Src); break; } // Convert the FP value to an int value through memory. - SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64); + bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() && + (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()); + SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); + int FI = cast(FIPtr)->getIndex(); + MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI); // Emit a store to the stack slot. - SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, - MachinePointerInfo(), false, false, 0); + SDValue Chain; + if (i32Stack) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); + SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; + Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, + DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops), + MVT::i32, MMO); + } else + Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, + MPI, false, false, 0); // Result is a load from the stack slot. If loading 4 bytes, make sure to // add in a bias. - if (Op.getValueType() == MVT::i32) + if (Op.getValueType() == MVT::i32 && !i32Stack) { FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, DAG.getConstant(4, FIPtr.getValueType())); - return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(), + MPI = MachinePointerInfo(); + } + + return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI, false, false, false, 0); } -SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, +SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // Don't handle ppc_fp128 here; let it be lowered to a libcall. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); + assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) && + "UINT_TO_FP is supported only with FPCVT"); + + // If we have FCFIDS, then use it when converting to single-precision. + // Otherwise, convert to double-prcision and then round. + unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? + (Op.getOpcode() == ISD::UINT_TO_FP ? + PPCISD::FCFIDUS : PPCISD::FCFIDS) : + (Op.getOpcode() == ISD::UINT_TO_FP ? + PPCISD::FCFIDU : PPCISD::FCFID); + MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? + MVT::f32 : MVT::f64; + if (Op.getOperand(0).getValueType() == MVT::i64) { SDValue SINT = Op.getOperand(0); // When converting to single-precision, we actually need to convert @@ -4760,6 +4810,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, // However, if -enable-unsafe-fp-math is in effect, accept double // rounding to avoid the extra overhead. if (Op.getValueType() == MVT::f32 && + !PPCSubTarget.hasFPCVT() && !DAG.getTarget().Options.UnsafeFPMath) { // Twiddle input to make sure the low 11 bits are zero. (If this @@ -4793,16 +4844,18 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); } + SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); - SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits); - if (Op.getValueType() == MVT::f32) + SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); + + if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); return FP; } assert(Op.getOperand(0).getValueType() == MVT::i32 && - "Unhandled SINT_TO_FP type in custom expander!"); + "Unhandled INT_TO_FP type in custom expander!"); // Since we only generate this in 64-bit mode, we can take advantage of // 64-bit registers. In particular, sign extend the input value into the // 64-bit register with extsw, store the WHOLE 64-bit value into the stack @@ -4812,7 +4865,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue Ld; - if (PPCSubTarget.hasLFIWAX()) { + if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) { int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); @@ -4826,10 +4879,14 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), MachineMemOperand::MOLoad, 4, 4); SDValue Ops[] = { Store, FIdx }; - Ld = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl, - DAG.getVTList(MVT::f64, MVT::Other), Ops, 2, - MVT::i32, MMO); + Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? + PPCISD::LFIWZX : PPCISD::LFIWAX, + dl, DAG.getVTList(MVT::f64, MVT::Other), + Ops, 2, MVT::i32, MMO); } else { + assert(PPCSubTarget.isPPC64() && + "i32->FP without LFIWAX supported only on PPC64"); + int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); @@ -4848,8 +4905,8 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, } // FCFID it and return it. - SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld); - if (Op.getValueType() == MVT::f32) + SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); + if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); return FP; } @@ -5651,7 +5708,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, Op.getDebugLoc()); - case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); // Lower 64-bit shifts. diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index e93a183a6ad..6690899e5d2 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -36,11 +36,19 @@ namespace llvm { /// was temporarily in the f64 operand. FCFID, + /// Newer FCFID[US] integer-to-floating-point conversion instructions for + /// unsigned integers and single-precision outputs. + FCFIDU, FCFIDS, FCFIDUS, + /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 /// operand, producing an f64 value containing the integer representation /// of that FP value. FCTIDZ, FCTIWZ, + /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for + /// unsigned integers. + FCTIDUZ, FCTIWUZ, + // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking // three v4f32 operands and producing a v4f32 result. VMADDFP, VNMSUBFP, @@ -247,6 +255,11 @@ namespace llvm { /// destination 64-bit register. LFIWAX, + /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point + /// load which zero-extends from a 32-bit integer value into the + /// destination 64-bit register. + LFIWZX, + /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model, /// produces an ADDIS8 instruction that adds the TOC base register to /// sym@toc@ha. @@ -494,7 +507,7 @@ namespace llvm { const PPCSubtarget &Subtarget) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const; - SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 61702cf916a..fa5b65f0ba2 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -859,6 +859,22 @@ def FCFID : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB), def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB), "fctidz $frD, $frB", FPGeneral, [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64; + +def FCFIDU : XForm_26<63, 974, (outs F8RC:$frD), (ins F8RC:$frB), + "fcfidu $frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64; +def FCFIDS : XForm_26<59, 846, (outs F4RC:$frD), (ins F8RC:$frB), + "fcfids $frD, $frB", FPGeneral, + [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64; +def FCFIDUS : XForm_26<59, 974, (outs F4RC:$frD), (ins F8RC:$frB), + "fcfidus $frD, $frB", FPGeneral, + [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64; +def FCTIDUZ : XForm_26<63, 943, (outs F8RC:$frD), (ins F8RC:$frB), + "fctiduz $frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64; +def FCTIWUZ : XForm_26<63, 143, (outs F8RC:$frD), (ins F8RC:$frB), + "fctiwuz $frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64; } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 478e6127ab0..067f5aacfaa 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -20,7 +20,7 @@ include "PPCInstrFormats.td" def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx SDTCisVT<0, f64>, SDTCisPtrTy<1> ]>; -def SDT_PPClfiwax : SDTypeProfile<1, 1, [ // lfiwax +def SDT_PPClfiwx : SDTypeProfile<1, 1, [ // lfiw[az]x SDTCisVT<0, f64>, SDTCisPtrTy<1> ]>; @@ -62,12 +62,19 @@ def SDT_PPCTC_ret : SDTypeProfile<0, 2, [ // PowerPC specific DAG Nodes. // -def PPCfcfid : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>; +def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>; +def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>; +def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>; +def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>; def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>; def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; +def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>; +def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>; def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, [SDNPHasChain, SDNPMayStore]>; -def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwax, +def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx, + [SDNPHasChain, SDNPMayLoad]>; +def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx, [SDNPHasChain, SDNPMayLoad]>; // Extract FPSCR (not modeled at the DAG level). @@ -853,6 +860,9 @@ def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), def LFIWAX : XForm_25<31, 855, (outs F8RC:$frD), (ins memrr:$src), "lfiwax $frD, $src", LdStLFD, [(set f64:$frD, (PPClfiwax xoaddr:$src))]>; +def LFIWZX : XForm_25<31, 887, (outs F8RC:$frD), (ins memrr:$src), + "lfiwzx $frD, $src", LdStLFD, + [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 57e18ed07ea..1b7150fd37d 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -41,6 +41,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, , HasSTFIWX(false) , HasLFIWAX(false) , HasFPRND(false) + , HasFPCVT(false) , HasISEL(false) , HasPOPCNTD(false) , HasLDBRX(false) diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 3958bc9c006..d5dfa1e8e14 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -80,6 +80,7 @@ protected: bool HasSTFIWX; bool HasLFIWAX; bool HasFPRND; + bool HasFPCVT; bool HasISEL; bool HasPOPCNTD; bool HasLDBRX; @@ -161,6 +162,7 @@ public: bool hasSTFIWX() const { return HasSTFIWX; } bool hasLFIWAX() const { return HasLFIWAX; } bool hasFPRND() const { return HasFPRND; } + bool hasFPCVT() const { return HasFPCVT; } bool hasAltivec() const { return HasAltivec; } bool hasQPX() const { return HasQPX; } bool hasMFOCRF() const { return HasMFOCRF; } diff --git a/test/CodeGen/PowerPC/float-to-int.ll b/test/CodeGen/PowerPC/float-to-int.ll new file mode 100644 index 00000000000..ebd0903358d --- /dev/null +++ b/test/CodeGen/PowerPC/float-to-int.ll @@ -0,0 +1,92 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define i64 @foo(float %a) nounwind { + %x = fptosi float %a to i64 + ret i64 %x + +; CHECK: @foo +; CHECK: fctidz [[REG:[0-9]+]], 1 +; CHECK: stfd [[REG]], +; CHECK: ld 3, +; CHECK: blr +} + +define i64 @foo2(double %a) nounwind { + %x = fptosi double %a to i64 + ret i64 %x + +; CHECK: @foo2 +; CHECK: fctidz [[REG:[0-9]+]], 1 +; CHECK: stfd [[REG]], +; CHECK: ld 3, +; CHECK: blr +} + +define i64 @foo3(float %a) nounwind { + %x = fptoui float %a to i64 + ret i64 %x + +; CHECK: @foo3 +; CHECK: fctiduz [[REG:[0-9]+]], 1 +; CHECK: stfd [[REG]], +; CHECK: ld 3, +; CHECK: blr +} + +define i64 @foo4(double %a) nounwind { + %x = fptoui double %a to i64 + ret i64 %x + +; CHECK: @foo4 +; CHECK: fctiduz [[REG:[0-9]+]], 1 +; CHECK: stfd [[REG]], +; CHECK: ld 3, +; CHECK: blr +} + +define i32 @goo(float %a) nounwind { + %x = fptosi float %a to i32 + ret i32 %x + +; CHECK: @goo +; CHECK: fctiwz [[REG:[0-9]+]], 1 +; CHECK: stfiwx [[REG]], +; CHECK: lwz 3, +; CHECK: blr +} + +define i32 @goo2(double %a) nounwind { + %x = fptosi double %a to i32 + ret i32 %x + +; CHECK: @goo2 +; CHECK: fctiwz [[REG:[0-9]+]], 1 +; CHECK: stfiwx [[REG]], +; CHECK: lwz 3, +; CHECK: blr +} + +define i32 @goo3(float %a) nounwind { + %x = fptoui float %a to i32 + ret i32 %x + +; CHECK: @goo3 +; CHECK: fctiwuz [[REG:[0-9]+]], 1 +; CHECK: stfiwx [[REG]], +; CHECK: lwz 3, +; CHECK: blr +} + +define i32 @goo4(double %a) nounwind { + %x = fptoui double %a to i32 + ret i32 %x + +; CHECK: @goo4 +; CHECK: fctiwuz [[REG:[0-9]+]], 1 +; CHECK: stfiwx [[REG]], +; CHECK: lwz 3, +; CHECK: blr +} + diff --git a/test/CodeGen/PowerPC/i32-to-float.ll b/test/CodeGen/PowerPC/i32-to-float.ll index bed940c5de0..2707d0352de 100644 --- a/test/CodeGen/PowerPC/i32-to-float.ll +++ b/test/CodeGen/PowerPC/i32-to-float.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr6 | FileCheck -check-prefix=CHECK-PWR6 %s ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck -check-prefix=CHECK-A2 %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -16,11 +17,17 @@ entry: ; CHECK: frsp 1, [[REG3]] ; CHECK: blr +; CHECK-PWR6: @foo +; CHECK-PWR6: stw 3, +; CHECK-PWR6: lfiwax [[REG:[0-9]+]], +; CHECK-PWR6: fcfid [[REG2:[0-9]+]], [[REG]] +; CHECK-PWR6: frsp 1, [[REG2]] +; CHECK-PWR6: blr + ; CHECK-A2: @foo ; CHECK-A2: stw 3, ; CHECK-A2: lfiwax [[REG:[0-9]+]], -; CHECK-A2: fcfid [[REG2:[0-9]+]], [[REG]] -; CHECK-A2: frsp 1, [[REG2]] +; CHECK-A2: fcfids 1, [[REG]] ; CHECK-A2: blr } @@ -36,6 +43,12 @@ entry: ; CHECK: fcfid 1, [[REG2]] ; CHECK: blr +; CHECK-PWR6: @goo +; CHECK-PWR6: stw 3, +; CHECK-PWR6: lfiwax [[REG:[0-9]+]], +; CHECK-PWR6: fcfid 1, [[REG]] +; CHECK-PWR6: blr + ; CHECK-A2: @goo ; CHECK-A2: stw 3, ; CHECK-A2: lfiwax [[REG:[0-9]+]], @@ -43,3 +56,27 @@ entry: ; CHECK-A2: blr } +define float @foou(i32 %a) nounwind { +entry: + %x = uitofp i32 %a to float + ret float %x + +; CHECK-A2: @foou +; CHECK-A2: stw 3, +; CHECK-A2: lfiwzx [[REG:[0-9]+]], +; CHECK-A2: fcfidus 1, [[REG]] +; CHECK-A2: blr +} + +define double @goou(i32 %a) nounwind { +entry: + %x = uitofp i32 %a to double + ret double %x + +; CHECK-A2: @goou +; CHECK-A2: stw 3, +; CHECK-A2: lfiwzx [[REG:[0-9]+]], +; CHECK-A2: fcfidu 1, [[REG]] +; CHECK-A2: blr +} + diff --git a/test/CodeGen/PowerPC/i64-to-float.ll b/test/CodeGen/PowerPC/i64-to-float.ll new file mode 100644 index 00000000000..b81d109e7f4 --- /dev/null +++ b/test/CodeGen/PowerPC/i64-to-float.ll @@ -0,0 +1,52 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define float @foo(i64 %a) nounwind { +entry: + %x = sitofp i64 %a to float + ret float %x + +; CHECK: @foo +; CHECK: std 3, +; CHECK: lfd [[REG:[0-9]+]], +; CHECK: fcfids 1, [[REG]] +; CHECK: blr +} + +define double @goo(i64 %a) nounwind { +entry: + %x = sitofp i64 %a to double + ret double %x + +; CHECK: @goo +; CHECK: std 3, +; CHECK: lfd [[REG:[0-9]+]], +; CHECK: fcfid 1, [[REG]] +; CHECK: blr +} + +define float @foou(i64 %a) nounwind { +entry: + %x = uitofp i64 %a to float + ret float %x + +; CHECK: @foou +; CHECK: std 3, +; CHECK: lfd [[REG:[0-9]+]], +; CHECK: fcfidus 1, [[REG]] +; CHECK: blr +} + +define double @goou(i64 %a) nounwind { +entry: + %x = uitofp i64 %a to double + ret double %x + +; CHECK: @goou +; CHECK: std 3, +; CHECK: lfd [[REG:[0-9]+]], +; CHECK: fcfidu 1, [[REG]] +; CHECK: blr +} + diff --git a/test/CodeGen/PowerPC/i64_fp_round.ll b/test/CodeGen/PowerPC/i64_fp_round.ll index 5ae1be8953c..d2a3239ab86 100644 --- a/test/CodeGen/PowerPC/i64_fp_round.ll +++ b/test/CodeGen/PowerPC/i64_fp_round.ll @@ -1,4 +1,4 @@ -; RUN: llc -mcpu=pwr7 < %s | FileCheck %s +; RUN: llc -mcpu=pwr7 -mattr=-fpcvt < %s | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -22,6 +22,6 @@ entry: ; Also check that with -enable-unsafe-fp-math we do not get that extra ; code sequence. Simply verify that there is no "isel" present. -; RUN: llc -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE +; RUN: llc -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE ; CHECK-UNSAFE-NOT: isel diff --git a/test/CodeGen/PowerPC/stfiwx-2.ll b/test/CodeGen/PowerPC/stfiwx-2.ll index c49b25cc230..345f1c93e8d 100644 --- a/test/CodeGen/PowerPC/stfiwx-2.ll +++ b/test/CodeGen/PowerPC/stfiwx-2.ll @@ -1,11 +1,14 @@ -; This cannot be a stfiwx -; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep stb -; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep stfiwx +; RUN: llc < %s -march=ppc32 -mcpu=g5 | FileCheck %s define void @test(float %F, i8* %P) { %I = fptosi float %F to i32 %X = trunc i32 %I to i8 store i8 %X, i8* %P ret void +; CHECK: fctiwz 0, 1 +; CHECK: stfiwx 0, 0, 4 +; CHECK: lwz 4, 12(1) +; CHECK: stb 4, 0(3) +; CHECK: blr }