From: Hal Finkel Date: Mon, 17 Oct 2011 04:03:55 +0000 (+0000) Subject: Add PPC 440 scheduler and some associated tests (new files) X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=b31d3d271f75c17593e917575dc324b50615a630;p=oota-llvm.git Add PPC 440 scheduler and some associated tests (new files) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142171 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td new file mode 100644 index 00000000000..604d5c8afed --- /dev/null +++ b/lib/Target/PowerPC/PPCSchedule440.td @@ -0,0 +1,568 @@ +//===- PPCSchedule440.td - PPC 440 Scheduling Definitions ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Primary reference: +// PowerPC 440x6 Embedded Processor Core User’s Manual. +// IBM (as updated in) 2010. + +// The basic PPC 440 does not include a floating-point unit; the pipeline +// timings here are constructed to match the FP2 unit shipped with the +// PPC-440- and PPC-450-based Blue Gene (L and P) supercomputers. +// References: +// S. Chatterjee, et al. Design and exploitation of a high-performance +// SIMD floating-point unit for Blue Gene/L. +// IBM J. Res. & Dev. 49 (2/3) March/May 2005. +// also: +// Carlos Sosa and Brant Knudson. IBM System Blue Gene Solution: +// Blue Gene/P Application Development. +// IBM (as updated in) 2009. + +//===----------------------------------------------------------------------===// +// Functional units on the PowerPC 440/450 chip sets +// +def IFTH1 : FuncUnit; // Fetch unit 1 +def IFTH2 : FuncUnit; // Fetch unit 2 +def PDCD1 : FuncUnit; // Decode unit 1 +def PDCD2 : FuncUnit; // Decode unit 2 +def DISS1 : FuncUnit; // Issue unit 1 +def DISS2 : FuncUnit; // Issue unit 2 +def LRACC : FuncUnit; // Register access and dispatch for + // the simple integer (J-pipe) and + // load/store (L-pipe) pipelines +def IRACC : FuncUnit; // Register access and dispatch for + // the complex integer (I-pipe) pipeline +def FRACC : FuncUnit; // Register access and dispatch for + // the floating-point execution (F-pipe) pipeline +def IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline +def IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline +def IWB : FuncUnit; // Write-back unit for the I pipeline +def JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline +def JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline +def JWB : FuncUnit; // Write-back unit for the J pipeline +def AGEN : FuncUnit; // Address generation for the L pipeline +def CRD : FuncUnit; // D-cache access for the L pipeline +def LWB : FuncUnit; // Write-back unit for the L pipeline +def FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline +def FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline +def FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline +def FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline +def FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline +def FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline +def FWB : FuncUnit; // Write-back unit for the F pipeline + +def LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used + // to make sure that no lwarx/stwcx. + // instructions are issued while another + // lwarx/stwcx. is in the L pipe. + +def GPR_Bypass : Bypass; // The bypass for general-purpose regs. +def FPR_Bypass : Bypass; // The bypass for floating-point regs. + +// Notes: +// Instructions are held in the FRACC, LRACC and IRACC pipeline +// stages until their source operands become ready. Exceptions: +// - Store instructions will hold in the AGEN stage +// - The integer multiply-accumulate instruction will hold in +// the IEXE1 stage +// +// For most I-pipe operations, the result is available at the end of +// the IEXE1 stage. Operations such as multiply and divide must +// continue to execute in IEXE2 and IWB. Divide resides in IWB for +// 33 cycles (multiply also calculates its result in IWB). For all +// J-pipe instructions, the result is available +// at the end of the JEXE1 stage. Loads have a 3-cycle latency +// (data is not available until after the LWB stage). +// +// The L1 cache hit latency is four cycles for floating point loads +// and three cycles for integer loads. +// +// The stwcx. instruction requires both the LRACC and the IRACC +// dispatch stages. It must be issued from DISS0. +// +// All lwarx/stwcx. instructions hold in LRACC if another +// uncommitted lwarx/stwcx. is in AGEN, CRD, or LWB. +// +// msync (a.k.a. sync) and mbar will hold in LWB until all load/store +// resources are empty. AGEN and CRD are held empty until the msync/mbar +// commits. +// +// Most floating-point instructions, computational and move, +// have a 5-cycle latency. Divide takes longer (30 cycles). Instructions that +// update the CR take 2 cycles. Stores take 3 cycles and, as mentioned above, +// loads take 4 cycles (for L1 hit). + +// +// This file defines the itinerary class data for the PPC 440 processor. +// +//===----------------------------------------------------------------------===// + + +def PPC440Itineraries : ProcessorItineraries< + [IFTH1, IFTH2, PDCD1, PDCD2, DISS1, DISS2, FRACC, + IRACC, IEXE1, IEXE2, IWB, LRACC, JEXE1, JEXE2, JWB, AGEN, CRD, LWB, + FEXE1, FEXE2, FEXE3, FEXE4, FEXE5, FEXE6, FWB, LWARX_Hold], + [GPR_Bypass, FPR_Bypass], [ + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC, LRACC]>, + InstrStage<1, [IEXE1, JEXE1]>, + InstrStage<1, [IEXE2, JEXE2]>, + InstrStage<1, [IWB, JWB]>], + [6, 4, 4], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC, LRACC]>, + InstrStage<1, [IEXE1, JEXE1]>, + InstrStage<1, [IEXE2, JEXE2]>, + InstrStage<1, [IWB, JWB]>], + [6, 4, 4], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<33, [IWB]>], + [40, 4, 4], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [7, 4, 4], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [7, 4, 4], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4, 4], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4, 4], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4, 4], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC, LRACC]>, + InstrStage<1, [IEXE1, JEXE1]>, + InstrStage<1, [IEXE2, JEXE2]>, + InstrStage<1, [IWB, JWB]>], + [6, 4, 4], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC, LRACC]>, + InstrStage<1, [IEXE1, JEXE1]>, + InstrStage<1, [IEXE2, JEXE2]>, + InstrStage<1, [IWB, JWB]>], + [6, 4, 4], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [6, 4], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4], + [NoBypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4, 4], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4, 4], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4, 4], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [9, 5], // FIXME: should be [9, 5] for loads and + // [8, 5] for stores. + [NoBypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5, 5], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [9, 5, 5], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [9, 5, 5], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1]>, + InstrStage<1, [IRACC], 0>, + InstrStage<4, [LWARX_Hold], 0>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1]>, + InstrStage<1, [IRACC], 0>, + InstrStage<4, [LWARX_Hold], 0>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<3, [AGEN], 1>, + InstrStage<2, [CRD], 1>, + InstrStage<1, [LWB]>]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [FRACC], 0>, + InstrStage<1, [LRACC], 0>, + InstrStage<1, [IRACC]>, + InstrStage<1, [FEXE1], 0>, + InstrStage<1, [AGEN], 0>, + InstrStage<1, [JEXE1], 0>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [FEXE2], 0>, + InstrStage<1, [CRD], 0>, + InstrStage<1, [JEXE2], 0>, + InstrStage<1, [IEXE2]>, + InstrStage<6, [FEXE3], 0>, + InstrStage<6, [LWB], 0>, + InstrStage<6, [JWB], 0>, + InstrStage<6, [IWB]>]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [6, 4], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [6, 4], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<3, [IWB]>], + [9, 4], + [NoBypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4], + [NoBypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [7, 4], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<3, [IWB]>], + [10, 4], + [NoBypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<3, [IWB]>], + [10, 4], + [NoBypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<3, [IWB]>], + [10, 4], + [NoBypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<3, [IWB]>], + [10, 4], + [NoBypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4], + [NoBypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4], + [NoBypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [FRACC]>, + InstrStage<1, [FEXE1]>, + InstrStage<1, [FEXE2]>, + InstrStage<1, [FEXE3]>, + InstrStage<1, [FEXE4]>, + InstrStage<1, [FEXE5]>, + InstrStage<1, [FEXE6]>, + InstrStage<1, [FWB]>], + [10, 4, 4], + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [FRACC]>, + InstrStage<1, [FEXE1]>, + InstrStage<1, [FEXE2]>, + InstrStage<1, [FEXE3]>, + InstrStage<1, [FEXE4]>, + InstrStage<1, [FEXE5]>, + InstrStage<1, [FEXE6]>, + InstrStage<1, [FWB]>], + [10, 4, 4], + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [FRACC]>, + InstrStage<1, [FEXE1]>, + InstrStage<1, [FEXE2]>, + InstrStage<1, [FEXE3]>, + InstrStage<1, [FEXE4]>, + InstrStage<1, [FEXE5]>, + InstrStage<1, [FEXE6]>, + InstrStage<25, [FWB]>], + [35, 4, 4], + [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [FRACC]>, + InstrStage<1, [FEXE1]>, + InstrStage<1, [FEXE2]>, + InstrStage<1, [FEXE3]>, + InstrStage<1, [FEXE4]>, + InstrStage<1, [FEXE5]>, + InstrStage<1, [FEXE6]>, + InstrStage<13, [FWB]>], + [23, 4, 4], + [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [FRACC]>, + InstrStage<1, [FEXE1]>, + InstrStage<1, [FEXE2]>, + InstrStage<1, [FEXE3]>, + InstrStage<1, [FEXE4]>, + InstrStage<1, [FEXE5]>, + InstrStage<1, [FEXE6]>, + InstrStage<1, [FWB]>], + [10, 4, 4, 4], + [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [FRACC]>, + InstrStage<1, [FEXE1]>, + InstrStage<1, [FEXE2]>, + InstrStage<1, [FEXE3]>, + InstrStage<1, [FEXE4]>, + InstrStage<1, [FEXE5]>, + InstrStage<1, [FEXE6]>, + InstrStage<1, [FWB]>], + [10, 4], + [FPR_Bypass, FPR_Bypass]> +]>; diff --git a/test/CodeGen/PowerPC/ppc440-fp-basic.ll b/test/CodeGen/PowerPC/ppc440-fp-basic.ll new file mode 100644 index 00000000000..51c14378637 --- /dev/null +++ b/test/CodeGen/PowerPC/ppc440-fp-basic.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -march=ppc32 -mcpu=440 | grep fmadd + +%0 = type { double, double } + +define void @maybe_an_fma(%0* sret %agg.result, %0* byval %a, %0* byval %b, %0* byval %c) nounwind { +entry: + %a.realp = getelementptr inbounds %0* %a, i32 0, i32 0 + %a.real = load double* %a.realp + %a.imagp = getelementptr inbounds %0* %a, i32 0, i32 1 + %a.imag = load double* %a.imagp + %b.realp = getelementptr inbounds %0* %b, i32 0, i32 0 + %b.real = load double* %b.realp + %b.imagp = getelementptr inbounds %0* %b, i32 0, i32 1 + %b.imag = load double* %b.imagp + %mul.rl = fmul double %a.real, %b.real + %mul.rr = fmul double %a.imag, %b.imag + %mul.r = fsub double %mul.rl, %mul.rr + %mul.il = fmul double %a.imag, %b.real + %mul.ir = fmul double %a.real, %b.imag + %mul.i = fadd double %mul.il, %mul.ir + %c.realp = getelementptr inbounds %0* %c, i32 0, i32 0 + %c.real = load double* %c.realp + %c.imagp = getelementptr inbounds %0* %c, i32 0, i32 1 + %c.imag = load double* %c.imagp + %add.r = fadd double %mul.r, %c.real + %add.i = fadd double %mul.i, %c.imag + %real = getelementptr inbounds %0* %agg.result, i32 0, i32 0 + %imag = getelementptr inbounds %0* %agg.result, i32 0, i32 1 + store double %add.r, double* %real + store double %add.i, double* %imag + ret void +} diff --git a/test/CodeGen/PowerPC/ppc440-msync.ll b/test/CodeGen/PowerPC/ppc440-msync.ll new file mode 100644 index 00000000000..4d663bc56ce --- /dev/null +++ b/test/CodeGen/PowerPC/ppc440-msync.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -march=ppc32 -o %t +; RUN: grep sync %t +; RUN: not grep msync %t +; RUN: llc < %s -march=ppc32 -mcpu=440 | grep msync + +define i32 @has_a_fence(i32 %a, i32 %b) nounwind { +entry: + fence acquire + %cond = icmp eq i32 %a, %b + br i1 %cond, label %IfEqual, label %IfUnequal + +IfEqual: + fence release + br label %end + +IfUnequal: + fence release + ret i32 0 + +end: + ret i32 1 +} +