From bd9a54888185890e30835394f786c47c64811ffa Mon Sep 17 00:00:00 2001 From: Kit Barton Date: Wed, 25 Mar 2015 19:36:23 +0000 Subject: [PATCH] Add Hardware Transactional Memory (HTM) Support This patch adds Hardware Transaction Memory (HTM) support supported by ISA 2.07 (POWER8). The intrinsic support is based on GCC one [1], but currently only the 'PowerPC HTM Low Level Built-in Function' are implemented. The HTM instructions follows the RC ones and the transaction initiation result is set on RC0 (with exception of tcheck). Currently approach is to create a register copy from CR0 to GPR and comapring. Although this is suboptimal, since the branch could be taken directly by comparing the CR0 value, it generates code correctly on both test and branch and just return value. A possible future optimization could be elimitate the MFCR instruction to branch directly. The HTM usage requires a recently newer kernel with PPC HTM enabled. Tested on powerpc64 and powerpc64le. This is send along a clang patch to enabled the builtins and option switch. [1] https://gcc.gnu.org/onlinedocs/gcc/PowerPC-Hardware-Transactional-Memory-Built-in-Functions.html Phabricator Review: http://reviews.llvm.org/D8247 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@233204 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsPowerPC.td | 59 ++++++ lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 1 + .../PowerPC/Disassembler/PPCDisassembler.cpp | 6 + .../PowerPC/InstPrinter/PPCInstPrinter.cpp | 7 + .../PowerPC/InstPrinter/PPCInstPrinter.h | 1 + lib/Target/PowerPC/PPC.td | 4 +- lib/Target/PowerPC/PPCISelLowering.cpp | 6 + lib/Target/PowerPC/PPCInstr64Bit.td | 6 + lib/Target/PowerPC/PPCInstrFormats.td | 54 ++++++ lib/Target/PowerPC/PPCInstrHTM.td | 172 ++++++++++++++++++ lib/Target/PowerPC/PPCInstrInfo.cpp | 53 ++++++ lib/Target/PowerPC/PPCInstrInfo.td | 13 ++ lib/Target/PowerPC/PPCRegisterInfo.cpp | 31 ---- lib/Target/PowerPC/PPCRegisterInfo.h | 33 ++++ lib/Target/PowerPC/PPCRegisterInfo.td | 2 + lib/Target/PowerPC/PPCSubtarget.cpp | 1 + lib/Target/PowerPC/PPCSubtarget.h | 2 + test/CodeGen/PowerPC/htm.ll | 125 +++++++++++++ test/MC/PowerPC/htm.s | 53 ++++++ 19 files changed, 597 insertions(+), 32 deletions(-) create mode 100644 lib/Target/PowerPC/PPCInstrHTM.td create mode 100644 test/CodeGen/PowerPC/htm.ll create mode 100644 test/MC/PowerPC/htm.s diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td index c60fb34d84b..95fc3e5ecec 100644 --- a/include/llvm/IR/IntrinsicsPowerPC.td +++ b/include/llvm/IR/IntrinsicsPowerPC.td @@ -796,3 +796,62 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". [llvm_v4f64_ty], [llvm_i32_ty], [IntrNoMem]>; } +//===----------------------------------------------------------------------===// +// PowerPC HTM Intrinsic Definitions. + +let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". + +def int_ppc_tbegin : + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; +def int_ppc_tend : + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; + +def int_ppc_tabort : GCCBuiltin<"__builtin_tabort">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; +def int_ppc_tabortwc : + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_ppc_tabortwci : + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_ppc_tabortdc : + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_ppc_tabortdci : + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + +def int_ppc_tcheck : GCCBuiltin<"__builtin_tcheck">, + Intrinsic<[llvm_i32_ty], [], []>; +def int_ppc_treclaim : GCCBuiltin<"__builtin_treclaim">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; +def int_ppc_trechkpt : GCCBuiltin<"__builtin_trechkpt">, + Intrinsic<[llvm_i32_ty], [], []>; +def int_ppc_tsr : + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; + +def int_ppc_get_texasr : GCCBuiltin<"__builtin_get_texasr">, + Intrinsic<[llvm_i64_ty], [], []>; +def int_ppc_get_texasru : GCCBuiltin<"__builtin_get_texasru">, + Intrinsic<[llvm_i64_ty], [], []>; +def int_ppc_get_tfhar : GCCBuiltin<"__builtin_get_tfhar">, + Intrinsic<[llvm_i64_ty], [], []>; +def int_ppc_get_tfiar : GCCBuiltin<"__builtin_get_tfiar">, + Intrinsic<[llvm_i64_ty], [], []>; + +def int_ppc_set_texasr : GCCBuiltin<"__builtin_set_texasr">, + Intrinsic<[], [llvm_i64_ty], []>; +def int_ppc_set_texasru : GCCBuiltin<"__builtin_set_texasru">, + Intrinsic<[], [llvm_i64_ty], []>; +def int_ppc_set_tfhar : GCCBuiltin<"__builtin_set_tfhar">, + Intrinsic<[], [llvm_i64_ty], []>; +def int_ppc_set_tfiar : GCCBuiltin<"__builtin_set_tfiar">, + Intrinsic<[], [llvm_i64_ty], []>; + +// Extended mnemonics +def int_ppc_tendall : GCCBuiltin<"__builtin_tendall">, + Intrinsic<[llvm_i32_ty], [], []>; +def int_ppc_tresume : GCCBuiltin<"__builtin_tresume">, + Intrinsic<[llvm_i32_ty], [], []>; +def int_ppc_tsuspend : GCCBuiltin<"__builtin_tsuspend">, + Intrinsic<[llvm_i32_ty], [], []>; + +def int_ppc_ttest : GCCBuiltin<"__builtin_ttest">, + Intrinsic<[llvm_i64_ty], [], []>; +} diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 9e2d9acefb7..99a16338a84 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -427,6 +427,7 @@ public: bool isImm() const override { return Kind == Immediate || Kind == Expression; } bool isU1Imm() const { return Kind == Immediate && isUInt<1>(getImm()); } bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); } + bool isU3Imm() const { return Kind == Immediate && isUInt<3>(getImm()); } bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); } bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); } bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); } diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 0ed07239327..a9f5fc79459 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -189,6 +189,12 @@ static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo, return decodeRegisterClass(Inst, RegNo, CRRegs); } +static DecodeStatus DecodeCRRC0RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, CRRegs); +} + static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 5d1aa1ad540..311a4f2a0ff 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -228,6 +228,13 @@ void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo, O << (unsigned int)Value; } +void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned int Value = MI->getOperand(OpNo).getImm(); + assert(Value <= 8 && "Invalid u3imm argument!"); + O << (unsigned int)Value; +} + void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 22934b8f4fb..8718743e108 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -45,6 +45,7 @@ public: void printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU3ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 25160b5c469..f175f6def78 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -125,6 +125,8 @@ def FeatureInvariantFunctionDescriptors : SubtargetFeature<"invariant-function-descriptors", "HasInvariantFunctionDescriptors", "true", "Assume function descriptors are invariant">; +def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true", + "Enable Hardware Transactional Memory instructions">; def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true", "Treat mftb as deprecated">; @@ -261,7 +263,7 @@ def ProcessorFeatures { [DirectivePwr8, FeatureAltivec, FeatureP8Altivec, FeatureVSX, FeatureP8Vector, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, - FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, + FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureHTM, FeatureFPRND, FeatureFPCVT, FeatureISEL, FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto, Feature64Bit /*, Feature64BitRegs */, FeatureICBT, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index c35ab7a49b1..871531ece89 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8782,6 +8782,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); + } else if (MI->getOpcode() == PPC::TCHECK_RET) { + DebugLoc Dl = MI->getDebugLoc(); + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); + BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); + return BB; } else { llvm_unreachable("Unexpected instr type to insert"); } diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 36479e2ff41..183d0887262 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -329,6 +329,12 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { Requires<[In64BitMode]>; } +def MFSPR8 : XFXForm_1<31, 339, (outs g8rc:$RT), (ins i32imm:$SPR), + "mfspr $RT, $SPR", IIC_SprMFSPR>; +def MTSPR8 : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, g8rc:$RT), + "mtspr $SPR, $RT", IIC_SprMTSPR>; + + //===----------------------------------------------------------------------===// // 64-bit SPR manipulation instrs. diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index 2d3e43c6f20..b7a7a1f50b4 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -693,6 +693,60 @@ class XForm_16b opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let A = 0; } +class XForm_htm0 opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list pattern> + : I { + bit R; + + bit RC = 1; + + let Inst{6-9} = 0; + let Inst{10} = R; + let Inst{11-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +class XForm_htm1 opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list pattern> + : I { + bit A; + + bit RC = 1; + + let Inst{6} = A; + let Inst{7-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +class XForm_htm2 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bit L; + + bit RC = 0; // set by isDOT + + let Inst{7-9} = 0; + let Inst{10} = L; + let Inst{11-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +class XForm_htm3 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<3> BF; + + bit RC = 0; + + let Inst{6-8} = BF; + let Inst{9-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + // XX*-Form (VSX) class XX1Form opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> diff --git a/lib/Target/PowerPC/PPCInstrHTM.td b/lib/Target/PowerPC/PPCInstrHTM.td new file mode 100644 index 00000000000..20e6a628632 --- /dev/null +++ b/lib/Target/PowerPC/PPCInstrHTM.td @@ -0,0 +1,172 @@ +//===-- PPCInstrHTM.td - The PowerPC Hardware Transactional Memory -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hardware Transactional Memory extension to the +// PowerPC instruction set. +// +//===----------------------------------------------------------------------===// + + + +def HasHTM : Predicate<"PPCSubTarget->hasHTM()">; + +def HTM_get_imm : SDNodeXFormgetZExtValue()); +}]>; + +let hasSideEffects = 1, usesCustomInserter = 1 in { +def TCHECK_RET : Pseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>; +} + + +let Predicates = [HasHTM] in { + +def TBEGIN : XForm_htm0 <31, 654, + (outs crrc0:$ret), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>; + +def TEND : XForm_htm1 <31, 686, + (outs crrc0:$ret), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>; + +def TABORT : XForm_base_r3xo <31, 910, + (outs crrc0:$ret), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR, + []>, isDOT { + let RST = 0; + let B = 0; +} + +def TABORTWC : XForm_base_r3xo <31, 782, + (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B), + "tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>, + isDOT; + +def TABORTWCI : XForm_base_r3xo <31, 846, + (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B), + "tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>, + isDOT; + +def TABORTDC : XForm_base_r3xo <31, 814, + (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B), + "tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>, + isDOT; + +def TABORTDCI : XForm_base_r3xo <31, 878, + (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B), + "tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>, + isDOT; + +def TSR : XForm_htm2 <31, 750, + (outs crrc0:$ret), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>, + isDOT; + +def TCHECK : XForm_htm3 <31, 718, + (outs), (ins crrc:$BF), "tcheck $BF", IIC_SprMTSPR, []>; + + +def TRECLAIM : XForm_base_r3xo <31, 942, + (outs crrc:$ret), (ins gprc:$A), "treclaim. $A", + IIC_SprMTSPR, []>, + isDOT { + let RST = 0; + let B = 0; +} + +def TRECHKPT : XForm_base_r3xo <31, 1006, + (outs crrc:$ret), (ins), "trechkpt.", IIC_SprMTSPR, []>, + isDOT { + let RST = 0; + let A = 0; + let B = 0; +} + +// Builtins + +// All HTM instructions, with the exception of tcheck, set CR0 with the +// value of the MSR Transaction State (TS) bits that exist before the +// instruction is executed. For tbegin., the EQ bit in CR0 can be used +// to determine whether the transaction was successfully started (0) or +// failed (1). We use an XORI pattern to 'flip' the bit to match the +// tbegin builtin API which defines a return value of 1 as success. + +def : Pat<(int_ppc_tbegin i32:$R), + (XORI + (EXTRACT_SUBREG ( + TBEGIN (HTM_get_imm imm:$R)), sub_eq), + 1)>; + +def : Pat<(int_ppc_tend i32:$R), + (TEND (HTM_get_imm imm:$R))>; + + +def : Pat<(int_ppc_tabort i32:$R), + (TABORT $R)>; + +def : Pat<(int_ppc_tabortwc i32:$TO, i32:$RA, i32:$RB), + (TABORTWC (HTM_get_imm imm:$TO), $RA, $RB)>; + +def : Pat<(int_ppc_tabortwci i32:$TO, i32:$RA, i32:$SI), + (TABORTWCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>; + +def : Pat<(int_ppc_tabortdc i32:$TO, i32:$RA, i32:$RB), + (TABORTDC (HTM_get_imm imm:$TO), $RA, $RB)>; + +def : Pat<(int_ppc_tabortdci i32:$TO, i32:$RA, i32:$SI), + (TABORTDCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>; + +def : Pat<(int_ppc_tcheck), + (TCHECK_RET)>; + +def : Pat<(int_ppc_treclaim i32:$RA), + (TRECLAIM $RA)>; + +def : Pat<(int_ppc_trechkpt), + (TRECHKPT)>; + +def : Pat<(int_ppc_tsr i32:$L), + (TSR (HTM_get_imm imm:$L))>; + +def : Pat<(int_ppc_get_texasr), + (MFSPR8 130)>; + +def : Pat<(int_ppc_get_texasru), + (MFSPR8 131)>; + +def : Pat<(int_ppc_get_tfhar), + (MFSPR8 128)>; + +def : Pat<(int_ppc_get_tfiar), + (MFSPR8 129)>; + + +def : Pat<(int_ppc_set_texasr i64:$V), + (MTSPR8 130, $V)>; + +def : Pat<(int_ppc_set_texasru i64:$V), + (MTSPR8 131, $V)>; + +def : Pat<(int_ppc_set_tfhar i64:$V), + (MTSPR8 128, $V)>; + +def : Pat<(int_ppc_set_tfiar i64:$V), + (MTSPR8 129, $V)>; + + +// Extended mnemonics +def : Pat<(int_ppc_tendall), + (TEND 1)>; + +def : Pat<(int_ppc_tresume), + (TSR 1)>; + +def : Pat<(int_ppc_tsuspend), + (TSR 0)>; + +def : Pat<(i64 (int_ppc_ttest)), + (RLDICL (i64 (COPY (TABORTWCI 0, ZERO, 0))), 36, 28)>; + +} // [HasHTM] diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 5d461d5394e..c9c2949dc6c 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -696,6 +696,33 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, .addReg(Cond[1].getReg(), 0, SubIdx); } +static unsigned getCRBitValue(unsigned CRBit) { + unsigned Ret = 4; + if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT || + CRBit == PPC::CR2LT || CRBit == PPC::CR3LT || + CRBit == PPC::CR4LT || CRBit == PPC::CR5LT || + CRBit == PPC::CR6LT || CRBit == PPC::CR7LT) + Ret = 3; + if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT || + CRBit == PPC::CR2GT || CRBit == PPC::CR3GT || + CRBit == PPC::CR4GT || CRBit == PPC::CR5GT || + CRBit == PPC::CR6GT || CRBit == PPC::CR7GT) + Ret = 2; + if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ || + CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ || + CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ || + CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ) + Ret = 1; + if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN || + CRBit == PPC::CR2UN || CRBit == PPC::CR3UN || + CRBit == PPC::CR4UN || CRBit == PPC::CR5UN || + CRBit == PPC::CR6UN || CRBit == PPC::CR7UN) + Ret = 0; + + assert(Ret != 4 && "Invalid CR bit register"); + return Ret; +} + void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -741,6 +768,32 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, SrcReg = SuperReg; } + // Different class register copy + if (PPC::CRBITRCRegClass.contains(SrcReg) && + PPC::GPRCRegClass.contains(DestReg)) { + unsigned CRReg = getCRFromCRBit(SrcReg); + BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg) + .addReg(CRReg), getKillRegState(KillSrc); + // Rotate the CR bit in the CR fields to be the least significant bit and + // then mask with 0x1 (MB = ME = 31). + BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg) + .addReg(DestReg, RegState::Kill) + .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg))) + .addImm(31) + .addImm(31); + return; + } else if (PPC::CRRCRegClass.contains(SrcReg) && + PPC::G8RCRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(PPC::MFOCRF8), DestReg) + .addReg(SrcReg), getKillRegState(KillSrc); + return; + } else if (PPC::CRRCRegClass.contains(SrcReg) && + PPC::GPRCRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg) + .addReg(SrcReg), getKillRegState(KillSrc); + return; + } + unsigned Opc; if (PPC::GPRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::OR; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 17e7eb39305..5eff1565ce3 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -432,6 +432,9 @@ def PPCRegCRRCAsmOperand : AsmOperandClass { def crrc : RegisterOperand { let ParserMatchClass = PPCRegCRRCAsmOperand; } +def crrc0 : RegisterOperand { + let ParserMatchClass = PPCRegCRRCAsmOperand; +} def PPCU1ImmAsmOperand : AsmOperandClass { let Name = "U1Imm"; let PredicateMethod = "isU1Imm"; @@ -451,6 +454,15 @@ def u2imm : Operand { let ParserMatchClass = PPCU2ImmAsmOperand; } +def PPCU3ImmAsmOperand : AsmOperandClass { + let Name = "U3Imm"; let PredicateMethod = "isU3Imm"; + let RenderMethod = "addImmOperands"; +} +def u3imm : Operand { + let PrintMethod = "printU3ImmOperand"; + let ParserMatchClass = PPCU3ImmAsmOperand; +} + def PPCU4ImmAsmOperand : AsmOperandClass { let Name = "U4Imm"; let PredicateMethod = "isU4Imm"; let RenderMethod = "addImmOperands"; @@ -2708,6 +2720,7 @@ include "PPCInstrSPE.td" include "PPCInstr64Bit.td" include "PPCInstrVSX.td" include "PPCInstrQPX.td" +include "PPCInstrHTM.td" def crnot : OutPatFrag<(ops node:$in), (CRNOR $in, $in)>; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 8653734c8c6..0e568d3278e 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -524,37 +524,6 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, MBB.erase(II); } -static unsigned getCRFromCRBit(unsigned SrcReg) { - unsigned Reg = 0; - if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT || - SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN) - Reg = PPC::CR0; - else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT || - SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN) - Reg = PPC::CR1; - else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT || - SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN) - Reg = PPC::CR2; - else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT || - SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN) - Reg = PPC::CR3; - else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT || - SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN) - Reg = PPC::CR4; - else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT || - SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN) - Reg = PPC::CR5; - else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT || - SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN) - Reg = PPC::CR6; - else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT || - SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN) - Reg = PPC::CR7; - - assert(Reg != 0 && "Invalid CR bit register"); - return Reg; -} - void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex) const { // Get the instruction. diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 765d5745628..d304e1d8b5e 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -22,6 +22,39 @@ #include "PPCGenRegisterInfo.inc" namespace llvm { + +inline static unsigned getCRFromCRBit(unsigned SrcReg) { + unsigned Reg = 0; + if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT || + SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN) + Reg = PPC::CR0; + else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT || + SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN) + Reg = PPC::CR1; + else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT || + SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN) + Reg = PPC::CR2; + else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT || + SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN) + Reg = PPC::CR3; + else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT || + SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN) + Reg = PPC::CR4; + else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT || + SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN) + Reg = PPC::CR5; + else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT || + SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN) + Reg = PPC::CR6; + else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT || + SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN) + Reg = PPC::CR7; + + assert(Reg != 0 && "Invalid CR bit register"); + return Reg; +} + + class PPCRegisterInfo : public PPCGenRegisterInfo { DenseMap ImmToIdxMap; const PPCTargetMachine &TM; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 9a7df9615cc..6ca68ed49b7 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -341,6 +341,8 @@ def CRBITRC : RegisterClass<"PPC", [i1], 32, def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6, CR7, CR2, CR3, CR4)>; +def CRRC0 : RegisterClass<"PPC", [i32], 32, (add CR0)>; + // The CTR registers are not allocatable because they're used by the // decrement-and-branch instructions, and thus need to stay live across // multiple basic blocks. diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index b10e291fbf7..ed888038a10 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -97,6 +97,7 @@ void PPCSubtarget::initializeEnvironment() { HasInvariantFunctionDescriptors = false; HasPartwordAtomics = false; IsQPXStackUnaligned = false; + HasHTM = false; } void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index b91e887a7c4..b4c1bb1dcce 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -115,6 +115,7 @@ protected: bool HasICBT; bool HasInvariantFunctionDescriptors; bool HasPartwordAtomics; + bool HasHTM; /// When targeting QPX running a stock PPC64 Linux kernel where the stack /// alignment has not been changed, we need to keep the 16-byte alignment @@ -246,6 +247,7 @@ public: return 16; } + bool hasHTM() const { return HasHTM; } const Triple &getTargetTriple() const { return TargetTriple; } diff --git a/test/CodeGen/PowerPC/htm.ll b/test/CodeGen/PowerPC/htm.ll new file mode 100644 index 00000000000..0e4304dc163 --- /dev/null +++ b/test/CodeGen/PowerPC/htm.ll @@ -0,0 +1,125 @@ +; RUN: llc -mcpu=pwr8 -mattr=+htm < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define zeroext i32 @test1() { +entry: + %0 = tail call i32 @llvm.ppc.tbegin(i32 0) + ret i32 %0 + +; CHECK-LABEL: @test1 +; CHECK: tbegin. 0 +; CHECK: mfocrf [[REGISTER1:[0-9]+]], 128 +; CHECK: rlwinm [[REGISTER2:[0-9]+]], [[REGISTER1]], 3, 31, 31 +; CHECK: xori {{[0-9]+}}, [[REGISTER2]], 1 +} + +declare i32 @llvm.ppc.tbegin(i32) #1 + + +define zeroext i32 @test2() { +entry: + %0 = tail call i32 @llvm.ppc.tend(i32 0) + ret i32 %0 +; CHECK-LABEL: @test2 +; CHECK: tend. 0 +; CHECK: mfocrf {{[0-9]+}}, 128 +} + +declare i32 @llvm.ppc.tend(i32) + + +define void @test3() { +entry: + %0 = tail call i32 @llvm.ppc.tabort(i32 0) + %1 = tail call i32 @llvm.ppc.tabortdc(i32 0, i32 1, i32 2) + %2 = tail call i32 @llvm.ppc.tabortdci(i32 0, i32 1, i32 2) + %3 = tail call i32 @llvm.ppc.tabortwc(i32 0, i32 1, i32 2) + %4 = tail call i32 @llvm.ppc.tabortwci(i32 0, i32 1, i32 2) + ret void +; CHECK-LABEL: @test3 +; CHECK: tabort. {{[0-9]+}} +; CHECK: tabortdc. 0, {{[0-9]+}}, {{[0-9]+}} +; CHECK: tabortdci. 0, {{[0-9]+}}, 2 +; CHECK: tabortwc. 0, {{[0-9]+}}, {{[0-9]+}} +; CHECK: tabortwci. 0, {{[0-9]+}}, 2 +} + +declare i32 @llvm.ppc.tabort(i32) +declare i32 @llvm.ppc.tabortdc(i32, i32, i32) +declare i32 @llvm.ppc.tabortdci(i32, i32, i32) +declare i32 @llvm.ppc.tabortwc(i32, i32, i32) +declare i32 @llvm.ppc.tabortwci(i32, i32, i32) + + +define void @test4() { +entry: + %0 = tail call i32 @llvm.ppc.tendall() + %1 = tail call i32 @llvm.ppc.tresume() + %2 = tail call i32 @llvm.ppc.tsuspend() + ret void +; CHECK-LABEL: @test4 +; CHECK: tend. 1 +; CHECK: tsr. 1 +; CHECK: tsr. 0 +} + +declare i32 @llvm.ppc.tendall() +declare i32 @llvm.ppc.tresume() +declare i32 @llvm.ppc.tsuspend() + + +define void @test5(i64 %v) { +entry: + tail call void @llvm.ppc.set.texasr(i64 %v) + tail call void @llvm.ppc.set.texasru(i64 %v) + tail call void @llvm.ppc.set.tfhar(i64 %v) + tail call void @llvm.ppc.set.tfiar(i64 %v) + ret void +; CHECK-LABEL: @test5 +; CHECK: mtspr 130, [[REG1:[0-9]+]] +; CHECK: mtspr 131, [[REG2:[0-9]+]] +; CHECK: mtspr 128, [[REG3:[0-9]+]] +; CHECK: mtspr 129, [[REG4:[0-9]+]] +} + +define i64 @test6() { +entry: + %0 = tail call i64 @llvm.ppc.get.texasr() + ret i64 %0 +; CHECK-LABEL: @test6 +; CHECK: mfspr [[REG1:[0-9]+]], 130 +} + +define i64 @test7() { +entry: + %0 = tail call i64 @llvm.ppc.get.texasru() + ret i64 %0 +; CHECK-LABEL: @test7 +; CHECK: mfspr [[REG1:[0-9]+]], 131 +} + +define i64 @test8() { +entry: + %0 = tail call i64 @llvm.ppc.get.tfhar() + ret i64 %0 +; CHECK-LABEL: @test8 +; CHECK: mfspr [[REG1:[0-9]+]], 128 +} + +define i64 @test9() { +entry: + %0 = tail call i64 @llvm.ppc.get.tfiar() + ret i64 %0 +; CHECK-LABEL: @test9 +; CHECK: mfspr [[REG1:[0-9]+]], 129 +} + +declare void @llvm.ppc.set.texasr(i64) +declare void @llvm.ppc.set.texasru(i64) +declare void @llvm.ppc.set.tfhar(i64) +declare void @llvm.ppc.set.tfiar(i64) +declare i64 @llvm.ppc.get.texasr() +declare i64 @llvm.ppc.get.texasru() +declare i64 @llvm.ppc.get.tfhar() +declare i64 @llvm.ppc.get.tfiar() diff --git a/test/MC/PowerPC/htm.s b/test/MC/PowerPC/htm.s new file mode 100644 index 00000000000..f99ff3cd536 --- /dev/null +++ b/test/MC/PowerPC/htm.s @@ -0,0 +1,53 @@ +# RUN: llvm-mc -triple powerpc64-unknown-linux-gnu --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s +# RUN: llvm-mc -triple powerpc64le-unknown-linux-gnu --show-encoding %s | FileCheck -check-prefix=CHECK-LE %s + +# CHECK-BE: tbegin. 0 # encoding: [0x7c,0x00,0x05,0x1d] +# CHECK-LE: tbegin. 0 # encoding: [0x1d,0x05,0x00,0x7c] + tbegin. 0 +# CHECK-BE: tbegin. 1 # encoding: [0x7c,0x20,0x05,0x1d] +# CHECK-LE: tbegin. 1 # encoding: [0x1d,0x05,0x20,0x7c] + tbegin. 1 + +# CHECK-BE: tend. 0 # encoding: [0x7c,0x00,0x05,0x5d] +# CHECK-LE: tend. 0 # encoding: [0x5d,0x05,0x00,0x7c] + tend. 0 +# CHECK-BE: tend. 1 # encoding: [0x7e,0x00,0x05,0x5d] +# CHECK-LE: tend. 1 # encoding: [0x5d,0x05,0x00,0x7e] + tend. 1 + +# CHECK-BE: tabort. 9 # encoding: [0x7c,0x09,0x07,0x1d] +# CHECK-LE: tabort. 9 # encoding: [0x1d,0x07,0x09,0x7c] + tabort. 9 +# CHECK-BE: tabortdc. 0, 9, 9 # encoding: [0x7c,0x09,0x4e,0x5d] +# CHECK-LE: tabortdc. 0, 9, 9 # encoding: [0x5d,0x4e,0x09,0x7c] + tabortdc. 0, 9, 9 +# CHECK-BE: tabortdci. 0, 9, 0 # encoding: [0x7c,0x09,0x06,0xdd] +# CHECK-LE: tabortdci. 0, 9, 0 # encoding: [0xdd,0x06,0x09,0x7c] + tabortdci. 0, 9, 0 +# CHECK-BE: tabortwc. 0, 9, 9 # encoding: [0x7c,0x09,0x4e,0x1d] +# CHECK-LE: tabortwc. 0, 9, 9 # encoding: [0x1d,0x4e,0x09,0x7c] + tabortwc. 0, 9, 9 +# CHECK-BE: tabortwci. 0, 9, 0 # encoding: [0x7c,0x09,0x06,0x9d] +# CHECK-LE: tabortwci. 0, 9, 0 # encoding: [0x9d,0x06,0x09,0x7c] + tabortwci. 0, 9, 0 + +# CHECK-BE: tsr. 0 # encoding: [0x7c,0x00,0x05,0xdd] +# CHECK-LE: tsr. 0 # encoding: [0xdd,0x05,0x00,0x7c] + tsr. 0 +# CHECK-BE: tsr. 1 # encoding: [0x7c,0x20,0x05,0xdd] +# CHECK-LE: tsr. 1 # encoding: [0xdd,0x05,0x20,0x7c] + tsr. 1 + +# CHECK-BE: tcheck 0 # encoding: [0x7c,0x00,0x05,0x9c] +# CHECK-LE: tcheck 0 # encoding: [0x9c,0x05,0x00,0x7c] + tcheck 0 +# CHECK-BE: tcheck 3 # encoding: [0x7d,0x80,0x05,0x9c] +# CHECK-LE: tcheck 3 # encoding: [0x9c,0x05,0x80,0x7d] + tcheck 3 + +# CHECK-BE: treclaim. 9 # encoding: [0x7c,0x09,0x07,0x5d] +# CHECK-LE: treclaim. 9 # encoding: [0x5d,0x07,0x09,0x7c] + treclaim. 9 +# CHECK-BE: trechkpt. # encoding: [0x7c,0x00,0x07,0xdd] +# CHECK-LE: trechkpt. # encoding: [0xdd,0x07,0x00,0x7c] + trechkpt. -- 2.34.1