lib/Target/SystemZ/SystemZISelLowering.h

   1 //===-- SystemZISelLowering.h - SystemZ DAG lowering interface --*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the interfaces that SystemZ uses to lower LLVM code into a
  11 // selection DAG.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H
  16 #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H
  17
  18 #include "SystemZ.h"
  19 #include "llvm/CodeGen/MachineBasicBlock.h"
  20 #include "llvm/CodeGen/SelectionDAG.h"
  21 #include "llvm/Target/TargetLowering.h"
  22
  23 namespace llvm {
  24 namespace SystemZISD {
  25 enum {
  26   FIRST_NUMBER = ISD::BUILTIN_OP_END,
  27
  28   // Return with a flag operand.  Operand 0 is the chain operand.
  29   RET_FLAG,
  30
  31   // Calls a function.  Operand 0 is the chain operand and operand 1
  32   // is the target address.  The arguments start at operand 2.
  33   // There is an optional glue operand at the end.
  34   CALL,
  35   SIBCALL,
  36
  37   // TLS calls.  Like regular calls, except operand 1 is the TLS symbol.
  38   // (The call target is implicitly __tls_get_offset.)
  39   TLS_GDCALL,
  40   TLS_LDCALL,
  41
  42   // Wraps a TargetGlobalAddress that should be loaded using PC-relative
  43   // accesses (LARL).  Operand 0 is the address.
  44   PCREL_WRAPPER,
  45
  46   // Used in cases where an offset is applied to a TargetGlobalAddress.
  47   // Operand 0 is the full TargetGlobalAddress and operand 1 is a
  48   // PCREL_WRAPPER for an anchor point.  This is used so that we can
  49   // cheaply refer to either the full address or the anchor point
  50   // as a register base.
  51   PCREL_OFFSET,
  52
  53   // Integer absolute.
  54   IABS,
  55
  56   // Integer comparisons.  There are three operands: the two values
  57   // to compare, and an integer of type SystemZICMP.
  58   ICMP,
  59
  60   // Floating-point comparisons.  The two operands are the values to compare.
  61   FCMP,
  62
  63   // Test under mask.  The first operand is ANDed with the second operand
  64   // and the condition codes are set on the result.  The third operand is
  65   // a boolean that is true if the condition codes need to distinguish
  66   // between CCMASK_TM_MIXED_MSB_0 and CCMASK_TM_MIXED_MSB_1 (which the
  67   // register forms do but the memory forms don't).
  68   TM,
  69
  70   // Branches if a condition is true.  Operand 0 is the chain operand;
  71   // operand 1 is the 4-bit condition-code mask, with bit N in
  72   // big-endian order meaning "branch if CC=N"; operand 2 is the
  73   // target block and operand 3 is the flag operand.
  74   BR_CCMASK,
  75
  76   // Selects between operand 0 and operand 1.  Operand 2 is the
  77   // mask of condition-code values for which operand 0 should be
  78   // chosen over operand 1; it has the same form as BR_CCMASK.
  79   // Operand 3 is the flag operand.
  80   SELECT_CCMASK,
  81
  82   // Evaluates to the gap between the stack pointer and the
  83   // base of the dynamically-allocatable area.
  84   ADJDYNALLOC,
  85
  86   // Extracts the value of a 32-bit access register.  Operand 0 is
  87   // the number of the register.
  88   EXTRACT_ACCESS,
  89
  90   // Count number of bits set in operand 0 per byte.
  91   POPCNT,
  92
  93   // Wrappers around the ISD opcodes of the same name.  The output and
  94   // first input operands are GR128s.  The trailing numbers are the
  95   // widths of the second operand in bits.
  96   UMUL_LOHI64,
  97   SDIVREM32,
  98   SDIVREM64,
  99   UDIVREM32,
 100   UDIVREM64,
 101
 102   // Use a series of MVCs to copy bytes from one memory location to another.
 103   // The operands are:
 104   // - the target address
 105   // - the source address
 106   // - the constant length
 107   //
 108   // This isn't a memory opcode because we'd need to attach two
 109   // MachineMemOperands rather than one.
 110   MVC,
 111
 112   // Like MVC, but implemented as a loop that handles X*256 bytes
 113   // followed by straight-line code to handle the rest (if any).
 114   // The value of X is passed as an additional operand.
 115   MVC_LOOP,
 116
 117   // Similar to MVC and MVC_LOOP, but for logic operations (AND, OR, XOR).
 118   NC,
 119   NC_LOOP,
 120   OC,
 121   OC_LOOP,
 122   XC,
 123   XC_LOOP,
 124
 125   // Use CLC to compare two blocks of memory, with the same comments
 126   // as for MVC and MVC_LOOP.
 127   CLC,
 128   CLC_LOOP,
 129
 130   // Use an MVST-based sequence to implement stpcpy().
 131   STPCPY,
 132
 133   // Use a CLST-based sequence to implement strcmp().  The two input operands
 134   // are the addresses of the strings to compare.
 135   STRCMP,
 136
 137   // Use an SRST-based sequence to search a block of memory.  The first
 138   // operand is the end address, the second is the start, and the third
 139   // is the character to search for.  CC is set to 1 on success and 2
 140   // on failure.
 141   SEARCH_STRING,
 142
 143   // Store the CC value in bits 29 and 28 of an integer.
 144   IPM,
 145
 146   // Perform a serialization operation.  (BCR 15,0 or BCR 14,0.)
 147   SERIALIZE,
 148
 149   // Transaction begin.  The first operand is the chain, the second
 150   // the TDB pointer, and the third the immediate control field.
 151   // Returns chain and glue.
 152   TBEGIN,
 153   TBEGIN_NOFLOAT,
 154
 155   // Transaction end.  Just the chain operand.  Returns chain and glue.
 156   TEND,
 157
 158   // Create a vector constant by filling byte N of the result with bit
 159   // 15-N of the single operand.
 160   BYTE_MASK,
 161
 162   // Create a vector constant by replicating an element-sized RISBG-style mask.
 163   // The first operand specifies the starting set bit and the second operand
 164   // specifies the ending set bit.  Both operands count from the MSB of the
 165   // element.
 166   ROTATE_MASK,
 167
 168   // Replicate a GPR scalar value into all elements of a vector.
 169   REPLICATE,
 170
 171   // Create a vector from two i64 GPRs.
 172   JOIN_DWORDS,
 173
 174   // Replicate one element of a vector into all elements.  The first operand
 175   // is the vector and the second is the index of the element to replicate.
 176   SPLAT,
 177
 178   // Interleave elements from the high half of operand 0 and the high half
 179   // of operand 1.
 180   MERGE_HIGH,
 181
 182   // Likewise for the low halves.
 183   MERGE_LOW,
 184
 185   // Concatenate the vectors in the first two operands, shift them left
 186   // by the third operand, and take the first half of the result.
 187   SHL_DOUBLE,
 188
 189   // Take one element of the first v2i64 operand and the one element of
 190   // the second v2i64 operand and concatenate them to form a v2i64 result.
 191   // The third operand is a 4-bit value of the form 0A0B, where A and B
 192   // are the element selectors for the first operand and second operands
 193   // respectively.
 194   PERMUTE_DWORDS,
 195
 196   // Perform a general vector permute on vector operands 0 and 1.
 197   // Each byte of operand 2 controls the corresponding byte of the result,
 198   // in the same way as a byte-level VECTOR_SHUFFLE mask.
 199   PERMUTE,
 200
 201   // Pack vector operands 0 and 1 into a single vector with half-sized elements.
 202   PACK,
 203
 204   // Unpack the first half of vector operand 0 into double-sized elements.
 205   // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends.
 206   UNPACK_HIGH,
 207   UNPACKL_HIGH,
 208
 209   // Likewise for the second half.
 210   UNPACK_LOW,
 211   UNPACKL_LOW,
 212
 213   // Shift each element of vector operand 0 by the number of bits specified
 214   // by scalar operand 1.
 215   VSHL_BY_SCALAR,
 216   VSRL_BY_SCALAR,
 217   VSRA_BY_SCALAR,
 218
 219   // For each element of the output type, sum across all sub-elements of
 220   // operand 0 belonging to the corresponding element, and add in the
 221   // rightmost sub-element of the corresponding element of operand 1.
 222   VSUM,
 223
 224   // Compare integer vector operands 0 and 1 to produce the usual 0/-1
 225   // vector result.  VICMPE is for equality, VICMPH for "signed greater than"
 226   // and VICMPHL for "unsigned greater than".
 227   VICMPE,
 228   VICMPH,
 229   VICMPHL,
 230
 231   // Compare floating-point vector operands 0 and 1 to preoduce the usual 0/-1
 232   // vector result.  VFCMPE is for "ordered and equal", VFCMPH for "ordered and
 233   // greater than" and VFCMPHE for "ordered and greater than or equal to".
 234   VFCMPE,
 235   VFCMPH,
 236   VFCMPHE,
 237
 238   // Extend the even f32 elements of vector operand 0 to produce a vector
 239   // of f64 elements.
 240   VEXTEND,
 241
 242   // Round the f64 elements of vector operand 0 to f32s and store them in the
 243   // even elements of the result.
 244   VROUND,
 245
 246   // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
 247   // ATOMIC_LOAD_<op>.
 248   //
 249   // Operand 0: the address of the containing 32-bit-aligned field
 250   // Operand 1: the second operand of <op>, in the high bits of an i32
 251   //            for everything except ATOMIC_SWAPW
 252   // Operand 2: how many bits to rotate the i32 left to bring the first
 253   //            operand into the high bits
 254   // Operand 3: the negative of operand 2, for rotating the other way
 255   // Operand 4: the width of the field in bits (8 or 16)
 256   ATOMIC_SWAPW = ISD::FIRST_TARGET_MEMORY_OPCODE,
 257   ATOMIC_LOADW_ADD,
 258   ATOMIC_LOADW_SUB,
 259   ATOMIC_LOADW_AND,
 260   ATOMIC_LOADW_OR,
 261   ATOMIC_LOADW_XOR,
 262   ATOMIC_LOADW_NAND,
 263   ATOMIC_LOADW_MIN,
 264   ATOMIC_LOADW_MAX,
 265   ATOMIC_LOADW_UMIN,
 266   ATOMIC_LOADW_UMAX,
 267
 268   // A wrapper around the inner loop of an ATOMIC_CMP_SWAP.
 269   //
 270   // Operand 0: the address of the containing 32-bit-aligned field
 271   // Operand 1: the compare value, in the low bits of an i32
 272   // Operand 2: the swap value, in the low bits of an i32
 273   // Operand 3: how many bits to rotate the i32 left to bring the first
 274   //            operand into the high bits
 275   // Operand 4: the negative of operand 2, for rotating the other way
 276   // Operand 5: the width of the field in bits (8 or 16)
 277   ATOMIC_CMP_SWAPW,
 278
 279   // Prefetch from the second operand using the 4-bit control code in
 280   // the first operand.  The code is 1 for a load prefetch and 2 for
 281   // a store prefetch.
 282   PREFETCH
 283 };
 284
 285 // Return true if OPCODE is some kind of PC-relative address.
 286 inline bool isPCREL(unsigned Opcode) {
 287   return Opcode == PCREL_WRAPPER || Opcode == PCREL_OFFSET;
 288 }
 289 } // end namespace SystemZISD
 290
 291 namespace SystemZICMP {
 292 // Describes whether an integer comparison needs to be signed or unsigned,
 293 // or whether either type is OK.
 294 enum {
 295   Any,
 296   UnsignedOnly,
 297   SignedOnly
 298 };
 299 } // end namespace SystemZICMP
 300
 301 class SystemZSubtarget;
 302 class SystemZTargetMachine;
 303
 304 class SystemZTargetLowering : public TargetLowering {
 305 public:
 306   explicit SystemZTargetLowering(const TargetMachine &TM,
 307                                  const SystemZSubtarget &STI);
 308
 309   // Override TargetLowering.
 310   MVT getScalarShiftAmountTy(EVT LHSTy) const override {
 311     return MVT::i32;
 312   }
 313   MVT getVectorIdxTy() const override {
 314     // Only the lower 12 bits of an element index are used, so we don't
 315     // want to clobber the upper 32 bits of a GPR unnecessarily.
 316     return MVT::i32;
 317   }
 318   TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
 319     const override {
 320     // Widen subvectors to the full width rather than promoting integer
 321     // elements.  This is better because:
 322     //
 323     // (a) it means that we can handle the ABI for passing and returning
 324     //     sub-128 vectors without having to handle them as legal types.
 325     //
 326     // (b) we don't have instructions to extend on load and truncate on store,
 327     //     so promoting the integers is less efficient.
 328     //
 329     // (c) there are no multiplication instructions for the widest integer
 330     //     type (v2i64).
 331     if (VT.getVectorElementType().getSizeInBits() % 8 == 0)
 332       return TypeWidenVector;
 333     return TargetLoweringBase::getPreferredVectorAction(VT);
 334   }
 335   EVT getSetCCResultType(LLVMContext &, EVT) const override;
 336   bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
 337   bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
 338   bool isLegalICmpImmediate(int64_t Imm) const override;
 339   bool isLegalAddImmediate(int64_t Imm) const override;
 340   bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
 341   bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
 342                                       unsigned Align,
 343                                       bool *Fast) const override;
 344   bool isTruncateFree(Type *, Type *) const override;
 345   bool isTruncateFree(EVT, EVT) const override;
 346   const char *getTargetNodeName(unsigned Opcode) const override;
 347   std::pair<unsigned, const TargetRegisterClass *>
 348   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
 349                                const std::string &Constraint,
 350                                MVT VT) const override;
 351   TargetLowering::ConstraintType
 352     getConstraintType(const std::string &Constraint) const override;
 353   TargetLowering::ConstraintWeight
 354     getSingleConstraintMatchWeight(AsmOperandInfo &info,
 355                                    const char *constraint) const override;
 356   void LowerAsmOperandForConstraint(SDValue Op,
 357                                     std::string &Constraint,
 358                                     std::vector<SDValue> &Ops,
 359                                     SelectionDAG &DAG) const override;
 360
 361   unsigned getInlineAsmMemConstraint(
 362       const std::string &ConstraintCode) const override {
 363     if (ConstraintCode.size() == 1) {
 364       switch(ConstraintCode[0]) {
 365       default:
 366         break;
 367       case 'Q':
 368         return InlineAsm::Constraint_Q;
 369       case 'R':
 370         return InlineAsm::Constraint_R;
 371       case 'S':
 372         return InlineAsm::Constraint_S;
 373       case 'T':
 374         return InlineAsm::Constraint_T;
 375       }
 376     }
 377     return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
 378   }
 379
 380   MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
 381                                                  MachineBasicBlock *BB) const
 382     override;
 383   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 384   bool allowTruncateForTailCall(Type *, Type *) const override;
 385   bool mayBeEmittedAsTailCall(CallInst *CI) const override;
 386   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
 387                                bool isVarArg,
 388                                const SmallVectorImpl<ISD::InputArg> &Ins,
 389                                SDLoc DL, SelectionDAG &DAG,
 390                                SmallVectorImpl<SDValue> &InVals) const override;
 391   SDValue LowerCall(CallLoweringInfo &CLI,
 392                     SmallVectorImpl<SDValue> &InVals) const override;
 393
 394   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
 395                       const SmallVectorImpl<ISD::OutputArg> &Outs,
 396                       const SmallVectorImpl<SDValue> &OutVals,
 397                       SDLoc DL, SelectionDAG &DAG) const override;
 398   SDValue prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL,
 399                                       SelectionDAG &DAG) const override;
 400   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 401
 402 private:
 403   const SystemZSubtarget &Subtarget;
 404
 405   // Implement LowerOperation for individual opcodes.
 406   SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
 407   SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
 408   SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
 409   SDValue lowerGlobalAddress(GlobalAddressSDNode *Node,
 410                              SelectionDAG &DAG) const;
 411   SDValue lowerTLSGetOffset(GlobalAddressSDNode *Node,
 412                             SelectionDAG &DAG, unsigned Opcode,
 413                             SDValue GOTOffset) const;
 414   SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
 415                                 SelectionDAG &DAG) const;
 416   SDValue lowerBlockAddress(BlockAddressSDNode *Node,
 417                             SelectionDAG &DAG) const;
 418   SDValue lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const;
 419   SDValue lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const;
 420   SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
 421   SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
 422   SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
 423   SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
 424   SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
 425   SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
 426   SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
 427   SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
 428   SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
 429   SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
 430   SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
 431   SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
 432   SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
 433                               unsigned Opcode) const;
 434   SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
 435   SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
 436   SDValue lowerLOAD_SEQUENCE_POINT(SDValue Op, SelectionDAG &DAG) const;
 437   SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
 438   SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
 439   SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
 440   SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
 441   SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
 442   SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
 443   SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
 444   SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
 445   SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
 446   SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
 447                                  unsigned UnpackHigh) const;
 448   SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
 449
 450   SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
 451                          unsigned Index, DAGCombinerInfo &DCI,
 452                          bool Force) const;
 453   SDValue combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op,
 454                                  DAGCombinerInfo &DCI) const;
 455
 456   // If the last instruction before MBBI in MBB was some form of COMPARE,
 457   // try to replace it with a COMPARE AND BRANCH just before MBBI.
 458   // CCMask and Target are the BRC-like operands for the branch.
 459   // Return true if the change was made.
 460   bool convertPrevCompareToBranch(MachineBasicBlock *MBB,
 461                                   MachineBasicBlock::iterator MBBI,
 462                                   unsigned CCMask,
 463                                   MachineBasicBlock *Target) const;
 464
 465   // Implement EmitInstrWithCustomInserter for individual operation types.
 466   MachineBasicBlock *emitSelect(MachineInstr *MI,
 467                                 MachineBasicBlock *BB) const;
 468   MachineBasicBlock *emitCondStore(MachineInstr *MI,
 469                                    MachineBasicBlock *BB,
 470                                    unsigned StoreOpcode, unsigned STOCOpcode,
 471                                    bool Invert) const;
 472   MachineBasicBlock *emitExt128(MachineInstr *MI,
 473                                 MachineBasicBlock *MBB,
 474                                 bool ClearEven, unsigned SubReg) const;
 475   MachineBasicBlock *emitAtomicLoadBinary(MachineInstr *MI,
 476                                           MachineBasicBlock *BB,
 477                                           unsigned BinOpcode, unsigned BitSize,
 478                                           bool Invert = false) const;
 479   MachineBasicBlock *emitAtomicLoadMinMax(MachineInstr *MI,
 480                                           MachineBasicBlock *MBB,
 481                                           unsigned CompareOpcode,
 482                                           unsigned KeepOldMask,
 483                                           unsigned BitSize) const;
 484   MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI,
 485                                         MachineBasicBlock *BB) const;
 486   MachineBasicBlock *emitMemMemWrapper(MachineInstr *MI,
 487                                        MachineBasicBlock *BB,
 488                                        unsigned Opcode) const;
 489   MachineBasicBlock *emitStringWrapper(MachineInstr *MI,
 490                                        MachineBasicBlock *BB,
 491                                        unsigned Opcode) const;
 492   MachineBasicBlock *emitTransactionBegin(MachineInstr *MI,
 493                                           MachineBasicBlock *MBB,
 494                                           unsigned Opcode,
 495                                           bool NoFloat) const;
 496 };
 497 } // end namespace llvm
 498
 499 #endif