1 //===-- PPC32ISelPattern.cpp - A pattern matching inst selector for PPC32 -===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Nate Begeman and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines a pattern matching instruction selector for 32 bit PowerPC.
11 // Magic number generation for integer divide from the PowerPC Compiler Writer's
12 // Guide, section 3.2.3.5
14 //===----------------------------------------------------------------------===//
17 #include "PowerPCInstrBuilder.h"
18 #include "PowerPCInstrInfo.h"
19 #include "PPC32TargetMachine.h"
20 #include "llvm/Constants.h"
21 #include "llvm/Function.h"
22 #include "llvm/CodeGen/MachineConstantPool.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SelectionDAGISel.h"
27 #include "llvm/CodeGen/SSARegMap.h"
28 #include "llvm/Target/TargetData.h"
29 #include "llvm/Target/TargetLowering.h"
30 #include "llvm/Target/TargetOptions.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/MathExtras.h"
33 #include "llvm/ADT/Statistic.h"
39 //===----------------------------------------------------------------------===//
40 // PPC32TargetLowering - PPC32 Implementation of the TargetLowering interface
42 class PPC32TargetLowering : public TargetLowering {
43 int VarArgsFrameIndex; // FrameIndex for start of varargs area.
44 int ReturnAddrIndex; // FrameIndex for return slot.
46 PPC32TargetLowering(TargetMachine &TM) : TargetLowering(TM) {
47 // Fold away setcc operations if possible.
48 setSetCCIsExpensive();
50 // Set up the register classes.
51 addRegisterClass(MVT::i32, PPC32::GPRCRegisterClass);
52 addRegisterClass(MVT::f32, PPC32::FPRCRegisterClass);
53 addRegisterClass(MVT::f64, PPC32::FPRCRegisterClass);
55 // PowerPC has no intrinsics for these particular operations
56 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
57 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
58 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
60 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
61 setOperationAction(ISD::SEXTLOAD, MVT::i1, Expand);
62 setOperationAction(ISD::SEXTLOAD, MVT::i8, Expand);
64 // PowerPC has no SREM/UREM instructions
65 setOperationAction(ISD::SREM, MVT::i32, Expand);
66 setOperationAction(ISD::UREM, MVT::i32, Expand);
68 // We don't support sin/cos/sqrt/fmod
69 setOperationAction(ISD::FSIN , MVT::f64, Expand);
70 setOperationAction(ISD::FCOS , MVT::f64, Expand);
71 setOperationAction(ISD::SREM , MVT::f64, Expand);
72 setOperationAction(ISD::FSIN , MVT::f32, Expand);
73 setOperationAction(ISD::FCOS , MVT::f32, Expand);
74 setOperationAction(ISD::SREM , MVT::f32, Expand);
76 // If we're enabling GP optimizations, use hardware square root
77 if (!TM.getSubtarget<PPCSubtarget>().isGigaProcessor()) {
78 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
79 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
82 // PowerPC does not have CTPOP or CTTZ
83 setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
84 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
86 // PowerPC does not have Select
87 setOperationAction(ISD::SELECT, MVT::i32, Expand);
88 setOperationAction(ISD::SELECT, MVT::f32, Expand);
89 setOperationAction(ISD::SELECT, MVT::f64, Expand);
91 // PowerPC does not have FP_TO_UINT
92 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
94 setSetCCResultContents(ZeroOrOneSetCCResult);
95 addLegalFPImmediate(+0.0); // Necessary for FSEL
96 addLegalFPImmediate(-0.0); //
98 computeRegisterProperties();
101 /// LowerArguments - This hook must be implemented to indicate how we should
102 /// lower the arguments for the specified function, into the specified DAG.
103 virtual std::vector<SDOperand>
104 LowerArguments(Function &F, SelectionDAG &DAG);
106 /// LowerCallTo - This hook lowers an abstract call to a function into an
108 virtual std::pair<SDOperand, SDOperand>
109 LowerCallTo(SDOperand Chain, const Type *RetTy, bool isVarArg, unsigned CC,
110 bool isTailCall, SDOperand Callee, ArgListTy &Args,
113 virtual SDOperand LowerVAStart(SDOperand Chain, SDOperand VAListP,
114 Value *VAListV, SelectionDAG &DAG);
116 virtual std::pair<SDOperand,SDOperand>
117 LowerVAArg(SDOperand Chain, SDOperand VAListP, Value *VAListV,
118 const Type *ArgTy, SelectionDAG &DAG);
120 virtual std::pair<SDOperand, SDOperand>
121 LowerFrameReturnAddress(bool isFrameAddr, SDOperand Chain, unsigned Depth,
127 std::vector<SDOperand>
128 PPC32TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
130 // add beautiful description of PPC stack frame format, or at least some docs
132 MachineFunction &MF = DAG.getMachineFunction();
133 MachineFrameInfo *MFI = MF.getFrameInfo();
134 MachineBasicBlock& BB = MF.front();
135 std::vector<SDOperand> ArgValues;
137 // Due to the rather complicated nature of the PowerPC ABI, rather than a
138 // fixed size array of physical args, for the sake of simplicity let the STL
139 // handle tracking them for us.
140 std::vector<unsigned> argVR, argPR, argOp;
141 unsigned ArgOffset = 24;
142 unsigned GPR_remaining = 8;
143 unsigned FPR_remaining = 13;
144 unsigned GPR_idx = 0, FPR_idx = 0;
145 static const unsigned GPR[] = {
146 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
147 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
149 static const unsigned FPR[] = {
150 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
151 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
154 // Add DAG nodes to load the arguments... On entry to a function on PPC,
155 // the arguments start at offset 24, although they are likely to be passed
157 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
158 SDOperand newroot, argt;
160 bool needsLoad = false;
161 bool ArgLive = !I->use_empty();
162 MVT::ValueType ObjectVT = getValueType(I->getType());
165 default: assert(0 && "Unhandled argument type!");
172 if (GPR_remaining > 0) {
173 MF.addLiveIn(GPR[GPR_idx]);
174 argt = newroot = DAG.getCopyFromReg(GPR[GPR_idx], MVT::i32,
176 if (ObjectVT != MVT::i32)
177 argt = DAG.getNode(ISD::TRUNCATE, ObjectVT, newroot);
182 case MVT::i64: ObjSize = 8;
184 if (GPR_remaining > 0) {
185 SDOperand argHi, argLo;
186 MF.addLiveIn(GPR[GPR_idx]);
187 argHi = DAG.getCopyFromReg(GPR[GPR_idx], MVT::i32, DAG.getRoot());
188 // If we have two or more remaining argument registers, then both halves
189 // of the i64 can be sourced from there. Otherwise, the lower half will
190 // have to come off the stack. This can happen when an i64 is preceded
191 // by 28 bytes of arguments.
192 if (GPR_remaining > 1) {
193 MF.addLiveIn(GPR[GPR_idx+1]);
194 argLo = DAG.getCopyFromReg(GPR[GPR_idx+1], MVT::i32, argHi);
196 int FI = MFI->CreateFixedObject(4, ArgOffset+4);
197 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
198 argLo = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN,
199 DAG.getSrcValue(NULL));
201 // Build the outgoing arg thingy
202 argt = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, argLo, argHi);
210 ObjSize = (ObjectVT == MVT::f64) ? 8 : 4;
212 if (FPR_remaining > 0) {
213 MF.addLiveIn(FPR[FPR_idx]);
214 argt = newroot = DAG.getCopyFromReg(FPR[FPR_idx], ObjectVT,
224 // We need to load the argument to a virtual register if we determined above
225 // that we ran out of physical registers of the appropriate type
227 unsigned SubregOffset = 0;
228 if (ObjectVT == MVT::i8 || ObjectVT == MVT::i1) SubregOffset = 3;
229 if (ObjectVT == MVT::i16) SubregOffset = 2;
230 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
231 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
232 FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN,
233 DAG.getConstant(SubregOffset, MVT::i32));
234 argt = newroot = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN,
235 DAG.getSrcValue(NULL));
238 // Every 4 bytes of argument space consumes one of the GPRs available for
240 if (GPR_remaining > 0) {
241 unsigned delta = (GPR_remaining > 1 && ObjSize == 8) ? 2 : 1;
242 GPR_remaining -= delta;
245 ArgOffset += ObjSize;
247 DAG.setRoot(newroot.getValue(1));
249 ArgValues.push_back(argt);
252 // If the function takes variable number of arguments, make a frame index for
253 // the start of the first vararg value... for expansion of llvm.va_start.
255 VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
256 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
257 // If this function is vararg, store any remaining integer argument regs
258 // to their spots on the stack so that they may be loaded by deferencing the
259 // result of va_next.
260 std::vector<SDOperand> MemOps;
261 for (; GPR_remaining > 0; --GPR_remaining, ++GPR_idx) {
262 MF.addLiveIn(GPR[GPR_idx]);
263 SDOperand Val = DAG.getCopyFromReg(GPR[GPR_idx], MVT::i32, DAG.getRoot());
264 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
265 Val, FIN, DAG.getSrcValue(NULL));
266 MemOps.push_back(Store);
267 // Increment the address by four for the next argument to store
268 SDOperand PtrOff = DAG.getConstant(4, getPointerTy());
269 FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN, PtrOff);
271 DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps));
274 // Finally, inform the code generator which regs we return values in.
275 switch (getValueType(F.getReturnType())) {
276 default: assert(0 && "Unknown type!");
277 case MVT::isVoid: break;
282 MF.addLiveOut(PPC::R3);
285 MF.addLiveOut(PPC::R3);
286 MF.addLiveOut(PPC::R4);
290 MF.addLiveOut(PPC::F1);
297 std::pair<SDOperand, SDOperand>
298 PPC32TargetLowering::LowerCallTo(SDOperand Chain,
299 const Type *RetTy, bool isVarArg,
300 unsigned CallingConv, bool isTailCall,
301 SDOperand Callee, ArgListTy &Args,
303 // args_to_use will accumulate outgoing args for the ISD::CALL case in
304 // SelectExpr to use to put the arguments in the appropriate registers.
305 std::vector<SDOperand> args_to_use;
307 // Count how many bytes are to be pushed on the stack, including the linkage
308 // area, and parameter passing area.
309 unsigned NumBytes = 24;
312 Chain = DAG.getNode(ISD::CALLSEQ_START, MVT::Other, Chain,
313 DAG.getConstant(NumBytes, getPointerTy()));
315 for (unsigned i = 0, e = Args.size(); i != e; ++i)
316 switch (getValueType(Args[i].second)) {
317 default: assert(0 && "Unknown value type!");
331 // Just to be safe, we'll always reserve the full 24 bytes of linkage area
332 // plus 32 bytes of argument space in case any called code gets funky on us.
333 // (Required by ABI to support var arg)
334 if (NumBytes < 56) NumBytes = 56;
336 // Adjust the stack pointer for the new arguments...
337 // These operations are automatically eliminated by the prolog/epilog pass
338 Chain = DAG.getNode(ISD::CALLSEQ_START, MVT::Other, Chain,
339 DAG.getConstant(NumBytes, getPointerTy()));
341 // Set up a copy of the stack pointer for use loading and storing any
342 // arguments that may not fit in the registers available for argument
344 SDOperand StackPtr = DAG.getCopyFromReg(PPC::R1, MVT::i32,
347 // Figure out which arguments are going to go in registers, and which in
348 // memory. Also, if this is a vararg function, floating point operations
349 // must be stored to our stack, and loaded into integer regs as well, if
350 // any integer regs are available for argument passing.
351 unsigned ArgOffset = 24;
352 unsigned GPR_remaining = 8;
353 unsigned FPR_remaining = 13;
355 std::vector<SDOperand> MemOps;
356 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
357 // PtrOff will be used to store the current argument to the stack if a
358 // register cannot be found for it.
359 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
360 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
361 MVT::ValueType ArgVT = getValueType(Args[i].second);
364 default: assert(0 && "Unexpected ValueType for argument!");
368 // Promote the integer to 32 bits. If the input type is signed use a
369 // sign extend, otherwise use a zero extend.
370 if (Args[i].second->isSigned())
371 Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first);
373 Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first);
376 if (GPR_remaining > 0) {
377 args_to_use.push_back(Args[i].first);
380 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
381 Args[i].first, PtrOff,
382 DAG.getSrcValue(NULL)));
387 // If we have one free GPR left, we can place the upper half of the i64
388 // in it, and store the other half to the stack. If we have two or more
389 // free GPRs, then we can pass both halves of the i64 in registers.
390 if (GPR_remaining > 0) {
391 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
392 Args[i].first, DAG.getConstant(1, MVT::i32));
393 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
394 Args[i].first, DAG.getConstant(0, MVT::i32));
395 args_to_use.push_back(Hi);
397 if (GPR_remaining > 0) {
398 args_to_use.push_back(Lo);
401 SDOperand ConstFour = DAG.getConstant(4, getPointerTy());
402 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);
403 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
404 Lo, PtrOff, DAG.getSrcValue(NULL)));
407 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
408 Args[i].first, PtrOff,
409 DAG.getSrcValue(NULL)));
415 if (FPR_remaining > 0) {
416 args_to_use.push_back(Args[i].first);
419 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain,
420 Args[i].first, PtrOff,
421 DAG.getSrcValue(NULL));
422 MemOps.push_back(Store);
423 // Float varargs are always shadowed in available integer registers
424 if (GPR_remaining > 0) {
425 SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
426 DAG.getSrcValue(NULL));
427 MemOps.push_back(Load);
428 args_to_use.push_back(Load);
431 if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
432 SDOperand ConstFour = DAG.getConstant(4, getPointerTy());
433 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);
434 SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
435 DAG.getSrcValue(NULL));
436 MemOps.push_back(Load);
437 args_to_use.push_back(Load);
441 // If we have any FPRs remaining, we may also have GPRs remaining.
442 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
444 if (GPR_remaining > 0) {
445 args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
448 if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
449 args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
454 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
455 Args[i].first, PtrOff,
456 DAG.getSrcValue(NULL)));
458 ArgOffset += (ArgVT == MVT::f32) ? 4 : 8;
463 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps);
466 std::vector<MVT::ValueType> RetVals;
467 MVT::ValueType RetTyVT = getValueType(RetTy);
468 if (RetTyVT != MVT::isVoid)
469 RetVals.push_back(RetTyVT);
470 RetVals.push_back(MVT::Other);
472 SDOperand TheCall = SDOperand(DAG.getCall(RetVals,
473 Chain, Callee, args_to_use), 0);
474 Chain = TheCall.getValue(RetTyVT != MVT::isVoid);
475 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
476 DAG.getConstant(NumBytes, getPointerTy()));
477 return std::make_pair(TheCall, Chain);
480 SDOperand PPC32TargetLowering::LowerVAStart(SDOperand Chain, SDOperand VAListP,
481 Value *VAListV, SelectionDAG &DAG) {
482 // vastart just stores the address of the VarArgsFrameIndex slot into the
483 // memory location argument.
484 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
485 return DAG.getNode(ISD::STORE, MVT::Other, Chain, FR, VAListP,
486 DAG.getSrcValue(VAListV));
489 std::pair<SDOperand,SDOperand>
490 PPC32TargetLowering::LowerVAArg(SDOperand Chain,
491 SDOperand VAListP, Value *VAListV,
492 const Type *ArgTy, SelectionDAG &DAG) {
493 MVT::ValueType ArgVT = getValueType(ArgTy);
496 DAG.getLoad(MVT::i32, Chain, VAListP, DAG.getSrcValue(VAListV));
497 SDOperand Result = DAG.getLoad(ArgVT, Chain, VAList, DAG.getSrcValue(NULL));
499 if (ArgVT == MVT::i32 || ArgVT == MVT::f32)
502 assert((ArgVT == MVT::i64 || ArgVT == MVT::f64) &&
503 "Other types should have been promoted for varargs!");
506 VAList = DAG.getNode(ISD::ADD, VAList.getValueType(), VAList,
507 DAG.getConstant(Amt, VAList.getValueType()));
508 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain,
509 VAList, VAListP, DAG.getSrcValue(VAListV));
510 return std::make_pair(Result, Chain);
514 std::pair<SDOperand, SDOperand> PPC32TargetLowering::
515 LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
517 assert(0 && "LowerFrameReturnAddress unimplemented");
522 Statistic<>Recorded("ppc-codegen", "Number of recording ops emitted");
523 Statistic<>FusedFP("ppc-codegen", "Number of fused fp operations");
524 Statistic<>FrameOff("ppc-codegen", "Number of frame idx offsets collapsed");
526 //===--------------------------------------------------------------------===//
527 /// ISel - PPC32 specific code to select PPC32 machine instructions for
528 /// SelectionDAG operations.
529 //===--------------------------------------------------------------------===//
530 class ISel : public SelectionDAGISel {
531 PPC32TargetLowering PPC32Lowering;
532 SelectionDAG *ISelDAG; // Hack to support us having a dag->dag transform
533 // for sdiv and udiv until it is put into the future
536 /// ExprMap - As shared expressions are codegen'd, we keep track of which
537 /// vreg the value is produced in, so we only emit one copy of each compiled
539 std::map<SDOperand, unsigned> ExprMap;
541 unsigned GlobalBaseReg;
542 bool GlobalBaseInitialized;
545 ISel(TargetMachine &TM) : SelectionDAGISel(PPC32Lowering), PPC32Lowering(TM),
548 /// runOnFunction - Override this function in order to reset our per-function
550 virtual bool runOnFunction(Function &Fn) {
551 // Make sure we re-emit a set of the global base reg if necessary
552 GlobalBaseInitialized = false;
553 return SelectionDAGISel::runOnFunction(Fn);
556 /// InstructionSelectBasicBlock - This callback is invoked by
557 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
558 virtual void InstructionSelectBasicBlock(SelectionDAG &DAG) {
560 // Codegen the basic block.
562 Select(DAG.getRoot());
564 // Clear state used for selection.
569 // convenience functions for virtual register creation
570 inline unsigned MakeIntReg() {
571 return RegMap->createVirtualRegister(PPC32::GPRCRegisterClass);
573 inline unsigned MakeFPReg() {
574 return RegMap->createVirtualRegister(PPC32::FPRCRegisterClass);
577 // dag -> dag expanders for integer divide by constant
578 SDOperand BuildSDIVSequence(SDOperand N);
579 SDOperand BuildUDIVSequence(SDOperand N);
581 unsigned getGlobalBaseReg();
582 unsigned getConstDouble(double floatVal, unsigned Result);
583 void MoveCRtoGPR(unsigned CCReg, ISD::CondCode CC, unsigned Result);
584 bool SelectBitfieldInsert(SDOperand OR, unsigned Result);
585 unsigned FoldIfWideZeroExtend(SDOperand N);
586 unsigned SelectCC(SDOperand LHS, SDOperand RHS, ISD::CondCode CC);
587 bool SelectIntImmediateExpr(SDOperand N, unsigned Result,
588 unsigned OCHi, unsigned OCLo,
589 bool IsArithmetic = false, bool Negate = false);
590 unsigned SelectExpr(SDOperand N, bool Recording=false);
591 void Select(SDOperand N);
593 unsigned SelectAddr(SDOperand N, unsigned& Reg, int& offset);
594 void SelectBranchCC(SDOperand N);
596 virtual const char *getPassName() const {
597 return "PowerPC Pattern Instruction Selection";
601 // isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
602 // any number of 0s on either side. The 1s are allowed to wrap from LSB to
603 // MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is
604 // not, since all 1s are not contiguous.
605 static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
606 if (isShiftedMask_32(Val)) {
607 // look for the first non-zero bit
608 MB = CountLeadingZeros_32(Val);
609 // look for the first zero bit after the run of ones
610 ME = CountLeadingZeros_32((Val - 1) ^ Val);
612 } else if (isShiftedMask_32(Val = ~Val)) { // invert mask
613 // effectively look for the first zero bit
614 ME = CountLeadingZeros_32(Val) - 1;
615 // effectively look for the first one bit after the run of zeros
616 MB = CountLeadingZeros_32((Val - 1) ^ Val) + 1;
623 // isRotateAndMask - Returns true if Mask and Shift can be folded in to a rotate
624 // and mask opcode and mask operation.
625 static bool isRotateAndMask(unsigned Opcode, unsigned Shift, unsigned Mask,
627 unsigned &SH, unsigned &MB, unsigned &ME) {
628 if (Shift > 31) return false;
629 unsigned Indeterminant = ~0; // bit mask marking indeterminant results
631 if (Opcode == ISD::SHL) { // shift left
632 // apply shift to mask if it comes first
633 if (IsShiftMask) Mask = Mask << Shift;
634 // determine which bits are made indeterminant by shift
635 Indeterminant = ~(0xFFFFFFFFu << Shift);
636 } else if (Opcode == ISD::SRA || Opcode == ISD::SRL) { // shift rights
637 // apply shift to mask if it comes first
638 if (IsShiftMask) Mask = Mask >> Shift;
639 // determine which bits are made indeterminant by shift
640 Indeterminant = ~(0xFFFFFFFFu >> Shift);
641 // adjust for the left rotate
645 // if the mask doesn't intersect any Indeterminant bits
646 if (Mask && !(Mask & Indeterminant)) {
648 // make sure the mask is still a mask (wrap arounds may not be)
649 return isRunOfOnes(Mask, MB, ME);
656 // isIntImmediate - This method tests to see if a constant operand.
657 // If so Imm will receive the 32 bit value.
658 static bool isIntImmediate(SDOperand N, unsigned& Imm) {
660 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
662 Imm = (unsigned)CN->getSignExtended();
670 // isOpcWithIntImmediate - This method tests to see if the node is a specific
671 // opcode and that it has a immediate integer right operand.
672 // If so Imm will receive the 32 bit value.
673 static bool isOpcWithIntImmediate(SDOperand N, unsigned Opc, unsigned& Imm) {
674 return N.getOpcode() == Opc && isIntImmediate(N.getOperand(1), Imm);
677 // isOprShiftImm - Returns true if the specified operand is a shift opcode with
678 // a immediate shift count less than 32.
679 static bool isOprShiftImm(SDOperand N, unsigned& Opc, unsigned& SH) {
681 return (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
682 isIntImmediate(N.getOperand(1), SH) && SH < 32;
685 // isOprNot - Returns true if the specified operand is an xor with immediate -1.
686 static bool isOprNot(SDOperand N) {
688 return isOpcWithIntImmediate(N, ISD::XOR, Imm) && (signed)Imm == -1;
691 // Immediate constant composers.
692 // Lo16 - grabs the lo 16 bits from a 32 bit constant.
693 // Hi16 - grabs the hi 16 bits from a 32 bit constant.
694 // HA16 - computes the hi bits required if the lo bits are add/subtracted in
696 static unsigned Lo16(unsigned x) { return x & 0x0000FFFF; }
697 static unsigned Hi16(unsigned x) { return Lo16(x >> 16); }
698 static unsigned HA16(unsigned x) { return Hi16((signed)x - (signed short)x); }
700 /// NodeHasRecordingVariant - If SelectExpr can always produce code for
701 /// NodeOpcode that also sets CR0 as a side effect, return true. Otherwise,
703 static bool NodeHasRecordingVariant(unsigned NodeOpcode) {
705 default: return false;
712 /// getBCCForSetCC - Returns the PowerPC condition branch mnemonic corresponding
714 static unsigned getBCCForSetCC(ISD::CondCode CC) {
716 default: assert(0 && "Unknown condition!"); abort();
717 case ISD::SETEQ: return PPC::BEQ;
718 case ISD::SETNE: return PPC::BNE;
720 case ISD::SETLT: return PPC::BLT;
722 case ISD::SETLE: return PPC::BLE;
724 case ISD::SETGT: return PPC::BGT;
726 case ISD::SETGE: return PPC::BGE;
731 /// getCROpForOp - Return the condition register opcode (or inverted opcode)
732 /// associated with the SelectionDAG opcode.
733 static unsigned getCROpForSetCC(unsigned Opcode, bool Inv1, bool Inv2) {
735 default: assert(0 && "Unknown opcode!"); abort();
737 if (Inv1 && Inv2) return PPC::CRNOR; // De Morgan's Law
738 if (!Inv1 && !Inv2) return PPC::CRAND;
739 if (Inv1 ^ Inv2) return PPC::CRANDC;
741 if (Inv1 && Inv2) return PPC::CRNAND; // De Morgan's Law
742 if (!Inv1 && !Inv2) return PPC::CROR;
743 if (Inv1 ^ Inv2) return PPC::CRORC;
748 /// getCRIdxForSetCC - Return the index of the condition register field
749 /// associated with the SetCC condition, and whether or not the field is
750 /// treated as inverted. That is, lt = 0; ge = 0 inverted.
751 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool& Inv) {
753 default: assert(0 && "Unknown condition!"); abort();
755 case ISD::SETLT: Inv = false; return 0;
757 case ISD::SETGE: Inv = true; return 0;
759 case ISD::SETGT: Inv = false; return 1;
761 case ISD::SETLE: Inv = true; return 1;
762 case ISD::SETEQ: Inv = false; return 2;
763 case ISD::SETNE: Inv = true; return 2;
768 /// IndexedOpForOp - Return the indexed variant for each of the PowerPC load
769 /// and store immediate instructions.
770 static unsigned IndexedOpForOp(unsigned Opcode) {
772 default: assert(0 && "Unknown opcode!"); abort();
773 case PPC::LBZ: return PPC::LBZX; case PPC::STB: return PPC::STBX;
774 case PPC::LHZ: return PPC::LHZX; case PPC::STH: return PPC::STHX;
775 case PPC::LHA: return PPC::LHAX; case PPC::STW: return PPC::STWX;
776 case PPC::LWZ: return PPC::LWZX; case PPC::STFS: return PPC::STFSX;
777 case PPC::LFS: return PPC::LFSX; case PPC::STFD: return PPC::STFDX;
778 case PPC::LFD: return PPC::LFDX;
783 // Structure used to return the necessary information to codegen an SDIV as
786 int m; // magic number
787 int s; // shift amount
791 unsigned int m; // magic number
792 int a; // add indicator
793 int s; // shift amount
796 /// magic - calculate the magic numbers required to codegen an integer sdiv as
797 /// a sequence of multiply and shifts. Requires that the divisor not be 0, 1,
799 static struct ms magic(int d) {
801 unsigned int ad, anc, delta, q1, r1, q2, r2, t;
802 const unsigned int two31 = 0x80000000U;
806 t = two31 + ((unsigned int)d >> 31);
807 anc = t - 1 - t%ad; // absolute value of nc
808 p = 31; // initialize p
809 q1 = two31/anc; // initialize q1 = 2p/abs(nc)
810 r1 = two31 - q1*anc; // initialize r1 = rem(2p,abs(nc))
811 q2 = two31/ad; // initialize q2 = 2p/abs(d)
812 r2 = two31 - q2*ad; // initialize r2 = rem(2p,abs(d))
815 q1 = 2*q1; // update q1 = 2p/abs(nc)
816 r1 = 2*r1; // update r1 = rem(2p/abs(nc))
817 if (r1 >= anc) { // must be unsigned comparison
821 q2 = 2*q2; // update q2 = 2p/abs(d)
822 r2 = 2*r2; // update r2 = rem(2p/abs(d))
823 if (r2 >= ad) { // must be unsigned comparison
828 } while (q1 < delta || (q1 == delta && r1 == 0));
831 if (d < 0) mag.m = -mag.m; // resulting magic number
832 mag.s = p - 32; // resulting shift
836 /// magicu - calculate the magic numbers required to codegen an integer udiv as
837 /// a sequence of multiply, add and shifts. Requires that the divisor not be 0.
838 static struct mu magicu(unsigned d)
841 unsigned int nc, delta, q1, r1, q2, r2;
843 magu.a = 0; // initialize "add" indicator
845 p = 31; // initialize p
846 q1 = 0x80000000/nc; // initialize q1 = 2p/nc
847 r1 = 0x80000000 - q1*nc; // initialize r1 = rem(2p,nc)
848 q2 = 0x7FFFFFFF/d; // initialize q2 = (2p-1)/d
849 r2 = 0x7FFFFFFF - q2*d; // initialize r2 = rem((2p-1),d)
852 if (r1 >= nc - r1 ) {
853 q1 = 2*q1 + 1; // update q1
854 r1 = 2*r1 - nc; // update r1
857 q1 = 2*q1; // update q1
858 r1 = 2*r1; // update r1
860 if (r2 + 1 >= d - r2) {
861 if (q2 >= 0x7FFFFFFF) magu.a = 1;
862 q2 = 2*q2 + 1; // update q2
863 r2 = 2*r2 + 1 - d; // update r2
866 if (q2 >= 0x80000000) magu.a = 1;
867 q2 = 2*q2; // update q2
868 r2 = 2*r2 + 1; // update r2
871 } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0)));
872 magu.m = q2 + 1; // resulting magic number
873 magu.s = p - 32; // resulting shift
878 /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
879 /// return a DAG expression to select that will generate the same value by
880 /// multiplying by a magic number. See:
881 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
882 SDOperand ISel::BuildSDIVSequence(SDOperand N) {
883 int d = (int)cast<ConstantSDNode>(N.getOperand(1))->getSignExtended();
884 ms magics = magic(d);
885 // Multiply the numerator (operand 0) by the magic value
886 SDOperand Q = ISelDAG->getNode(ISD::MULHS, MVT::i32, N.getOperand(0),
887 ISelDAG->getConstant(magics.m, MVT::i32));
888 // If d > 0 and m < 0, add the numerator
889 if (d > 0 && magics.m < 0)
890 Q = ISelDAG->getNode(ISD::ADD, MVT::i32, Q, N.getOperand(0));
891 // If d < 0 and m > 0, subtract the numerator.
892 if (d < 0 && magics.m > 0)
893 Q = ISelDAG->getNode(ISD::SUB, MVT::i32, Q, N.getOperand(0));
894 // Shift right algebraic if shift value is nonzero
896 Q = ISelDAG->getNode(ISD::SRA, MVT::i32, Q,
897 ISelDAG->getConstant(magics.s, MVT::i32));
898 // Extract the sign bit and add it to the quotient
900 ISelDAG->getNode(ISD::SRL, MVT::i32, Q, ISelDAG->getConstant(31, MVT::i32));
901 return ISelDAG->getNode(ISD::ADD, MVT::i32, Q, T);
904 /// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
905 /// return a DAG expression to select that will generate the same value by
906 /// multiplying by a magic number. See:
907 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
908 SDOperand ISel::BuildUDIVSequence(SDOperand N) {
910 (unsigned)cast<ConstantSDNode>(N.getOperand(1))->getSignExtended();
911 mu magics = magicu(d);
912 // Multiply the numerator (operand 0) by the magic value
913 SDOperand Q = ISelDAG->getNode(ISD::MULHU, MVT::i32, N.getOperand(0),
914 ISelDAG->getConstant(magics.m, MVT::i32));
916 Q = ISelDAG->getNode(ISD::SRL, MVT::i32, Q,
917 ISelDAG->getConstant(magics.s, MVT::i32));
919 SDOperand NPQ = ISelDAG->getNode(ISD::SUB, MVT::i32, N.getOperand(0), Q);
920 NPQ = ISelDAG->getNode(ISD::SRL, MVT::i32, NPQ,
921 ISelDAG->getConstant(1, MVT::i32));
922 NPQ = ISelDAG->getNode(ISD::ADD, MVT::i32, NPQ, Q);
923 Q = ISelDAG->getNode(ISD::SRL, MVT::i32, NPQ,
924 ISelDAG->getConstant(magics.s-1, MVT::i32));
929 /// getGlobalBaseReg - Output the instructions required to put the
930 /// base address to use for accessing globals into a register.
932 unsigned ISel::getGlobalBaseReg() {
933 if (!GlobalBaseInitialized) {
934 // Insert the set of GlobalBaseReg into the first MBB of the function
935 MachineBasicBlock &FirstMBB = BB->getParent()->front();
936 MachineBasicBlock::iterator MBBI = FirstMBB.begin();
937 GlobalBaseReg = MakeIntReg();
938 BuildMI(FirstMBB, MBBI, PPC::MovePCtoLR, 0, PPC::LR);
939 BuildMI(FirstMBB, MBBI, PPC::MFLR, 1, GlobalBaseReg).addReg(PPC::LR);
940 GlobalBaseInitialized = true;
942 return GlobalBaseReg;
945 /// getConstDouble - Loads a floating point value into a register, via the
946 /// Constant Pool. Optionally takes a register in which to load the value.
947 unsigned ISel::getConstDouble(double doubleVal, unsigned Result=0) {
948 unsigned Tmp1 = MakeIntReg();
949 if (0 == Result) Result = MakeFPReg();
950 MachineConstantPool *CP = BB->getParent()->getConstantPool();
951 ConstantFP *CFP = ConstantFP::get(Type::DoubleTy, doubleVal);
952 unsigned CPI = CP->getConstantPoolIndex(CFP);
954 BuildMI(BB, PPC::ADDIS, 2, Tmp1).addReg(getGlobalBaseReg())
955 .addConstantPoolIndex(CPI);
957 BuildMI(BB, PPC::LIS, 1, Tmp1).addConstantPoolIndex(CPI);
958 BuildMI(BB, PPC::LFD, 2, Result).addConstantPoolIndex(CPI).addReg(Tmp1);
962 /// MoveCRtoGPR - Move CCReg[Idx] to the least significant bit of Result. If
963 /// Inv is true, then invert the result.
964 void ISel::MoveCRtoGPR(unsigned CCReg, ISD::CondCode CC, unsigned Result){
966 unsigned IntCR = MakeIntReg();
967 unsigned Idx = getCRIdxForSetCC(CC, Inv);
968 BuildMI(BB, PPC::MCRF, 1, PPC::CR7).addReg(CCReg);
970 TLI.getTargetMachine().getSubtarget<PPCSubtarget>().isGigaProcessor();
971 BuildMI(BB, GPOpt ? PPC::MFOCRF : PPC::MFCR, 1, IntCR).addReg(PPC::CR7);
973 unsigned Tmp1 = MakeIntReg();
974 BuildMI(BB, PPC::RLWINM, 4, Tmp1).addReg(IntCR).addImm(32-(3-Idx))
975 .addImm(31).addImm(31);
976 BuildMI(BB, PPC::XORI, 2, Result).addReg(Tmp1).addImm(1);
978 BuildMI(BB, PPC::RLWINM, 4, Result).addReg(IntCR).addImm(32-(3-Idx))
979 .addImm(31).addImm(31);
983 /// SelectBitfieldInsert - turn an or of two masked values into
984 /// the rotate left word immediate then mask insert (rlwimi) instruction.
985 /// Returns true on success, false if the caller still needs to select OR.
987 /// Patterns matched:
988 /// 1. or shl, and 5. or and, and
989 /// 2. or and, shl 6. or shl, shr
990 /// 3. or shr, and 7. or shr, shl
992 bool ISel::SelectBitfieldInsert(SDOperand OR, unsigned Result) {
993 bool IsRotate = false;
994 unsigned TgtMask = 0xFFFFFFFF, InsMask = 0xFFFFFFFF, Amount = 0;
997 SDOperand Op0 = OR.getOperand(0);
998 SDOperand Op1 = OR.getOperand(1);
1000 unsigned Op0Opc = Op0.getOpcode();
1001 unsigned Op1Opc = Op1.getOpcode();
1003 // Verify that we have the correct opcodes
1004 if (ISD::SHL != Op0Opc && ISD::SRL != Op0Opc && ISD::AND != Op0Opc)
1006 if (ISD::SHL != Op1Opc && ISD::SRL != Op1Opc && ISD::AND != Op1Opc)
1009 // Generate Mask value for Target
1010 if (isIntImmediate(Op0.getOperand(1), Value)) {
1012 case ISD::SHL: TgtMask <<= Value; break;
1013 case ISD::SRL: TgtMask >>= Value; break;
1014 case ISD::AND: TgtMask &= Value; break;
1020 // Generate Mask value for Insert
1021 if (isIntImmediate(Op1.getOperand(1), Value)) {
1026 if (Op0Opc == ISD::SRL) IsRotate = true;
1032 if (Op0Opc == ISD::SHL) IsRotate = true;
1044 // If both of the inputs are ANDs and one of them has a logical shift by
1045 // constant as its input, make that the inserted value so that we can combine
1046 // the shift into the rotate part of the rlwimi instruction
1047 if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
1048 if (Op1.getOperand(0).getOpcode() == ISD::SHL ||
1049 Op1.getOperand(0).getOpcode() == ISD::SRL) {
1050 if (isIntImmediate(Op1.getOperand(0).getOperand(1), Value)) {
1051 Amount = Op1.getOperand(0).getOpcode() == ISD::SHL ?
1053 Tmp3 = SelectExpr(Op1.getOperand(0).getOperand(0));
1055 } else if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
1056 Op0.getOperand(0).getOpcode() == ISD::SRL) {
1057 if (isIntImmediate(Op0.getOperand(0).getOperand(1), Value)) {
1058 std::swap(Op0, Op1);
1059 std::swap(TgtMask, InsMask);
1060 Amount = Op1.getOperand(0).getOpcode() == ISD::SHL ?
1062 Tmp3 = SelectExpr(Op1.getOperand(0).getOperand(0));
1067 // Verify that the Target mask and Insert mask together form a full word mask
1068 // and that the Insert mask is a run of set bits (which implies both are runs
1069 // of set bits). Given that, Select the arguments and generate the rlwimi
1072 if (((TgtMask & InsMask) == 0) && isRunOfOnes(InsMask, MB, ME)) {
1073 unsigned Tmp1, Tmp2;
1074 bool fullMask = (TgtMask ^ InsMask) == 0xFFFFFFFF;
1075 // Check for rotlwi / rotrwi here, a special case of bitfield insert
1076 // where both bitfield halves are sourced from the same value.
1077 if (IsRotate && fullMask &&
1078 OR.getOperand(0).getOperand(0) == OR.getOperand(1).getOperand(0)) {
1079 Tmp1 = SelectExpr(OR.getOperand(0).getOperand(0));
1080 BuildMI(BB, PPC::RLWINM, 4, Result).addReg(Tmp1).addImm(Amount)
1081 .addImm(0).addImm(31);
1084 if (Op0Opc == ISD::AND && fullMask)
1085 Tmp1 = SelectExpr(Op0.getOperand(0));
1087 Tmp1 = SelectExpr(Op0);
1088 Tmp2 = Tmp3 ? Tmp3 : SelectExpr(Op1.getOperand(0));
1089 BuildMI(BB, PPC::RLWIMI, 5, Result).addReg(Tmp1).addReg(Tmp2)
1090 .addImm(Amount).addImm(MB).addImm(ME);
1096 /// FoldIfWideZeroExtend - 32 bit PowerPC implicit masks shift amounts to the
1097 /// low six bits. If the shift amount is an ISD::AND node with a mask that is
1098 /// wider than the implicit mask, then we can get rid of the AND and let the
1099 /// shift do the mask.
1100 unsigned ISel::FoldIfWideZeroExtend(SDOperand N) {
1102 if (isOpcWithIntImmediate(N, ISD::AND, C) && isMask_32(C) && C > 63)
1103 return SelectExpr(N.getOperand(0));
1105 return SelectExpr(N);
1108 unsigned ISel::SelectCC(SDOperand LHS, SDOperand RHS, ISD::CondCode CC) {
1109 unsigned Result, Tmp1, Tmp2;
1110 bool AlreadySelected = false;
1111 static const unsigned CompareOpcodes[] =
1112 { PPC::FCMPU, PPC::FCMPU, PPC::CMPW, PPC::CMPLW };
1114 // Allocate a condition register for this expression
1115 Result = RegMap->createVirtualRegister(PPC32::CRRCRegisterClass);
1117 // Use U to determine whether the SETCC immediate range is signed or not.
1118 bool U = ISD::isUnsignedIntSetCC(CC);
1119 if (isIntImmediate(RHS, Tmp2) &&
1120 ((U && isUInt16(Tmp2)) || (!U && isInt16(Tmp2)))) {
1122 // For comparisons against zero, we can implicity set CR0 if a recording
1123 // variant (e.g. 'or.' instead of 'or') of the instruction that defines
1124 // operand zero of the SetCC node is available.
1126 NodeHasRecordingVariant(LHS.getOpcode()) && LHS.Val->hasOneUse()) {
1127 RecordSuccess = false;
1128 Tmp1 = SelectExpr(LHS, true);
1129 if (RecordSuccess) {
1131 BuildMI(BB, PPC::MCRF, 1, Result).addReg(PPC::CR0);
1134 AlreadySelected = true;
1136 // If we could not implicitly set CR0, then emit a compare immediate
1138 if (!AlreadySelected) Tmp1 = SelectExpr(LHS);
1140 BuildMI(BB, PPC::CMPLWI, 2, Result).addReg(Tmp1).addImm(Tmp2);
1142 BuildMI(BB, PPC::CMPWI, 2, Result).addReg(Tmp1).addSImm(Tmp2);
1144 bool IsInteger = MVT::isInteger(LHS.getValueType());
1145 unsigned CompareOpc = CompareOpcodes[2 * IsInteger + U];
1146 Tmp1 = SelectExpr(LHS);
1147 Tmp2 = SelectExpr(RHS);
1148 BuildMI(BB, CompareOpc, 2, Result).addReg(Tmp1).addReg(Tmp2);
1153 /// Check to see if the load is a constant offset from a base register.
1154 unsigned ISel::SelectAddr(SDOperand N, unsigned& Reg, int& offset)
1156 unsigned imm = 0, opcode = N.getOpcode();
1157 if (N.getOpcode() == ISD::ADD) {
1158 bool isFrame = N.getOperand(0).getOpcode() == ISD::FrameIndex;
1159 if (isIntImmediate(N.getOperand(1), imm) && isInt16(imm)) {
1163 Reg = cast<FrameIndexSDNode>(N.getOperand(0))->getIndex();
1166 Reg = SelectExpr(N.getOperand(0));
1170 Reg = SelectExpr(N.getOperand(0));
1171 offset = SelectExpr(N.getOperand(1));
1175 // Now check if we're dealing with a global, and whether or not we should emit
1176 // an optimized load or store for statics.
1177 if(GlobalAddressSDNode *GN = dyn_cast<GlobalAddressSDNode>(N)) {
1178 GlobalValue *GV = GN->getGlobal();
1179 if (!GV->hasWeakLinkage() && !GV->isExternal()) {
1180 unsigned GlobalHi = MakeIntReg();
1182 BuildMI(BB, PPC::ADDIS, 2, GlobalHi).addReg(getGlobalBaseReg())
1183 .addGlobalAddress(GV);
1185 BuildMI(BB, PPC::LIS, 1, GlobalHi).addGlobalAddress(GV);
1191 Reg = SelectExpr(N);
1196 void ISel::SelectBranchCC(SDOperand N)
1198 MachineBasicBlock *Dest =
1199 cast<BasicBlockSDNode>(N.getOperand(2))->getBasicBlock();
1201 Select(N.getOperand(0)); //chain
1203 // FIXME: Until we have Branch_CC and Branch_Twoway_CC, we're going to have to
1204 // Fake it up by hand by checking to see if op 1 is a SetCC, or a boolean.
1207 SDOperand Cond = N.getOperand(1);
1208 if (Cond.getOpcode() == ISD::SETCC) {
1209 CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1210 CCReg = SelectCC(Cond.getOperand(0), Cond.getOperand(1), CC);
1213 CCReg = SelectCC(Cond, ISelDAG->getConstant(0, Cond.getValueType()), CC);
1215 unsigned Opc = getBCCForSetCC(CC);
1217 // Iterate to the next basic block
1218 ilist<MachineBasicBlock>::iterator It = BB;
1221 // If this is a two way branch, then grab the fallthrough basic block argument
1222 // and build a PowerPC branch pseudo-op, suitable for long branch conversion
1223 // if necessary by the branch selection pass. Otherwise, emit a standard
1224 // conditional branch.
1225 if (N.getOpcode() == ISD::BRCONDTWOWAY) {
1226 MachineBasicBlock *Fallthrough =
1227 cast<BasicBlockSDNode>(N.getOperand(3))->getBasicBlock();
1229 BuildMI(BB, PPC::COND_BRANCH, 4).addReg(CCReg).addImm(Opc)
1230 .addMBB(Dest).addMBB(Fallthrough);
1231 if (Fallthrough != It)
1232 BuildMI(BB, PPC::B, 1).addMBB(Fallthrough);
1234 if (Fallthrough != It) {
1235 Opc = PPC32InstrInfo::invertPPCBranchOpcode(Opc);
1236 BuildMI(BB, PPC::COND_BRANCH, 4).addReg(CCReg).addImm(Opc)
1237 .addMBB(Fallthrough).addMBB(Dest);
1241 // If the fallthrough path is off the end of the function, which would be
1242 // undefined behavior, set it to be the same as the current block because
1243 // we have nothing better to set it to, and leaving it alone will cause the
1244 // PowerPC Branch Selection pass to crash.
1245 if (It == BB->getParent()->end()) It = Dest;
1246 BuildMI(BB, PPC::COND_BRANCH, 4).addReg(CCReg).addImm(Opc)
1247 .addMBB(Dest).addMBB(It);
1252 // SelectIntImmediateExpr - Choose code for opcodes with immediate value.
1253 bool ISel::SelectIntImmediateExpr(SDOperand N, unsigned Result,
1254 unsigned OCHi, unsigned OCLo,
1255 bool IsArithmetic, bool Negate) {
1257 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1));
1258 // exit if not a constant
1259 if (!CN) return false;
1260 // extract immediate
1261 unsigned C = (unsigned)CN->getSignExtended();
1262 // negate if required (ISD::SUB)
1264 // get the hi and lo portions of constant
1265 unsigned Hi = IsArithmetic ? HA16(C) : Hi16(C);
1266 unsigned Lo = Lo16(C);
1267 // assume no intermediate result from lo instruction (same as final result)
1268 unsigned Tmp = Result;
1269 // check if two instructions are needed
1271 // exit if usage indicates it would be better to load immediate into a
1273 if (CN->use_size() > 2) return false;
1274 // need intermediate result for two instructions
1277 // get first operand
1278 unsigned Opr0 = SelectExpr(N.getOperand(0));
1279 // is a lo instruction needed
1281 // generate instruction for hi portion
1282 const MachineInstrBuilder &MIBLo = BuildMI(BB, OCLo, 2, Tmp).addReg(Opr0);
1283 if (IsArithmetic) MIBLo.addSImm(Lo); else MIBLo.addImm(Lo);
1284 // need to switch out first operand for hi instruction
1287 // is a ho instruction needed
1289 // generate instruction for hi portion
1290 const MachineInstrBuilder &MIBHi = BuildMI(BB, OCHi, 2, Result).addReg(Opr0);
1291 if (IsArithmetic) MIBHi.addSImm(Hi); else MIBHi.addImm(Hi);
1296 unsigned ISel::SelectExpr(SDOperand N, bool Recording) {
1298 unsigned Tmp1, Tmp2, Tmp3;
1300 unsigned opcode = N.getOpcode();
1302 SDNode *Node = N.Val;
1303 MVT::ValueType DestType = N.getValueType();
1305 if (Node->getOpcode() == ISD::CopyFromReg &&
1306 (MRegisterInfo::isVirtualRegister(cast<RegSDNode>(Node)->getReg()) ||
1307 cast<RegSDNode>(Node)->getReg() == PPC::R1))
1308 // Just use the specified register as our input.
1309 return cast<RegSDNode>(Node)->getReg();
1311 unsigned &Reg = ExprMap[N];
1312 if (Reg) return Reg;
1314 switch (N.getOpcode()) {
1316 Reg = Result = (N.getValueType() != MVT::Other) ?
1317 MakeReg(N.getValueType()) : 1;
1321 // If this is a call instruction, make sure to prepare ALL of the result
1322 // values as well as the chain.
1323 if (Node->getNumValues() == 1)
1324 Reg = Result = 1; // Void call, just a chain.
1326 Result = MakeReg(Node->getValueType(0));
1327 ExprMap[N.getValue(0)] = Result;
1328 for (unsigned i = 1, e = N.Val->getNumValues()-1; i != e; ++i)
1329 ExprMap[N.getValue(i)] = MakeReg(Node->getValueType(i));
1330 ExprMap[SDOperand(Node, Node->getNumValues()-1)] = 1;
1333 case ISD::ADD_PARTS:
1334 case ISD::SUB_PARTS:
1335 case ISD::SHL_PARTS:
1336 case ISD::SRL_PARTS:
1337 case ISD::SRA_PARTS:
1338 Result = MakeReg(Node->getValueType(0));
1339 ExprMap[N.getValue(0)] = Result;
1340 for (unsigned i = 1, e = N.Val->getNumValues(); i != e; ++i)
1341 ExprMap[N.getValue(i)] = MakeReg(Node->getValueType(i));
1347 Node->dump(); std::cerr << '\n';
1348 assert(0 && "Node not handled!\n");
1350 BuildMI(BB, PPC::IMPLICIT_DEF, 0, Result);
1352 case ISD::DYNAMIC_STACKALLOC:
1353 // Generate both result values. FIXME: Need a better commment here?
1355 ExprMap[N.getValue(1)] = 1;
1357 Result = ExprMap[N.getValue(0)] = MakeReg(N.getValue(0).getValueType());
1359 // FIXME: We are currently ignoring the requested alignment for handling
1360 // greater than the stack alignment. This will need to be revisited at some
1361 // point. Align = N.getOperand(2);
1362 if (!isa<ConstantSDNode>(N.getOperand(2)) ||
1363 cast<ConstantSDNode>(N.getOperand(2))->getValue() != 0) {
1364 std::cerr << "Cannot allocate stack object with greater alignment than"
1365 << " the stack alignment yet!";
1368 Select(N.getOperand(0));
1369 Tmp1 = SelectExpr(N.getOperand(1));
1370 // Subtract size from stack pointer, thereby allocating some space.
1371 BuildMI(BB, PPC::SUBF, 2, PPC::R1).addReg(Tmp1).addReg(PPC::R1);
1372 // Put a pointer to the space into the result register by copying the SP
1373 BuildMI(BB, PPC::OR, 2, Result).addReg(PPC::R1).addReg(PPC::R1);
1376 case ISD::ConstantPool:
1377 Tmp1 = cast<ConstantPoolSDNode>(N)->getIndex();
1378 Tmp2 = MakeIntReg();
1380 BuildMI(BB, PPC::ADDIS, 2, Tmp2).addReg(getGlobalBaseReg())
1381 .addConstantPoolIndex(Tmp1);
1383 BuildMI(BB, PPC::LIS, 1, Tmp2).addConstantPoolIndex(Tmp1);
1384 BuildMI(BB, PPC::LA, 2, Result).addReg(Tmp2).addConstantPoolIndex(Tmp1);
1387 case ISD::FrameIndex:
1388 Tmp1 = cast<FrameIndexSDNode>(N)->getIndex();
1389 addFrameReference(BuildMI(BB, PPC::ADDI, 2, Result), (int)Tmp1, 0, false);
1392 case ISD::GlobalAddress: {
1393 GlobalValue *GV = cast<GlobalAddressSDNode>(N)->getGlobal();
1394 Tmp1 = MakeIntReg();
1396 BuildMI(BB, PPC::ADDIS, 2, Tmp1).addReg(getGlobalBaseReg())
1397 .addGlobalAddress(GV);
1399 BuildMI(BB, PPC::LIS, 1, Tmp1).addGlobalAddress(GV);
1400 if (GV->hasWeakLinkage() || GV->isExternal()) {
1401 BuildMI(BB, PPC::LWZ, 2, Result).addGlobalAddress(GV).addReg(Tmp1);
1403 BuildMI(BB, PPC::LA, 2, Result).addReg(Tmp1).addGlobalAddress(GV);
1411 case ISD::SEXTLOAD: {
1412 MVT::ValueType TypeBeingLoaded = (ISD::LOAD == opcode) ?
1413 Node->getValueType(0) : cast<VTSDNode>(Node->getOperand(3))->getVT();
1414 bool sext = (ISD::SEXTLOAD == opcode);
1416 // Make sure we generate both values.
1418 ExprMap[N.getValue(1)] = 1; // Generate the token
1420 Result = ExprMap[N.getValue(0)] = MakeReg(N.getValue(0).getValueType());
1422 SDOperand Chain = N.getOperand(0);
1423 SDOperand Address = N.getOperand(1);
1426 switch (TypeBeingLoaded) {
1427 default: Node->dump(); assert(0 && "Cannot load this type!");
1428 case MVT::i1: Opc = PPC::LBZ; break;
1429 case MVT::i8: Opc = PPC::LBZ; break;
1430 case MVT::i16: Opc = sext ? PPC::LHA : PPC::LHZ; break;
1431 case MVT::i32: Opc = PPC::LWZ; break;
1432 case MVT::f32: Opc = PPC::LFS; break;
1433 case MVT::f64: Opc = PPC::LFD; break;
1436 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Address)) {
1437 Tmp1 = MakeIntReg();
1438 int CPI = CP->getIndex();
1440 BuildMI(BB, PPC::ADDIS, 2, Tmp1).addReg(getGlobalBaseReg())
1441 .addConstantPoolIndex(CPI);
1443 BuildMI(BB, PPC::LIS, 1, Tmp1).addConstantPoolIndex(CPI);
1444 BuildMI(BB, Opc, 2, Result).addConstantPoolIndex(CPI).addReg(Tmp1);
1445 } else if (Address.getOpcode() == ISD::FrameIndex) {
1446 Tmp1 = cast<FrameIndexSDNode>(Address)->getIndex();
1447 addFrameReference(BuildMI(BB, Opc, 2, Result), (int)Tmp1);
1450 switch(SelectAddr(Address, Tmp1, offset)) {
1451 default: assert(0 && "Unhandled return value from SelectAddr");
1452 case 0: // imm offset, no frame, no index
1453 BuildMI(BB, Opc, 2, Result).addSImm(offset).addReg(Tmp1);
1455 case 1: // imm offset + frame index
1456 addFrameReference(BuildMI(BB, Opc, 2, Result), (int)Tmp1, offset);
1458 case 2: // base+index addressing
1459 Opc = IndexedOpForOp(Opc);
1460 BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(offset);
1463 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Address);
1464 GlobalValue *GV = GN->getGlobal();
1465 BuildMI(BB, Opc, 2, Result).addGlobalAddress(GV).addReg(Tmp1);
1474 unsigned GPR_idx = 0, FPR_idx = 0;
1475 static const unsigned GPR[] = {
1476 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
1477 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
1479 static const unsigned FPR[] = {
1480 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
1481 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
1484 // Lower the chain for this call.
1485 Select(N.getOperand(0));
1486 ExprMap[N.getValue(Node->getNumValues()-1)] = 1;
1488 MachineInstr *CallMI;
1489 // Emit the correct call instruction based on the type of symbol called.
1490 if (GlobalAddressSDNode *GASD =
1491 dyn_cast<GlobalAddressSDNode>(N.getOperand(1))) {
1492 CallMI = BuildMI(PPC::CALLpcrel, 1).addGlobalAddress(GASD->getGlobal(),
1494 } else if (ExternalSymbolSDNode *ESSDN =
1495 dyn_cast<ExternalSymbolSDNode>(N.getOperand(1))) {
1496 CallMI = BuildMI(PPC::CALLpcrel, 1).addExternalSymbol(ESSDN->getSymbol(),
1499 Tmp1 = SelectExpr(N.getOperand(1));
1500 BuildMI(BB, PPC::OR, 2, PPC::R12).addReg(Tmp1).addReg(Tmp1);
1501 BuildMI(BB, PPC::MTCTR, 1).addReg(PPC::R12);
1502 CallMI = BuildMI(PPC::CALLindirect, 3).addImm(20).addImm(0)
1506 // Load the register args to virtual regs
1507 std::vector<unsigned> ArgVR;
1508 for(int i = 2, e = Node->getNumOperands(); i < e; ++i)
1509 ArgVR.push_back(SelectExpr(N.getOperand(i)));
1511 // Copy the virtual registers into the appropriate argument register
1512 for(int i = 0, e = ArgVR.size(); i < e; ++i) {
1513 switch(N.getOperand(i+2).getValueType()) {
1514 default: Node->dump(); assert(0 && "Unknown value type for call");
1519 assert(GPR_idx < 8 && "Too many int args");
1520 if (N.getOperand(i+2).getOpcode() != ISD::UNDEF) {
1521 BuildMI(BB, PPC::OR,2,GPR[GPR_idx]).addReg(ArgVR[i]).addReg(ArgVR[i]);
1522 CallMI->addRegOperand(GPR[GPR_idx], MachineOperand::Use);
1528 assert(FPR_idx < 13 && "Too many fp args");
1529 BuildMI(BB, PPC::FMR, 1, FPR[FPR_idx]).addReg(ArgVR[i]);
1530 CallMI->addRegOperand(FPR[FPR_idx], MachineOperand::Use);
1536 // Put the call instruction in the correct place in the MachineBasicBlock
1537 BB->push_back(CallMI);
1539 switch (Node->getValueType(0)) {
1540 default: assert(0 && "Unknown value type for call result!");
1541 case MVT::Other: return 1;
1546 if (Node->getValueType(1) == MVT::i32) {
1547 BuildMI(BB, PPC::OR, 2, Result+1).addReg(PPC::R3).addReg(PPC::R3);
1548 BuildMI(BB, PPC::OR, 2, Result).addReg(PPC::R4).addReg(PPC::R4);
1550 BuildMI(BB, PPC::OR, 2, Result).addReg(PPC::R3).addReg(PPC::R3);
1555 BuildMI(BB, PPC::FMR, 1, Result).addReg(PPC::F1);
1558 return Result+N.ResNo;
1561 case ISD::SIGN_EXTEND:
1562 case ISD::SIGN_EXTEND_INREG:
1563 Tmp1 = SelectExpr(N.getOperand(0));
1564 switch(cast<VTSDNode>(Node->getOperand(1))->getVT()) {
1565 default: Node->dump(); assert(0 && "Unhandled SIGN_EXTEND type"); break;
1567 BuildMI(BB, PPC::EXTSH, 1, Result).addReg(Tmp1);
1570 BuildMI(BB, PPC::EXTSB, 1, Result).addReg(Tmp1);
1573 BuildMI(BB, PPC::SUBFIC, 2, Result).addReg(Tmp1).addSImm(0);
1578 case ISD::CopyFromReg:
1579 DestType = N.getValue(0).getValueType();
1581 Result = ExprMap[N.getValue(0)] = MakeReg(DestType);
1582 Tmp1 = dyn_cast<RegSDNode>(Node)->getReg();
1583 if (MVT::isInteger(DestType))
1584 BuildMI(BB, PPC::OR, 2, Result).addReg(Tmp1).addReg(Tmp1);
1586 BuildMI(BB, PPC::FMR, 1, Result).addReg(Tmp1);
1590 if (isIntImmediate(N.getOperand(1), Tmp2)) {
1591 unsigned SH, MB, ME;
1592 if (isOpcWithIntImmediate(N.getOperand(0), ISD::AND, Tmp3) &&
1593 isRotateAndMask(ISD::SHL, Tmp2, Tmp3, true, SH, MB, ME)) {
1594 Tmp1 = SelectExpr(N.getOperand(0).getOperand(0));
1595 BuildMI(BB, PPC::RLWINM, 4, Result).addReg(Tmp1).addImm(SH)
1596 .addImm(MB).addImm(ME);
1599 Tmp1 = SelectExpr(N.getOperand(0));
1601 BuildMI(BB, PPC::RLWINM, 4, Result).addReg(Tmp1).addImm(Tmp2).addImm(0)
1604 Tmp1 = SelectExpr(N.getOperand(0));
1605 Tmp2 = FoldIfWideZeroExtend(N.getOperand(1));
1606 BuildMI(BB, PPC::SLW, 2, Result).addReg(Tmp1).addReg(Tmp2);
1611 if (isIntImmediate(N.getOperand(1), Tmp2)) {
1612 unsigned SH, MB, ME;
1613 if (isOpcWithIntImmediate(N.getOperand(0), ISD::AND, Tmp3) &&
1614 isRotateAndMask(ISD::SRL, Tmp2, Tmp3, true, SH, MB, ME)) {
1615 Tmp1 = SelectExpr(N.getOperand(0).getOperand(0));
1616 BuildMI(BB, PPC::RLWINM, 4, Result).addReg(Tmp1).addImm(SH)
1617 .addImm(MB).addImm(ME);
1620 Tmp1 = SelectExpr(N.getOperand(0));
1622 BuildMI(BB, PPC::RLWINM, 4, Result).addReg(Tmp1).addImm(32-Tmp2)
1623 .addImm(Tmp2).addImm(31);
1625 Tmp1 = SelectExpr(N.getOperand(0));
1626 Tmp2 = FoldIfWideZeroExtend(N.getOperand(1));
1627 BuildMI(BB, PPC::SRW, 2, Result).addReg(Tmp1).addReg(Tmp2);
1632 if (isIntImmediate(N.getOperand(1), Tmp2)) {
1633 unsigned SH, MB, ME;
1634 if (isOpcWithIntImmediate(N.getOperand(0), ISD::AND, Tmp3) &&
1635 isRotateAndMask(ISD::SRA, Tmp2, Tmp3, true, SH, MB, ME)) {
1636 Tmp1 = SelectExpr(N.getOperand(0).getOperand(0));
1637 BuildMI(BB, PPC::RLWINM, 4, Result).addReg(Tmp1).addImm(SH)
1638 .addImm(MB).addImm(ME);
1641 Tmp1 = SelectExpr(N.getOperand(0));
1643 BuildMI(BB, PPC::SRAWI, 2, Result).addReg(Tmp1).addImm(Tmp2);
1645 Tmp1 = SelectExpr(N.getOperand(0));
1646 Tmp2 = FoldIfWideZeroExtend(N.getOperand(1));
1647 BuildMI(BB, PPC::SRAW, 2, Result).addReg(Tmp1).addReg(Tmp2);
1652 Tmp1 = SelectExpr(N.getOperand(0));
1653 BuildMI(BB, PPC::CNTLZW, 1, Result).addReg(Tmp1);
1657 if (!MVT::isInteger(DestType)) {
1658 if (!NoExcessFPPrecision && N.getOperand(0).getOpcode() == ISD::MUL &&
1659 N.getOperand(0).Val->hasOneUse()) {
1660 ++FusedFP; // Statistic
1661 Tmp1 = SelectExpr(N.getOperand(0).getOperand(0));
1662 Tmp2 = SelectExpr(N.getOperand(0).getOperand(1));
1663 Tmp3 = SelectExpr(N.getOperand(1));
1664 Opc = DestType == MVT::f64 ? PPC::FMADD : PPC::FMADDS;
1665 BuildMI(BB, Opc, 3, Result).addReg(Tmp1).addReg(Tmp2).addReg(Tmp3);
1668 if (!NoExcessFPPrecision && N.getOperand(1).getOpcode() == ISD::MUL &&
1669 N.getOperand(1).Val->hasOneUse()) {
1670 ++FusedFP; // Statistic
1671 Tmp1 = SelectExpr(N.getOperand(1).getOperand(0));
1672 Tmp2 = SelectExpr(N.getOperand(1).getOperand(1));
1673 Tmp3 = SelectExpr(N.getOperand(0));
1674 Opc = DestType == MVT::f64 ? PPC::FMADD : PPC::FMADDS;
1675 BuildMI(BB, Opc, 3, Result).addReg(Tmp1).addReg(Tmp2).addReg(Tmp3);
1678 Opc = DestType == MVT::f64 ? PPC::FADD : PPC::FADDS;
1679 Tmp1 = SelectExpr(N.getOperand(0));
1680 Tmp2 = SelectExpr(N.getOperand(1));
1681 BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
1684 if (SelectIntImmediateExpr(N, Result, PPC::ADDIS, PPC::ADDI, true))
1686 Tmp1 = SelectExpr(N.getOperand(0));
1687 Tmp2 = SelectExpr(N.getOperand(1));
1688 BuildMI(BB, PPC::ADD, 2, Result).addReg(Tmp1).addReg(Tmp2);
1692 if (isIntImmediate(N.getOperand(1), Tmp2)) {
1693 if (isShiftedMask_32(Tmp2) || isShiftedMask_32(~Tmp2)) {
1694 unsigned SH, MB, ME;
1695 Opc = Recording ? PPC::RLWINMo : PPC::RLWINM;
1697 if (isOprShiftImm(N.getOperand(0), OprOpc, Tmp3) &&
1698 isRotateAndMask(OprOpc, Tmp3, Tmp2, false, SH, MB, ME)) {
1699 Tmp1 = SelectExpr(N.getOperand(0).getOperand(0));
1701 Tmp1 = SelectExpr(N.getOperand(0));
1702 isRunOfOnes(Tmp2, MB, ME);
1705 BuildMI(BB, Opc, 4, Result).addReg(Tmp1).addImm(SH)
1706 .addImm(MB).addImm(ME);
1707 RecordSuccess = true;
1709 } else if (isUInt16(Tmp2)) {
1711 Tmp1 = SelectExpr(N.getOperand(0));
1712 BuildMI(BB, PPC::ANDIo, 2, Result).addReg(Tmp1).addImm(Tmp2);
1713 RecordSuccess = true;
1715 } else if (isUInt16(Tmp2)) {
1717 Tmp1 = SelectExpr(N.getOperand(0));
1718 BuildMI(BB, PPC::ANDISo, 2, Result).addReg(Tmp1).addImm(Tmp2);
1719 RecordSuccess = true;
1723 if (isOprNot(N.getOperand(1))) {
1724 Tmp1 = SelectExpr(N.getOperand(0));
1725 Tmp2 = SelectExpr(N.getOperand(1).getOperand(0));
1726 BuildMI(BB, PPC::ANDC, 2, Result).addReg(Tmp1).addReg(Tmp2);
1727 RecordSuccess = false;
1730 if (isOprNot(N.getOperand(0))) {
1731 Tmp1 = SelectExpr(N.getOperand(1));
1732 Tmp2 = SelectExpr(N.getOperand(0).getOperand(0));
1733 BuildMI(BB, PPC::ANDC, 2, Result).addReg(Tmp1).addReg(Tmp2);
1734 RecordSuccess = false;
1737 // emit a regular and
1738 Tmp1 = SelectExpr(N.getOperand(0));
1739 Tmp2 = SelectExpr(N.getOperand(1));
1740 Opc = Recording ? PPC::ANDo : PPC::AND;
1741 BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
1742 RecordSuccess = true;
1746 if (SelectBitfieldInsert(N, Result))
1748 if (SelectIntImmediateExpr(N, Result, PPC::ORIS, PPC::ORI))
1750 if (isOprNot(N.getOperand(1))) {
1751 Tmp1 = SelectExpr(N.getOperand(0));
1752 Tmp2 = SelectExpr(N.getOperand(1).getOperand(0));
1753 BuildMI(BB, PPC::ORC, 2, Result).addReg(Tmp1).addReg(Tmp2);
1754 RecordSuccess = false;
1757 if (isOprNot(N.getOperand(0))) {
1758 Tmp1 = SelectExpr(N.getOperand(1));
1759 Tmp2 = SelectExpr(N.getOperand(0).getOperand(0));
1760 BuildMI(BB, PPC::ORC, 2, Result).addReg(Tmp1).addReg(Tmp2);
1761 RecordSuccess = false;
1765 Tmp1 = SelectExpr(N.getOperand(0));
1766 Tmp2 = SelectExpr(N.getOperand(1));
1767 Opc = Recording ? PPC::ORo : PPC::OR;
1768 RecordSuccess = true;
1769 BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
1773 // Check for EQV: xor, (xor a, -1), b
1774 if (isOprNot(N.getOperand(0))) {
1775 Tmp1 = SelectExpr(N.getOperand(0).getOperand(0));
1776 Tmp2 = SelectExpr(N.getOperand(1));
1777 BuildMI(BB, PPC::EQV, 2, Result).addReg(Tmp1).addReg(Tmp2);
1780 // Check for NOT, NOR, EQV, and NAND: xor (copy, or, xor, and), -1
1782 switch(N.getOperand(0).getOpcode()) {
1784 Tmp1 = SelectExpr(N.getOperand(0).getOperand(0));
1785 Tmp2 = SelectExpr(N.getOperand(0).getOperand(1));
1786 BuildMI(BB, PPC::NOR, 2, Result).addReg(Tmp1).addReg(Tmp2);
1789 Tmp1 = SelectExpr(N.getOperand(0).getOperand(0));
1790 Tmp2 = SelectExpr(N.getOperand(0).getOperand(1));
1791 BuildMI(BB, PPC::NAND, 2, Result).addReg(Tmp1).addReg(Tmp2);
1794 Tmp1 = SelectExpr(N.getOperand(0).getOperand(0));
1795 Tmp2 = SelectExpr(N.getOperand(0).getOperand(1));
1796 BuildMI(BB, PPC::EQV, 2, Result).addReg(Tmp1).addReg(Tmp2);
1799 Tmp1 = SelectExpr(N.getOperand(0));
1800 BuildMI(BB, PPC::NOR, 2, Result).addReg(Tmp1).addReg(Tmp1);
1805 if (SelectIntImmediateExpr(N, Result, PPC::XORIS, PPC::XORI))
1808 Tmp1 = SelectExpr(N.getOperand(0));
1809 Tmp2 = SelectExpr(N.getOperand(1));
1810 BuildMI(BB, PPC::XOR, 2, Result).addReg(Tmp1).addReg(Tmp2);
1815 if (!MVT::isInteger(DestType)) {
1816 if (!NoExcessFPPrecision && N.getOperand(0).getOpcode() == ISD::MUL &&
1817 N.getOperand(0).Val->hasOneUse()) {
1818 ++FusedFP; // Statistic
1819 Tmp1 = SelectExpr(N.getOperand(0).getOperand(0));
1820 Tmp2 = SelectExpr(N.getOperand(0).getOperand(1));
1821 Tmp3 = SelectExpr(N.getOperand(1));
1822 Opc = DestType == MVT::f64 ? PPC::FMSUB : PPC::FMSUBS;
1823 BuildMI(BB, Opc, 3, Result).addReg(Tmp1).addReg(Tmp2).addReg(Tmp3);
1826 if (!NoExcessFPPrecision && N.getOperand(1).getOpcode() == ISD::MUL &&
1827 N.getOperand(1).Val->hasOneUse()) {
1828 ++FusedFP; // Statistic
1829 Tmp1 = SelectExpr(N.getOperand(1).getOperand(0));
1830 Tmp2 = SelectExpr(N.getOperand(1).getOperand(1));
1831 Tmp3 = SelectExpr(N.getOperand(0));
1832 Opc = DestType == MVT::f64 ? PPC::FNMSUB : PPC::FNMSUBS;
1833 BuildMI(BB, Opc, 3, Result).addReg(Tmp1).addReg(Tmp2).addReg(Tmp3);
1836 Opc = DestType == MVT::f64 ? PPC::FSUB : PPC::FSUBS;
1837 Tmp1 = SelectExpr(N.getOperand(0));
1838 Tmp2 = SelectExpr(N.getOperand(1));
1839 BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
1842 if (isIntImmediate(N.getOperand(0), Tmp1) && isInt16(Tmp1)) {
1844 Tmp2 = SelectExpr(N.getOperand(1));
1845 BuildMI(BB, PPC::SUBFIC, 2, Result).addReg(Tmp2).addSImm(Tmp1);
1848 if (SelectIntImmediateExpr(N, Result, PPC::ADDIS, PPC::ADDI, true, true))
1850 Tmp1 = SelectExpr(N.getOperand(0));
1851 Tmp2 = SelectExpr(N.getOperand(1));
1852 BuildMI(BB, PPC::SUBF, 2, Result).addReg(Tmp2).addReg(Tmp1);
1856 Tmp1 = SelectExpr(N.getOperand(0));
1857 if (isIntImmediate(N.getOperand(1), Tmp2) && isInt16(Tmp2)) {
1859 BuildMI(BB, PPC::MULLI, 2, Result).addReg(Tmp1).addSImm(Tmp2);
1861 Tmp2 = SelectExpr(N.getOperand(1));
1863 default: assert(0 && "Unknown type to ISD::MUL"); break;
1864 case MVT::i32: Opc = PPC::MULLW; break;
1865 case MVT::f32: Opc = PPC::FMULS; break;
1866 case MVT::f64: Opc = PPC::FMUL; break;
1868 BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
1874 Tmp1 = SelectExpr(N.getOperand(0));
1875 Tmp2 = SelectExpr(N.getOperand(1));
1876 Opc = (ISD::MULHU == opcode) ? PPC::MULHWU : PPC::MULHW;
1877 BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
1881 if (isIntImmediate(N.getOperand(1), Tmp3)) {
1882 if ((signed)Tmp3 > 0 && isPowerOf2_32(Tmp3)) {
1883 Tmp3 = Log2_32(Tmp3);
1884 Tmp1 = MakeIntReg();
1885 Tmp2 = SelectExpr(N.getOperand(0));
1886 BuildMI(BB, PPC::SRAWI, 2, Tmp1).addReg(Tmp2).addImm(Tmp3);
1887 BuildMI(BB, PPC::ADDZE, 1, Result).addReg(Tmp1);
1889 } else if ((signed)Tmp3 < 0 && isPowerOf2_32(-Tmp3)) {
1890 Tmp3 = Log2_32(-Tmp3);
1891 Tmp2 = SelectExpr(N.getOperand(0));
1892 Tmp1 = MakeIntReg();
1893 unsigned Tmp4 = MakeIntReg();
1894 BuildMI(BB, PPC::SRAWI, 2, Tmp1).addReg(Tmp2).addImm(Tmp3);
1895 BuildMI(BB, PPC::ADDZE, 1, Tmp4).addReg(Tmp1);
1896 BuildMI(BB, PPC::NEG, 1, Result).addReg(Tmp4);
1902 // If this is a divide by constant, we can emit code using some magic
1903 // constants to implement it as a multiply instead.
1904 if (isIntImmediate(N.getOperand(1), Tmp3)) {
1905 if (opcode == ISD::SDIV) {
1906 if ((signed)Tmp3 < -1 || (signed)Tmp3 > 1) {
1908 return SelectExpr(BuildSDIVSequence(N));
1911 if ((signed)Tmp3 > 1) {
1913 return SelectExpr(BuildUDIVSequence(N));
1917 Tmp1 = SelectExpr(N.getOperand(0));
1918 Tmp2 = SelectExpr(N.getOperand(1));
1920 default: assert(0 && "Unknown type to ISD::SDIV"); break;
1921 case MVT::i32: Opc = (ISD::UDIV == opcode) ? PPC::DIVWU : PPC::DIVW; break;
1922 case MVT::f32: Opc = PPC::FDIVS; break;
1923 case MVT::f64: Opc = PPC::FDIV; break;
1925 BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
1928 case ISD::ADD_PARTS:
1929 case ISD::SUB_PARTS: {
1930 assert(N.getNumOperands() == 4 && N.getValueType() == MVT::i32 &&
1931 "Not an i64 add/sub!");
1932 // Emit all of the operands.
1933 std::vector<unsigned> InVals;
1934 for (unsigned i = 0, e = N.getNumOperands(); i != e; ++i)
1935 InVals.push_back(SelectExpr(N.getOperand(i)));
1936 if (N.getOpcode() == ISD::ADD_PARTS) {
1937 BuildMI(BB, PPC::ADDC, 2, Result).addReg(InVals[0]).addReg(InVals[2]);
1938 BuildMI(BB, PPC::ADDE, 2, Result+1).addReg(InVals[1]).addReg(InVals[3]);
1940 BuildMI(BB, PPC::SUBFC, 2, Result).addReg(InVals[2]).addReg(InVals[0]);
1941 BuildMI(BB, PPC::SUBFE, 2, Result+1).addReg(InVals[3]).addReg(InVals[1]);
1943 return Result+N.ResNo;
1946 case ISD::SHL_PARTS:
1947 case ISD::SRA_PARTS:
1948 case ISD::SRL_PARTS: {
1949 assert(N.getNumOperands() == 3 && N.getValueType() == MVT::i32 &&
1950 "Not an i64 shift!");
1951 unsigned ShiftOpLo = SelectExpr(N.getOperand(0));
1952 unsigned ShiftOpHi = SelectExpr(N.getOperand(1));
1953 unsigned SHReg = FoldIfWideZeroExtend(N.getOperand(2));
1954 Tmp1 = MakeIntReg();
1955 Tmp2 = MakeIntReg();
1956 Tmp3 = MakeIntReg();
1957 unsigned Tmp4 = MakeIntReg();
1958 unsigned Tmp5 = MakeIntReg();
1959 unsigned Tmp6 = MakeIntReg();
1960 BuildMI(BB, PPC::SUBFIC, 2, Tmp1).addReg(SHReg).addSImm(32);
1961 if (ISD::SHL_PARTS == opcode) {
1962 BuildMI(BB, PPC::SLW, 2, Tmp2).addReg(ShiftOpHi).addReg(SHReg);
1963 BuildMI(BB, PPC::SRW, 2, Tmp3).addReg(ShiftOpLo).addReg(Tmp1);
1964 BuildMI(BB, PPC::OR, 2, Tmp4).addReg(Tmp2).addReg(Tmp3);
1965 BuildMI(BB, PPC::ADDI, 2, Tmp5).addReg(SHReg).addSImm(-32);
1966 BuildMI(BB, PPC::SLW, 2, Tmp6).addReg(ShiftOpLo).addReg(Tmp5);
1967 BuildMI(BB, PPC::OR, 2, Result+1).addReg(Tmp4).addReg(Tmp6);
1968 BuildMI(BB, PPC::SLW, 2, Result).addReg(ShiftOpLo).addReg(SHReg);
1969 } else if (ISD::SRL_PARTS == opcode) {
1970 BuildMI(BB, PPC::SRW, 2, Tmp2).addReg(ShiftOpLo).addReg(SHReg);
1971 BuildMI(BB, PPC::SLW, 2, Tmp3).addReg(ShiftOpHi).addReg(Tmp1);
1972 BuildMI(BB, PPC::OR, 2, Tmp4).addReg(Tmp2).addReg(Tmp3);
1973 BuildMI(BB, PPC::ADDI, 2, Tmp5).addReg(SHReg).addSImm(-32);
1974 BuildMI(BB, PPC::SRW, 2, Tmp6).addReg(ShiftOpHi).addReg(Tmp5);
1975 BuildMI(BB, PPC::OR, 2, Result).addReg(Tmp4).addReg(Tmp6);
1976 BuildMI(BB, PPC::SRW, 2, Result+1).addReg(ShiftOpHi).addReg(SHReg);
1978 MachineBasicBlock *TmpMBB = new MachineBasicBlock(BB->getBasicBlock());
1979 MachineBasicBlock *PhiMBB = new MachineBasicBlock(BB->getBasicBlock());
1980 MachineBasicBlock *OldMBB = BB;
1981 MachineFunction *F = BB->getParent();
1982 ilist<MachineBasicBlock>::iterator It = BB; ++It;
1983 F->getBasicBlockList().insert(It, TmpMBB);
1984 F->getBasicBlockList().insert(It, PhiMBB);
1985 BB->addSuccessor(TmpMBB);
1986 BB->addSuccessor(PhiMBB);
1987 BuildMI(BB, PPC::SRW, 2, Tmp2).addReg(ShiftOpLo).addReg(SHReg);
1988 BuildMI(BB, PPC::SLW, 2, Tmp3).addReg(ShiftOpHi).addReg(Tmp1);
1989 BuildMI(BB, PPC::OR, 2, Tmp4).addReg(Tmp2).addReg(Tmp3);
1990 BuildMI(BB, PPC::ADDICo, 2, Tmp5).addReg(SHReg).addSImm(-32);
1991 BuildMI(BB, PPC::SRAW, 2, Tmp6).addReg(ShiftOpHi).addReg(Tmp5);
1992 BuildMI(BB, PPC::SRAW, 2, Result+1).addReg(ShiftOpHi).addReg(SHReg);
1993 BuildMI(BB, PPC::BLE, 2).addReg(PPC::CR0).addMBB(PhiMBB);
1994 // Select correct least significant half if the shift amount > 32
1996 unsigned Tmp7 = MakeIntReg();
1997 BuildMI(BB, PPC::OR, 2, Tmp7).addReg(Tmp6).addReg(Tmp6);
1998 TmpMBB->addSuccessor(PhiMBB);
2000 BuildMI(BB, PPC::PHI, 4, Result).addReg(Tmp4).addMBB(OldMBB)
2001 .addReg(Tmp7).addMBB(TmpMBB);
2003 return Result+N.ResNo;
2006 case ISD::FP_TO_SINT: {
2007 Tmp1 = SelectExpr(N.getOperand(0));
2009 BuildMI(BB, PPC::FCTIWZ, 1, Tmp2).addReg(Tmp1);
2010 int FrameIdx = BB->getParent()->getFrameInfo()->CreateStackObject(8, 8);
2011 addFrameReference(BuildMI(BB, PPC::STFD, 3).addReg(Tmp2), FrameIdx);
2012 addFrameReference(BuildMI(BB, PPC::LWZ, 2, Result), FrameIdx, 4);
2017 ISD::CondCode CC = cast<CondCodeSDNode>(Node->getOperand(2))->get();
2018 if (isIntImmediate(Node->getOperand(1), Tmp3)) {
2019 // We can codegen setcc op, imm very efficiently compared to a brcond.
2020 // Check for those cases here.
2023 Tmp1 = SelectExpr(Node->getOperand(0));
2025 default: Node->dump(); assert(0 && "Unhandled SetCC condition"); abort();
2027 Tmp2 = MakeIntReg();
2028 BuildMI(BB, PPC::CNTLZW, 1, Tmp2).addReg(Tmp1);
2029 BuildMI(BB, PPC::RLWINM, 4, Result).addReg(Tmp2).addImm(27)
2030 .addImm(5).addImm(31);
2033 Tmp2 = MakeIntReg();
2034 BuildMI(BB, PPC::ADDIC, 2, Tmp2).addReg(Tmp1).addSImm(-1);
2035 BuildMI(BB, PPC::SUBFE, 2, Result).addReg(Tmp2).addReg(Tmp1);
2038 BuildMI(BB, PPC::RLWINM, 4, Result).addReg(Tmp1).addImm(1)
2039 .addImm(31).addImm(31);
2042 Tmp2 = MakeIntReg();
2043 Tmp3 = MakeIntReg();
2044 BuildMI(BB, PPC::NEG, 2, Tmp2).addReg(Tmp1);
2045 BuildMI(BB, PPC::ANDC, 2, Tmp3).addReg(Tmp2).addReg(Tmp1);
2046 BuildMI(BB, PPC::RLWINM, 4, Result).addReg(Tmp3).addImm(1)
2047 .addImm(31).addImm(31);
2051 } else if (Tmp3 == ~0U) { // setcc op, -1
2052 Tmp1 = SelectExpr(Node->getOperand(0));
2054 default: assert(0 && "Unhandled SetCC condition"); abort();
2056 Tmp2 = MakeIntReg();
2057 Tmp3 = MakeIntReg();
2058 BuildMI(BB, PPC::ADDIC, 2, Tmp2).addReg(Tmp1).addSImm(1);
2059 BuildMI(BB, PPC::LI, 1, Tmp3).addSImm(0);
2060 BuildMI(BB, PPC::ADDZE, 1, Result).addReg(Tmp3);
2063 Tmp2 = MakeIntReg();
2064 Tmp3 = MakeIntReg();
2065 BuildMI(BB, PPC::NOR, 2, Tmp2).addReg(Tmp1).addReg(Tmp1);
2066 BuildMI(BB, PPC::ADDIC, 2, Tmp3).addReg(Tmp2).addSImm(-1);
2067 BuildMI(BB, PPC::SUBFE, 2, Result).addReg(Tmp3).addReg(Tmp2);
2070 Tmp2 = MakeIntReg();
2071 Tmp3 = MakeIntReg();
2072 BuildMI(BB, PPC::ADDI, 2, Tmp2).addReg(Tmp1).addSImm(1);
2073 BuildMI(BB, PPC::AND, 2, Tmp3).addReg(Tmp2).addReg(Tmp1);
2074 BuildMI(BB, PPC::RLWINM, 4, Result).addReg(Tmp3).addImm(1)
2075 .addImm(31).addImm(31);
2078 Tmp2 = MakeIntReg();
2079 BuildMI(BB, PPC::RLWINM, 4, Tmp2).addReg(Tmp1).addImm(1)
2080 .addImm(31).addImm(31);
2081 BuildMI(BB, PPC::XORI, 2, Result).addReg(Tmp2).addImm(1);
2088 unsigned CCReg = SelectCC(N.getOperand(0), N.getOperand(1), CC);
2089 MoveCRtoGPR(CCReg, CC, Result);
2093 case ISD::SELECT_CC: {
2094 ISD::CondCode CC = cast<CondCodeSDNode>(N.getOperand(4))->get();
2095 if (!MVT::isInteger(N.getOperand(0).getValueType()) &&
2096 !MVT::isInteger(N.getOperand(2).getValueType()) &&
2097 CC != ISD::SETEQ && CC != ISD::SETNE) {
2098 MVT::ValueType VT = N.getOperand(0).getValueType();
2099 unsigned TV = SelectExpr(N.getOperand(2)); // Use if TRUE
2100 unsigned FV = SelectExpr(N.getOperand(3)); // Use if FALSE
2102 ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N.getOperand(1));
2103 if (CN && (CN->isExactlyValue(-0.0) || CN->isExactlyValue(0.0))) {
2105 default: assert(0 && "Invalid FSEL condition"); abort();
2108 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
2111 Tmp1 = SelectExpr(N.getOperand(0)); // Val to compare against
2112 BuildMI(BB, PPC::FSEL, 3, Result).addReg(Tmp1).addReg(TV).addReg(FV);
2116 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
2119 if (N.getOperand(0).getOpcode() == ISD::FNEG) {
2120 Tmp2 = SelectExpr(N.getOperand(0).getOperand(0));
2123 Tmp1 = SelectExpr(N.getOperand(0)); // Val to compare against
2124 BuildMI(BB, PPC::FNEG, 1, Tmp2).addReg(Tmp1);
2126 BuildMI(BB, PPC::FSEL, 3, Result).addReg(Tmp2).addReg(TV).addReg(FV);
2131 Opc = (MVT::f64 == VT) ? PPC::FSUB : PPC::FSUBS;
2132 Tmp1 = SelectExpr(N.getOperand(0)); // Val to compare against
2133 Tmp2 = SelectExpr(N.getOperand(1));
2136 default: assert(0 && "Invalid FSEL condition"); abort();
2139 BuildMI(BB, Opc, 2, Tmp3).addReg(Tmp1).addReg(Tmp2);
2140 BuildMI(BB, PPC::FSEL, 3, Result).addReg(Tmp3).addReg(FV).addReg(TV);
2144 BuildMI(BB, Opc, 2, Tmp3).addReg(Tmp1).addReg(Tmp2);
2145 BuildMI(BB, PPC::FSEL, 3, Result).addReg(Tmp3).addReg(TV).addReg(FV);
2149 BuildMI(BB, Opc, 2, Tmp3).addReg(Tmp2).addReg(Tmp1);
2150 BuildMI(BB, PPC::FSEL, 3, Result).addReg(Tmp3).addReg(FV).addReg(TV);
2154 BuildMI(BB, Opc, 2, Tmp3).addReg(Tmp2).addReg(Tmp1);
2155 BuildMI(BB, PPC::FSEL, 3, Result).addReg(Tmp3).addReg(TV).addReg(FV);
2159 assert(0 && "Should never get here");
2162 // If the False value only has one use, we can generate better code by
2163 // selecting it in the fallthrough basic block rather than here, which
2164 // increases register pressure.
2165 bool FalseHasOneUse = N.getOperand(3).Val->hasOneUse();
2166 unsigned TrueValue = SelectExpr(N.getOperand(2));
2167 unsigned FalseValue = FalseHasOneUse ? 0 : SelectExpr(N.getOperand(3));
2168 unsigned CCReg = SelectCC(N.getOperand(0), N.getOperand(1), CC);
2169 Opc = getBCCForSetCC(CC);
2171 // Create an iterator with which to insert the MBB for copying the false
2172 // value and the MBB to hold the PHI instruction for this SetCC.
2173 MachineBasicBlock *thisMBB = BB;
2174 const BasicBlock *LLVM_BB = BB->getBasicBlock();
2175 ilist<MachineBasicBlock>::iterator It = BB;
2181 // cmpTY ccX, r1, r2
2183 // fallthrough --> copy0MBB
2184 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
2185 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
2186 BuildMI(BB, Opc, 2).addReg(CCReg).addMBB(sinkMBB);
2187 MachineFunction *F = BB->getParent();
2188 F->getBasicBlockList().insert(It, copy0MBB);
2189 F->getBasicBlockList().insert(It, sinkMBB);
2190 // Update machine-CFG edges
2191 BB->addSuccessor(copy0MBB);
2192 BB->addSuccessor(sinkMBB);
2195 // %FalseValue = ...
2196 // # fallthrough to sinkMBB
2198 if (FalseHasOneUse) FalseValue = SelectExpr(N.getOperand(3));
2199 // Update machine-CFG edges
2200 BB->addSuccessor(sinkMBB);
2203 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
2206 BuildMI(BB, PPC::PHI, 4, Result).addReg(FalseValue)
2207 .addMBB(copy0MBB).addReg(TrueValue).addMBB(thisMBB);
2212 switch (N.getValueType()) {
2213 default: assert(0 && "Cannot use constants of this type!");
2215 BuildMI(BB, PPC::LI, 1, Result)
2216 .addSImm(!cast<ConstantSDNode>(N)->isNullValue());
2220 int v = (int)cast<ConstantSDNode>(N)->getSignExtended();
2221 if (v < 32768 && v >= -32768) {
2222 BuildMI(BB, PPC::LI, 1, Result).addSImm(v);
2224 Tmp1 = MakeIntReg();
2225 BuildMI(BB, PPC::LIS, 1, Tmp1).addSImm(v >> 16);
2226 BuildMI(BB, PPC::ORI, 2, Result).addReg(Tmp1).addImm(v & 0xFFFF);
2232 case ISD::ConstantFP: {
2233 ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
2234 Result = getConstDouble(CN->getValue(), Result);
2239 if (!NoExcessFPPrecision &&
2240 ISD::ADD == N.getOperand(0).getOpcode() &&
2241 N.getOperand(0).Val->hasOneUse() &&
2242 ISD::MUL == N.getOperand(0).getOperand(0).getOpcode() &&
2243 N.getOperand(0).getOperand(0).Val->hasOneUse()) {
2244 ++FusedFP; // Statistic
2245 Tmp1 = SelectExpr(N.getOperand(0).getOperand(0).getOperand(0));
2246 Tmp2 = SelectExpr(N.getOperand(0).getOperand(0).getOperand(1));
2247 Tmp3 = SelectExpr(N.getOperand(0).getOperand(1));
2248 Opc = DestType == MVT::f64 ? PPC::FNMADD : PPC::FNMADDS;
2249 BuildMI(BB, Opc, 3, Result).addReg(Tmp1).addReg(Tmp2).addReg(Tmp3);
2250 } else if (!NoExcessFPPrecision &&
2251 ISD::ADD == N.getOperand(0).getOpcode() &&
2252 N.getOperand(0).Val->hasOneUse() &&
2253 ISD::MUL == N.getOperand(0).getOperand(1).getOpcode() &&
2254 N.getOperand(0).getOperand(1).Val->hasOneUse()) {
2255 ++FusedFP; // Statistic
2256 Tmp1 = SelectExpr(N.getOperand(0).getOperand(1).getOperand(0));
2257 Tmp2 = SelectExpr(N.getOperand(0).getOperand(1).getOperand(1));
2258 Tmp3 = SelectExpr(N.getOperand(0).getOperand(0));
2259 Opc = DestType == MVT::f64 ? PPC::FNMADD : PPC::FNMADDS;
2260 BuildMI(BB, Opc, 3, Result).addReg(Tmp1).addReg(Tmp2).addReg(Tmp3);
2261 } else if (ISD::FABS == N.getOperand(0).getOpcode()) {
2262 Tmp1 = SelectExpr(N.getOperand(0).getOperand(0));
2263 BuildMI(BB, PPC::FNABS, 1, Result).addReg(Tmp1);
2265 Tmp1 = SelectExpr(N.getOperand(0));
2266 BuildMI(BB, PPC::FNEG, 1, Result).addReg(Tmp1);
2271 Tmp1 = SelectExpr(N.getOperand(0));
2272 BuildMI(BB, PPC::FABS, 1, Result).addReg(Tmp1);
2276 Tmp1 = SelectExpr(N.getOperand(0));
2277 Opc = DestType == MVT::f64 ? PPC::FSQRT : PPC::FSQRTS;
2278 BuildMI(BB, Opc, 1, Result).addReg(Tmp1);
2282 assert (DestType == MVT::f32 &&
2283 N.getOperand(0).getValueType() == MVT::f64 &&
2284 "only f64 to f32 conversion supported here");
2285 Tmp1 = SelectExpr(N.getOperand(0));
2286 BuildMI(BB, PPC::FRSP, 1, Result).addReg(Tmp1);
2289 case ISD::FP_EXTEND:
2290 assert (DestType == MVT::f64 &&
2291 N.getOperand(0).getValueType() == MVT::f32 &&
2292 "only f32 to f64 conversion supported here");
2293 Tmp1 = SelectExpr(N.getOperand(0));
2294 BuildMI(BB, PPC::FMR, 1, Result).addReg(Tmp1);
2297 case ISD::UINT_TO_FP:
2298 case ISD::SINT_TO_FP: {
2299 assert (N.getOperand(0).getValueType() == MVT::i32
2300 && "int to float must operate on i32");
2301 bool IsUnsigned = (ISD::UINT_TO_FP == opcode);
2302 Tmp1 = SelectExpr(N.getOperand(0)); // Get the operand register
2303 Tmp2 = MakeFPReg(); // temp reg to load the integer value into
2304 Tmp3 = MakeIntReg(); // temp reg to hold the conversion constant
2306 int FrameIdx = BB->getParent()->getFrameInfo()->CreateStackObject(8, 8);
2307 MachineConstantPool *CP = BB->getParent()->getConstantPool();
2310 unsigned ConstF = getConstDouble(0x1.000000p52);
2311 // Store the hi & low halves of the fp value, currently in int regs
2312 BuildMI(BB, PPC::LIS, 1, Tmp3).addSImm(0x4330);
2313 addFrameReference(BuildMI(BB, PPC::STW, 3).addReg(Tmp3), FrameIdx);
2314 addFrameReference(BuildMI(BB, PPC::STW, 3).addReg(Tmp1), FrameIdx, 4);
2315 addFrameReference(BuildMI(BB, PPC::LFD, 2, Tmp2), FrameIdx);
2316 // Generate the return value with a subtract
2317 BuildMI(BB, PPC::FSUB, 2, Result).addReg(Tmp2).addReg(ConstF);
2319 unsigned ConstF = getConstDouble(0x1.000008p52);
2320 unsigned TmpL = MakeIntReg();
2321 // Store the hi & low halves of the fp value, currently in int regs
2322 BuildMI(BB, PPC::LIS, 1, Tmp3).addSImm(0x4330);
2323 addFrameReference(BuildMI(BB, PPC::STW, 3).addReg(Tmp3), FrameIdx);
2324 BuildMI(BB, PPC::XORIS, 2, TmpL).addReg(Tmp1).addImm(0x8000);
2325 addFrameReference(BuildMI(BB, PPC::STW, 3).addReg(TmpL), FrameIdx, 4);
2326 addFrameReference(BuildMI(BB, PPC::LFD, 2, Tmp2), FrameIdx);
2327 // Generate the return value with a subtract
2328 BuildMI(BB, PPC::FSUB, 2, Result).addReg(Tmp2).addReg(ConstF);
2336 void ISel::Select(SDOperand N) {
2337 unsigned Tmp1, Tmp2, Tmp3, Opc;
2338 unsigned opcode = N.getOpcode();
2340 if (!ExprMap.insert(std::make_pair(N, 1)).second)
2341 return; // Already selected.
2343 SDNode *Node = N.Val;
2345 switch (Node->getOpcode()) {
2347 Node->dump(); std::cerr << "\n";
2348 assert(0 && "Node not handled yet!");
2349 case ISD::EntryToken: return; // Noop
2350 case ISD::TokenFactor:
2351 for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
2352 Select(Node->getOperand(i));
2354 case ISD::CALLSEQ_START:
2355 case ISD::CALLSEQ_END:
2356 Select(N.getOperand(0));
2357 Tmp1 = cast<ConstantSDNode>(N.getOperand(1))->getValue();
2358 Opc = N.getOpcode() == ISD::CALLSEQ_START ? PPC::ADJCALLSTACKDOWN :
2359 PPC::ADJCALLSTACKUP;
2360 BuildMI(BB, Opc, 1).addImm(Tmp1);
2363 MachineBasicBlock *Dest =
2364 cast<BasicBlockSDNode>(N.getOperand(1))->getBasicBlock();
2365 Select(N.getOperand(0));
2366 BuildMI(BB, PPC::B, 1).addMBB(Dest);
2370 case ISD::BRCONDTWOWAY:
2373 case ISD::CopyToReg:
2374 Select(N.getOperand(0));
2375 Tmp1 = SelectExpr(N.getOperand(1));
2376 Tmp2 = cast<RegSDNode>(N)->getReg();
2379 if (N.getOperand(1).getValueType() == MVT::f64 ||
2380 N.getOperand(1).getValueType() == MVT::f32)
2381 BuildMI(BB, PPC::FMR, 1, Tmp2).addReg(Tmp1);
2383 BuildMI(BB, PPC::OR, 2, Tmp2).addReg(Tmp1).addReg(Tmp1);
2386 case ISD::ImplicitDef:
2387 Select(N.getOperand(0));
2388 BuildMI(BB, PPC::IMPLICIT_DEF, 0, cast<RegSDNode>(N)->getReg());
2391 switch (N.getNumOperands()) {
2393 assert(0 && "Unknown return instruction!");
2395 assert(N.getOperand(1).getValueType() == MVT::i32 &&
2396 N.getOperand(2).getValueType() == MVT::i32 &&
2397 "Unknown two-register value!");
2398 Select(N.getOperand(0));
2399 Tmp1 = SelectExpr(N.getOperand(1));
2400 Tmp2 = SelectExpr(N.getOperand(2));
2401 BuildMI(BB, PPC::OR, 2, PPC::R3).addReg(Tmp2).addReg(Tmp2);
2402 BuildMI(BB, PPC::OR, 2, PPC::R4).addReg(Tmp1).addReg(Tmp1);
2405 Select(N.getOperand(0));
2406 Tmp1 = SelectExpr(N.getOperand(1));
2407 switch (N.getOperand(1).getValueType()) {
2409 assert(0 && "Unknown return type!");
2412 BuildMI(BB, PPC::FMR, 1, PPC::F1).addReg(Tmp1);
2415 BuildMI(BB, PPC::OR, 2, PPC::R3).addReg(Tmp1).addReg(Tmp1);
2419 Select(N.getOperand(0));
2422 BuildMI(BB, PPC::BLR, 0); // Just emit a 'ret' instruction
2424 case ISD::TRUNCSTORE:
2426 SDOperand Chain = N.getOperand(0);
2427 SDOperand Value = N.getOperand(1);
2428 SDOperand Address = N.getOperand(2);
2431 Tmp1 = SelectExpr(Value); //value
2433 if (opcode == ISD::STORE) {
2434 switch(Value.getValueType()) {
2435 default: assert(0 && "unknown Type in store");
2436 case MVT::i32: Opc = PPC::STW; break;
2437 case MVT::f64: Opc = PPC::STFD; break;
2438 case MVT::f32: Opc = PPC::STFS; break;
2440 } else { //ISD::TRUNCSTORE
2441 switch(cast<VTSDNode>(Node->getOperand(4))->getVT()) {
2442 default: assert(0 && "unknown Type in store");
2444 case MVT::i8: Opc = PPC::STB; break;
2445 case MVT::i16: Opc = PPC::STH; break;
2449 if(Address.getOpcode() == ISD::FrameIndex) {
2450 Tmp2 = cast<FrameIndexSDNode>(Address)->getIndex();
2451 addFrameReference(BuildMI(BB, Opc, 3).addReg(Tmp1), (int)Tmp2);
2454 switch(SelectAddr(Address, Tmp2, offset)) {
2455 default: assert(0 && "Unhandled return value from SelectAddr");
2456 case 0: // imm offset, no frame, no index
2457 BuildMI(BB, Opc, 3).addReg(Tmp1).addSImm(offset).addReg(Tmp2);
2459 case 1: // imm offset + frame index
2460 addFrameReference(BuildMI(BB, Opc, 3).addReg(Tmp1), (int)Tmp2, offset);
2462 case 2: // base+index addressing
2463 Opc = IndexedOpForOp(Opc);
2464 BuildMI(BB, Opc, 3).addReg(Tmp1).addReg(Tmp2).addReg(offset);
2467 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Address);
2468 GlobalValue *GV = GN->getGlobal();
2469 BuildMI(BB, Opc, 3).addReg(Tmp1).addGlobalAddress(GV).addReg(Tmp2);
2479 case ISD::CopyFromReg:
2482 case ISD::DYNAMIC_STACKALLOC:
2487 assert(0 && "Should not be reached!");
2491 /// createPPC32PatternInstructionSelector - This pass converts an LLVM function
2492 /// into a machine code representation using pattern matching and a machine
2493 /// description file.
2495 FunctionPass *llvm::createPPC32ISelPattern(TargetMachine &TM) {
2496 return new ISel(TM);