1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the LLVM research group and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements the TargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Target/TargetLowering.h"
15 #include "llvm/Target/TargetData.h"
16 #include "llvm/Target/TargetMachine.h"
17 #include "llvm/Target/MRegisterInfo.h"
18 #include "llvm/DerivedTypes.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/Support/MathExtras.h"
24 TargetLowering::TargetLowering(TargetMachine &tm)
25 : TM(tm), TD(TM.getTargetData()) {
26 assert(ISD::BUILTIN_OP_END <= 156 &&
27 "Fixed size array in TargetLowering is not large enough!");
28 // All operations default to being supported.
29 memset(OpActions, 0, sizeof(OpActions));
30 memset(LoadXActions, 0, sizeof(LoadXActions));
32 IsLittleEndian = TD->isLittleEndian();
33 ShiftAmountTy = SetCCResultTy = PointerTy = getValueType(TD->getIntPtrType());
34 ShiftAmtHandling = Undefined;
35 memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
36 memset(TargetDAGCombineArray, 0,
37 sizeof(TargetDAGCombineArray)/sizeof(TargetDAGCombineArray[0]));
38 maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
39 allowUnalignedMemoryAccesses = false;
40 UseUnderscoreSetJmpLongJmp = false;
41 IntDivIsCheap = false;
42 Pow2DivIsCheap = false;
43 StackPointerRegisterToSaveRestore = 0;
44 SchedPreferenceInfo = SchedulingForLatency;
49 TargetLowering::~TargetLowering() {}
51 /// setValueTypeAction - Set the action for a particular value type. This
52 /// assumes an action has not already been set for this value type.
53 static void SetValueTypeAction(MVT::ValueType VT,
54 TargetLowering::LegalizeAction Action,
56 MVT::ValueType *TransformToType,
57 TargetLowering::ValueTypeActionImpl &ValueTypeActions) {
58 ValueTypeActions.setTypeAction(VT, Action);
59 if (Action == TargetLowering::Promote) {
60 MVT::ValueType PromoteTo;
64 unsigned LargerReg = VT+1;
65 while (!TLI.isTypeLegal((MVT::ValueType)LargerReg)) {
67 assert(MVT::isInteger((MVT::ValueType)LargerReg) &&
68 "Nothing to promote to??");
70 PromoteTo = (MVT::ValueType)LargerReg;
73 assert(MVT::isInteger(VT) == MVT::isInteger(PromoteTo) &&
74 MVT::isFloatingPoint(VT) == MVT::isFloatingPoint(PromoteTo) &&
75 "Can only promote from int->int or fp->fp!");
76 assert(VT < PromoteTo && "Must promote to a larger type!");
77 TransformToType[VT] = PromoteTo;
78 } else if (Action == TargetLowering::Expand) {
79 assert((VT == MVT::Vector || MVT::isInteger(VT)) && VT > MVT::i8 &&
80 "Cannot expand this type: target must support SOME integer reg!");
81 // Expand to the next smaller integer type!
82 TransformToType[VT] = (MVT::ValueType)(VT-1);
87 /// computeRegisterProperties - Once all of the register classes are added,
88 /// this allows us to compute derived properties we expose.
89 void TargetLowering::computeRegisterProperties() {
90 assert(MVT::LAST_VALUETYPE <= 32 &&
91 "Too many value types for ValueTypeActions to hold!");
93 // Everything defaults to one.
94 for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i)
95 NumElementsForVT[i] = 1;
97 // Find the largest integer register class.
98 unsigned LargestIntReg = MVT::i128;
99 for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
100 assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
102 // Every integer value type larger than this largest register takes twice as
103 // many registers to represent as the previous ValueType.
104 unsigned ExpandedReg = LargestIntReg; ++LargestIntReg;
105 for (++ExpandedReg; MVT::isInteger((MVT::ValueType)ExpandedReg);++ExpandedReg)
106 NumElementsForVT[ExpandedReg] = 2*NumElementsForVT[ExpandedReg-1];
108 // Inspect all of the ValueType's possible, deciding how to process them.
109 for (unsigned IntReg = MVT::i1; IntReg <= MVT::i128; ++IntReg)
110 // If we are expanding this type, expand it!
111 if (getNumElements((MVT::ValueType)IntReg) != 1)
112 SetValueTypeAction((MVT::ValueType)IntReg, Expand, *this, TransformToType,
114 else if (!isTypeLegal((MVT::ValueType)IntReg))
115 // Otherwise, if we don't have native support, we must promote to a
117 SetValueTypeAction((MVT::ValueType)IntReg, Promote, *this,
118 TransformToType, ValueTypeActions);
120 TransformToType[(MVT::ValueType)IntReg] = (MVT::ValueType)IntReg;
122 // If the target does not have native support for F32, promote it to F64.
123 if (!isTypeLegal(MVT::f32))
124 SetValueTypeAction(MVT::f32, Promote, *this,
125 TransformToType, ValueTypeActions);
127 TransformToType[MVT::f32] = MVT::f32;
129 // Set MVT::Vector to always be Expanded
130 SetValueTypeAction(MVT::Vector, Expand, *this, TransformToType,
133 // Loop over all of the legal vector value types, specifying an identity type
135 for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
136 i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
137 if (isTypeLegal((MVT::ValueType)i))
138 TransformToType[i] = (MVT::ValueType)i;
141 assert(isTypeLegal(MVT::f64) && "Target does not support FP?");
142 TransformToType[MVT::f64] = MVT::f64;
145 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
149 /// getPackedTypeBreakdown - Packed types are broken down into some number of
150 /// legal first class types. For example, <8 x float> maps to 2 MVT::v4f32
151 /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
153 /// This method returns the number and type of the resultant breakdown.
155 unsigned TargetLowering::getPackedTypeBreakdown(const PackedType *PTy,
156 MVT::ValueType &PTyElementVT,
157 MVT::ValueType &PTyLegalElementVT) const {
158 // Figure out the right, legal destination reg to copy into.
159 unsigned NumElts = PTy->getNumElements();
160 MVT::ValueType EltTy = getValueType(PTy->getElementType());
162 unsigned NumVectorRegs = 1;
164 // Divide the input until we get to a supported size. This will always
165 // end with a scalar if the target doesn't support vectors.
166 while (NumElts > 1 && !isTypeLegal(getVectorType(EltTy, NumElts))) {
175 VT = getVectorType(EltTy, NumElts);
179 MVT::ValueType DestVT = getTypeToTransformTo(VT);
180 PTyLegalElementVT = DestVT;
182 // Value is expanded, e.g. i64 -> i16.
183 return NumVectorRegs*(MVT::getSizeInBits(VT)/MVT::getSizeInBits(DestVT));
185 // Otherwise, promotion or legal types use the same number of registers as
186 // the vector decimated to the appropriate level.
187 return NumVectorRegs;
193 //===----------------------------------------------------------------------===//
194 // Optimization Methods
195 //===----------------------------------------------------------------------===//
197 /// ShrinkDemandedConstant - Check to see if the specified operand of the
198 /// specified instruction is a constant integer. If so, check to see if there
199 /// are any bits set in the constant that are not demanded. If so, shrink the
200 /// constant and return true.
201 bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDOperand Op,
203 // FIXME: ISD::SELECT, ISD::SELECT_CC
204 switch(Op.getOpcode()) {
209 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
210 if ((~Demanded & C->getValue()) != 0) {
211 MVT::ValueType VT = Op.getValueType();
212 SDOperand New = DAG.getNode(Op.getOpcode(), VT, Op.getOperand(0),
213 DAG.getConstant(Demanded & C->getValue(),
215 return CombineTo(Op, New);
222 /// SimplifyDemandedBits - Look at Op. At this point, we know that only the
223 /// DemandedMask bits of the result of Op are ever used downstream. If we can
224 /// use this information to simplify Op, create a new simplified DAG node and
225 /// return true, returning the original and new nodes in Old and New. Otherwise,
226 /// analyze the expression and return a mask of KnownOne and KnownZero bits for
227 /// the expression (used to simplify the caller). The KnownZero/One bits may
228 /// only be accurate for those bits in the DemandedMask.
229 bool TargetLowering::SimplifyDemandedBits(SDOperand Op, uint64_t DemandedMask,
232 TargetLoweringOpt &TLO,
233 unsigned Depth) const {
234 KnownZero = KnownOne = 0; // Don't know anything.
235 // Other users may use these bits.
236 if (!Op.Val->hasOneUse()) {
238 // If not at the root, Just compute the KnownZero/KnownOne bits to
239 // simplify things downstream.
240 ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
243 // If this is the root being simplified, allow it to have multiple uses,
244 // just set the DemandedMask to all bits.
245 DemandedMask = MVT::getIntVTBitMask(Op.getValueType());
246 } else if (DemandedMask == 0) {
247 // Not demanding any bits from Op.
248 if (Op.getOpcode() != ISD::UNDEF)
249 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::UNDEF, Op.getValueType()));
251 } else if (Depth == 6) { // Limit search depth.
255 uint64_t KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut;
256 switch (Op.getOpcode()) {
258 // We know all of the bits for a constant!
259 KnownOne = cast<ConstantSDNode>(Op)->getValue() & DemandedMask;
260 KnownZero = ~KnownOne & DemandedMask;
261 return false; // Don't fall through, will infinitely loop.
263 // If the RHS is a constant, check to see if the LHS would be zero without
264 // using the bits from the RHS. Below, we use knowledge about the RHS to
265 // simplify the LHS, here we're using information from the LHS to simplify
267 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
268 uint64_t LHSZero, LHSOne;
269 ComputeMaskedBits(Op.getOperand(0), DemandedMask,
270 LHSZero, LHSOne, Depth+1);
271 // If the LHS already has zeros where RHSC does, this and is dead.
272 if ((LHSZero & DemandedMask) == (~RHSC->getValue() & DemandedMask))
273 return TLO.CombineTo(Op, Op.getOperand(0));
274 // If any of the set bits in the RHS are known zero on the LHS, shrink
276 if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & DemandedMask))
280 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero,
281 KnownOne, TLO, Depth+1))
283 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
284 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & ~KnownZero,
285 KnownZero2, KnownOne2, TLO, Depth+1))
287 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
289 // If all of the demanded bits are known one on one side, return the other.
290 // These bits cannot contribute to the result of the 'and'.
291 if ((DemandedMask & ~KnownZero2 & KnownOne)==(DemandedMask & ~KnownZero2))
292 return TLO.CombineTo(Op, Op.getOperand(0));
293 if ((DemandedMask & ~KnownZero & KnownOne2)==(DemandedMask & ~KnownZero))
294 return TLO.CombineTo(Op, Op.getOperand(1));
295 // If all of the demanded bits in the inputs are known zeros, return zero.
296 if ((DemandedMask & (KnownZero|KnownZero2)) == DemandedMask)
297 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType()));
298 // If the RHS is a constant, see if we can simplify it.
299 if (TLO.ShrinkDemandedConstant(Op, DemandedMask & ~KnownZero2))
302 // Output known-1 bits are only known if set in both the LHS & RHS.
303 KnownOne &= KnownOne2;
304 // Output known-0 are known to be clear if zero in either the LHS | RHS.
305 KnownZero |= KnownZero2;
308 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero,
309 KnownOne, TLO, Depth+1))
311 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
312 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & ~KnownOne,
313 KnownZero2, KnownOne2, TLO, Depth+1))
315 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
317 // If all of the demanded bits are known zero on one side, return the other.
318 // These bits cannot contribute to the result of the 'or'.
319 if ((DemandedMask & ~KnownOne2 & KnownZero) == (DemandedMask & ~KnownOne2))
320 return TLO.CombineTo(Op, Op.getOperand(0));
321 if ((DemandedMask & ~KnownOne & KnownZero2) == (DemandedMask & ~KnownOne))
322 return TLO.CombineTo(Op, Op.getOperand(1));
323 // If all of the potentially set bits on one side are known to be set on
324 // the other side, just use the 'other' side.
325 if ((DemandedMask & (~KnownZero) & KnownOne2) ==
326 (DemandedMask & (~KnownZero)))
327 return TLO.CombineTo(Op, Op.getOperand(0));
328 if ((DemandedMask & (~KnownZero2) & KnownOne) ==
329 (DemandedMask & (~KnownZero2)))
330 return TLO.CombineTo(Op, Op.getOperand(1));
331 // If the RHS is a constant, see if we can simplify it.
332 if (TLO.ShrinkDemandedConstant(Op, DemandedMask))
335 // Output known-0 bits are only known if clear in both the LHS & RHS.
336 KnownZero &= KnownZero2;
337 // Output known-1 are known to be set if set in either the LHS | RHS.
338 KnownOne |= KnownOne2;
341 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero,
342 KnownOne, TLO, Depth+1))
344 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
345 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, KnownZero2,
346 KnownOne2, TLO, Depth+1))
348 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
350 // If all of the demanded bits are known zero on one side, return the other.
351 // These bits cannot contribute to the result of the 'xor'.
352 if ((DemandedMask & KnownZero) == DemandedMask)
353 return TLO.CombineTo(Op, Op.getOperand(0));
354 if ((DemandedMask & KnownZero2) == DemandedMask)
355 return TLO.CombineTo(Op, Op.getOperand(1));
357 // Output known-0 bits are known if clear or set in both the LHS & RHS.
358 KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
359 // Output known-1 are known to be set if set in only one of the LHS, RHS.
360 KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
362 // If all of the unknown bits are known to be zero on one side or the other
363 // (but not both) turn this into an *inclusive* or.
364 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
365 if (uint64_t UnknownBits = DemandedMask & ~(KnownZeroOut|KnownOneOut))
366 if ((UnknownBits & (KnownZero|KnownZero2)) == UnknownBits)
367 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, Op.getValueType(),
370 // If all of the demanded bits on one side are known, and all of the set
371 // bits on that side are also known to be set on the other side, turn this
372 // into an AND, as we know the bits will be cleared.
373 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
374 if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) { // all known
375 if ((KnownOne & KnownOne2) == KnownOne) {
376 MVT::ValueType VT = Op.getValueType();
377 SDOperand ANDC = TLO.DAG.getConstant(~KnownOne & DemandedMask, VT);
378 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, VT, Op.getOperand(0),
383 // If the RHS is a constant, see if we can simplify it.
384 // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
385 if (TLO.ShrinkDemandedConstant(Op, DemandedMask))
388 KnownZero = KnownZeroOut;
389 KnownOne = KnownOneOut;
392 // If we know the result of a setcc has the top bits zero, use this info.
393 if (getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult)
394 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
397 if (SimplifyDemandedBits(Op.getOperand(2), DemandedMask, KnownZero,
398 KnownOne, TLO, Depth+1))
400 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero2,
401 KnownOne2, TLO, Depth+1))
403 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
404 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
406 // If the operands are constants, see if we can simplify them.
407 if (TLO.ShrinkDemandedConstant(Op, DemandedMask))
410 // Only known if known in both the LHS and RHS.
411 KnownOne &= KnownOne2;
412 KnownZero &= KnownZero2;
415 if (SimplifyDemandedBits(Op.getOperand(3), DemandedMask, KnownZero,
416 KnownOne, TLO, Depth+1))
418 if (SimplifyDemandedBits(Op.getOperand(2), DemandedMask, KnownZero2,
419 KnownOne2, TLO, Depth+1))
421 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
422 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
424 // If the operands are constants, see if we can simplify them.
425 if (TLO.ShrinkDemandedConstant(Op, DemandedMask))
428 // Only known if known in both the LHS and RHS.
429 KnownOne &= KnownOne2;
430 KnownZero &= KnownZero2;
433 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
434 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask >> SA->getValue(),
435 KnownZero, KnownOne, TLO, Depth+1))
437 KnownZero <<= SA->getValue();
438 KnownOne <<= SA->getValue();
439 KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero.
443 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
444 MVT::ValueType VT = Op.getValueType();
445 unsigned ShAmt = SA->getValue();
447 // Compute the new bits that are at the top now.
448 uint64_t TypeMask = MVT::getIntVTBitMask(VT);
449 if (SimplifyDemandedBits(Op.getOperand(0),
450 (DemandedMask << ShAmt) & TypeMask,
451 KnownZero, KnownOne, TLO, Depth+1))
453 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
454 KnownZero &= TypeMask;
455 KnownOne &= TypeMask;
459 uint64_t HighBits = (1ULL << ShAmt)-1;
460 HighBits <<= MVT::getSizeInBits(VT) - ShAmt;
461 KnownZero |= HighBits; // High bits known zero.
465 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
466 MVT::ValueType VT = Op.getValueType();
467 unsigned ShAmt = SA->getValue();
469 // Compute the new bits that are at the top now.
470 uint64_t TypeMask = MVT::getIntVTBitMask(VT);
472 uint64_t InDemandedMask = (DemandedMask << ShAmt) & TypeMask;
474 // If any of the demanded bits are produced by the sign extension, we also
475 // demand the input sign bit.
476 uint64_t HighBits = (1ULL << ShAmt)-1;
477 HighBits <<= MVT::getSizeInBits(VT) - ShAmt;
478 if (HighBits & DemandedMask)
479 InDemandedMask |= MVT::getIntVTSignBit(VT);
481 if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
482 KnownZero, KnownOne, TLO, Depth+1))
484 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
485 KnownZero &= TypeMask;
486 KnownOne &= TypeMask;
490 // Handle the sign bits.
491 uint64_t SignBit = MVT::getIntVTSignBit(VT);
492 SignBit >>= ShAmt; // Adjust to where it is now in the mask.
494 // If the input sign bit is known to be zero, or if none of the top bits
495 // are demanded, turn this into an unsigned shift right.
496 if ((KnownZero & SignBit) || (HighBits & ~DemandedMask) == HighBits) {
497 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, VT, Op.getOperand(0),
499 } else if (KnownOne & SignBit) { // New bits are known one.
500 KnownOne |= HighBits;
504 case ISD::SIGN_EXTEND_INREG: {
505 MVT::ValueType VT = Op.getValueType();
506 MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
508 // Sign extension. Compute the demanded bits in the result that are not
509 // present in the input.
510 uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & DemandedMask;
512 // If none of the extended bits are demanded, eliminate the sextinreg.
514 return TLO.CombineTo(Op, Op.getOperand(0));
516 uint64_t InSignBit = MVT::getIntVTSignBit(EVT);
517 int64_t InputDemandedBits = DemandedMask & MVT::getIntVTBitMask(EVT);
519 // Since the sign extended bits are demanded, we know that the sign
521 InputDemandedBits |= InSignBit;
523 if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
524 KnownZero, KnownOne, TLO, Depth+1))
526 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
528 // If the sign bit of the input is known set or clear, then we know the
529 // top bits of the result.
531 // If the input sign bit is known zero, convert this into a zero extension.
532 if (KnownZero & InSignBit)
533 return TLO.CombineTo(Op,
534 TLO.DAG.getZeroExtendInReg(Op.getOperand(0), EVT));
536 if (KnownOne & InSignBit) { // Input sign bit known set
538 KnownZero &= ~NewBits;
539 } else { // Input sign bit unknown
540 KnownZero &= ~NewBits;
541 KnownOne &= ~NewBits;
548 MVT::ValueType VT = Op.getValueType();
549 unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1;
550 KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT);
555 if (ISD::isZEXTLoad(Op.Val)) {
556 MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(3))->getVT();
557 KnownZero |= ~MVT::getIntVTBitMask(VT) & DemandedMask;
561 case ISD::ZERO_EXTEND: {
562 uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType());
564 // If none of the top bits are demanded, convert this into an any_extend.
565 uint64_t NewBits = (~InMask) & DemandedMask;
567 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND,
571 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask,
572 KnownZero, KnownOne, TLO, Depth+1))
574 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
575 KnownZero |= NewBits;
578 case ISD::SIGN_EXTEND: {
579 MVT::ValueType InVT = Op.getOperand(0).getValueType();
580 uint64_t InMask = MVT::getIntVTBitMask(InVT);
581 uint64_t InSignBit = MVT::getIntVTSignBit(InVT);
582 uint64_t NewBits = (~InMask) & DemandedMask;
584 // If none of the top bits are demanded, convert this into an any_extend.
586 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND,Op.getValueType(),
589 // Since some of the sign extended bits are demanded, we know that the sign
591 uint64_t InDemandedBits = DemandedMask & InMask;
592 InDemandedBits |= InSignBit;
594 if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
595 KnownOne, TLO, Depth+1))
598 // If the sign bit is known zero, convert this to a zero extend.
599 if (KnownZero & InSignBit)
600 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND,
604 // If the sign bit is known one, the top bits match.
605 if (KnownOne & InSignBit) {
607 KnownZero &= ~NewBits;
608 } else { // Otherwise, top bits aren't known.
609 KnownOne &= ~NewBits;
610 KnownZero &= ~NewBits;
614 case ISD::ANY_EXTEND: {
615 uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType());
616 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask,
617 KnownZero, KnownOne, TLO, Depth+1))
619 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
622 case ISD::TRUNCATE: {
623 // Simplify the input, using demanded bit information, and compute the known
624 // zero/one bits live out.
625 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask,
626 KnownZero, KnownOne, TLO, Depth+1))
629 // If the input is only used by this truncate, see if we can shrink it based
630 // on the known demanded bits.
631 if (Op.getOperand(0).Val->hasOneUse()) {
632 SDOperand In = Op.getOperand(0);
633 switch (In.getOpcode()) {
636 // Shrink SRL by a constant if none of the high bits shifted in are
638 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){
639 uint64_t HighBits = MVT::getIntVTBitMask(In.getValueType());
640 HighBits &= ~MVT::getIntVTBitMask(Op.getValueType());
641 HighBits >>= ShAmt->getValue();
643 if (ShAmt->getValue() < MVT::getSizeInBits(Op.getValueType()) &&
644 (DemandedMask & HighBits) == 0) {
645 // None of the shifted in bits are needed. Add a truncate of the
646 // shift input, then shift it.
647 SDOperand NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE,
650 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL,Op.getValueType(),
651 NewTrunc, In.getOperand(1)));
658 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
659 uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType());
660 KnownZero &= OutMask;
664 case ISD::AssertZext: {
665 MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
666 uint64_t InMask = MVT::getIntVTBitMask(VT);
667 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask,
668 KnownZero, KnownOne, TLO, Depth+1))
670 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
671 KnownZero |= ~InMask & DemandedMask;
676 case ISD::INTRINSIC_WO_CHAIN:
677 case ISD::INTRINSIC_W_CHAIN:
678 case ISD::INTRINSIC_VOID:
679 // Just use ComputeMaskedBits to compute output bits.
680 ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
684 // If we know the value of all of the demanded bits, return this as a
686 if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
687 return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
692 /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
693 /// this predicate to simplify operations downstream. Mask is known to be zero
694 /// for bits that V cannot have.
695 bool TargetLowering::MaskedValueIsZero(SDOperand Op, uint64_t Mask,
696 unsigned Depth) const {
697 uint64_t KnownZero, KnownOne;
698 ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
699 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
700 return (KnownZero & Mask) == Mask;
703 /// ComputeMaskedBits - Determine which of the bits specified in Mask are
704 /// known to be either zero or one and return them in the KnownZero/KnownOne
705 /// bitsets. This code only analyzes bits in Mask, in order to short-circuit
707 void TargetLowering::ComputeMaskedBits(SDOperand Op, uint64_t Mask,
708 uint64_t &KnownZero, uint64_t &KnownOne,
709 unsigned Depth) const {
710 KnownZero = KnownOne = 0; // Don't know anything.
711 if (Depth == 6 || Mask == 0)
712 return; // Limit search depth.
714 uint64_t KnownZero2, KnownOne2;
716 switch (Op.getOpcode()) {
718 // We know all of the bits for a constant!
719 KnownOne = cast<ConstantSDNode>(Op)->getValue() & Mask;
720 KnownZero = ~KnownOne & Mask;
723 // If either the LHS or the RHS are Zero, the result is zero.
724 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
726 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
727 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
728 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
730 // Output known-1 bits are only known if set in both the LHS & RHS.
731 KnownOne &= KnownOne2;
732 // Output known-0 are known to be clear if zero in either the LHS | RHS.
733 KnownZero |= KnownZero2;
736 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
738 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
739 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
740 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
742 // Output known-0 bits are only known if clear in both the LHS & RHS.
743 KnownZero &= KnownZero2;
744 // Output known-1 are known to be set if set in either the LHS | RHS.
745 KnownOne |= KnownOne2;
748 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
749 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
750 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
751 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
753 // Output known-0 bits are known if clear or set in both the LHS & RHS.
754 uint64_t KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
755 // Output known-1 are known to be set if set in only one of the LHS, RHS.
756 KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
757 KnownZero = KnownZeroOut;
761 ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
762 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
763 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
764 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
766 // Only known if known in both the LHS and RHS.
767 KnownOne &= KnownOne2;
768 KnownZero &= KnownZero2;
771 ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1);
772 ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1);
773 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
774 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
776 // Only known if known in both the LHS and RHS.
777 KnownOne &= KnownOne2;
778 KnownZero &= KnownZero2;
781 // If we know the result of a setcc has the top bits zero, use this info.
782 if (getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult)
783 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
786 // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
787 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
788 ComputeMaskedBits(Op.getOperand(0), Mask >> SA->getValue(),
789 KnownZero, KnownOne, Depth+1);
790 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
791 KnownZero <<= SA->getValue();
792 KnownOne <<= SA->getValue();
793 KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero.
797 // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
798 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
799 MVT::ValueType VT = Op.getValueType();
800 unsigned ShAmt = SA->getValue();
802 uint64_t TypeMask = MVT::getIntVTBitMask(VT);
803 ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt) & TypeMask,
804 KnownZero, KnownOne, Depth+1);
805 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
806 KnownZero &= TypeMask;
807 KnownOne &= TypeMask;
811 uint64_t HighBits = (1ULL << ShAmt)-1;
812 HighBits <<= MVT::getSizeInBits(VT)-ShAmt;
813 KnownZero |= HighBits; // High bits known zero.
817 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
818 MVT::ValueType VT = Op.getValueType();
819 unsigned ShAmt = SA->getValue();
821 // Compute the new bits that are at the top now.
822 uint64_t TypeMask = MVT::getIntVTBitMask(VT);
824 uint64_t InDemandedMask = (Mask << ShAmt) & TypeMask;
825 // If any of the demanded bits are produced by the sign extension, we also
826 // demand the input sign bit.
827 uint64_t HighBits = (1ULL << ShAmt)-1;
828 HighBits <<= MVT::getSizeInBits(VT) - ShAmt;
830 InDemandedMask |= MVT::getIntVTSignBit(VT);
832 ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne,
834 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
835 KnownZero &= TypeMask;
836 KnownOne &= TypeMask;
840 // Handle the sign bits.
841 uint64_t SignBit = MVT::getIntVTSignBit(VT);
842 SignBit >>= ShAmt; // Adjust to where it is now in the mask.
844 if (KnownZero & SignBit) {
845 KnownZero |= HighBits; // New bits are known zero.
846 } else if (KnownOne & SignBit) {
847 KnownOne |= HighBits; // New bits are known one.
851 case ISD::SIGN_EXTEND_INREG: {
852 MVT::ValueType VT = Op.getValueType();
853 MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
855 // Sign extension. Compute the demanded bits in the result that are not
856 // present in the input.
857 uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & Mask;
859 uint64_t InSignBit = MVT::getIntVTSignBit(EVT);
860 int64_t InputDemandedBits = Mask & MVT::getIntVTBitMask(EVT);
862 // If the sign extended bits are demanded, we know that the sign
865 InputDemandedBits |= InSignBit;
867 ComputeMaskedBits(Op.getOperand(0), InputDemandedBits,
868 KnownZero, KnownOne, Depth+1);
869 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
871 // If the sign bit of the input is known set or clear, then we know the
872 // top bits of the result.
873 if (KnownZero & InSignBit) { // Input sign bit known clear
874 KnownZero |= NewBits;
875 KnownOne &= ~NewBits;
876 } else if (KnownOne & InSignBit) { // Input sign bit known set
878 KnownZero &= ~NewBits;
879 } else { // Input sign bit unknown
880 KnownZero &= ~NewBits;
881 KnownOne &= ~NewBits;
888 MVT::ValueType VT = Op.getValueType();
889 unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1;
890 KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT);
895 if (ISD::isZEXTLoad(Op.Val)) {
896 MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(3))->getVT();
897 KnownZero |= ~MVT::getIntVTBitMask(VT) & Mask;
901 case ISD::ZERO_EXTEND: {
902 uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType());
903 uint64_t NewBits = (~InMask) & Mask;
904 ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero,
906 KnownZero |= NewBits & Mask;
907 KnownOne &= ~NewBits;
910 case ISD::SIGN_EXTEND: {
911 MVT::ValueType InVT = Op.getOperand(0).getValueType();
912 unsigned InBits = MVT::getSizeInBits(InVT);
913 uint64_t InMask = MVT::getIntVTBitMask(InVT);
914 uint64_t InSignBit = 1ULL << (InBits-1);
915 uint64_t NewBits = (~InMask) & Mask;
916 uint64_t InDemandedBits = Mask & InMask;
918 // If any of the sign extended bits are demanded, we know that the sign
921 InDemandedBits |= InSignBit;
923 ComputeMaskedBits(Op.getOperand(0), InDemandedBits, KnownZero,
925 // If the sign bit is known zero or one, the top bits match.
926 if (KnownZero & InSignBit) {
927 KnownZero |= NewBits;
928 KnownOne &= ~NewBits;
929 } else if (KnownOne & InSignBit) {
931 KnownZero &= ~NewBits;
932 } else { // Otherwise, top bits aren't known.
933 KnownOne &= ~NewBits;
934 KnownZero &= ~NewBits;
938 case ISD::ANY_EXTEND: {
939 MVT::ValueType VT = Op.getOperand(0).getValueType();
940 ComputeMaskedBits(Op.getOperand(0), Mask & MVT::getIntVTBitMask(VT),
941 KnownZero, KnownOne, Depth+1);
944 case ISD::TRUNCATE: {
945 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
946 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
947 uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType());
948 KnownZero &= OutMask;
952 case ISD::AssertZext: {
953 MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
954 uint64_t InMask = MVT::getIntVTBitMask(VT);
955 ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero,
957 KnownZero |= (~InMask) & Mask;
961 // If either the LHS or the RHS are Zero, the result is zero.
962 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
963 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
964 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
965 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
967 // Output known-0 bits are known if clear or set in both the low clear bits
968 // common to both LHS & RHS. For example, 8+(X<<3) is known to have the
970 uint64_t KnownZeroOut = std::min(CountTrailingZeros_64(~KnownZero),
971 CountTrailingZeros_64(~KnownZero2));
973 KnownZero = (1ULL << KnownZeroOut) - 1;
978 ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0));
981 // We know that the top bits of C-X are clear if X contains less bits
982 // than C (i.e. no wrap-around can happen). For example, 20-X is
983 // positive if we can prove that X is >= 0 and < 16.
984 MVT::ValueType VT = CLHS->getValueType(0);
985 if ((CLHS->getValue() & MVT::getIntVTSignBit(VT)) == 0) { // sign bit clear
986 unsigned NLZ = CountLeadingZeros_64(CLHS->getValue()+1);
987 uint64_t MaskV = (1ULL << (63-NLZ))-1; // NLZ can't be 64 with no sign bit
988 MaskV = ~MaskV & MVT::getIntVTBitMask(VT);
989 ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero, KnownOne, Depth+1);
991 // If all of the MaskV bits are known to be zero, then we know the output
992 // top bits are zero, because we now know that the output is from [0-C].
993 if ((KnownZero & MaskV) == MaskV) {
994 unsigned NLZ2 = CountLeadingZeros_64(CLHS->getValue());
995 KnownZero = ~((1ULL << (64-NLZ2))-1) & Mask; // Top bits known zero.
996 KnownOne = 0; // No one bits known.
998 KnownZero = KnownOne = 0; // Otherwise, nothing known.
1004 // Allow the target to implement this method for its nodes.
1005 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
1006 case ISD::INTRINSIC_WO_CHAIN:
1007 case ISD::INTRINSIC_W_CHAIN:
1008 case ISD::INTRINSIC_VOID:
1009 computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne);
1015 /// computeMaskedBitsForTargetNode - Determine which of the bits specified
1016 /// in Mask are known to be either zero or one and return them in the
1017 /// KnownZero/KnownOne bitsets.
1018 void TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
1020 uint64_t &KnownZero,
1022 unsigned Depth) const {
1023 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
1024 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1025 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
1026 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
1027 "Should use MaskedValueIsZero if you don't know whether Op"
1028 " is a target node!");
1033 /// ComputeNumSignBits - Return the number of times the sign bit of the
1034 /// register is replicated into the other bits. We know that at least 1 bit
1035 /// is always equal to the sign bit (itself), but other cases can give us
1036 /// information. For example, immediately after an "SRA X, 2", we know that
1037 /// the top 3 bits are all equal to each other, so we return 3.
1038 unsigned TargetLowering::ComputeNumSignBits(SDOperand Op, unsigned Depth) const{
1039 MVT::ValueType VT = Op.getValueType();
1040 assert(MVT::isInteger(VT) && "Invalid VT!");
1041 unsigned VTBits = MVT::getSizeInBits(VT);
1045 return 1; // Limit search depth.
1047 switch (Op.getOpcode()) {
1049 case ISD::AssertSext:
1050 Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
1051 return VTBits-Tmp+1;
1052 case ISD::AssertZext:
1053 Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
1056 case ISD::Constant: {
1057 uint64_t Val = cast<ConstantSDNode>(Op)->getValue();
1058 // If negative, invert the bits, then look at it.
1059 if (Val & MVT::getIntVTSignBit(VT))
1062 // Shift the bits so they are the leading bits in the int64_t.
1065 // Return # leading zeros. We use 'min' here in case Val was zero before
1066 // shifting. We don't want to return '64' as for an i32 "0".
1067 return std::min(VTBits, CountLeadingZeros_64(Val));
1070 case ISD::SIGN_EXTEND:
1071 Tmp = VTBits-MVT::getSizeInBits(Op.getOperand(0).getValueType());
1072 return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
1074 case ISD::SIGN_EXTEND_INREG:
1075 // Max of the input and what this extends.
1076 Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
1079 Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
1080 return std::max(Tmp, Tmp2);
1083 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
1084 // SRA X, C -> adds C sign bits.
1085 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
1086 Tmp += C->getValue();
1087 if (Tmp > VTBits) Tmp = VTBits;
1091 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
1092 // shl destroys sign bits.
1093 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
1094 if (C->getValue() >= VTBits || // Bad shift.
1095 C->getValue() >= Tmp) break; // Shifted all sign bits out.
1096 return Tmp - C->getValue();
1101 case ISD::XOR: // NOT is handled here.
1102 // Logical binary ops preserve the number of sign bits.
1103 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
1104 if (Tmp == 1) return 1; // Early out.
1105 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
1106 return std::min(Tmp, Tmp2);
1109 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
1110 if (Tmp == 1) return 1; // Early out.
1111 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
1112 return std::min(Tmp, Tmp2);
1115 // If setcc returns 0/-1, all bits are sign bits.
1116 if (getSetCCResultContents() == ZeroOrNegativeOneSetCCResult)
1121 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
1122 unsigned RotAmt = C->getValue() & (VTBits-1);
1124 // Handle rotate right by N like a rotate left by 32-N.
1125 if (Op.getOpcode() == ISD::ROTR)
1126 RotAmt = (VTBits-RotAmt) & (VTBits-1);
1128 // If we aren't rotating out all of the known-in sign bits, return the
1129 // number that are left. This handles rotl(sext(x), 1) for example.
1130 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
1131 if (Tmp > RotAmt+1) return Tmp-RotAmt;
1135 // Add can have at most one carry bit. Thus we know that the output
1136 // is, at worst, one more bit than the inputs.
1137 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
1138 if (Tmp == 1) return 1; // Early out.
1140 // Special case decrementing a value (ADD X, -1):
1141 if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
1142 if (CRHS->isAllOnesValue()) {
1143 uint64_t KnownZero, KnownOne;
1144 uint64_t Mask = MVT::getIntVTBitMask(VT);
1145 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
1147 // If the input is known to be 0 or 1, the output is 0/-1, which is all
1149 if ((KnownZero|1) == Mask)
1152 // If we are subtracting one from a positive number, there is no carry
1153 // out of the result.
1154 if (KnownZero & MVT::getIntVTSignBit(VT))
1158 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
1159 if (Tmp2 == 1) return 1;
1160 return std::min(Tmp, Tmp2)-1;
1164 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
1165 if (Tmp2 == 1) return 1;
1168 if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
1169 if (CLHS->getValue() == 0) {
1170 uint64_t KnownZero, KnownOne;
1171 uint64_t Mask = MVT::getIntVTBitMask(VT);
1172 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
1173 // If the input is known to be 0 or 1, the output is 0/-1, which is all
1175 if ((KnownZero|1) == Mask)
1178 // If the input is known to be positive (the sign bit is known clear),
1179 // the output of the NEG has the same number of sign bits as the input.
1180 if (KnownZero & MVT::getIntVTSignBit(VT))
1183 // Otherwise, we treat this like a SUB.
1186 // Sub can have at most one carry bit. Thus we know that the output
1187 // is, at worst, one more bit than the inputs.
1188 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
1189 if (Tmp == 1) return 1; // Early out.
1190 return std::min(Tmp, Tmp2)-1;
1193 // FIXME: it's tricky to do anything useful for this, but it is an important
1194 // case for targets like X86.
1198 // Handle LOADX separately here. EXTLOAD case will fallthrough.
1199 if (Op.getOpcode() == ISD::LOADX) {
1200 unsigned LType = Op.getConstantOperandVal(4);
1203 case ISD::SEXTLOAD: // '17' bits known
1204 Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(3))->getVT());
1205 return VTBits-Tmp+1;
1206 case ISD::ZEXTLOAD: // '16' bits known
1207 Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(3))->getVT());
1212 // Allow the target to implement this method for its nodes.
1213 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
1214 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1215 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
1216 Op.getOpcode() == ISD::INTRINSIC_VOID) {
1217 unsigned NumBits = ComputeNumSignBitsForTargetNode(Op, Depth);
1218 if (NumBits > 1) return NumBits;
1221 // Finally, if we can prove that the top bits of the result are 0's or 1's,
1222 // use this information.
1223 uint64_t KnownZero, KnownOne;
1224 uint64_t Mask = MVT::getIntVTBitMask(VT);
1225 ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
1227 uint64_t SignBit = MVT::getIntVTSignBit(VT);
1228 if (KnownZero & SignBit) { // SignBit is 0
1230 } else if (KnownOne & SignBit) { // SignBit is 1;
1237 // Okay, we know that the sign bit in Mask is set. Use CLZ to determine
1238 // the number of identical bits in the top of the input value.
1241 // Return # leading zeros. We use 'min' here in case Val was zero before
1242 // shifting. We don't want to return '64' as for an i32 "0".
1243 return std::min(VTBits, CountLeadingZeros_64(Mask));
1248 /// ComputeNumSignBitsForTargetNode - This method can be implemented by
1249 /// targets that want to expose additional information about sign bits to the
1251 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDOperand Op,
1252 unsigned Depth) const {
1253 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
1254 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1255 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
1256 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
1257 "Should use ComputeNumSignBits if you don't know whether Op"
1258 " is a target node!");
1263 SDOperand TargetLowering::
1264 PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
1265 // Default implementation: no optimization.
1269 //===----------------------------------------------------------------------===//
1270 // Inline Assembler Implementation Methods
1271 //===----------------------------------------------------------------------===//
1273 TargetLowering::ConstraintType
1274 TargetLowering::getConstraintType(char ConstraintLetter) const {
1275 // FIXME: lots more standard ones to handle.
1276 switch (ConstraintLetter) {
1277 default: return C_Unknown;
1278 case 'r': return C_RegisterClass;
1280 case 'o': // offsetable
1281 case 'V': // not offsetable
1283 case 'i': // Simple Integer or Relocatable Constant
1284 case 'n': // Simple Integer
1285 case 's': // Relocatable Constant
1286 case 'I': // Target registers.
1298 bool TargetLowering::isOperandValidForConstraint(SDOperand Op,
1299 char ConstraintLetter) {
1300 switch (ConstraintLetter) {
1301 default: return false;
1302 case 'i': // Simple Integer or Relocatable Constant
1303 case 'n': // Simple Integer
1304 case 's': // Relocatable Constant
1305 return true; // FIXME: not right.
1310 std::vector<unsigned> TargetLowering::
1311 getRegClassForInlineAsmConstraint(const std::string &Constraint,
1312 MVT::ValueType VT) const {
1313 return std::vector<unsigned>();
1317 std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
1318 getRegForInlineAsmConstraint(const std::string &Constraint,
1319 MVT::ValueType VT) const {
1320 if (Constraint[0] != '{')
1321 return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
1322 assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
1324 // Remove the braces from around the name.
1325 std::string RegName(Constraint.begin()+1, Constraint.end()-1);
1327 // Figure out which register class contains this reg.
1328 const MRegisterInfo *RI = TM.getRegisterInfo();
1329 for (MRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
1330 E = RI->regclass_end(); RCI != E; ++RCI) {
1331 const TargetRegisterClass *RC = *RCI;
1333 // If none of the the value types for this register class are valid, we
1334 // can't use it. For example, 64-bit reg classes on 32-bit targets.
1335 bool isLegal = false;
1336 for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
1338 if (isTypeLegal(*I)) {
1344 if (!isLegal) continue;
1346 for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
1348 if (StringsEqualNoCase(RegName, RI->get(*I).Name))
1349 return std::make_pair(*I, RC);
1353 return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
1356 //===----------------------------------------------------------------------===//
1357 // Loop Strength Reduction hooks
1358 //===----------------------------------------------------------------------===//
1360 /// isLegalAddressImmediate - Return true if the integer value or
1361 /// GlobalValue can be used as the offset of the target addressing mode.
1362 bool TargetLowering::isLegalAddressImmediate(int64_t V) const {
1365 bool TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const {
1370 // Magic for divide replacement
1373 int64_t m; // magic number
1374 int64_t s; // shift amount
1378 uint64_t m; // magic number
1379 int64_t a; // add indicator
1380 int64_t s; // shift amount
1383 /// magic - calculate the magic numbers required to codegen an integer sdiv as
1384 /// a sequence of multiply and shifts. Requires that the divisor not be 0, 1,
1386 static ms magic32(int32_t d) {
1388 uint32_t ad, anc, delta, q1, r1, q2, r2, t;
1389 const uint32_t two31 = 0x80000000U;
1393 t = two31 + ((uint32_t)d >> 31);
1394 anc = t - 1 - t%ad; // absolute value of nc
1395 p = 31; // initialize p
1396 q1 = two31/anc; // initialize q1 = 2p/abs(nc)
1397 r1 = two31 - q1*anc; // initialize r1 = rem(2p,abs(nc))
1398 q2 = two31/ad; // initialize q2 = 2p/abs(d)
1399 r2 = two31 - q2*ad; // initialize r2 = rem(2p,abs(d))
1402 q1 = 2*q1; // update q1 = 2p/abs(nc)
1403 r1 = 2*r1; // update r1 = rem(2p/abs(nc))
1404 if (r1 >= anc) { // must be unsigned comparison
1408 q2 = 2*q2; // update q2 = 2p/abs(d)
1409 r2 = 2*r2; // update r2 = rem(2p/abs(d))
1410 if (r2 >= ad) { // must be unsigned comparison
1415 } while (q1 < delta || (q1 == delta && r1 == 0));
1417 mag.m = (int32_t)(q2 + 1); // make sure to sign extend
1418 if (d < 0) mag.m = -mag.m; // resulting magic number
1419 mag.s = p - 32; // resulting shift
1423 /// magicu - calculate the magic numbers required to codegen an integer udiv as
1424 /// a sequence of multiply, add and shifts. Requires that the divisor not be 0.
1425 static mu magicu32(uint32_t d) {
1427 uint32_t nc, delta, q1, r1, q2, r2;
1429 magu.a = 0; // initialize "add" indicator
1431 p = 31; // initialize p
1432 q1 = 0x80000000/nc; // initialize q1 = 2p/nc
1433 r1 = 0x80000000 - q1*nc; // initialize r1 = rem(2p,nc)
1434 q2 = 0x7FFFFFFF/d; // initialize q2 = (2p-1)/d
1435 r2 = 0x7FFFFFFF - q2*d; // initialize r2 = rem((2p-1),d)
1438 if (r1 >= nc - r1 ) {
1439 q1 = 2*q1 + 1; // update q1
1440 r1 = 2*r1 - nc; // update r1
1443 q1 = 2*q1; // update q1
1444 r1 = 2*r1; // update r1
1446 if (r2 + 1 >= d - r2) {
1447 if (q2 >= 0x7FFFFFFF) magu.a = 1;
1448 q2 = 2*q2 + 1; // update q2
1449 r2 = 2*r2 + 1 - d; // update r2
1452 if (q2 >= 0x80000000) magu.a = 1;
1453 q2 = 2*q2; // update q2
1454 r2 = 2*r2 + 1; // update r2
1457 } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0)));
1458 magu.m = q2 + 1; // resulting magic number
1459 magu.s = p - 32; // resulting shift
1463 /// magic - calculate the magic numbers required to codegen an integer sdiv as
1464 /// a sequence of multiply and shifts. Requires that the divisor not be 0, 1,
1466 static ms magic64(int64_t d) {
1468 uint64_t ad, anc, delta, q1, r1, q2, r2, t;
1469 const uint64_t two63 = 9223372036854775808ULL; // 2^63
1472 ad = d >= 0 ? d : -d;
1473 t = two63 + ((uint64_t)d >> 63);
1474 anc = t - 1 - t%ad; // absolute value of nc
1475 p = 63; // initialize p
1476 q1 = two63/anc; // initialize q1 = 2p/abs(nc)
1477 r1 = two63 - q1*anc; // initialize r1 = rem(2p,abs(nc))
1478 q2 = two63/ad; // initialize q2 = 2p/abs(d)
1479 r2 = two63 - q2*ad; // initialize r2 = rem(2p,abs(d))
1482 q1 = 2*q1; // update q1 = 2p/abs(nc)
1483 r1 = 2*r1; // update r1 = rem(2p/abs(nc))
1484 if (r1 >= anc) { // must be unsigned comparison
1488 q2 = 2*q2; // update q2 = 2p/abs(d)
1489 r2 = 2*r2; // update r2 = rem(2p/abs(d))
1490 if (r2 >= ad) { // must be unsigned comparison
1495 } while (q1 < delta || (q1 == delta && r1 == 0));
1498 if (d < 0) mag.m = -mag.m; // resulting magic number
1499 mag.s = p - 64; // resulting shift
1503 /// magicu - calculate the magic numbers required to codegen an integer udiv as
1504 /// a sequence of multiply, add and shifts. Requires that the divisor not be 0.
1505 static mu magicu64(uint64_t d)
1508 uint64_t nc, delta, q1, r1, q2, r2;
1510 magu.a = 0; // initialize "add" indicator
1512 p = 63; // initialize p
1513 q1 = 0x8000000000000000ull/nc; // initialize q1 = 2p/nc
1514 r1 = 0x8000000000000000ull - q1*nc; // initialize r1 = rem(2p,nc)
1515 q2 = 0x7FFFFFFFFFFFFFFFull/d; // initialize q2 = (2p-1)/d
1516 r2 = 0x7FFFFFFFFFFFFFFFull - q2*d; // initialize r2 = rem((2p-1),d)
1519 if (r1 >= nc - r1 ) {
1520 q1 = 2*q1 + 1; // update q1
1521 r1 = 2*r1 - nc; // update r1
1524 q1 = 2*q1; // update q1
1525 r1 = 2*r1; // update r1
1527 if (r2 + 1 >= d - r2) {
1528 if (q2 >= 0x7FFFFFFFFFFFFFFFull) magu.a = 1;
1529 q2 = 2*q2 + 1; // update q2
1530 r2 = 2*r2 + 1 - d; // update r2
1533 if (q2 >= 0x8000000000000000ull) magu.a = 1;
1534 q2 = 2*q2; // update q2
1535 r2 = 2*r2 + 1; // update r2
1538 } while (p < 128 && (q1 < delta || (q1 == delta && r1 == 0)));
1539 magu.m = q2 + 1; // resulting magic number
1540 magu.s = p - 64; // resulting shift
1544 /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
1545 /// return a DAG expression to select that will generate the same value by
1546 /// multiplying by a magic number. See:
1547 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
1548 SDOperand TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
1549 std::vector<SDNode*>* Created) const {
1550 MVT::ValueType VT = N->getValueType(0);
1552 // Check to see if we can do this.
1553 if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64))
1554 return SDOperand(); // BuildSDIV only operates on i32 or i64
1555 if (!isOperationLegal(ISD::MULHS, VT))
1556 return SDOperand(); // Make sure the target supports MULHS.
1558 int64_t d = cast<ConstantSDNode>(N->getOperand(1))->getSignExtended();
1559 ms magics = (VT == MVT::i32) ? magic32(d) : magic64(d);
1561 // Multiply the numerator (operand 0) by the magic value
1562 SDOperand Q = DAG.getNode(ISD::MULHS, VT, N->getOperand(0),
1563 DAG.getConstant(magics.m, VT));
1564 // If d > 0 and m < 0, add the numerator
1565 if (d > 0 && magics.m < 0) {
1566 Q = DAG.getNode(ISD::ADD, VT, Q, N->getOperand(0));
1568 Created->push_back(Q.Val);
1570 // If d < 0 and m > 0, subtract the numerator.
1571 if (d < 0 && magics.m > 0) {
1572 Q = DAG.getNode(ISD::SUB, VT, Q, N->getOperand(0));
1574 Created->push_back(Q.Val);
1576 // Shift right algebraic if shift value is nonzero
1578 Q = DAG.getNode(ISD::SRA, VT, Q,
1579 DAG.getConstant(magics.s, getShiftAmountTy()));
1581 Created->push_back(Q.Val);
1583 // Extract the sign bit and add it to the quotient
1585 DAG.getNode(ISD::SRL, VT, Q, DAG.getConstant(MVT::getSizeInBits(VT)-1,
1586 getShiftAmountTy()));
1588 Created->push_back(T.Val);
1589 return DAG.getNode(ISD::ADD, VT, Q, T);
1592 /// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
1593 /// return a DAG expression to select that will generate the same value by
1594 /// multiplying by a magic number. See:
1595 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
1596 SDOperand TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
1597 std::vector<SDNode*>* Created) const {
1598 MVT::ValueType VT = N->getValueType(0);
1600 // Check to see if we can do this.
1601 if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64))
1602 return SDOperand(); // BuildUDIV only operates on i32 or i64
1603 if (!isOperationLegal(ISD::MULHU, VT))
1604 return SDOperand(); // Make sure the target supports MULHU.
1606 uint64_t d = cast<ConstantSDNode>(N->getOperand(1))->getValue();
1607 mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d);
1609 // Multiply the numerator (operand 0) by the magic value
1610 SDOperand Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0),
1611 DAG.getConstant(magics.m, VT));
1613 Created->push_back(Q.Val);
1615 if (magics.a == 0) {
1616 return DAG.getNode(ISD::SRL, VT, Q,
1617 DAG.getConstant(magics.s, getShiftAmountTy()));
1619 SDOperand NPQ = DAG.getNode(ISD::SUB, VT, N->getOperand(0), Q);
1621 Created->push_back(NPQ.Val);
1622 NPQ = DAG.getNode(ISD::SRL, VT, NPQ,
1623 DAG.getConstant(1, getShiftAmountTy()));
1625 Created->push_back(NPQ.Val);
1626 NPQ = DAG.getNode(ISD::ADD, VT, NPQ, Q);
1628 Created->push_back(NPQ.Val);
1629 return DAG.getNode(ISD::SRL, VT, NPQ,
1630 DAG.getConstant(magics.s-1, getShiftAmountTy()));