// Control flow operators
void visitReturnInst(ReturnInst &RI);
void visitBranchInst(BranchInst &BI);
+ void visitUnreachableInst(UnreachableInst &UI) {}
struct ValueRecord {
Value *Val;
MachineBasicBlock::iterator IP,
Value *Op, Value *ShiftAmount, bool isLeftShift,
const Type *ResultTy, unsigned DestReg);
+
+ // Emit code for a 'SHLD DestReg, Op0, Op1, Amt' operation, where Amt is a
+ // constant.
+ void doSHLDConst(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, unsigned Op0Reg, unsigned Op1Reg,
+ unsigned Op1Val);
/// emitSelectOperation - Common code shared between visitSelectInst and the
/// constant expression support.
void X86ISel::copyConstantToRegister(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
Constant *C, unsigned R) {
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ if (isa<UndefValue>(C)) {
+ switch (getClassB(C->getType())) {
+ case cFP:
+ // FIXME: SHOULD TEACH STACKIFIER ABOUT UNDEF VALUES!
+ BuildMI(*MBB, IP, X86::FLD0, 0, R);
+ return;
+ case cLong:
+ BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, R+1);
+ // FALL THROUGH
+ default:
+ BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, R);
+ return;
+ }
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
unsigned Class = 0;
switch (CE->getOpcode()) {
case Instruction::GetElementPtr:
// The arguments are already supposed to be of the same type.
const Type *CompTy = Op0->getType();
unsigned Class = getClassB(CompTy);
- unsigned Op0r = getReg(Op0, MBB, IP);
// Special case handling of: cmp R, i
if (isa<ConstantPointerNull>(Op1)) {
+ unsigned Op0r = getReg(Op0, MBB, IP);
if (OpNum < 2) // seteq/setne -> test
BuildMI(*MBB, IP, X86::TEST32rr, 2).addReg(Op0r).addReg(Op0r);
else
// can't handle unsigned comparisons against zero unless they are == or
// !=. These should have been strength reduced already anyway.
if (Op1v == 0 && (CompTy->isSigned() || OpNum < 2)) {
+
+ // If this is a comparison against zero and the LHS is an and of a
+ // register with a constant, use the test to do the and.
+ if (Instruction *Op0I = dyn_cast<Instruction>(Op0))
+ if (Op0I->getOpcode() == Instruction::And && Op0->hasOneUse() &&
+ isa<ConstantInt>(Op0I->getOperand(1))) {
+ static const unsigned TESTTab[] = {
+ X86::TEST8ri, X86::TEST16ri, X86::TEST32ri
+ };
+
+ // Emit test X, i
+ unsigned LHS = getReg(Op0I->getOperand(0), MBB, IP);
+ unsigned Imm =
+ cast<ConstantInt>(Op0I->getOperand(1))->getRawValue();
+ BuildMI(*MBB, IP, TESTTab[Class], 2).addReg(LHS).addImm(Imm);
+
+ if (OpNum == 2) return 6; // Map jl -> js
+ if (OpNum == 3) return 7; // Map jg -> jns
+ return OpNum;
+ }
+
+ unsigned Op0r = getReg(Op0, MBB, IP);
static const unsigned TESTTab[] = {
X86::TEST8rr, X86::TEST16rr, X86::TEST32rr
};
X86::CMP8ri, X86::CMP16ri, X86::CMP32ri
};
+ unsigned Op0r = getReg(Op0, MBB, IP);
BuildMI(*MBB, IP, CMPTab[Class], 2).addReg(Op0r).addImm(Op1v);
return OpNum;
} else {
+ unsigned Op0r = getReg(Op0, MBB, IP);
assert(Class == cLong && "Unknown integer class!");
unsigned LowCst = CI->getRawValue();
unsigned HiCst = CI->getRawValue() >> 32;
}
}
+ unsigned Op0r = getReg(Op0, MBB, IP);
+
// Special case handling of comparison against +/- 0.0
if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op1))
if (CFP->isExactlyValue(+0.0) || CFP->isExactlyValue(-0.0)) {
Value *Op0 = B.getOperand(0), *Op1 = B.getOperand(1);
unsigned Class = getClassB(B.getType());
+ // If this is AND X, C, and it is only used by a setcc instruction, it will
+ // be folded. There is no need to emit this instruction.
+ if (B.hasOneUse() && OperatorClass == 2 && isa<ConstantInt>(Op1))
+ if (Class == cByte || Class == cShort || Class == cInt) {
+ Instruction *Use = cast<Instruction>(B.use_back());
+ if (isa<SetCondInst>(Use) &&
+ Use->getOperand(1) == Constant::getNullValue(B.getType())) {
+ switch (getSetCCNumber(Use->getOpcode())) {
+ case 0:
+ case 1:
+ return;
+ default:
+ if (B.getType()->isSigned()) return;
+ }
+ }
+ }
+
// Special case: op Reg, load [mem]
if (isa<LoadInst>(Op0) && !isa<LoadInst>(Op1) && Class != cLong &&
Op0->hasOneUse() &&
getReg (I));
}
+/// Emit code for a 'SHLD DestReg, Op0, Op1, Amt' operation, where Amt is a
+/// constant.
+void X86ISel::doSHLDConst(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator IP,
+ unsigned DestReg, unsigned Op0Reg, unsigned Op1Reg,
+ unsigned Amt) {
+ // SHLD is a very inefficient operation on every processor, try to do
+ // somethign simpler for common values of 'Amt'.
+ if (Amt == 0) {
+ BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0Reg);
+ } else if (Amt == 1) {
+ unsigned Tmp = makeAnotherReg(Type::UIntTy);
+ BuildMI(*MBB, IP, X86::ADD32rr, 2, Tmp).addReg(Op1Reg).addReg(Op1Reg);
+ BuildMI(*MBB, IP, X86::ADC32rr, 2, DestReg).addReg(Op0Reg).addReg(Op0Reg);
+ } else if (Amt == 2 || Amt == 3) {
+ // On the P4 and Athlon it is cheaper to replace shld ..., 2|3 with a
+ // shift/lea pair. NOTE: This should not be done on the P6 family!
+ unsigned Tmp = makeAnotherReg(Type::UIntTy);
+ BuildMI(*MBB, IP, X86::SHR32ri, 2, Tmp).addReg(Op1Reg).addImm(32-Amt);
+ X86AddressMode AM;
+ AM.BaseType = X86AddressMode::RegBase;
+ AM.Base.Reg = Tmp;
+ AM.Scale = 1 << Amt;
+ AM.IndexReg = Op0Reg;
+ AM.Disp = 0;
+ addFullAddress(BuildMI(*MBB, IP, X86::LEA32r, 4, DestReg), AM);
+ } else {
+ // NOTE: It is always cheaper on the P4 to emit SHLD as two shifts and an OR
+ // than it is to emit a real SHLD.
+
+ BuildMI(*MBB, IP, X86::SHLD32rri8, 3,
+ DestReg).addReg(Op0Reg).addReg(Op1Reg).addImm(Amt);
+ }
+}
+
/// emitShiftOperation - Common code shared between visitShiftInst and
/// constant expression support.
void X86ISel::emitShiftOperation(MachineBasicBlock *MBB,
unsigned SrcReg = getReg (Op, MBB, IP);
bool isSigned = ResultTy->isSigned ();
unsigned Class = getClass (ResultTy);
-
- static const unsigned ConstantOperand[][4] = {
- { X86::SHR8ri, X86::SHR16ri, X86::SHR32ri, X86::SHRD32rri8 }, // SHR
- { X86::SAR8ri, X86::SAR16ri, X86::SAR32ri, X86::SHRD32rri8 }, // SAR
- { X86::SHL8ri, X86::SHL16ri, X86::SHL32ri, X86::SHLD32rri8 }, // SHL
- { X86::SHL8ri, X86::SHL16ri, X86::SHL32ri, X86::SHLD32rri8 }, // SAL = SHL
+
+ static const unsigned ConstantOperand[][3] = {
+ { X86::SHR8ri, X86::SHR16ri, X86::SHR32ri }, // SHR
+ { X86::SAR8ri, X86::SAR16ri, X86::SAR32ri }, // SAR
+ { X86::SHL8ri, X86::SHL16ri, X86::SHL32ri }, // SHL
+ { X86::SHL8ri, X86::SHL16ri, X86::SHL32ri }, // SAL = SHL
};
- static const unsigned NonConstantOperand[][4] = {
+ static const unsigned NonConstantOperand[][3] = {
{ X86::SHR8rCL, X86::SHR16rCL, X86::SHR32rCL }, // SHR
{ X86::SAR8rCL, X86::SAR16rCL, X86::SAR32rCL }, // SAR
{ X86::SHL8rCL, X86::SHL16rCL, X86::SHL32rCL }, // SHL
{ X86::SHL8rCL, X86::SHL16rCL, X86::SHL32rCL }, // SAL = SHL
};
- // Longs, as usual, are handled specially...
+ // Longs, as usual, are handled specially.
if (Class == cLong) {
- // If we have a constant shift, we can generate much more efficient code
- // than otherwise...
- //
if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(ShiftAmount)) {
unsigned Amount = CUI->getValue();
- if (Amount < 32) {
+ if (Amount == 1 && isLeftShift) { // X << 1 == X+X
+ BuildMI(*MBB, IP, X86::ADD32rr, 2,
+ DestReg).addReg(SrcReg).addReg(SrcReg);
+ BuildMI(*MBB, IP, X86::ADC32rr, 2,
+ DestReg+1).addReg(SrcReg+1).addReg(SrcReg+1);
+ } else if (Amount < 32) {
const unsigned *Opc = ConstantOperand[isLeftShift*2+isSigned];
if (isLeftShift) {
- BuildMI(*MBB, IP, Opc[3], 3,
- DestReg+1).addReg(SrcReg+1).addReg(SrcReg).addImm(Amount);
+ doSHLDConst(MBB, IP, DestReg+1, SrcReg+1, SrcReg, Amount);
BuildMI(*MBB, IP, Opc[2], 2, DestReg).addReg(SrcReg).addImm(Amount);
} else {
- BuildMI(*MBB, IP, Opc[3], 3,
- DestReg).addReg(SrcReg ).addReg(SrcReg+1).addImm(Amount);
+ BuildMI(*MBB, IP, X86::SHRD32rri8, 3,
+ DestReg).addReg(SrcReg ).addReg(SrcReg+1).addImm(Amount);
BuildMI(*MBB, IP, Opc[2],2,DestReg+1).addReg(SrcReg+1).addImm(Amount);
}
- } else { // Shifting more than 32 bits
- Amount -= 32;
+ } else if (Amount == 32) {
if (isLeftShift) {
- if (Amount != 0) {
- BuildMI(*MBB, IP, X86::SHL32ri, 2,
- DestReg + 1).addReg(SrcReg).addImm(Amount);
- } else {
- BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(SrcReg);
- }
+ BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(SrcReg);
BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
} else {
- if (Amount != 0) {
- BuildMI(*MBB, IP, isSigned ? X86::SAR32ri : X86::SHR32ri, 2,
- DestReg).addReg(SrcReg+1).addImm(Amount);
+ BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg+1);
+ if (!isSigned) {
+ BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
} else {
- BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg+1);
+ BuildMI(*MBB, IP, X86::SAR32ri, 2,
+ DestReg+1).addReg(SrcReg).addImm(31);
}
+ }
+ } else { // Shifting more than 32 bits
+ Amount -= 32;
+ if (isLeftShift) {
+ BuildMI(*MBB, IP, X86::SHL32ri, 2,
+ DestReg + 1).addReg(SrcReg).addImm(Amount);
+ BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
+ } else {
+ BuildMI(*MBB, IP, isSigned ? X86::SAR32ri : X86::SHR32ri, 2,
+ DestReg).addReg(SrcReg+1).addImm(Amount);
BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
}
}
} else {
unsigned TmpReg = makeAnotherReg(Type::IntTy);
-
if (!isLeftShift && isSigned) {
// If this is a SHR of a Long, then we need to do funny sign extension
// stuff. TmpReg gets the value to use as the high-part if we are
// The shift amount is constant, guaranteed to be a ubyte. Get its value.
assert(CUI->getType() == Type::UByteTy && "Shift amount not a ubyte?");
- const unsigned *Opc = ConstantOperand[isLeftShift*2+isSigned];
- BuildMI(*MBB, IP, Opc[Class], 2,
- DestReg).addReg(SrcReg).addImm(CUI->getValue());
+ if (CUI->getValue() == 1 && isLeftShift) { // X << 1 -> X+X
+ static const int AddOpC[] = { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr };
+ BuildMI(*MBB, IP, AddOpC[Class], 2,DestReg).addReg(SrcReg).addReg(SrcReg);
+ } else {
+ const unsigned *Opc = ConstantOperand[isLeftShift*2+isSigned];
+ BuildMI(*MBB, IP, Opc[Class], 2,
+ DestReg).addReg(SrcReg).addImm(CUI->getValue());
+ }
} else { // The shift amount is non-constant.
unsigned ShiftAmountReg = getReg (ShiftAmount, MBB, IP);
BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::CL).addReg(ShiftAmountReg);
PromoteType = Type::IntTy;
PromoteOpcode = X86::MOVZX32rr16;
break;
- case Type::UIntTyID: {
- // Make a 64 bit temporary... and zero out the top of it...
- unsigned TmpReg = makeAnotherReg(Type::LongTy);
- BuildMI(*BB, IP, X86::MOV32rr, 1, TmpReg).addReg(SrcReg);
- BuildMI(*BB, IP, X86::MOV32ri, 1, TmpReg+1).addImm(0);
- SrcTy = Type::LongTy;
- SrcClass = cLong;
- SrcReg = TmpReg;
- break;
- }
case Type::ULongTyID:
+ case Type::UIntTyID:
// Don't fild into the read destination.
DestReg = makeAnotherReg(Type::DoubleTy);
break;
{ 0/*byte*/, X86::FILD16m, X86::FILD32m, 0/*FP*/, X86::FILD64m };
addFrameReference(BuildMI(*BB, IP, Op2[SrcClass], 5, DestReg), FrameIdx);
- // We need special handling for unsigned 64-bit integer sources. If the
- // input number has the "sign bit" set, then we loaded it incorrectly as a
- // negative 64-bit number. In this case, add an offset value.
- if (SrcTy == Type::ULongTy) {
+ if (SrcTy == Type::UIntTy) {
+ // If this is a cast from uint -> double, we need to be careful about if
+ // the "sign" bit is set. If so, we don't want to make a negative number,
+ // we want to make a positive number. Emit code to add an offset if the
+ // sign bit is set.
+
+ // Compute whether the sign bit is set by shifting the reg right 31 bits.
+ unsigned IsNeg = makeAnotherReg(Type::IntTy);
+ BuildMI(BB, X86::SHR32ri, 2, IsNeg).addReg(SrcReg).addImm(31);
+
+ // Create a CP value that has the offset in one word and 0 in the other.
+ static ConstantInt *TheOffset = ConstantUInt::get(Type::ULongTy,
+ 0x4f80000000000000ULL);
+ unsigned CPI = F->getConstantPool()->getConstantPoolIndex(TheOffset);
+ BuildMI(BB, X86::FADD32m, 5, RealDestReg).addReg(DestReg)
+ .addConstantPoolIndex(CPI).addZImm(4).addReg(IsNeg).addSImm(0);
+
+ } else if (SrcTy == Type::ULongTy) {
+ // We need special handling for unsigned 64-bit integer sources. If the
+ // input number has the "sign bit" set, then we loaded it incorrectly as a
+ // negative 64-bit number. In this case, add an offset value.
+
// Emit a test instruction to see if the dynamic input value was signed.
BuildMI(*BB, IP, X86::TEST32rr, 2).addReg(SrcReg+1).addReg(SrcReg+1);