#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/InstVisitor.h"
-#include "llvm/Support/CFG.h"
#include "Support/Statistic.h"
using namespace llvm;
namespace {
Statistic<>
NumFPKill("x86-codegen", "Number of FP_REG_KILL instructions added");
+
+ /// TypeClass - Used by the X86 backend to group LLVM types by their basic X86
+ /// Representation.
+ ///
+ enum TypeClass {
+ cByte, cShort, cInt, cFP, cLong
+ };
+}
+
+/// getClass - Turn a primitive type into a "class" number which is based on the
+/// size of the type, and whether or not it is floating point.
+///
+static inline TypeClass getClass(const Type *Ty) {
+ switch (Ty->getPrimitiveID()) {
+ case Type::SByteTyID:
+ case Type::UByteTyID: return cByte; // Byte operands are class #0
+ case Type::ShortTyID:
+ case Type::UShortTyID: return cShort; // Short operands are class #1
+ case Type::IntTyID:
+ case Type::UIntTyID:
+ case Type::PointerTyID: return cInt; // Int's and pointers are class #2
+
+ case Type::FloatTyID:
+ case Type::DoubleTyID: return cFP; // Floating Point is #3
+
+ case Type::LongTyID:
+ case Type::ULongTyID: return cLong; // Longs are class #4
+ default:
+ assert(0 && "Invalid type to getClass!");
+ return cByte; // not reached
+ }
+}
+
+// getClassB - Just like getClass, but treat boolean values as bytes.
+static inline TypeClass getClassB(const Type *Ty) {
+ if (Ty == Type::BoolTy) return cByte;
+ return getClass(Ty);
}
namespace {
void visitSimpleBinary(BinaryOperator &B, unsigned OpcodeClass);
void visitAdd(BinaryOperator &B) { visitSimpleBinary(B, 0); }
void visitSub(BinaryOperator &B) { visitSimpleBinary(B, 1); }
- void doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
- unsigned DestReg, const Type *DestTy,
- unsigned Op0Reg, unsigned Op1Reg);
- void doMultiplyConst(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, const Type *DestTy,
- unsigned Op0Reg, unsigned Op1Val);
void visitMul(BinaryOperator &B);
void visitDiv(BinaryOperator &B) { visitDivRem(B); }
Value *Op0, Value *Op1,
unsigned OperatorClass, unsigned TargetReg);
+ /// emitBinaryFPOperation - This method handles emission of floating point
+ /// Add (0), Sub (1), Mul (2), and Div (3) operations.
+ void emitBinaryFPOperation(MachineBasicBlock *BB,
+ MachineBasicBlock::iterator IP,
+ Value *Op0, Value *Op1,
+ unsigned OperatorClass, unsigned TargetReg);
+
+ void emitMultiply(MachineBasicBlock *BB, MachineBasicBlock::iterator IP,
+ Value *Op0, Value *Op1, unsigned TargetReg);
+
+ void doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, const Type *DestTy,
+ unsigned Op0Reg, unsigned Op1Reg);
+ void doMultiplyConst(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, const Type *DestTy,
+ unsigned Op0Reg, unsigned Op1Val);
+
void emitDivRemOperation(MachineBasicBlock *BB,
MachineBasicBlock::iterator IP,
- unsigned Op0Reg, unsigned Op1Reg, bool isDiv,
- const Type *Ty, unsigned TargetReg);
+ Value *Op0, Value *Op1, bool isDiv,
+ unsigned TargetReg);
/// emitSetCCOperation - Common code shared between visitSetCondInst and
/// constant expression support.
}
unsigned getReg(Value *V, MachineBasicBlock *MBB,
MachineBasicBlock::iterator IPt) {
- unsigned &Reg = RegMap[V];
- if (Reg == 0) {
- Reg = makeAnotherReg(V->getType());
- RegMap[V] = Reg;
- }
-
// If this operand is a constant, emit the code to copy the constant into
// the register here...
//
if (Constant *C = dyn_cast<Constant>(V)) {
+ unsigned Reg = makeAnotherReg(V->getType());
copyConstantToRegister(MBB, IPt, C, Reg);
- RegMap.erase(V); // Assign a new name to this constant if ref'd again
+ return Reg;
} else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ unsigned Reg = makeAnotherReg(V->getType());
// Move the address of the global into the register
BuildMI(*MBB, IPt, X86::MOV32ri, 1, Reg).addGlobalAddress(GV);
- RegMap.erase(V); // Assign a new name to this address if ref'd again
+ return Reg;
+ } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
+ // Do not emit noop casts at all.
+ if (getClassB(CI->getType()) == getClassB(CI->getOperand(0)->getType()))
+ return getReg(CI->getOperand(0), MBB, IPt);
+ }
+
+ unsigned &Reg = RegMap[V];
+ if (Reg == 0) {
+ Reg = makeAnotherReg(V->getType());
+ RegMap[V] = Reg;
}
return Reg;
};
}
-/// TypeClass - Used by the X86 backend to group LLVM types by their basic X86
-/// Representation.
-///
-enum TypeClass {
- cByte, cShort, cInt, cFP, cLong
-};
-
-/// getClass - Turn a primitive type into a "class" number which is based on the
-/// size of the type, and whether or not it is floating point.
-///
-static inline TypeClass getClass(const Type *Ty) {
- switch (Ty->getPrimitiveID()) {
- case Type::SByteTyID:
- case Type::UByteTyID: return cByte; // Byte operands are class #0
- case Type::ShortTyID:
- case Type::UShortTyID: return cShort; // Short operands are class #1
- case Type::IntTyID:
- case Type::UIntTyID:
- case Type::PointerTyID: return cInt; // Int's and pointers are class #2
-
- case Type::FloatTyID:
- case Type::DoubleTyID: return cFP; // Floating Point is #3
-
- case Type::LongTyID:
- case Type::ULongTyID: return cLong; // Longs are class #4
- default:
- assert(0 && "Invalid type to getClass!");
- return cByte; // not reached
- }
-}
-
-// getClassB - Just like getClass, but treat boolean values as bytes.
-static inline TypeClass getClassB(const Type *Ty) {
- if (Ty == Type::BoolTy) return cByte;
- return getClass(Ty);
-}
-
-
/// copyConstantToRegister - Output the instructions required to put the
/// specified constant into the specified register.
///
Class, R);
return;
- case Instruction::Mul: {
- unsigned Op0Reg = getReg(CE->getOperand(0), MBB, IP);
- unsigned Op1Reg = getReg(CE->getOperand(1), MBB, IP);
- doMultiply(MBB, IP, R, CE->getType(), Op0Reg, Op1Reg);
+ case Instruction::Mul:
+ emitMultiply(MBB, IP, CE->getOperand(0), CE->getOperand(1), R);
return;
- }
+
case Instruction::Div:
- case Instruction::Rem: {
- unsigned Op0Reg = getReg(CE->getOperand(0), MBB, IP);
- unsigned Op1Reg = getReg(CE->getOperand(1), MBB, IP);
- emitDivRemOperation(MBB, IP, Op0Reg, Op1Reg,
- CE->getOpcode() == Instruction::Div,
- CE->getType(), R);
+ case Instruction::Rem:
+ emitDivRemOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
+ CE->getOpcode() == Instruction::Div, R);
return;
- }
case Instruction::SetNE:
case Instruction::SetEQ:
MachineFrameInfo *MFI = F->getFrameInfo();
for (Function::aiterator I = Fn.abegin(), E = Fn.aend(); I != E; ++I) {
- unsigned Reg = getReg(*I);
-
+ bool ArgLive = !I->use_empty();
+ unsigned Reg = ArgLive ? getReg(*I) : 0;
int FI; // Frame object index
+
switch (getClassB(I->getType())) {
case cByte:
- FI = MFI->CreateFixedObject(1, ArgOffset);
- addFrameReference(BuildMI(BB, X86::MOV8rm, 4, Reg), FI);
+ if (ArgLive) {
+ FI = MFI->CreateFixedObject(1, ArgOffset);
+ addFrameReference(BuildMI(BB, X86::MOV8rm, 4, Reg), FI);
+ }
break;
case cShort:
- FI = MFI->CreateFixedObject(2, ArgOffset);
- addFrameReference(BuildMI(BB, X86::MOV16rm, 4, Reg), FI);
+ if (ArgLive) {
+ FI = MFI->CreateFixedObject(2, ArgOffset);
+ addFrameReference(BuildMI(BB, X86::MOV16rm, 4, Reg), FI);
+ }
break;
case cInt:
- FI = MFI->CreateFixedObject(4, ArgOffset);
- addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI);
+ if (ArgLive) {
+ FI = MFI->CreateFixedObject(4, ArgOffset);
+ addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI);
+ }
break;
case cLong:
- FI = MFI->CreateFixedObject(8, ArgOffset);
- addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI);
- addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg+1), FI, 4);
+ if (ArgLive) {
+ FI = MFI->CreateFixedObject(8, ArgOffset);
+ addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI);
+ addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg+1), FI, 4);
+ }
ArgOffset += 4; // longs require 4 additional bytes
break;
case cFP:
- unsigned Opcode;
- if (I->getType() == Type::FloatTy) {
- Opcode = X86::FLD32m;
- FI = MFI->CreateFixedObject(4, ArgOffset);
- } else {
- Opcode = X86::FLD64m;
- FI = MFI->CreateFixedObject(8, ArgOffset);
- ArgOffset += 4; // doubles require 4 additional bytes
+ if (ArgLive) {
+ unsigned Opcode;
+ if (I->getType() == Type::FloatTy) {
+ Opcode = X86::FLD32m;
+ FI = MFI->CreateFixedObject(4, ArgOffset);
+ } else {
+ Opcode = X86::FLD64m;
+ FI = MFI->CreateFixedObject(8, ArgOffset);
+ }
+ addFrameReference(BuildMI(BB, Opcode, 4, Reg), FI);
}
- addFrameReference(BuildMI(BB, Opcode, 4, Reg), FI);
+ if (I->getType() == Type::DoubleTy)
+ ArgOffset += 4; // doubles require 4 additional bytes
break;
default:
assert(0 && "Unhandled argument type!");
/// Note that this kill instruction will eventually be eliminated when
/// restrictions in the stackifier are relaxed.
///
-static bool RequiresFPRegKill(const BasicBlock *BB) {
+static bool RequiresFPRegKill(const MachineBasicBlock *MBB) {
#if 0
+ const BasicBlock *BB = MBB->getBasicBlock ();
for (succ_const_iterator SI = succ_begin(BB), E = succ_end(BB); SI!=E; ++SI) {
const BasicBlock *Succ = *SI;
pred_const_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
// If we haven't found an FP register use or def in this basic block, check
// to see if any of our successors has an FP PHI node, which will cause a
// copy to be inserted into this block.
- for (succ_const_iterator SI = succ_begin(BB->getBasicBlock()),
- E = succ_end(BB->getBasicBlock()); SI != E; ++SI) {
- MachineBasicBlock *SBB = MBBMap[*SI];
+ for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SBB = *SI;
for (MachineBasicBlock::iterator I = SBB->begin();
I != SBB->end() && I->getOpcode() == X86::PHI; ++I) {
if (RegMap.getRegClass(I->getOperand(0).getReg())->getSize() == 10)
UsesFPReg:
// Okay, this block uses an FP register. If the block has successors (ie,
// it's not an unwind/return), insert the FP_REG_KILL instruction.
- if (BB->getBasicBlock()->getTerminator()->getNumSuccessors() &&
- RequiresFPRegKill(BB->getBasicBlock())) {
+ if (BB->succ_size () && RequiresFPRegKill(BB)) {
BuildMI(*BB, BB->getFirstTerminator(), X86::FP_REG_KILL, 0);
++NumFPKill;
}
Instruction *User = cast<Instruction>(SCI->use_back());
if ((isa<BranchInst>(User) || isa<SelectInst>(User)) &&
SCI->getParent() == User->getParent() &&
- getClassB(SCI->getOperand(0)->getType()) != cLong)
+ (getClassB(SCI->getOperand(0)->getType()) != cLong ||
+ SCI->getOpcode() == Instruction::SetEQ ||
+ SCI->getOpcode() == Instruction::SetNE))
return SCI;
}
return 0;
unsigned Op0r = getReg(Op0, MBB, IP);
// Special case handling of: cmp R, i
- if (Class == cByte || Class == cShort || Class == cInt)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
- uint64_t Op1v = cast<ConstantInt>(CI)->getRawValue();
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ if (Class == cByte || Class == cShort || Class == cInt) {
+ unsigned Op1v = CI->getRawValue();
// Mask off any upper bits of the constant, if there are any...
Op1v &= (1ULL << (8 << Class)) - 1;
BuildMI(*MBB, IP, CMPTab[Class], 2).addReg(Op0r).addImm(Op1v);
return OpNum;
+ } else {
+ assert(Class == cLong && "Unknown integer class!");
+ unsigned LowCst = CI->getRawValue();
+ unsigned HiCst = CI->getRawValue() >> 32;
+ if (OpNum < 2) { // seteq, setne
+ unsigned LoTmp = Op0r;
+ if (LowCst != 0) {
+ LoTmp = makeAnotherReg(Type::IntTy);
+ BuildMI(*MBB, IP, X86::XOR32ri, 2, LoTmp).addReg(Op0r).addImm(LowCst);
+ }
+ unsigned HiTmp = Op0r+1;
+ if (HiCst != 0) {
+ HiTmp = makeAnotherReg(Type::IntTy);
+ BuildMI(*MBB, IP, X86::XOR32ri, 2,HiTmp).addReg(Op0r+1).addImm(HiCst);
+ }
+ unsigned FinalTmp = makeAnotherReg(Type::IntTy);
+ BuildMI(*MBB, IP, X86::OR32rr, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp);
+ return OpNum;
+ } else {
+ // Emit a sequence of code which compares the high and low parts once
+ // each, then uses a conditional move to handle the overflow case. For
+ // example, a setlt for long would generate code like this:
+ //
+ // AL = lo(op1) < lo(op2) // Signedness depends on operands
+ // BL = hi(op1) < hi(op2) // Always unsigned comparison
+ // dest = hi(op1) == hi(op2) ? AL : BL;
+ //
+
+ // FIXME: This would be much better if we had hierarchical register
+ // classes! Until then, hardcode registers so that we can deal with
+ // their aliases (because we don't have conditional byte moves).
+ //
+ BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r).addImm(LowCst);
+ BuildMI(*MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL);
+ BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r+1).addImm(HiCst);
+ BuildMI(*MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0,X86::BL);
+ BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH);
+ BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH);
+ BuildMI(*MBB, IP, X86::CMOVE16rr, 2, X86::BX).addReg(X86::BX)
+ .addReg(X86::AX);
+ // NOTE: visitSetCondInst knows that the value is dumped into the BL
+ // register at this point for long values...
+ return OpNum;
+ }
}
+ }
// Special case handling of comparison against +/- 0.0
if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op1))
BuildMI(*MBB, IP, X86::CMP32rr, 2).addReg(Op0r).addReg(Op1r);
break;
case cFP:
- BuildMI(*MBB, IP, X86::FpUCOM, 2).addReg(Op0r).addReg(Op1r);
- BuildMI(*MBB, IP, X86::FNSTSW8r, 0);
- BuildMI(*MBB, IP, X86::SAHF, 1);
+ if (0) { // for processors prior to the P6
+ BuildMI(*MBB, IP, X86::FpUCOM, 2).addReg(Op0r).addReg(Op1r);
+ BuildMI(*MBB, IP, X86::FNSTSW8r, 0);
+ BuildMI(*MBB, IP, X86::SAHF, 1);
+ } else {
+ BuildMI(*MBB, IP, X86::FpUCOMI, 2).addReg(Op0r).addReg(Op1r);
+ }
break;
case cLong:
FalseVal = ConstantExpr::getCast(F, Type::ShortTy);
}
-
+ unsigned TrueReg = getReg(TrueVal, MBB, IP);
+ unsigned FalseReg = getReg(FalseVal, MBB, IP);
+ if (TrueReg == FalseReg) {
+ static const unsigned Opcode[] = {
+ X86::MOV8rr, X86::MOV16rr, X86::MOV32rr, X86::FpMOV, X86::MOV32rr
+ };
+ BuildMI(*MBB, IP, Opcode[SelectClass], 1, DestReg).addReg(TrueReg);
+ if (SelectClass == cLong)
+ BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(TrueReg+1);
+ return;
+ }
+
unsigned Opcode;
if (SetCondInst *SCI = canFoldSetCCIntoBranchOrSelect(Cond)) {
// We successfully folded the setcc into the select instruction.
}
}
- unsigned TrueReg = getReg(TrueVal, MBB, IP);
- unsigned FalseReg = getReg(FalseVal, MBB, IP);
unsigned RealDestReg = DestReg;
/// just make a fall-through (but we don't currently).
///
void ISel::visitBranchInst(BranchInst &BI) {
+ // Update machine-CFG edges
+ BB->addSuccessor (MBBMap[BI.getSuccessor(0)]);
+ if (BI.isConditional())
+ BB->addSuccessor (MBBMap[BI.getSuccessor(1)]);
+
BasicBlock *NextBB = getBlockAfter(BI.getParent()); // BB after current one
if (!BI.isConditional()) { // Unconditional branch?
}
break;
case cLong:
- ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg;
- addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
- X86::ESP, ArgOffset).addReg(ArgReg);
- addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
- X86::ESP, ArgOffset+4).addReg(ArgReg+1);
+ if (Args[i].Val && isa<ConstantInt>(Args[i].Val)) {
+ uint64_t Val = cast<ConstantInt>(Args[i].Val)->getRawValue();
+ addRegOffset(BuildMI(BB, X86::MOV32mi, 5),
+ X86::ESP, ArgOffset).addImm(Val & ~0U);
+ addRegOffset(BuildMI(BB, X86::MOV32mi, 5),
+ X86::ESP, ArgOffset+4).addImm(Val >> 32ULL);
+ } else {
+ ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg;
+ addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
+ X86::ESP, ArgOffset).addReg(ArgReg);
+ addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
+ X86::ESP, ArgOffset+4).addReg(ArgReg+1);
+ }
ArgOffset += 4; // 8 byte entry, not 4.
break;
case Intrinsic::frameaddress:
case Intrinsic::memcpy:
case Intrinsic::memset:
+ case Intrinsic::readport:
+ case Intrinsic::writeport:
// We directly implement these intrinsics
break;
+ case Intrinsic::readio: {
+ // On X86, memory operations are in-order. Lower this intrinsic
+ // into a volatile load.
+ Instruction *Before = CI->getPrev();
+ LoadInst * LI = new LoadInst (CI->getOperand(1), "", true, CI);
+ CI->replaceAllUsesWith (LI);
+ BB->getInstList().erase (CI);
+ break;
+ }
+ case Intrinsic::writeio: {
+ // On X86, memory operations are in-order. Lower this intrinsic
+ // into a volatile store.
+ Instruction *Before = CI->getPrev();
+ StoreInst * LI = new StoreInst (CI->getOperand(1),
+ CI->getOperand(2), true, CI);
+ CI->replaceAllUsesWith (LI);
+ BB->getInstList().erase (CI);
+ break;
+ }
default:
// All other intrinsic calls we must lower.
Instruction *Before = CI->getPrev();
return;
}
+ case Intrinsic::readport: {
+ // First, determine that the size of the operand falls within the acceptable
+ // range for this architecture.
+ //
+ if (getClassB(CI.getOperand(1)->getType()) != cShort) {
+ std::cerr << "llvm.readport: Address size is not 16 bits\n";
+ exit(1);
+ }
+
+ // Now, move the I/O port address into the DX register and use the IN
+ // instruction to get the input data.
+ //
+ unsigned Class = getClass(CI.getCalledFunction()->getReturnType());
+ unsigned DestReg = getReg(CI);
+
+ // If the port is a single-byte constant, use the immediate form.
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CI.getOperand(1)))
+ if ((C->getRawValue() & 255) == C->getRawValue()) {
+ switch (Class) {
+ case cByte:
+ BuildMI(BB, X86::IN8ri, 1).addImm((unsigned char)C->getRawValue());
+ BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AL);
+ return;
+ case cShort:
+ BuildMI(BB, X86::IN16ri, 1).addImm((unsigned char)C->getRawValue());
+ BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AX);
+ return;
+ case cInt:
+ BuildMI(BB, X86::IN32ri, 1).addImm((unsigned char)C->getRawValue());
+ BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::EAX);
+ return;
+ }
+ }
+
+ unsigned Reg = getReg(CI.getOperand(1));
+ BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(Reg);
+ switch (Class) {
+ case cByte:
+ BuildMI(BB, X86::IN8rr, 0);
+ BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AL);
+ break;
+ case cShort:
+ BuildMI(BB, X86::IN16rr, 0);
+ BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AX);
+ break;
+ case cInt:
+ BuildMI(BB, X86::IN32rr, 0);
+ BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::EAX);
+ break;
+ default:
+ std::cerr << "Cannot do input on this data type";
+ exit (1);
+ }
+ return;
+ }
+
+ case Intrinsic::writeport: {
+ // First, determine that the size of the operand falls within the
+ // acceptable range for this architecture.
+ if (getClass(CI.getOperand(2)->getType()) != cShort) {
+ std::cerr << "llvm.writeport: Address size is not 16 bits\n";
+ exit(1);
+ }
+
+ unsigned Class = getClassB(CI.getOperand(1)->getType());
+ unsigned ValReg = getReg(CI.getOperand(1));
+ switch (Class) {
+ case cByte:
+ BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(ValReg);
+ break;
+ case cShort:
+ BuildMI(BB, X86::MOV16rr, 1, X86::AX).addReg(ValReg);
+ break;
+ case cInt:
+ BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(ValReg);
+ break;
+ default:
+ std::cerr << "llvm.writeport: invalid data type for X86 target";
+ exit(1);
+ }
+
+
+ // If the port is a single-byte constant, use the immediate form.
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CI.getOperand(2)))
+ if ((C->getRawValue() & 255) == C->getRawValue()) {
+ static const unsigned O[] = { X86::OUT8ir, X86::OUT16ir, X86::OUT32ir };
+ BuildMI(BB, O[Class], 1).addImm((unsigned char)C->getRawValue());
+ return;
+ }
+
+ // Otherwise, move the I/O port address into the DX register and the value
+ // to write into the AL/AX/EAX register.
+ static const unsigned Opc[] = { X86::OUT8rr, X86::OUT16rr, X86::OUT32rr };
+ unsigned Reg = getReg(CI.getOperand(2));
+ BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(Reg);
+ BuildMI(BB, Opc[Class], 0);
+ return;
+ }
+
default: assert(0 && "Error: unknown intrinsics should have been lowered!");
}
}
case Instruction::Call:
case Instruction::Invoke:
return false;
+ case Instruction::Load:
+ if (cast<LoadInst>(It)->isVolatile() && LI.isVolatile())
+ return false;
+ break;
}
}
return true;
}
-
/// visitSimpleBinary - Implement simple binary operators for integral types...
/// OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for Or, 4 for
/// Xor.
std::swap(Op0, Op1); // Make sure any loads are in the RHS.
unsigned Class = getClassB(B.getType());
- if (isa<LoadInst>(Op1) && Class < cFP &&
+ if (isa<LoadInst>(Op1) && Class != cLong &&
isSafeToFoldLoadIntoInstruction(*cast<LoadInst>(Op1), B)) {
- static const unsigned OpcodeTab[][3] = {
- // Arithmetic operators
- { X86::ADD8rm, X86::ADD16rm, X86::ADD32rm }, // ADD
- { X86::SUB8rm, X86::SUB16rm, X86::SUB32rm }, // SUB
-
- // Bitwise operators
- { X86::AND8rm, X86::AND16rm, X86::AND32rm }, // AND
- { X86:: OR8rm, X86:: OR16rm, X86:: OR32rm }, // OR
- { X86::XOR8rm, X86::XOR16rm, X86::XOR32rm }, // XOR
- };
-
- assert(Class < cFP && "General code handles 64-bit integer types!");
- unsigned Opcode = OpcodeTab[OperatorClass][Class];
+ unsigned Opcode;
+ if (Class != cFP) {
+ static const unsigned OpcodeTab[][3] = {
+ // Arithmetic operators
+ { X86::ADD8rm, X86::ADD16rm, X86::ADD32rm }, // ADD
+ { X86::SUB8rm, X86::SUB16rm, X86::SUB32rm }, // SUB
+
+ // Bitwise operators
+ { X86::AND8rm, X86::AND16rm, X86::AND32rm }, // AND
+ { X86:: OR8rm, X86:: OR16rm, X86:: OR32rm }, // OR
+ { X86::XOR8rm, X86::XOR16rm, X86::XOR32rm }, // XOR
+ };
+ Opcode = OpcodeTab[OperatorClass][Class];
+ } else {
+ static const unsigned OpcodeTab[][2] = {
+ { X86::FADD32m, X86::FADD64m }, // ADD
+ { X86::FSUB32m, X86::FSUB64m }, // SUB
+ };
+ const Type *Ty = Op0->getType();
+ assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
+ Opcode = OpcodeTab[OperatorClass][Ty == Type::DoubleTy];
+ }
unsigned BaseReg, Scale, IndexReg, Disp;
getAddressingMode(cast<LoadInst>(Op1)->getOperand(0), BaseReg,
return;
}
+ // If this is a floating point subtract, check to see if we can fold the first
+ // operand in.
+ if (Class == cFP && OperatorClass == 1 &&
+ isa<LoadInst>(Op0) &&
+ isSafeToFoldLoadIntoInstruction(*cast<LoadInst>(Op0), B)) {
+ const Type *Ty = Op0->getType();
+ assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
+ unsigned Opcode = Ty == Type::FloatTy ? X86::FSUBR32m : X86::FSUBR64m;
+
+ unsigned BaseReg, Scale, IndexReg, Disp;
+ getAddressingMode(cast<LoadInst>(Op0)->getOperand(0), BaseReg,
+ Scale, IndexReg, Disp);
+
+ unsigned Op1r = getReg(Op1);
+ addFullAddress(BuildMI(BB, Opcode, 2, DestReg).addReg(Op1r),
+ BaseReg, Scale, IndexReg, Disp);
+ return;
+ }
+
emitSimpleBinaryOperation(BB, MI, Op0, Op1, OperatorClass, DestReg);
}
+
+/// emitBinaryFPOperation - This method handles emission of floating point
+/// Add (0), Sub (1), Mul (2), and Div (3) operations.
+void ISel::emitBinaryFPOperation(MachineBasicBlock *BB,
+ MachineBasicBlock::iterator IP,
+ Value *Op0, Value *Op1,
+ unsigned OperatorClass, unsigned DestReg) {
+
+ // Special case: op Reg, <const fp>
+ if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1))
+ if (!Op1C->isExactlyValue(+0.0) && !Op1C->isExactlyValue(+1.0)) {
+ // Create a constant pool entry for this constant.
+ MachineConstantPool *CP = F->getConstantPool();
+ unsigned CPI = CP->getConstantPoolIndex(Op1C);
+ const Type *Ty = Op1->getType();
+
+ static const unsigned OpcodeTab[][4] = {
+ { X86::FADD32m, X86::FSUB32m, X86::FMUL32m, X86::FDIV32m }, // Float
+ { X86::FADD64m, X86::FSUB64m, X86::FMUL64m, X86::FDIV64m }, // Double
+ };
+
+ assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
+ unsigned Opcode = OpcodeTab[Ty != Type::FloatTy][OperatorClass];
+ unsigned Op0r = getReg(Op0, BB, IP);
+ addConstantPoolReference(BuildMI(*BB, IP, Opcode, 5,
+ DestReg).addReg(Op0r), CPI);
+ return;
+ }
+
+ // Special case: R1 = op <const fp>, R2
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op0))
+ if (CFP->isExactlyValue(-0.0) && OperatorClass == 1) {
+ // -0.0 - X === -X
+ unsigned op1Reg = getReg(Op1, BB, IP);
+ BuildMI(*BB, IP, X86::FCHS, 1, DestReg).addReg(op1Reg);
+ return;
+ } else if (!CFP->isExactlyValue(+0.0) && !CFP->isExactlyValue(+1.0)) {
+ // R1 = op CST, R2 --> R1 = opr R2, CST
+
+ // Create a constant pool entry for this constant.
+ MachineConstantPool *CP = F->getConstantPool();
+ unsigned CPI = CP->getConstantPoolIndex(CFP);
+ const Type *Ty = CFP->getType();
+
+ static const unsigned OpcodeTab[][4] = {
+ { X86::FADD32m, X86::FSUBR32m, X86::FMUL32m, X86::FDIVR32m }, // Float
+ { X86::FADD64m, X86::FSUBR64m, X86::FMUL64m, X86::FDIVR64m }, // Double
+ };
+
+ assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
+ unsigned Opcode = OpcodeTab[Ty != Type::FloatTy][OperatorClass];
+ unsigned Op1r = getReg(Op1, BB, IP);
+ addConstantPoolReference(BuildMI(*BB, IP, Opcode, 5,
+ DestReg).addReg(Op1r), CPI);
+ return;
+ }
+
+ // General case.
+ static const unsigned OpcodeTab[4] = {
+ X86::FpADD, X86::FpSUB, X86::FpMUL, X86::FpDIV
+ };
+
+ unsigned Opcode = OpcodeTab[OperatorClass];
+ unsigned Op0r = getReg(Op0, BB, IP);
+ unsigned Op1r = getReg(Op1, BB, IP);
+ BuildMI(*BB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r);
+}
+
/// emitSimpleBinaryOperation - Implement simple binary operators for integral
/// types... OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for
/// Or, 4 for Xor.
unsigned OperatorClass, unsigned DestReg) {
unsigned Class = getClassB(Op0->getType());
+ if (Class == cFP) {
+ assert(OperatorClass < 2 && "No logical ops for FP!");
+ emitBinaryFPOperation(MBB, IP, Op0, Op1, OperatorClass, DestReg);
+ return;
+ }
+
// sub 0, X -> neg X
- if (OperatorClass == 1)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0)) {
- if (CI->isNullValue()) {
- unsigned op1Reg = getReg(Op1, MBB, IP);
- static unsigned const NEGTab[] = {
- X86::NEG8r, X86::NEG16r, X86::NEG32r, 0, X86::NEG32r
- };
- BuildMI(*MBB, IP, NEGTab[Class], 1, DestReg).addReg(op1Reg);
-
- if (Class == cLong) {
- // We just emitted: Dl = neg Sl
- // Now emit : T = addc Sh, 0
- // : Dh = neg T
- unsigned T = makeAnotherReg(Type::IntTy);
- BuildMI(*MBB, IP, X86::ADC32ri, 2, T).addReg(op1Reg+1).addImm(0);
- BuildMI(*MBB, IP, X86::NEG32r, 1, DestReg+1).addReg(T);
- }
- return;
- }
- } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op0))
- if (CFP->isExactlyValue(-0.0)) {
- // -0.0 - X === -X
- unsigned op1Reg = getReg(Op1, MBB, IP);
- BuildMI(*MBB, IP, X86::FCHS, 1, DestReg).addReg(op1Reg);
- return;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0))
+ if (OperatorClass == 1 && CI->isNullValue()) {
+ unsigned op1Reg = getReg(Op1, MBB, IP);
+ static unsigned const NEGTab[] = {
+ X86::NEG8r, X86::NEG16r, X86::NEG32r, 0, X86::NEG32r
+ };
+ BuildMI(*MBB, IP, NEGTab[Class], 1, DestReg).addReg(op1Reg);
+
+ if (Class == cLong) {
+ // We just emitted: Dl = neg Sl
+ // Now emit : T = addc Sh, 0
+ // : Dh = neg T
+ unsigned T = makeAnotherReg(Type::IntTy);
+ BuildMI(*MBB, IP, X86::ADC32ri, 2, T).addReg(op1Reg+1).addImm(0);
+ BuildMI(*MBB, IP, X86::NEG32r, 1, DestReg+1).addReg(T);
}
+ return;
+ }
- // Special case: op Reg, <const>
- if (isa<ConstantInt>(Op1)) {
- ConstantInt *Op1C = cast<ConstantInt>(Op1);
+ // Special case: op Reg, <const int>
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
unsigned Op0r = getReg(Op0, MBB, IP);
// xor X, -1 -> not X
}
// add X, -1 -> dec X
- if (OperatorClass == 0 && Op1C->isAllOnesValue()) {
- static unsigned const DECTab[] = {
- X86::DEC8r, X86::DEC16r, X86::DEC32r, 0, X86::DEC32r
- };
+ if (OperatorClass == 0 && Op1C->isAllOnesValue() && Class != cLong) {
+ // Note that we can't use dec for 64-bit decrements, because it does not
+ // set the carry flag!
+ static unsigned const DECTab[] = { X86::DEC8r, X86::DEC16r, X86::DEC32r };
BuildMI(*MBB, IP, DECTab[Class], 1, DestReg).addReg(Op0r);
- if (Class == cLong) // Dh = sbb Sh, 0
- BuildMI(*MBB, IP, X86::SBB32ri, 2, DestReg+1).addReg(Op0r+1).addImm(0);
return;
}
// add X, 1 -> inc X
- if (OperatorClass == 0 && Op1C->equalsInt(1)) {
- static unsigned const INCTab[] = {
- X86::INC8r, X86::INC16r, X86::INC32r, 0, X86::INC32r
- };
+ if (OperatorClass == 0 && Op1C->equalsInt(1) && Class != cLong) {
+ // Note that we can't use inc for 64-bit increments, because it does not
+ // set the carry flag!
+ static unsigned const INCTab[] = { X86::INC8r, X86::INC16r, X86::INC32r };
BuildMI(*MBB, IP, INCTab[Class], 1, DestReg).addReg(Op0r);
- if (Class == cLong) // Dh = adc Sh, 0
- BuildMI(*MBB, IP, X86::ADC32ri, 2, DestReg+1).addReg(Op0r+1).addImm(0);
return;
}
};
unsigned Opcode = OpcodeTab[OperatorClass][Class];
+ unsigned Op1l = cast<ConstantInt>(Op1C)->getRawValue();
- uint64_t Op1v = cast<ConstantInt>(Op1C)->getRawValue();
- BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1v &0xFFFFFFFF);
-
- if (Class == cLong) {
- static const unsigned TopTab[] = {
- X86::ADC32ri, X86::SBB32ri, X86::AND32ri, X86::OR32ri, X86::XOR32ri
- };
- BuildMI(*MBB, IP, TopTab[OperatorClass], 2, DestReg+1)
- .addReg(Op0r+1).addImm(uint64_t(Op1v) >> 32);
+ if (Class != cLong) {
+ BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l);
+ return;
}
+
+ // If this is a long value and the high or low bits have a special
+ // property, emit some special cases.
+ unsigned Op1h = cast<ConstantInt>(Op1C)->getRawValue() >> 32LL;
+
+ // If the constant is zero in the low 32-bits, just copy the low part
+ // across and apply the normal 32-bit operation to the high parts. There
+ // will be no carry or borrow into the top.
+ if (Op1l == 0) {
+ if (OperatorClass != 2) // All but and...
+ BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0r);
+ else
+ BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
+ BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg+1)
+ .addReg(Op0r+1).addImm(Op1h);
+ return;
+ }
+
+ // If this is a logical operation and the top 32-bits are zero, just
+ // operate on the lower 32.
+ if (Op1h == 0 && OperatorClass > 1) {
+ BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg)
+ .addReg(Op0r).addImm(Op1l);
+ if (OperatorClass != 2) // All but and
+ BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(Op0r+1);
+ else
+ BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
+ return;
+ }
+
+ // TODO: We could handle lots of other special cases here, such as AND'ing
+ // with 0xFFFFFFFF00000000 -> noop, etc.
+
+ // Otherwise, code generate the full operation with a constant.
+ static const unsigned TopTab[] = {
+ X86::ADC32ri, X86::SBB32ri, X86::AND32ri, X86::OR32ri, X86::XOR32ri
+ };
+
+ BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l);
+ BuildMI(*MBB, IP, TopTab[OperatorClass], 2, DestReg+1)
+ .addReg(Op0r+1).addImm(Op1h);
return;
}
// Finally, handle the general case now.
static const unsigned OpcodeTab[][5] = {
// Arithmetic operators
- { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr, X86::FpADD, X86::ADD32rr },// ADD
- { X86::SUB8rr, X86::SUB16rr, X86::SUB32rr, X86::FpSUB, X86::SUB32rr },// SUB
+ { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr, 0, X86::ADD32rr }, // ADD
+ { X86::SUB8rr, X86::SUB16rr, X86::SUB32rr, 0, X86::SUB32rr }, // SUB
// Bitwise operators
{ X86::AND8rr, X86::AND16rr, X86::AND32rr, 0, X86::AND32rr }, // AND
};
unsigned Opcode = OpcodeTab[OperatorClass][Class];
- assert(Opcode && "Floating point arguments to logical inst?");
unsigned Op0r = getReg(Op0, MBB, IP);
unsigned Op1r = getReg(Op1, MBB, IP);
BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r);
unsigned op0Reg, unsigned op1Reg) {
unsigned Class = getClass(DestTy);
switch (Class) {
- case cFP: // Floating point multiply
- BuildMI(*MBB, MBBI, X86::FpMUL, 2, DestReg).addReg(op0Reg).addReg(op1Reg);
- return;
case cInt:
case cShort:
BuildMI(*MBB, MBBI, Class == cInt ? X86::IMUL32rr:X86::IMUL16rr, 2, DestReg)
return Count+1;
}
+
+/// doMultiplyConst - This function is specialized to efficiently codegen an 8,
+/// 16, or 32-bit integer multiply by a constant.
void ISel::doMultiplyConst(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
unsigned DestReg, const Type *DestTy,
unsigned op0Reg, unsigned ConstRHS) {
+ static const unsigned MOVrrTab[] = {X86::MOV8rr, X86::MOV16rr, X86::MOV32rr};
+ static const unsigned MOVriTab[] = {X86::MOV8ri, X86::MOV16ri, X86::MOV32ri};
+ static const unsigned ADDrrTab[] = {X86::ADD8rr, X86::ADD16rr, X86::ADD32rr};
+
unsigned Class = getClass(DestTy);
+ // Handle special cases here.
+ switch (ConstRHS) {
+ case 0:
+ BuildMI(*MBB, IP, MOVriTab[Class], 1, DestReg).addImm(0);
+ return;
+ case 1:
+ BuildMI(*MBB, IP, MOVrrTab[Class], 1, DestReg).addReg(op0Reg);
+ return;
+ case 2:
+ BuildMI(*MBB, IP, ADDrrTab[Class], 1,DestReg).addReg(op0Reg).addReg(op0Reg);
+ return;
+ case 3:
+ case 5:
+ case 9:
+ if (Class == cInt) {
+ addFullAddress(BuildMI(*MBB, IP, X86::LEA32r, 5, DestReg),
+ op0Reg, ConstRHS-1, op0Reg, 0);
+ return;
+ }
+ }
+
// If the element size is exactly a power of 2, use a shift to get it.
if (unsigned Shift = ExactLog2(ConstRHS)) {
switch (Class) {
}
// Most general case, emit a normal multiply...
- static const unsigned MOVriTab[] = {
- X86::MOV8ri, X86::MOV16ri, X86::MOV32ri
- };
-
unsigned TmpReg = makeAnotherReg(DestTy);
BuildMI(*MBB, IP, MOVriTab[Class], 1, TmpReg).addImm(ConstRHS);
/// with the EAX register explicitly.
///
void ISel::visitMul(BinaryOperator &I) {
- unsigned Op0Reg = getReg(I.getOperand(0));
- unsigned DestReg = getReg(I);
+ unsigned ResultReg = getReg(I);
+
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+
+ // Fold loads into floating point multiplies.
+ if (getClass(Op0->getType()) == cFP) {
+ if (isa<LoadInst>(Op0) && !isa<LoadInst>(Op1))
+ if (!I.swapOperands())
+ std::swap(Op0, Op1); // Make sure any loads are in the RHS.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Op1))
+ if (isSafeToFoldLoadIntoInstruction(*LI, I)) {
+ const Type *Ty = Op0->getType();
+ assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
+ unsigned Opcode = Ty == Type::FloatTy ? X86::FMUL32m : X86::FMUL64m;
+
+ unsigned BaseReg, Scale, IndexReg, Disp;
+ getAddressingMode(LI->getOperand(0), BaseReg,
+ Scale, IndexReg, Disp);
+
+ unsigned Op0r = getReg(Op0);
+ addFullAddress(BuildMI(BB, Opcode, 2, ResultReg).addReg(Op0r),
+ BaseReg, Scale, IndexReg, Disp);
+ return;
+ }
+ }
+
+ MachineBasicBlock::iterator IP = BB->end();
+ emitMultiply(BB, IP, Op0, Op1, ResultReg);
+}
+
+void ISel::emitMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
+ Value *Op0, Value *Op1, unsigned DestReg) {
+ MachineBasicBlock &BB = *MBB;
+ TypeClass Class = getClass(Op0->getType());
// Simple scalar multiply?
- if (I.getType() != Type::LongTy && I.getType() != Type::ULongTy) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1))) {
- unsigned Val = (unsigned)CI->getRawValue(); // Cannot be 64-bit constant
- MachineBasicBlock::iterator MBBI = BB->end();
- doMultiplyConst(BB, MBBI, DestReg, I.getType(), Op0Reg, Val);
+ unsigned Op0Reg = getReg(Op0, &BB, IP);
+ switch (Class) {
+ case cByte:
+ case cShort:
+ case cInt:
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ unsigned Val = (unsigned)CI->getRawValue(); // Isn't a 64-bit constant
+ doMultiplyConst(&BB, IP, DestReg, Op0->getType(), Op0Reg, Val);
} else {
- unsigned Op1Reg = getReg(I.getOperand(1));
- MachineBasicBlock::iterator MBBI = BB->end();
- doMultiply(BB, MBBI, DestReg, I.getType(), Op0Reg, Op1Reg);
+ unsigned Op1Reg = getReg(Op1, &BB, IP);
+ doMultiply(&BB, IP, DestReg, Op1->getType(), Op0Reg, Op1Reg);
}
- } else {
- unsigned Op1Reg = getReg(I.getOperand(1));
+ return;
+ case cFP:
+ emitBinaryFPOperation(MBB, IP, Op0, Op1, 2, DestReg);
+ return;
+ case cLong:
+ break;
+ }
- // Long value. We have to do things the hard way...
+ // Long value. We have to do things the hard way...
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ unsigned CLow = CI->getRawValue();
+ unsigned CHi = CI->getRawValue() >> 32;
+
+ if (CLow == 0) {
+ // If the low part of the constant is all zeros, things are simple.
+ BuildMI(BB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
+ doMultiplyConst(&BB, IP, DestReg+1, Type::UIntTy, Op0Reg, CHi);
+ return;
+ }
+
// Multiply the two low parts... capturing carry into EDX
- BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
- BuildMI(BB, X86::MUL32r, 1).addReg(Op1Reg); // AL*BL
-
- unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
- BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL
- BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32
-
- MachineBasicBlock::iterator MBBI = BB->end();
+ unsigned OverflowReg = 0;
+ if (CLow == 1) {
+ BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0Reg);
+ } else {
+ unsigned Op1RegL = makeAnotherReg(Type::UIntTy);
+ OverflowReg = makeAnotherReg(Type::UIntTy);
+ BuildMI(BB, IP, X86::MOV32ri, 1, Op1RegL).addImm(CLow);
+ BuildMI(BB, IP, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
+ BuildMI(BB, IP, X86::MUL32r, 1).addReg(Op1RegL); // AL*BL
+
+ BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL
+ BuildMI(BB, IP, X86::MOV32rr, 1,
+ OverflowReg).addReg(X86::EDX); // AL*BL >> 32
+ }
+
unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL
- BuildMI(*BB, MBBI, X86::IMUL32rr,2,AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg);
-
- unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
- BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32)
- AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
+ doMultiplyConst(&BB, IP, AHBLReg, Type::UIntTy, Op0Reg+1, CLow);
- MBBI = BB->end();
- unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
- BuildMI(*BB, MBBI, X86::IMUL32rr,2,ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1);
+ unsigned AHBLplusOverflowReg;
+ if (OverflowReg) {
+ AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
+ BuildMI(BB, IP, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32)
+ AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
+ } else {
+ AHBLplusOverflowReg = AHBLReg;
+ }
- BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32)
- DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
+ if (CHi == 0) {
+ BuildMI(BB, IP, X86::MOV32rr, 1, DestReg+1).addReg(AHBLplusOverflowReg);
+ } else {
+ unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
+ doMultiplyConst(&BB, IP, ALBHReg, Type::UIntTy, Op0Reg, CHi);
+
+ BuildMI(BB, IP, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32)
+ DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
+ }
+ return;
}
+
+ // General 64x64 multiply
+
+ unsigned Op1Reg = getReg(Op1, &BB, IP);
+ // Multiply the two low parts... capturing carry into EDX
+ BuildMI(BB, IP, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
+ BuildMI(BB, IP, X86::MUL32r, 1).addReg(Op1Reg); // AL*BL
+
+ unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
+ BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL
+ BuildMI(BB, IP, X86::MOV32rr, 1,
+ OverflowReg).addReg(X86::EDX); // AL*BL >> 32
+
+ unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL
+ BuildMI(BB, IP, X86::IMUL32rr, 2,
+ AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg);
+
+ unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
+ BuildMI(BB, IP, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32)
+ AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
+
+ unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
+ BuildMI(BB, IP, X86::IMUL32rr, 2,
+ ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1);
+
+ BuildMI(BB, IP, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32)
+ DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
}
/// instructions work differently for signed and unsigned operands.
///
void ISel::visitDivRem(BinaryOperator &I) {
- unsigned Op0Reg = getReg(I.getOperand(0));
- unsigned Op1Reg = getReg(I.getOperand(1));
unsigned ResultReg = getReg(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // Fold loads into floating point divides.
+ if (getClass(Op0->getType()) == cFP) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(Op1))
+ if (isSafeToFoldLoadIntoInstruction(*LI, I)) {
+ const Type *Ty = Op0->getType();
+ assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
+ unsigned Opcode = Ty == Type::FloatTy ? X86::FDIV32m : X86::FDIV64m;
+
+ unsigned BaseReg, Scale, IndexReg, Disp;
+ getAddressingMode(LI->getOperand(0), BaseReg,
+ Scale, IndexReg, Disp);
+
+ unsigned Op0r = getReg(Op0);
+ addFullAddress(BuildMI(BB, Opcode, 2, ResultReg).addReg(Op0r),
+ BaseReg, Scale, IndexReg, Disp);
+ return;
+ }
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(Op0))
+ if (isSafeToFoldLoadIntoInstruction(*LI, I)) {
+ const Type *Ty = Op0->getType();
+ assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
+ unsigned Opcode = Ty == Type::FloatTy ? X86::FDIVR32m : X86::FDIVR64m;
+
+ unsigned BaseReg, Scale, IndexReg, Disp;
+ getAddressingMode(LI->getOperand(0), BaseReg,
+ Scale, IndexReg, Disp);
+
+ unsigned Op1r = getReg(Op1);
+ addFullAddress(BuildMI(BB, Opcode, 2, ResultReg).addReg(Op1r),
+ BaseReg, Scale, IndexReg, Disp);
+ return;
+ }
+ }
+
MachineBasicBlock::iterator IP = BB->end();
- emitDivRemOperation(BB, IP, Op0Reg, Op1Reg, I.getOpcode() == Instruction::Div,
- I.getType(), ResultReg);
+ emitDivRemOperation(BB, IP, Op0, Op1,
+ I.getOpcode() == Instruction::Div, ResultReg);
}
void ISel::emitDivRemOperation(MachineBasicBlock *BB,
MachineBasicBlock::iterator IP,
- unsigned Op0Reg, unsigned Op1Reg, bool isDiv,
- const Type *Ty, unsigned ResultReg) {
+ Value *Op0, Value *Op1, bool isDiv,
+ unsigned ResultReg) {
+ const Type *Ty = Op0->getType();
unsigned Class = getClass(Ty);
switch (Class) {
case cFP: // Floating point divide
if (isDiv) {
- BuildMI(*BB, IP, X86::FpDIV, 2, ResultReg).addReg(Op0Reg).addReg(Op1Reg);
+ emitBinaryFPOperation(BB, IP, Op0, Op1, 3, ResultReg);
+ return;
} else { // Floating point remainder...
+ unsigned Op0Reg = getReg(Op0, BB, IP);
+ unsigned Op1Reg = getReg(Op1, BB, IP);
MachineInstr *TheCall =
BuildMI(X86::CALLpcrel32, 1).addExternalSymbol("fmod", true);
std::vector<ValueRecord> Args;
case cLong: {
static const char *FnName[] =
{ "__moddi3", "__divdi3", "__umoddi3", "__udivdi3" };
-
+ unsigned Op0Reg = getReg(Op0, BB, IP);
+ unsigned Op1Reg = getReg(Op1, BB, IP);
unsigned NameIdx = Ty->isUnsigned()*2 + isDiv;
MachineInstr *TheCall =
BuildMI(X86::CALLpcrel32, 1).addExternalSymbol(FnName[NameIdx], true);
unsigned ExtReg = ExtRegs[Class];
// Put the first operand into one of the A registers...
+ unsigned Op0Reg = getReg(Op0, BB, IP);
+ unsigned Op1Reg = getReg(Op1, BB, IP);
BuildMI(*BB, IP, MovOpcode[Class], 1, Reg).addReg(Op0Reg);
if (isSigned) {
// Emit a sign extension instruction...
- unsigned ShiftResult = makeAnotherReg(Ty);
+ unsigned ShiftResult = makeAnotherReg(Op0->getType());
BuildMI(*BB, IP, SarOpcode[Class], 2,ShiftResult).addReg(Op0Reg).addImm(31);
BuildMI(*BB, IP, MovOpcode[Class], 1, ExtReg).addReg(ShiftResult);
} else {
} else { // Shifting more than 32 bits
Amount -= 32;
if (isLeftShift) {
- BuildMI(*MBB, IP, X86::SHL32ri, 2,
- DestReg + 1).addReg(SrcReg).addImm(Amount);
- BuildMI(*MBB, IP, X86::MOV32ri, 1,
- DestReg).addImm(0);
+ if (Amount != 0) {
+ BuildMI(*MBB, IP, X86::SHL32ri, 2,
+ DestReg + 1).addReg(SrcReg).addImm(Amount);
+ } else {
+ BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(SrcReg);
+ }
+ BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
} else {
- unsigned Opcode = isSigned ? X86::SAR32ri : X86::SHR32ri;
- BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(SrcReg+1).addImm(Amount);
+ if (Amount != 0) {
+ BuildMI(*MBB, IP, isSigned ? X86::SAR32ri : X86::SHR32ri, 2,
+ DestReg).addReg(SrcReg+1).addImm(Amount);
+ } else {
+ BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg+1);
+ }
BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
}
}
// Check to see if this load instruction is going to be folded into a binary
// instruction, like add. If so, we don't want to emit it. Wouldn't a real
// pattern matching instruction selector be nice?
- if (I.hasOneUse() && getClassB(I.getType()) < cFP) {
+ unsigned Class = getClassB(I.getType());
+ if (I.hasOneUse()) {
Instruction *User = cast<Instruction>(I.use_back());
switch (User->getOpcode()) {
- default: User = 0; break;
+ case Instruction::Cast:
+ // If this is a cast from a signed-integer type to a floating point type,
+ // fold the cast here.
+ if (getClass(User->getType()) == cFP &&
+ (I.getType() == Type::ShortTy || I.getType() == Type::IntTy ||
+ I.getType() == Type::LongTy)) {
+ unsigned DestReg = getReg(User);
+ static const unsigned Opcode[] = {
+ 0/*BYTE*/, X86::FILD16m, X86::FILD32m, 0/*FP*/, X86::FILD64m
+ };
+ unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0;
+ getAddressingMode(I.getOperand(0), BaseReg, Scale, IndexReg, Disp);
+ addFullAddress(BuildMI(BB, Opcode[Class], 5, DestReg),
+ BaseReg, Scale, IndexReg, Disp);
+ return;
+ } else {
+ User = 0;
+ }
+ break;
+
case Instruction::Add:
case Instruction::Sub:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
+ if (Class == cLong) User = 0;
break;
+ case Instruction::Mul:
+ case Instruction::Div:
+ if (Class != cFP) User = 0;
+ break; // Folding only implemented for floating point.
+ default: User = 0; break;
}
if (User) {
if (User->getOperand(1) == &I &&
isSafeToFoldLoadIntoInstruction(I, *User))
return; // Eliminate the load!
+
+ // If this is a floating point sub or div, we won't be able to swap the
+ // operands, but we will still be able to eliminate the load.
+ if (Class == cFP && User->getOperand(0) == &I &&
+ !isa<LoadInst>(User->getOperand(1)) &&
+ (User->getOpcode() == Instruction::Sub ||
+ User->getOpcode() == Instruction::Div) &&
+ isSafeToFoldLoadIntoInstruction(I, *User))
+ return; // Eliminate the load!
}
}
unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0;
getAddressingMode(I.getOperand(0), BaseReg, Scale, IndexReg, Disp);
- unsigned Class = getClassB(I.getType());
if (Class == cLong) {
addFullAddress(BuildMI(BB, X86::MOV32rm, 4, DestReg),
BaseReg, Scale, IndexReg, Disp);
///
void ISel::visitCastInst(CastInst &CI) {
Value *Op = CI.getOperand(0);
+
+ unsigned SrcClass = getClassB(Op->getType());
+ unsigned DestClass = getClassB(CI.getType());
+ // Noop casts are not emitted: getReg will return the source operand as the
+ // register to use for any uses of the noop cast.
+ if (DestClass == SrcClass)
+ return;
+
// If this is a cast from a 32-bit integer to a Long type, and the only uses
// of the case are GEP instructions, then the cast does not need to be
// generated explicitly, it will be folded into the GEP.
- if (CI.getType() == Type::LongTy &&
- (Op->getType() == Type::IntTy || Op->getType() == Type::UIntTy)) {
+ if (DestClass == cLong && SrcClass == cInt) {
bool AllUsesAreGEPs = true;
for (Value::use_iterator I = CI.use_begin(), E = CI.use_end(); I != E; ++I)
if (!isa<GetElementPtrInst>(*I)) {
if (AllUsesAreGEPs) return;
}
+ // If this cast converts a load from a short,int, or long integer to a FP
+ // value, we will have folded this cast away.
+ if (DestClass == cFP && isa<LoadInst>(Op) && Op->hasOneUse() &&
+ (Op->getType() == Type::ShortTy || Op->getType() == Type::IntTy ||
+ Op->getType() == Type::LongTy))
+ return;
+
+
unsigned DestReg = getReg(CI);
MachineBasicBlock::iterator MI = BB->end();
emitCastOperation(BB, MI, Op, CI.getType(), DestReg);
MachineBasicBlock::iterator IP,
Value *Src, const Type *DestTy,
unsigned DestReg) {
- unsigned SrcReg = getReg(Src, BB, IP);
const Type *SrcTy = Src->getType();
unsigned SrcClass = getClassB(SrcTy);
unsigned DestClass = getClassB(DestTy);
+ unsigned SrcReg = getReg(Src, BB, IP);
// Implement casts to bool by using compare on the operand followed by set if
// not zero on the result.
// a larger signed value, then use FLD on the larger value.
//
const Type *PromoteType = 0;
- unsigned PromoteOpcode;
+ unsigned PromoteOpcode = 0;
unsigned RealDestReg = DestReg;
switch (SrcTy->getPrimitiveID()) {
case Type::BoolTyID:
if (PromoteType) {
unsigned TmpReg = makeAnotherReg(PromoteType);
- unsigned Opc = SrcTy->isSigned() ? X86::MOVSX16rr8 : X86::MOVZX16rr8;
- BuildMI(*BB, IP, Opc, 1, TmpReg).addReg(SrcReg);
+ BuildMI(*BB, IP, PromoteOpcode, 1, TmpReg).addReg(SrcReg);
SrcTy = PromoteType;
SrcClass = getClass(PromoteType);
SrcReg = TmpReg;