#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/InstVisitor.h"
-#include "llvm/Support/CFG.h"
#include "Support/Statistic.h"
using namespace llvm;
Value *Op0, Value *Op1,
unsigned OperatorClass, unsigned TargetReg);
+ /// emitBinaryFPOperation - This method handles emission of floating point
+ /// Add (0), Sub (1), Mul (2), and Div (3) operations.
+ void emitBinaryFPOperation(MachineBasicBlock *BB,
+ MachineBasicBlock::iterator IP,
+ Value *Op0, Value *Op1,
+ unsigned OperatorClass, unsigned TargetReg);
+
void emitMultiply(MachineBasicBlock *BB, MachineBasicBlock::iterator IP,
Value *Op0, Value *Op1, unsigned TargetReg);
/// Note that this kill instruction will eventually be eliminated when
/// restrictions in the stackifier are relaxed.
///
-static bool RequiresFPRegKill(const BasicBlock *BB) {
+static bool RequiresFPRegKill(const MachineBasicBlock *MBB) {
#if 0
+ const BasicBlock *BB = MBB->getBasicBlock ();
for (succ_const_iterator SI = succ_begin(BB), E = succ_end(BB); SI!=E; ++SI) {
const BasicBlock *Succ = *SI;
pred_const_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
// If we haven't found an FP register use or def in this basic block, check
// to see if any of our successors has an FP PHI node, which will cause a
// copy to be inserted into this block.
- for (succ_const_iterator SI = succ_begin(BB->getBasicBlock()),
- E = succ_end(BB->getBasicBlock()); SI != E; ++SI) {
- MachineBasicBlock *SBB = MBBMap[*SI];
+ for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SBB = *SI;
for (MachineBasicBlock::iterator I = SBB->begin();
I != SBB->end() && I->getOpcode() == X86::PHI; ++I) {
if (RegMap.getRegClass(I->getOperand(0).getReg())->getSize() == 10)
UsesFPReg:
// Okay, this block uses an FP register. If the block has successors (ie,
// it's not an unwind/return), insert the FP_REG_KILL instruction.
- if (BB->getBasicBlock()->getTerminator()->getNumSuccessors() &&
- RequiresFPRegKill(BB->getBasicBlock())) {
+ if (BB->succ_size () && RequiresFPRegKill(BB)) {
BuildMI(*BB, BB->getFirstTerminator(), X86::FP_REG_KILL, 0);
++NumFPKill;
}
BuildMI(*MBB, IP, X86::CMP32rr, 2).addReg(Op0r).addReg(Op1r);
break;
case cFP:
- BuildMI(*MBB, IP, X86::FpUCOM, 2).addReg(Op0r).addReg(Op1r);
- BuildMI(*MBB, IP, X86::FNSTSW8r, 0);
- BuildMI(*MBB, IP, X86::SAHF, 1);
+ if (0) { // for processors prior to the P6
+ BuildMI(*MBB, IP, X86::FpUCOM, 2).addReg(Op0r).addReg(Op1r);
+ BuildMI(*MBB, IP, X86::FNSTSW8r, 0);
+ BuildMI(*MBB, IP, X86::SAHF, 1);
+ } else {
+ BuildMI(*MBB, IP, X86::FpUCOMI, 2).addReg(Op0r).addReg(Op1r);
+ }
break;
case cLong:
FalseVal = ConstantExpr::getCast(F, Type::ShortTy);
}
-
+ unsigned TrueReg = getReg(TrueVal, MBB, IP);
+ unsigned FalseReg = getReg(FalseVal, MBB, IP);
+ if (TrueReg == FalseReg) {
+ static const unsigned Opcode[] = {
+ X86::MOV8rr, X86::MOV16rr, X86::MOV32rr, X86::FpMOV, X86::MOV32rr
+ };
+ BuildMI(*MBB, IP, Opcode[SelectClass], 1, DestReg).addReg(TrueReg);
+ if (SelectClass == cLong)
+ BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(TrueReg+1);
+ return;
+ }
+
unsigned Opcode;
if (SetCondInst *SCI = canFoldSetCCIntoBranchOrSelect(Cond)) {
// We successfully folded the setcc into the select instruction.
}
}
- unsigned TrueReg = getReg(TrueVal, MBB, IP);
- unsigned FalseReg = getReg(FalseVal, MBB, IP);
unsigned RealDestReg = DestReg;
/// just make a fall-through (but we don't currently).
///
void ISel::visitBranchInst(BranchInst &BI) {
+ // Update machine-CFG edges
+ BB->addSuccessor (MBBMap[BI.getSuccessor(0)]);
+ if (BI.isConditional())
+ BB->addSuccessor (MBBMap[BI.getSuccessor(1)]);
+
BasicBlock *NextBB = getBlockAfter(BI.getParent()); // BB after current one
if (!BI.isConditional()) { // Unconditional branch?
case Intrinsic::writeport:
// We directly implement these intrinsics
break;
+ case Intrinsic::readio: {
+ // On X86, memory operations are in-order. Lower this intrinsic
+ // into a volatile load.
+ Instruction *Before = CI->getPrev();
+ LoadInst * LI = new LoadInst (CI->getOperand(1), "", true, CI);
+ CI->replaceAllUsesWith (LI);
+ BB->getInstList().erase (CI);
+ break;
+ }
+ case Intrinsic::writeio: {
+ // On X86, memory operations are in-order. Lower this intrinsic
+ // into a volatile store.
+ Instruction *Before = CI->getPrev();
+ StoreInst * LI = new StoreInst (CI->getOperand(1),
+ CI->getOperand(2), true, CI);
+ CI->replaceAllUsesWith (LI);
+ BB->getInstList().erase (CI);
+ break;
+ }
default:
// All other intrinsic calls we must lower.
Instruction *Before = CI->getPrev();
return;
}
- case Intrinsic::readport:
- //
- // First, determine that the size of the operand falls within the
- // acceptable range for this architecture.
+ case Intrinsic::readport: {
+ // First, determine that the size of the operand falls within the acceptable
+ // range for this architecture.
//
- if ((CI.getOperand(1)->getType()->getPrimitiveSize()) != 2) {
+ if (getClassB(CI.getOperand(1)->getType()) != cShort) {
std::cerr << "llvm.readport: Address size is not 16 bits\n";
- exit (1);
+ exit(1);
}
- //
// Now, move the I/O port address into the DX register and use the IN
// instruction to get the input data.
//
- BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(getReg(CI.getOperand(1)));
- switch (CI.getCalledFunction()->getReturnType()->getPrimitiveSize()) {
- case 1:
- BuildMI(BB, X86::IN8, 0);
- break;
- case 2:
- BuildMI(BB, X86::IN16, 0);
- break;
- case 4:
- BuildMI(BB, X86::IN32, 0);
- break;
- default:
- std::cerr << "Cannot do input on this data type";
- exit (1);
+ unsigned Class = getClass(CI.getCalledFunction()->getReturnType());
+ unsigned DestReg = getReg(CI);
+
+ // If the port is a single-byte constant, use the immediate form.
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CI.getOperand(1)))
+ if ((C->getRawValue() & 255) == C->getRawValue()) {
+ switch (Class) {
+ case cByte:
+ BuildMI(BB, X86::IN8ri, 1).addImm((unsigned char)C->getRawValue());
+ BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AL);
+ return;
+ case cShort:
+ BuildMI(BB, X86::IN16ri, 1).addImm((unsigned char)C->getRawValue());
+ BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AX);
+ return;
+ case cInt:
+ BuildMI(BB, X86::IN32ri, 1).addImm((unsigned char)C->getRawValue());
+ BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::EAX);
+ return;
+ }
+ }
+
+ unsigned Reg = getReg(CI.getOperand(1));
+ BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(Reg);
+ switch (Class) {
+ case cByte:
+ BuildMI(BB, X86::IN8rr, 0);
+ BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AL);
+ break;
+ case cShort:
+ BuildMI(BB, X86::IN16rr, 0);
+ BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AX);
+ break;
+ case cInt:
+ BuildMI(BB, X86::IN32rr, 0);
+ BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::EAX);
+ break;
+ default:
+ std::cerr << "Cannot do input on this data type";
+ exit (1);
}
return;
+ }
- case Intrinsic::writeport:
- //
+ case Intrinsic::writeport: {
// First, determine that the size of the operand falls within the
// acceptable range for this architecture.
- //
- //
- if ((CI.getOperand(2)->getType()->getPrimitiveSize()) != 2) {
+ if (getClass(CI.getOperand(2)->getType()) != cShort) {
std::cerr << "llvm.writeport: Address size is not 16 bits\n";
- exit (1);
+ exit(1);
}
- //
- // Now, move the I/O port address into the DX register and the value to
- // write into the AL/AX/EAX register.
- //
- BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(getReg(CI.getOperand(2)));
- switch (CI.getOperand(1)->getType()->getPrimitiveSize()) {
- case 1:
- BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(getReg(CI.getOperand(1)));
- BuildMI(BB, X86::OUT8, 0);
- break;
- case 2:
- BuildMI(BB, X86::MOV16rr, 1, X86::AX).addReg(getReg(CI.getOperand(1)));
- BuildMI(BB, X86::OUT16, 0);
- break;
- case 4:
- BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(getReg(CI.getOperand(1)));
- BuildMI(BB, X86::OUT32, 0);
- break;
- default:
- std::cerr << "Cannot do output on this data type";
- exit (1);
+ unsigned Class = getClassB(CI.getOperand(1)->getType());
+ unsigned ValReg = getReg(CI.getOperand(1));
+ switch (Class) {
+ case cByte:
+ BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(ValReg);
+ break;
+ case cShort:
+ BuildMI(BB, X86::MOV16rr, 1, X86::AX).addReg(ValReg);
+ break;
+ case cInt:
+ BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(ValReg);
+ break;
+ default:
+ std::cerr << "llvm.writeport: invalid data type for X86 target";
+ exit(1);
}
- return;
+
+ // If the port is a single-byte constant, use the immediate form.
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CI.getOperand(2)))
+ if ((C->getRawValue() & 255) == C->getRawValue()) {
+ static const unsigned O[] = { X86::OUT8ir, X86::OUT16ir, X86::OUT32ir };
+ BuildMI(BB, O[Class], 1).addImm((unsigned char)C->getRawValue());
+ return;
+ }
+
+ // Otherwise, move the I/O port address into the DX register and the value
+ // to write into the AL/AX/EAX register.
+ static const unsigned Opc[] = { X86::OUT8rr, X86::OUT16rr, X86::OUT32rr };
+ unsigned Reg = getReg(CI.getOperand(2));
+ BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(Reg);
+ BuildMI(BB, Opc[Class], 0);
+ return;
+ }
+
default: assert(0 && "Error: unknown intrinsics should have been lowered!");
}
}
case Instruction::Call:
case Instruction::Invoke:
return false;
+ case Instruction::Load:
+ if (cast<LoadInst>(It)->isVolatile() && LI.isVolatile())
+ return false;
+ break;
}
}
return true;
}
-
/// visitSimpleBinary - Implement simple binary operators for integral types...
/// OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for Or, 4 for
/// Xor.
std::swap(Op0, Op1); // Make sure any loads are in the RHS.
unsigned Class = getClassB(B.getType());
- if (isa<LoadInst>(Op1) && Class < cFP &&
+ if (isa<LoadInst>(Op1) && Class != cLong &&
isSafeToFoldLoadIntoInstruction(*cast<LoadInst>(Op1), B)) {
- static const unsigned OpcodeTab[][3] = {
- // Arithmetic operators
- { X86::ADD8rm, X86::ADD16rm, X86::ADD32rm }, // ADD
- { X86::SUB8rm, X86::SUB16rm, X86::SUB32rm }, // SUB
-
- // Bitwise operators
- { X86::AND8rm, X86::AND16rm, X86::AND32rm }, // AND
- { X86:: OR8rm, X86:: OR16rm, X86:: OR32rm }, // OR
- { X86::XOR8rm, X86::XOR16rm, X86::XOR32rm }, // XOR
- };
-
- assert(Class < cFP && "General code handles 64-bit integer types!");
- unsigned Opcode = OpcodeTab[OperatorClass][Class];
+ unsigned Opcode;
+ if (Class != cFP) {
+ static const unsigned OpcodeTab[][3] = {
+ // Arithmetic operators
+ { X86::ADD8rm, X86::ADD16rm, X86::ADD32rm }, // ADD
+ { X86::SUB8rm, X86::SUB16rm, X86::SUB32rm }, // SUB
+
+ // Bitwise operators
+ { X86::AND8rm, X86::AND16rm, X86::AND32rm }, // AND
+ { X86:: OR8rm, X86:: OR16rm, X86:: OR32rm }, // OR
+ { X86::XOR8rm, X86::XOR16rm, X86::XOR32rm }, // XOR
+ };
+ Opcode = OpcodeTab[OperatorClass][Class];
+ } else {
+ static const unsigned OpcodeTab[][2] = {
+ { X86::FADD32m, X86::FADD64m }, // ADD
+ { X86::FSUB32m, X86::FSUB64m }, // SUB
+ };
+ const Type *Ty = Op0->getType();
+ assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
+ Opcode = OpcodeTab[OperatorClass][Ty == Type::DoubleTy];
+ }
unsigned BaseReg, Scale, IndexReg, Disp;
getAddressingMode(cast<LoadInst>(Op1)->getOperand(0), BaseReg,
return;
}
+ // If this is a floating point subtract, check to see if we can fold the first
+ // operand in.
+ if (Class == cFP && OperatorClass == 1 &&
+ isa<LoadInst>(Op0) &&
+ isSafeToFoldLoadIntoInstruction(*cast<LoadInst>(Op0), B)) {
+ const Type *Ty = Op0->getType();
+ assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
+ unsigned Opcode = Ty == Type::FloatTy ? X86::FSUBR32m : X86::FSUBR64m;
+
+ unsigned BaseReg, Scale, IndexReg, Disp;
+ getAddressingMode(cast<LoadInst>(Op0)->getOperand(0), BaseReg,
+ Scale, IndexReg, Disp);
+
+ unsigned Op1r = getReg(Op1);
+ addFullAddress(BuildMI(BB, Opcode, 2, DestReg).addReg(Op1r),
+ BaseReg, Scale, IndexReg, Disp);
+ return;
+ }
+
emitSimpleBinaryOperation(BB, MI, Op0, Op1, OperatorClass, DestReg);
}
+
+/// emitBinaryFPOperation - This method handles emission of floating point
+/// Add (0), Sub (1), Mul (2), and Div (3) operations.
+void ISel::emitBinaryFPOperation(MachineBasicBlock *BB,
+ MachineBasicBlock::iterator IP,
+ Value *Op0, Value *Op1,
+ unsigned OperatorClass, unsigned DestReg) {
+
+ // Special case: op Reg, <const fp>
+ if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1))
+ if (!Op1C->isExactlyValue(+0.0) && !Op1C->isExactlyValue(+1.0)) {
+ // Create a constant pool entry for this constant.
+ MachineConstantPool *CP = F->getConstantPool();
+ unsigned CPI = CP->getConstantPoolIndex(Op1C);
+ const Type *Ty = Op1->getType();
+
+ static const unsigned OpcodeTab[][4] = {
+ { X86::FADD32m, X86::FSUB32m, X86::FMUL32m, X86::FDIV32m }, // Float
+ { X86::FADD64m, X86::FSUB64m, X86::FMUL64m, X86::FDIV64m }, // Double
+ };
+
+ assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
+ unsigned Opcode = OpcodeTab[Ty != Type::FloatTy][OperatorClass];
+ unsigned Op0r = getReg(Op0, BB, IP);
+ addConstantPoolReference(BuildMI(*BB, IP, Opcode, 5,
+ DestReg).addReg(Op0r), CPI);
+ return;
+ }
+
+ // Special case: R1 = op <const fp>, R2
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op0))
+ if (CFP->isExactlyValue(-0.0) && OperatorClass == 1) {
+ // -0.0 - X === -X
+ unsigned op1Reg = getReg(Op1, BB, IP);
+ BuildMI(*BB, IP, X86::FCHS, 1, DestReg).addReg(op1Reg);
+ return;
+ } else if (!CFP->isExactlyValue(+0.0) && !CFP->isExactlyValue(+1.0)) {
+ // R1 = op CST, R2 --> R1 = opr R2, CST
+
+ // Create a constant pool entry for this constant.
+ MachineConstantPool *CP = F->getConstantPool();
+ unsigned CPI = CP->getConstantPoolIndex(CFP);
+ const Type *Ty = CFP->getType();
+
+ static const unsigned OpcodeTab[][4] = {
+ { X86::FADD32m, X86::FSUBR32m, X86::FMUL32m, X86::FDIVR32m }, // Float
+ { X86::FADD64m, X86::FSUBR64m, X86::FMUL64m, X86::FDIVR64m }, // Double
+ };
+
+ assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
+ unsigned Opcode = OpcodeTab[Ty != Type::FloatTy][OperatorClass];
+ unsigned Op1r = getReg(Op1, BB, IP);
+ addConstantPoolReference(BuildMI(*BB, IP, Opcode, 5,
+ DestReg).addReg(Op1r), CPI);
+ return;
+ }
+
+ // General case.
+ static const unsigned OpcodeTab[4] = {
+ X86::FpADD, X86::FpSUB, X86::FpMUL, X86::FpDIV
+ };
+
+ unsigned Opcode = OpcodeTab[OperatorClass];
+ unsigned Op0r = getReg(Op0, BB, IP);
+ unsigned Op1r = getReg(Op1, BB, IP);
+ BuildMI(*BB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r);
+}
+
/// emitSimpleBinaryOperation - Implement simple binary operators for integral
/// types... OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for
/// Or, 4 for Xor.
unsigned OperatorClass, unsigned DestReg) {
unsigned Class = getClassB(Op0->getType());
+ if (Class == cFP) {
+ assert(OperatorClass < 2 && "No logical ops for FP!");
+ emitBinaryFPOperation(MBB, IP, Op0, Op1, OperatorClass, DestReg);
+ return;
+ }
+
// sub 0, X -> neg X
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0))
if (OperatorClass == 1 && CI->isNullValue()) {
if (Class != cLong) {
BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l);
return;
- } else {
- // If this is a long value and the high or low bits have a special
- // property, emit some special cases.
- unsigned Op1h = cast<ConstantInt>(Op1C)->getRawValue() >> 32LL;
-
- // If the constant is zero in the low 32-bits, just copy the low part
- // across and apply the normal 32-bit operation to the high parts. There
- // will be no carry or borrow into the top.
- if (Op1l == 0) {
- if (OperatorClass != 2) // All but and...
- BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0r);
- else
- BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
- BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg+1)
- .addReg(Op0r+1).addImm(Op1h);
- return;
- }
-
- // If this is a logical operation and the top 32-bits are zero, just
- // operate on the lower 32.
- if (Op1h == 0 && OperatorClass > 1) {
- BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg)
- .addReg(Op0r).addImm(Op1l);
- if (OperatorClass != 2) // All but and
- BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(Op0r+1);
- else
- BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
- return;
- }
-
- // TODO: We could handle lots of other special cases here, such as AND'ing
- // with 0xFFFFFFFF00000000 -> noop, etc.
-
- // Otherwise, code generate the full operation with a constant.
- static const unsigned TopTab[] = {
- X86::ADC32ri, X86::SBB32ri, X86::AND32ri, X86::OR32ri, X86::XOR32ri
- };
-
- BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l);
- BuildMI(*MBB, IP, TopTab[OperatorClass], 2, DestReg+1)
- .addReg(Op0r+1).addImm(Op1h);
- return;
}
- }
-
- // Special case: op Reg, <const fp>
- if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1))
- if (!Op1C->isExactlyValue(+0.0) && !Op1C->isExactlyValue(+1.0)) {
- assert(OperatorClass < 2 && "FP operations only support add/sub!");
-
- // Create a constant pool entry for this constant.
- MachineConstantPool *CP = F->getConstantPool();
- unsigned CPI = CP->getConstantPoolIndex(Op1C);
- const Type *Ty = Op1->getType();
-
- static const unsigned OpcodeTab[][2] = {
- { X86::FADD32m, X86::FSUB32m }, // Float
- { X86::FADD64m, X86::FSUB64m }, // Double
- };
-
- assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
- unsigned Opcode = OpcodeTab[Ty != Type::FloatTy][OperatorClass];
- unsigned Op0r = getReg(Op0, MBB, IP);
- addConstantPoolReference(BuildMI(*MBB, IP, Opcode, 5,
- DestReg).addReg(Op0r), CPI);
+
+ // If this is a long value and the high or low bits have a special
+ // property, emit some special cases.
+ unsigned Op1h = cast<ConstantInt>(Op1C)->getRawValue() >> 32LL;
+
+ // If the constant is zero in the low 32-bits, just copy the low part
+ // across and apply the normal 32-bit operation to the high parts. There
+ // will be no carry or borrow into the top.
+ if (Op1l == 0) {
+ if (OperatorClass != 2) // All but and...
+ BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0r);
+ else
+ BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
+ BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg+1)
+ .addReg(Op0r+1).addImm(Op1h);
return;
}
-
- // Special case: R1 = sub <const fp>, R2
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op0))
- if (OperatorClass == 1) { // sub only
- if (CFP->isExactlyValue(-0.0)) {
- // -0.0 - X === -X
- unsigned op1Reg = getReg(Op1, MBB, IP);
- BuildMI(*MBB, IP, X86::FCHS, 1, DestReg).addReg(op1Reg);
- return;
- } else if (!CFP->isExactlyValue(+0.0) && !CFP->isExactlyValue(+1.0)) {
- // R1 = sub CST, R2 --> R1 = subr R2, CST
-
- // Create a constant pool entry for this constant.
- MachineConstantPool *CP = F->getConstantPool();
- unsigned CPI = CP->getConstantPoolIndex(CFP);
- const Type *Ty = CFP->getType();
-
- static const unsigned OpcodeTab[2] = { X86::FSUBR32m, X86::FSUBR64m };
-
- assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
- unsigned Opcode = OpcodeTab[Ty != Type::FloatTy];
- unsigned Op1r = getReg(Op1, MBB, IP);
- addConstantPoolReference(BuildMI(*MBB, IP, Opcode, 5,
- DestReg).addReg(Op1r), CPI);
- return;
- }
+
+ // If this is a logical operation and the top 32-bits are zero, just
+ // operate on the lower 32.
+ if (Op1h == 0 && OperatorClass > 1) {
+ BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg)
+ .addReg(Op0r).addImm(Op1l);
+ if (OperatorClass != 2) // All but and
+ BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(Op0r+1);
+ else
+ BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
+ return;
}
+
+ // TODO: We could handle lots of other special cases here, such as AND'ing
+ // with 0xFFFFFFFF00000000 -> noop, etc.
+
+ // Otherwise, code generate the full operation with a constant.
+ static const unsigned TopTab[] = {
+ X86::ADC32ri, X86::SBB32ri, X86::AND32ri, X86::OR32ri, X86::XOR32ri
+ };
+
+ BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l);
+ BuildMI(*MBB, IP, TopTab[OperatorClass], 2, DestReg+1)
+ .addReg(Op0r+1).addImm(Op1h);
+ return;
+ }
// Finally, handle the general case now.
static const unsigned OpcodeTab[][5] = {
// Arithmetic operators
- { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr, X86::FpADD, X86::ADD32rr },// ADD
- { X86::SUB8rr, X86::SUB16rr, X86::SUB32rr, X86::FpSUB, X86::SUB32rr },// SUB
+ { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr, 0, X86::ADD32rr }, // ADD
+ { X86::SUB8rr, X86::SUB16rr, X86::SUB32rr, 0, X86::SUB32rr }, // SUB
// Bitwise operators
{ X86::AND8rr, X86::AND16rr, X86::AND32rr, 0, X86::AND32rr }, // AND
};
unsigned Opcode = OpcodeTab[OperatorClass][Class];
- assert(Opcode && "Floating point arguments to logical inst?");
unsigned Op0r = getReg(Op0, MBB, IP);
unsigned Op1r = getReg(Op1, MBB, IP);
BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r);
unsigned op0Reg, unsigned op1Reg) {
unsigned Class = getClass(DestTy);
switch (Class) {
- case cFP: // Floating point multiply
- BuildMI(*MBB, MBBI, X86::FpMUL, 2, DestReg).addReg(op0Reg).addReg(op1Reg);
- return;
case cInt:
case cShort:
BuildMI(*MBB, MBBI, Class == cInt ? X86::IMUL32rr:X86::IMUL16rr, 2, DestReg)
unsigned op0Reg, unsigned ConstRHS) {
static const unsigned MOVrrTab[] = {X86::MOV8rr, X86::MOV16rr, X86::MOV32rr};
static const unsigned MOVriTab[] = {X86::MOV8ri, X86::MOV16ri, X86::MOV32ri};
+ static const unsigned ADDrrTab[] = {X86::ADD8rr, X86::ADD16rr, X86::ADD32rr};
unsigned Class = getClass(DestTy);
- if (ConstRHS == 0) {
+ // Handle special cases here.
+ switch (ConstRHS) {
+ case 0:
BuildMI(*MBB, IP, MOVriTab[Class], 1, DestReg).addImm(0);
return;
- } else if (ConstRHS == 1) {
+ case 1:
BuildMI(*MBB, IP, MOVrrTab[Class], 1, DestReg).addReg(op0Reg);
return;
+ case 2:
+ BuildMI(*MBB, IP, ADDrrTab[Class], 1,DestReg).addReg(op0Reg).addReg(op0Reg);
+ return;
+ case 3:
+ case 5:
+ case 9:
+ if (Class == cInt) {
+ addFullAddress(BuildMI(*MBB, IP, X86::LEA32r, 5, DestReg),
+ op0Reg, ConstRHS-1, op0Reg, 0);
+ return;
+ }
}
// If the element size is exactly a power of 2, use a shift to get it.
void ISel::visitMul(BinaryOperator &I) {
unsigned ResultReg = getReg(I);
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+
+ // Fold loads into floating point multiplies.
+ if (getClass(Op0->getType()) == cFP) {
+ if (isa<LoadInst>(Op0) && !isa<LoadInst>(Op1))
+ if (!I.swapOperands())
+ std::swap(Op0, Op1); // Make sure any loads are in the RHS.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Op1))
+ if (isSafeToFoldLoadIntoInstruction(*LI, I)) {
+ const Type *Ty = Op0->getType();
+ assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
+ unsigned Opcode = Ty == Type::FloatTy ? X86::FMUL32m : X86::FMUL64m;
+
+ unsigned BaseReg, Scale, IndexReg, Disp;
+ getAddressingMode(LI->getOperand(0), BaseReg,
+ Scale, IndexReg, Disp);
+
+ unsigned Op0r = getReg(Op0);
+ addFullAddress(BuildMI(BB, Opcode, 2, ResultReg).addReg(Op0r),
+ BaseReg, Scale, IndexReg, Disp);
+ return;
+ }
+ }
+
MachineBasicBlock::iterator IP = BB->end();
- emitMultiply(BB, IP, I.getOperand(0), I.getOperand(1), ResultReg);
+ emitMultiply(BB, IP, Op0, Op1, ResultReg);
}
void ISel::emitMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
}
return;
case cFP:
- if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1))
- if (!Op1C->isExactlyValue(+0.0) && !Op1C->isExactlyValue(+1.0)) {
- // Create a constant pool entry for this constant.
- MachineConstantPool *CP = F->getConstantPool();
- unsigned CPI = CP->getConstantPoolIndex(Op1C);
- const Type *Ty = Op1C->getType();
-
- static const unsigned OpcodeTab[2] = { X86::FMUL32m, X86::FMUL64m };
-
- assert(Ty == Type::FloatTy||Ty == Type::DoubleTy&&"Unknown FP type!");
- unsigned Opcode = OpcodeTab[Ty != Type::FloatTy];
- addConstantPoolReference(BuildMI(*MBB, IP, Opcode, 5,
- DestReg).addReg(Op0Reg), CPI);
- return;
- }
-
- {
- unsigned Op1Reg = getReg(Op1, &BB, IP);
- doMultiply(&BB, IP, DestReg, Op1->getType(), Op0Reg, Op1Reg);
- return;
- }
+ emitBinaryFPOperation(MBB, IP, Op0, Op1, 2, DestReg);
+ return;
case cLong:
break;
}
///
void ISel::visitDivRem(BinaryOperator &I) {
unsigned ResultReg = getReg(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // Fold loads into floating point divides.
+ if (getClass(Op0->getType()) == cFP) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(Op1))
+ if (isSafeToFoldLoadIntoInstruction(*LI, I)) {
+ const Type *Ty = Op0->getType();
+ assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
+ unsigned Opcode = Ty == Type::FloatTy ? X86::FDIV32m : X86::FDIV64m;
+
+ unsigned BaseReg, Scale, IndexReg, Disp;
+ getAddressingMode(LI->getOperand(0), BaseReg,
+ Scale, IndexReg, Disp);
+
+ unsigned Op0r = getReg(Op0);
+ addFullAddress(BuildMI(BB, Opcode, 2, ResultReg).addReg(Op0r),
+ BaseReg, Scale, IndexReg, Disp);
+ return;
+ }
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(Op0))
+ if (isSafeToFoldLoadIntoInstruction(*LI, I)) {
+ const Type *Ty = Op0->getType();
+ assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
+ unsigned Opcode = Ty == Type::FloatTy ? X86::FDIVR32m : X86::FDIVR64m;
+
+ unsigned BaseReg, Scale, IndexReg, Disp;
+ getAddressingMode(LI->getOperand(0), BaseReg,
+ Scale, IndexReg, Disp);
+
+ unsigned Op1r = getReg(Op1);
+ addFullAddress(BuildMI(BB, Opcode, 2, ResultReg).addReg(Op1r),
+ BaseReg, Scale, IndexReg, Disp);
+ return;
+ }
+ }
+
MachineBasicBlock::iterator IP = BB->end();
- emitDivRemOperation(BB, IP, I.getOperand(0), I.getOperand(1),
+ emitDivRemOperation(BB, IP, Op0, Op1,
I.getOpcode() == Instruction::Div, ResultReg);
}
switch (Class) {
case cFP: // Floating point divide
if (isDiv) {
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op0))
- if (!CFP->isExactlyValue(+0.0) && !CFP->isExactlyValue(+1.0)) {
- // Create a constant pool entry for this constant.
- MachineConstantPool *CP = F->getConstantPool();
- unsigned CPI = CP->getConstantPoolIndex(CFP);
- static const unsigned OpcodeTab[2] = { X86::FDIVR32m, X86::FDIVR64m };
-
- assert(Ty == Type::FloatTy||Ty == Type::DoubleTy&&"Unknown FP type!");
- unsigned Opcode = OpcodeTab[Ty != Type::FloatTy];
- unsigned Op1Reg = getReg(Op1, BB, IP);
- addConstantPoolReference(BuildMI(*BB, IP, Opcode, 5,
- ResultReg).addReg(Op1Reg), CPI);
- return;
- }
-
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op1))
- if (!CFP->isExactlyValue(+0.0) && !CFP->isExactlyValue(+1.0)) {
- // Create a constant pool entry for this constant.
- MachineConstantPool *CP = F->getConstantPool();
- unsigned CPI = CP->getConstantPoolIndex(CFP);
-
- static const unsigned OpcodeTab[2] = { X86::FDIV32m, X86::FDIV64m };
-
- assert(Ty == Type::FloatTy||Ty == Type::DoubleTy&&"Unknown FP type!");
- unsigned Opcode = OpcodeTab[Ty != Type::FloatTy];
- unsigned Op0Reg = getReg(Op0, BB, IP);
- addConstantPoolReference(BuildMI(*BB, IP, Opcode, 5,
- ResultReg).addReg(Op0Reg), CPI);
- return;
- }
-
- unsigned Op0Reg = getReg(Op0, BB, IP);
- unsigned Op1Reg = getReg(Op1, BB, IP);
- BuildMI(*BB, IP, X86::FpDIV, 2, ResultReg).addReg(Op0Reg).addReg(Op1Reg);
+ emitBinaryFPOperation(BB, IP, Op0, Op1, 3, ResultReg);
+ return;
} else { // Floating point remainder...
unsigned Op0Reg = getReg(Op0, BB, IP);
unsigned Op1Reg = getReg(Op1, BB, IP);
// Check to see if this load instruction is going to be folded into a binary
// instruction, like add. If so, we don't want to emit it. Wouldn't a real
// pattern matching instruction selector be nice?
- if (I.hasOneUse() && getClassB(I.getType()) < cFP) {
+ unsigned Class = getClassB(I.getType());
+ if (I.hasOneUse()) {
Instruction *User = cast<Instruction>(I.use_back());
switch (User->getOpcode()) {
- default: User = 0; break;
+ case Instruction::Cast:
+ // If this is a cast from a signed-integer type to a floating point type,
+ // fold the cast here.
+ if (getClass(User->getType()) == cFP &&
+ (I.getType() == Type::ShortTy || I.getType() == Type::IntTy ||
+ I.getType() == Type::LongTy)) {
+ unsigned DestReg = getReg(User);
+ static const unsigned Opcode[] = {
+ 0/*BYTE*/, X86::FILD16m, X86::FILD32m, 0/*FP*/, X86::FILD64m
+ };
+ unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0;
+ getAddressingMode(I.getOperand(0), BaseReg, Scale, IndexReg, Disp);
+ addFullAddress(BuildMI(BB, Opcode[Class], 5, DestReg),
+ BaseReg, Scale, IndexReg, Disp);
+ return;
+ } else {
+ User = 0;
+ }
+ break;
+
case Instruction::Add:
case Instruction::Sub:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
+ if (Class == cLong) User = 0;
break;
+ case Instruction::Mul:
+ case Instruction::Div:
+ if (Class != cFP) User = 0;
+ break; // Folding only implemented for floating point.
+ default: User = 0; break;
}
if (User) {
if (User->getOperand(1) == &I &&
isSafeToFoldLoadIntoInstruction(I, *User))
return; // Eliminate the load!
+
+ // If this is a floating point sub or div, we won't be able to swap the
+ // operands, but we will still be able to eliminate the load.
+ if (Class == cFP && User->getOperand(0) == &I &&
+ !isa<LoadInst>(User->getOperand(1)) &&
+ (User->getOpcode() == Instruction::Sub ||
+ User->getOpcode() == Instruction::Div) &&
+ isSafeToFoldLoadIntoInstruction(I, *User))
+ return; // Eliminate the load!
}
}
unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0;
getAddressingMode(I.getOperand(0), BaseReg, Scale, IndexReg, Disp);
- unsigned Class = getClassB(I.getType());
if (Class == cLong) {
addFullAddress(BuildMI(BB, X86::MOV32rm, 4, DestReg),
BaseReg, Scale, IndexReg, Disp);
void ISel::visitCastInst(CastInst &CI) {
Value *Op = CI.getOperand(0);
- // Noop casts are not even emitted.
- if (getClassB(CI.getType()) == getClassB(Op->getType()))
+ unsigned SrcClass = getClassB(Op->getType());
+ unsigned DestClass = getClassB(CI.getType());
+ // Noop casts are not emitted: getReg will return the source operand as the
+ // register to use for any uses of the noop cast.
+ if (DestClass == SrcClass)
return;
// If this is a cast from a 32-bit integer to a Long type, and the only uses
// of the case are GEP instructions, then the cast does not need to be
// generated explicitly, it will be folded into the GEP.
- if (CI.getType() == Type::LongTy &&
- (Op->getType() == Type::IntTy || Op->getType() == Type::UIntTy)) {
+ if (DestClass == cLong && SrcClass == cInt) {
bool AllUsesAreGEPs = true;
for (Value::use_iterator I = CI.use_begin(), E = CI.use_end(); I != E; ++I)
if (!isa<GetElementPtrInst>(*I)) {
if (AllUsesAreGEPs) return;
}
+ // If this cast converts a load from a short,int, or long integer to a FP
+ // value, we will have folded this cast away.
+ if (DestClass == cFP && isa<LoadInst>(Op) && Op->hasOneUse() &&
+ (Op->getType() == Type::ShortTy || Op->getType() == Type::IntTy ||
+ Op->getType() == Type::LongTy))
+ return;
+
+
unsigned DestReg = getReg(CI);
MachineBasicBlock::iterator MI = BB->end();
emitCastOperation(BB, MI, Op, CI.getType(), DestReg);
MachineBasicBlock::iterator IP,
Value *Src, const Type *DestTy,
unsigned DestReg) {
- unsigned SrcReg = getReg(Src, BB, IP);
const Type *SrcTy = Src->getType();
unsigned SrcClass = getClassB(SrcTy);
unsigned DestClass = getClassB(DestTy);
+ unsigned SrcReg = getReg(Src, BB, IP);
// Implement casts to bool by using compare on the operand followed by set if
// not zero on the result.