X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FInstSelectSimple.cpp;h=8dbf61ed7c530ee85ac47723b201d69e50edf0f7;hb=427aeb476f80adb1dbc7ab93210a8ae6e7bf7df8;hp=6ab950459734d45332cb8ec3f3a57516daf1603f;hpb=ab18020cbd52d3fee2b1b05eeb26c420ac09e9d6;p=oota-llvm.git diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp index 6ab95045973..8dbf61ed7c5 100644 --- a/lib/Target/X86/InstSelectSimple.cpp +++ b/lib/Target/X86/InstSelectSimple.cpp @@ -35,6 +35,43 @@ using namespace llvm; namespace { Statistic<> NumFPKill("x86-codegen", "Number of FP_REG_KILL instructions added"); + + /// TypeClass - Used by the X86 backend to group LLVM types by their basic X86 + /// Representation. + /// + enum TypeClass { + cByte, cShort, cInt, cFP, cLong + }; +} + +/// getClass - Turn a primitive type into a "class" number which is based on the +/// size of the type, and whether or not it is floating point. +/// +static inline TypeClass getClass(const Type *Ty) { + switch (Ty->getPrimitiveID()) { + case Type::SByteTyID: + case Type::UByteTyID: return cByte; // Byte operands are class #0 + case Type::ShortTyID: + case Type::UShortTyID: return cShort; // Short operands are class #1 + case Type::IntTyID: + case Type::UIntTyID: + case Type::PointerTyID: return cInt; // Int's and pointers are class #2 + + case Type::FloatTyID: + case Type::DoubleTyID: return cFP; // Floating Point is #3 + + case Type::LongTyID: + case Type::ULongTyID: return cLong; // Longs are class #4 + default: + assert(0 && "Invalid type to getClass!"); + return cByte; // not reached + } +} + +// getClassB - Just like getClass, but treat boolean values as bytes. +static inline TypeClass getClassB(const Type *Ty) { + if (Ty == Type::BoolTy) return cByte; + return getClass(Ty); } namespace { @@ -174,6 +211,8 @@ namespace { unsigned EmitComparison(unsigned OpNum, Value *Op0, Value *Op1, MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI); + void visitSelectInst(SelectInst &SI); + // Memory Instructions void visitLoadInst(LoadInst &I); @@ -261,6 +300,12 @@ namespace { Value *Op, Value *ShiftAmount, bool isLeftShift, const Type *ResultTy, unsigned DestReg); + /// emitSelectOperation - Common code shared between visitSelectInst and the + /// constant expression support. + void emitSelectOperation(MachineBasicBlock *MBB, + MachineBasicBlock::iterator IP, + Value *Cond, Value *TrueVal, Value *FalseVal, + unsigned DestReg); /// copyConstantToRegister - Output the instructions required to put the /// specified constant into the specified register. @@ -307,22 +352,28 @@ namespace { } unsigned getReg(Value *V, MachineBasicBlock *MBB, MachineBasicBlock::iterator IPt) { - unsigned &Reg = RegMap[V]; - if (Reg == 0) { - Reg = makeAnotherReg(V->getType()); - RegMap[V] = Reg; - } - // If this operand is a constant, emit the code to copy the constant into // the register here... // if (Constant *C = dyn_cast(V)) { + unsigned Reg = makeAnotherReg(V->getType()); copyConstantToRegister(MBB, IPt, C, Reg); - RegMap.erase(V); // Assign a new name to this constant if ref'd again + return Reg; } else if (GlobalValue *GV = dyn_cast(V)) { + unsigned Reg = makeAnotherReg(V->getType()); // Move the address of the global into the register BuildMI(*MBB, IPt, X86::MOV32ri, 1, Reg).addGlobalAddress(GV); - RegMap.erase(V); // Assign a new name to this address if ref'd again + return Reg; + } else if (CastInst *CI = dyn_cast(V)) { + // Do not emit noop casts at all. + if (getClassB(CI->getType()) == getClassB(CI->getOperand(0)->getType())) + return getReg(CI->getOperand(0), MBB, IPt); + } + + unsigned &Reg = RegMap[V]; + if (Reg == 0) { + Reg = makeAnotherReg(V->getType()); + RegMap[V] = Reg; } return Reg; @@ -330,44 +381,6 @@ namespace { }; } -/// TypeClass - Used by the X86 backend to group LLVM types by their basic X86 -/// Representation. -/// -enum TypeClass { - cByte, cShort, cInt, cFP, cLong -}; - -/// getClass - Turn a primitive type into a "class" number which is based on the -/// size of the type, and whether or not it is floating point. -/// -static inline TypeClass getClass(const Type *Ty) { - switch (Ty->getPrimitiveID()) { - case Type::SByteTyID: - case Type::UByteTyID: return cByte; // Byte operands are class #0 - case Type::ShortTyID: - case Type::UShortTyID: return cShort; // Short operands are class #1 - case Type::IntTyID: - case Type::UIntTyID: - case Type::PointerTyID: return cInt; // Int's and pointers are class #2 - - case Type::FloatTyID: - case Type::DoubleTyID: return cFP; // Floating Point is #3 - - case Type::LongTyID: - case Type::ULongTyID: return cLong; // Longs are class #4 - default: - assert(0 && "Invalid type to getClass!"); - return cByte; // not reached - } -} - -// getClassB - Just like getClass, but treat boolean values as bytes. -static inline TypeClass getClassB(const Type *Ty) { - if (Ty == Type::BoolTy) return cByte; - return getClass(Ty); -} - - /// copyConstantToRegister - Output the instructions required to put the /// specified constant into the specified register. /// @@ -426,6 +439,11 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB, CE->getOpcode() == Instruction::Shl, CE->getType(), R); return; + case Instruction::Select: + emitSelectOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1), + CE->getOperand(2), R); + return; + default: std::cerr << "Offending expr: " << C << "\n"; assert(0 && "Constant expression not yet handled!\n"); @@ -498,39 +516,51 @@ void ISel::LoadArgumentsToVirtualRegs(Function &Fn) { MachineFrameInfo *MFI = F->getFrameInfo(); for (Function::aiterator I = Fn.abegin(), E = Fn.aend(); I != E; ++I) { - unsigned Reg = getReg(*I); - + bool ArgLive = !I->use_empty(); + unsigned Reg = ArgLive ? getReg(*I) : 0; int FI; // Frame object index + switch (getClassB(I->getType())) { case cByte: - FI = MFI->CreateFixedObject(1, ArgOffset); - addFrameReference(BuildMI(BB, X86::MOV8rm, 4, Reg), FI); + if (ArgLive) { + FI = MFI->CreateFixedObject(1, ArgOffset); + addFrameReference(BuildMI(BB, X86::MOV8rm, 4, Reg), FI); + } break; case cShort: - FI = MFI->CreateFixedObject(2, ArgOffset); - addFrameReference(BuildMI(BB, X86::MOV16rm, 4, Reg), FI); + if (ArgLive) { + FI = MFI->CreateFixedObject(2, ArgOffset); + addFrameReference(BuildMI(BB, X86::MOV16rm, 4, Reg), FI); + } break; case cInt: - FI = MFI->CreateFixedObject(4, ArgOffset); - addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI); + if (ArgLive) { + FI = MFI->CreateFixedObject(4, ArgOffset); + addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI); + } break; case cLong: - FI = MFI->CreateFixedObject(8, ArgOffset); - addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI); - addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg+1), FI, 4); + if (ArgLive) { + FI = MFI->CreateFixedObject(8, ArgOffset); + addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI); + addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg+1), FI, 4); + } ArgOffset += 4; // longs require 4 additional bytes break; case cFP: - unsigned Opcode; - if (I->getType() == Type::FloatTy) { - Opcode = X86::FLD32m; - FI = MFI->CreateFixedObject(4, ArgOffset); - } else { - Opcode = X86::FLD64m; - FI = MFI->CreateFixedObject(8, ArgOffset); - ArgOffset += 4; // doubles require 4 additional bytes + if (ArgLive) { + unsigned Opcode; + if (I->getType() == Type::FloatTy) { + Opcode = X86::FLD32m; + FI = MFI->CreateFixedObject(4, ArgOffset); + } else { + Opcode = X86::FLD64m; + FI = MFI->CreateFixedObject(8, ArgOffset); + } + addFrameReference(BuildMI(BB, Opcode, 4, Reg), FI); } - addFrameReference(BuildMI(BB, Opcode, 4, Reg), FI); + if (I->getType() == Type::DoubleTy) + ArgOffset += 4; // doubles require 4 additional bytes break; default: assert(0 && "Unhandled argument type!"); @@ -597,17 +627,24 @@ void ISel::SelectPHINodes() { // If this is a constant or GlobalValue, we may have to insert code // into the basic block to compute it into a virtual register. if (isa(Val) || isa(Val)) { - // Because we don't want to clobber any values which might be in - // physical registers with the computation of this constant (which - // might be arbitrarily complex if it is a constant expression), - // just insert the computation at the top of the basic block. - MachineBasicBlock::iterator PI = PredMBB->begin(); - - // Skip over any PHI nodes though! - while (PI != PredMBB->end() && PI->getOpcode() == X86::PHI) - ++PI; - - ValReg = getReg(Val, PredMBB, PI); + if (isa(Val)) { + // Because we don't want to clobber any values which might be in + // physical registers with the computation of this constant (which + // might be arbitrarily complex if it is a constant expression), + // just insert the computation at the top of the basic block. + MachineBasicBlock::iterator PI = PredMBB->begin(); + + // Skip over any PHI nodes though! + while (PI != PredMBB->end() && PI->getOpcode() == X86::PHI) + ++PI; + + ValReg = getReg(Val, PredMBB, PI); + } else { + // Simple constants get emitted at the end of the basic block, + // before any terminator instructions. We "know" that the code to + // move a constant into a register will never clobber any flags. + ValReg = getReg(Val, PredMBB, PredMBB->getFirstTerminator()); + } } else { ValReg = getReg(Val); } @@ -723,19 +760,22 @@ void ISel::InsertFPRegKills() { } -// canFoldSetCCIntoBranch - Return the setcc instruction if we can fold it into -// the conditional branch instruction which is the only user of the cc -// instruction. This is the case if the conditional branch is the only user of -// the setcc, and if the setcc is in the same basic block as the conditional -// branch. We also don't handle long arguments below, so we reject them here as -// well. +// canFoldSetCCIntoBranchOrSelect - Return the setcc instruction if we can fold +// it into the conditional branch or select instruction which is the only user +// of the cc instruction. This is the case if the conditional branch is the +// only user of the setcc, and if the setcc is in the same basic block as the +// conditional branch. We also don't handle long arguments below, so we reject +// them here as well. // -static SetCondInst *canFoldSetCCIntoBranch(Value *V) { +static SetCondInst *canFoldSetCCIntoBranchOrSelect(Value *V) { if (SetCondInst *SCI = dyn_cast(V)) - if (SCI->hasOneUse() && isa(SCI->use_back()) && - SCI->getParent() == cast(SCI->use_back())->getParent()) { - const Type *Ty = SCI->getOperand(0)->getType(); - if (Ty != Type::LongTy && Ty != Type::ULongTy) + if (SCI->hasOneUse()) { + Instruction *User = cast(SCI->use_back()); + if ((isa(User) || isa(User)) && + SCI->getParent() == User->getParent() && + (getClassB(SCI->getOperand(0)->getType()) != cLong || + SCI->getOpcode() == Instruction::SetEQ || + SCI->getOpcode() == Instruction::SetNE)) return SCI; } return 0; @@ -785,9 +825,9 @@ unsigned ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1, unsigned Op0r = getReg(Op0, MBB, IP); // Special case handling of: cmp R, i - if (Class == cByte || Class == cShort || Class == cInt) - if (ConstantInt *CI = dyn_cast(Op1)) { - uint64_t Op1v = cast(CI)->getRawValue(); + if (ConstantInt *CI = dyn_cast(Op1)) { + if (Class == cByte || Class == cShort || Class == cInt) { + unsigned Op1v = CI->getRawValue(); // Mask off any upper bits of the constant, if there are any... Op1v &= (1ULL << (8 << Class)) - 1; @@ -812,7 +852,52 @@ unsigned ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1, BuildMI(*MBB, IP, CMPTab[Class], 2).addReg(Op0r).addImm(Op1v); return OpNum; + } else { + assert(Class == cLong && "Unknown integer class!"); + unsigned LowCst = CI->getRawValue(); + unsigned HiCst = CI->getRawValue() >> 32; + if (OpNum < 2) { // seteq, setne + unsigned LoTmp = Op0r; + if (LowCst != 0) { + LoTmp = makeAnotherReg(Type::IntTy); + BuildMI(*MBB, IP, X86::XOR32ri, 2, LoTmp).addReg(Op0r).addImm(LowCst); + } + unsigned HiTmp = Op0r+1; + if (HiCst != 0) { + HiTmp = makeAnotherReg(Type::IntTy); + BuildMI(*MBB, IP, X86::XOR32ri, 2,HiTmp).addReg(Op0r+1).addImm(HiCst); + } + unsigned FinalTmp = makeAnotherReg(Type::IntTy); + BuildMI(*MBB, IP, X86::OR32rr, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp); + return OpNum; + } else { + // Emit a sequence of code which compares the high and low parts once + // each, then uses a conditional move to handle the overflow case. For + // example, a setlt for long would generate code like this: + // + // AL = lo(op1) < lo(op2) // Signedness depends on operands + // BL = hi(op1) < hi(op2) // Always unsigned comparison + // dest = hi(op1) == hi(op2) ? AL : BL; + // + + // FIXME: This would be much better if we had hierarchical register + // classes! Until then, hardcode registers so that we can deal with + // their aliases (because we don't have conditional byte moves). + // + BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r).addImm(LowCst); + BuildMI(*MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL); + BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r+1).addImm(HiCst); + BuildMI(*MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0,X86::BL); + BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH); + BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH); + BuildMI(*MBB, IP, X86::CMOVE16rr, 2, X86::BX).addReg(X86::BX) + .addReg(X86::AX); + // NOTE: visitSetCondInst knows that the value is dumped into the BL + // register at this point for long values... + return OpNum; + } } + } // Special case handling of comparison against +/- 0.0 if (ConstantFP *CFP = dyn_cast(Op1)) @@ -883,12 +968,12 @@ unsigned ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1, return OpNum; } - /// SetCC instructions - Here we just emit boilerplate code to set a byte-sized /// register, then move it to wherever the result should be. /// void ISel::visitSetCondInst(SetCondInst &I) { - if (canFoldSetCCIntoBranch(&I)) return; // Fold this into a branch... + if (canFoldSetCCIntoBranchOrSelect(&I)) + return; // Fold this into a branch or select. unsigned DestReg = getReg(I); MachineBasicBlock::iterator MII = BB->end(); @@ -920,6 +1005,163 @@ void ISel::emitSetCCOperation(MachineBasicBlock *MBB, } } +void ISel::visitSelectInst(SelectInst &SI) { + unsigned DestReg = getReg(SI); + MachineBasicBlock::iterator MII = BB->end(); + emitSelectOperation(BB, MII, SI.getCondition(), SI.getTrueValue(), + SI.getFalseValue(), DestReg); +} + +/// emitSelect - Common code shared between visitSelectInst and the constant +/// expression support. +void ISel::emitSelectOperation(MachineBasicBlock *MBB, + MachineBasicBlock::iterator IP, + Value *Cond, Value *TrueVal, Value *FalseVal, + unsigned DestReg) { + unsigned SelectClass = getClassB(TrueVal->getType()); + + // We don't support 8-bit conditional moves. If we have incoming constants, + // transform them into 16-bit constants to avoid having a run-time conversion. + if (SelectClass == cByte) { + if (Constant *T = dyn_cast(TrueVal)) + TrueVal = ConstantExpr::getCast(T, Type::ShortTy); + if (Constant *F = dyn_cast(FalseVal)) + FalseVal = ConstantExpr::getCast(F, Type::ShortTy); + } + + + unsigned Opcode; + if (SetCondInst *SCI = canFoldSetCCIntoBranchOrSelect(Cond)) { + // We successfully folded the setcc into the select instruction. + + unsigned OpNum = getSetCCNumber(SCI->getOpcode()); + OpNum = EmitComparison(OpNum, SCI->getOperand(0), SCI->getOperand(1), MBB, + IP); + + const Type *CompTy = SCI->getOperand(0)->getType(); + bool isSigned = CompTy->isSigned() && getClassB(CompTy) != cFP; + + // LLVM -> X86 signed X86 unsigned + // ----- ---------- ------------ + // seteq -> cmovNE cmovNE + // setne -> cmovE cmovE + // setlt -> cmovGE cmovAE + // setge -> cmovL cmovB + // setgt -> cmovLE cmovBE + // setle -> cmovG cmovA + // ---- + // cmovNS // Used by comparison with 0 optimization + // cmovS + + switch (SelectClass) { + default: assert(0 && "Unknown value class!"); + case cFP: { + // Annoyingly, we don't have a full set of floating point conditional + // moves. :( + static const unsigned OpcodeTab[2][8] = { + { X86::FCMOVNE, X86::FCMOVE, X86::FCMOVAE, X86::FCMOVB, + X86::FCMOVBE, X86::FCMOVA, 0, 0 }, + { X86::FCMOVNE, X86::FCMOVE, 0, 0, 0, 0, 0, 0 }, + }; + Opcode = OpcodeTab[isSigned][OpNum]; + + // If opcode == 0, we hit a case that we don't support. Output a setcc + // and compare the result against zero. + if (Opcode == 0) { + unsigned CompClass = getClassB(CompTy); + unsigned CondReg; + if (CompClass != cLong || OpNum < 2) { + CondReg = makeAnotherReg(Type::BoolTy); + // Handle normal comparisons with a setcc instruction... + BuildMI(*MBB, IP, SetCCOpcodeTab[isSigned][OpNum], 0, CondReg); + } else { + // Long comparisons end up in the BL register. + CondReg = X86::BL; + } + + BuildMI(*MBB, IP, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg); + Opcode = X86::FCMOVE; + } + break; + } + case cByte: + case cShort: { + static const unsigned OpcodeTab[2][8] = { + { X86::CMOVNE16rr, X86::CMOVE16rr, X86::CMOVAE16rr, X86::CMOVB16rr, + X86::CMOVBE16rr, X86::CMOVA16rr, 0, 0 }, + { X86::CMOVNE16rr, X86::CMOVE16rr, X86::CMOVGE16rr, X86::CMOVL16rr, + X86::CMOVLE16rr, X86::CMOVG16rr, X86::CMOVNS16rr, X86::CMOVS16rr }, + }; + Opcode = OpcodeTab[isSigned][OpNum]; + break; + } + case cInt: + case cLong: { + static const unsigned OpcodeTab[2][8] = { + { X86::CMOVNE32rr, X86::CMOVE32rr, X86::CMOVAE32rr, X86::CMOVB32rr, + X86::CMOVBE32rr, X86::CMOVA32rr, 0, 0 }, + { X86::CMOVNE32rr, X86::CMOVE32rr, X86::CMOVGE32rr, X86::CMOVL32rr, + X86::CMOVLE32rr, X86::CMOVG32rr, X86::CMOVNS32rr, X86::CMOVS32rr }, + }; + Opcode = OpcodeTab[isSigned][OpNum]; + break; + } + } + } else { + // Get the value being branched on, and use it to set the condition codes. + unsigned CondReg = getReg(Cond, MBB, IP); + BuildMI(*MBB, IP, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg); + switch (SelectClass) { + default: assert(0 && "Unknown value class!"); + case cFP: Opcode = X86::FCMOVE; break; + case cByte: + case cShort: Opcode = X86::CMOVE16rr; break; + case cInt: + case cLong: Opcode = X86::CMOVE32rr; break; + } + } + + unsigned TrueReg = getReg(TrueVal, MBB, IP); + unsigned FalseReg = getReg(FalseVal, MBB, IP); + unsigned RealDestReg = DestReg; + + + // Annoyingly enough, X86 doesn't HAVE 8-bit conditional moves. Because of + // this, we have to promote the incoming values to 16 bits, perform a 16-bit + // cmove, then truncate the result. + if (SelectClass == cByte) { + DestReg = makeAnotherReg(Type::ShortTy); + if (getClassB(TrueVal->getType()) == cByte) { + // Promote the true value, by storing it into AL, and reading from AX. + BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::AL).addReg(TrueReg); + BuildMI(*MBB, IP, X86::MOV8ri, 1, X86::AH).addImm(0); + TrueReg = makeAnotherReg(Type::ShortTy); + BuildMI(*MBB, IP, X86::MOV16rr, 1, TrueReg).addReg(X86::AX); + } + if (getClassB(FalseVal->getType()) == cByte) { + // Promote the true value, by storing it into CL, and reading from CX. + BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::CL).addReg(FalseReg); + BuildMI(*MBB, IP, X86::MOV8ri, 1, X86::CH).addImm(0); + FalseReg = makeAnotherReg(Type::ShortTy); + BuildMI(*MBB, IP, X86::MOV16rr, 1, FalseReg).addReg(X86::CX); + } + } + + BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(TrueReg).addReg(FalseReg); + + switch (SelectClass) { + case cByte: + // We did the computation with 16-bit registers. Truncate back to our + // result by copying into AX then copying out AL. + BuildMI(*MBB, IP, X86::MOV16rr, 1, X86::AX).addReg(DestReg); + BuildMI(*MBB, IP, X86::MOV8rr, 1, RealDestReg).addReg(X86::AL); + break; + case cLong: + // Move the upper half of the value as well. + BuildMI(*MBB, IP, Opcode, 2,DestReg+1).addReg(TrueReg+1).addReg(FalseReg+1); + break; + } +} @@ -929,10 +1171,27 @@ void ISel::emitSetCCOperation(MachineBasicBlock *MBB, void ISel::promote32(unsigned targetReg, const ValueRecord &VR) { bool isUnsigned = VR.Ty->isUnsigned(); + Value *Val = VR.Val; + const Type *Ty = VR.Ty; + if (Val) { + if (Constant *C = dyn_cast(Val)) { + Val = ConstantExpr::getCast(C, Type::IntTy); + Ty = Type::IntTy; + } + + // If this is a simple constant, just emit a MOVri directly to avoid the + // copy. + if (ConstantInt *CI = dyn_cast(Val)) { + int TheVal = CI->getRawValue() & 0xFFFFFFFF; + BuildMI(BB, X86::MOV32ri, 1, targetReg).addImm(TheVal); + return; + } + } + // Make sure we have the register number for this value... - unsigned Reg = VR.Val ? getReg(VR.Val) : VR.Reg; + unsigned Reg = Val ? getReg(Val) : VR.Reg; - switch (getClassB(VR.Ty)) { + switch (getClassB(Ty)) { case cByte: // Extend value into target register (8->32) if (isUnsigned) @@ -974,27 +1233,30 @@ void ISel::visitReturnInst(ReturnInst &I) { } Value *RetVal = I.getOperand(0); - unsigned RetReg = getReg(RetVal); switch (getClassB(RetVal->getType())) { case cByte: // integral return values: extend or move into EAX and return case cShort: case cInt: - promote32(X86::EAX, ValueRecord(RetReg, RetVal->getType())); + promote32(X86::EAX, ValueRecord(RetVal)); // Declare that EAX is live on exit BuildMI(BB, X86::IMPLICIT_USE, 2).addReg(X86::EAX).addReg(X86::ESP); break; - case cFP: // Floats & Doubles: Return in ST(0) + case cFP: { // Floats & Doubles: Return in ST(0) + unsigned RetReg = getReg(RetVal); BuildMI(BB, X86::FpSETRESULT, 1).addReg(RetReg); // Declare that top-of-stack is live on exit BuildMI(BB, X86::IMPLICIT_USE, 2).addReg(X86::ST0).addReg(X86::ESP); break; - case cLong: + } + case cLong: { + unsigned RetReg = getReg(RetVal); BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(RetReg); BuildMI(BB, X86::MOV32rr, 1, X86::EDX).addReg(RetReg+1); // Declare that EAX & EDX are live on exit BuildMI(BB, X86::IMPLICIT_USE, 3).addReg(X86::EAX).addReg(X86::EDX) .addReg(X86::ESP); break; + } default: visitInstruction(I); } @@ -1024,12 +1286,12 @@ void ISel::visitBranchInst(BranchInst &BI) { } // See if we can fold the setcc into the branch itself... - SetCondInst *SCI = canFoldSetCCIntoBranch(BI.getCondition()); + SetCondInst *SCI = canFoldSetCCIntoBranchOrSelect(BI.getCondition()); if (SCI == 0) { // Nope, cannot fold setcc into this branch. Emit a branch on a condition // computed some other way... unsigned condReg = getReg(BI.getCondition()); - BuildMI(BB, X86::CMP8ri, 2).addReg(condReg).addImm(0); + BuildMI(BB, X86::TEST8rr, 2).addReg(condReg).addReg(condReg); if (BI.getSuccessor(1) == NextBB) { if (BI.getSuccessor(0) != NextBB) BuildMI(BB, X86::JNE, 1).addPCDisp(BI.getSuccessor(0)); @@ -1141,11 +1403,19 @@ void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI, } break; case cLong: - ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg; - addRegOffset(BuildMI(BB, X86::MOV32mr, 5), - X86::ESP, ArgOffset).addReg(ArgReg); - addRegOffset(BuildMI(BB, X86::MOV32mr, 5), - X86::ESP, ArgOffset+4).addReg(ArgReg+1); + if (Args[i].Val && isa(Args[i].Val)) { + uint64_t Val = cast(Args[i].Val)->getRawValue(); + addRegOffset(BuildMI(BB, X86::MOV32mi, 5), + X86::ESP, ArgOffset).addImm(Val & ~0U); + addRegOffset(BuildMI(BB, X86::MOV32mi, 5), + X86::ESP, ArgOffset+4).addImm(Val >> 32ULL); + } else { + ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg; + addRegOffset(BuildMI(BB, X86::MOV32mr, 5), + X86::ESP, ArgOffset).addReg(ArgReg); + addRegOffset(BuildMI(BB, X86::MOV32mr, 5), + X86::ESP, ArgOffset+4).addReg(ArgReg+1); + } ArgOffset += 4; // 8 byte entry, not 4. break; @@ -1249,6 +1519,8 @@ void ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) { case Intrinsic::frameaddress: case Intrinsic::memcpy: case Intrinsic::memset: + case Intrinsic::readport: + case Intrinsic::writeport: // We directly implement these intrinsics break; default: @@ -1410,6 +1682,72 @@ void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) { return; } + case Intrinsic::readport: + // + // First, determine that the size of the operand falls within the + // acceptable range for this architecture. + // + if ((CI.getOperand(1)->getType()->getPrimitiveSize()) != 2) { + std::cerr << "llvm.readport: Address size is not 16 bits\n"; + exit (1); + } + + // + // Now, move the I/O port address into the DX register and use the IN + // instruction to get the input data. + // + BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(getReg(CI.getOperand(1))); + switch (CI.getCalledFunction()->getReturnType()->getPrimitiveSize()) { + case 1: + BuildMI(BB, X86::IN8, 0); + break; + case 2: + BuildMI(BB, X86::IN16, 0); + break; + case 4: + BuildMI(BB, X86::IN32, 0); + break; + default: + std::cerr << "Cannot do input on this data type"; + exit (1); + } + return; + + case Intrinsic::writeport: + // + // First, determine that the size of the operand falls within the + // acceptable range for this architecture. + // + // + if ((CI.getOperand(2)->getType()->getPrimitiveSize()) != 2) { + std::cerr << "llvm.writeport: Address size is not 16 bits\n"; + exit (1); + } + + // + // Now, move the I/O port address into the DX register and the value to + // write into the AL/AX/EAX register. + // + BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(getReg(CI.getOperand(2))); + switch (CI.getOperand(1)->getType()->getPrimitiveSize()) { + case 1: + BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(getReg(CI.getOperand(1))); + BuildMI(BB, X86::OUT8, 0); + break; + case 2: + BuildMI(BB, X86::MOV16rr, 1, X86::AX).addReg(getReg(CI.getOperand(1))); + BuildMI(BB, X86::OUT16, 0); + break; + case 4: + BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(getReg(CI.getOperand(1))); + BuildMI(BB, X86::OUT32, 0); + break; + default: + std::cerr << "Cannot do output on this data type"; + exit (1); + } + return; + default: assert(0 && "Error: unknown intrinsics should have been lowered!"); } } @@ -1492,22 +1830,24 @@ void ISel::emitSimpleBinaryOperation(MachineBasicBlock *MBB, unsigned Class = getClassB(Op0->getType()); // sub 0, X -> neg X - if (OperatorClass == 1 && Class != cLong) + if (OperatorClass == 1) if (ConstantInt *CI = dyn_cast(Op0)) { if (CI->isNullValue()) { unsigned op1Reg = getReg(Op1, MBB, IP); - switch (Class) { - default: assert(0 && "Unknown class for this function!"); - case cByte: - BuildMI(*MBB, IP, X86::NEG8r, 1, DestReg).addReg(op1Reg); - return; - case cShort: - BuildMI(*MBB, IP, X86::NEG16r, 1, DestReg).addReg(op1Reg); - return; - case cInt: - BuildMI(*MBB, IP, X86::NEG32r, 1, DestReg).addReg(op1Reg); - return; + static unsigned const NEGTab[] = { + X86::NEG8r, X86::NEG16r, X86::NEG32r, 0, X86::NEG32r + }; + BuildMI(*MBB, IP, NEGTab[Class], 1, DestReg).addReg(op1Reg); + + if (Class == cLong) { + // We just emitted: Dl = neg Sl + // Now emit : T = addc Sh, 0 + // : Dh = neg T + unsigned T = makeAnotherReg(Type::IntTy); + BuildMI(*MBB, IP, X86::ADC32ri, 2, T).addReg(op1Reg+1).addImm(0); + BuildMI(*MBB, IP, X86::NEG32r, 1, DestReg+1).addReg(T); } + return; } } else if (ConstantFP *CFP = dyn_cast(Op0)) if (CFP->isExactlyValue(-0.0)) { @@ -1518,76 +1858,120 @@ void ISel::emitSimpleBinaryOperation(MachineBasicBlock *MBB, } // Special case: op Reg, - if (Class != cLong && isa(Op1)) { + if (isa(Op1)) { ConstantInt *Op1C = cast(Op1); unsigned Op0r = getReg(Op0, MBB, IP); // xor X, -1 -> not X if (OperatorClass == 4 && Op1C->isAllOnesValue()) { - static unsigned const NOTTab[] = { X86::NOT8r, X86::NOT16r, X86::NOT32r }; + static unsigned const NOTTab[] = { + X86::NOT8r, X86::NOT16r, X86::NOT32r, 0, X86::NOT32r + }; BuildMI(*MBB, IP, NOTTab[Class], 1, DestReg).addReg(Op0r); + if (Class == cLong) // Invert the top part too + BuildMI(*MBB, IP, X86::NOT32r, 1, DestReg+1).addReg(Op0r+1); return; } // add X, -1 -> dec X - if (OperatorClass == 0 && Op1C->isAllOnesValue()) { + if (OperatorClass == 0 && Op1C->isAllOnesValue() && Class != cLong) { + // Note that we can't use dec for 64-bit decrements, because it does not + // set the carry flag! static unsigned const DECTab[] = { X86::DEC8r, X86::DEC16r, X86::DEC32r }; BuildMI(*MBB, IP, DECTab[Class], 1, DestReg).addReg(Op0r); return; } // add X, 1 -> inc X - if (OperatorClass == 0 && Op1C->equalsInt(1)) { - static unsigned const DECTab[] = { X86::INC8r, X86::INC16r, X86::INC32r }; - BuildMI(*MBB, IP, DECTab[Class], 1, DestReg).addReg(Op0r); + if (OperatorClass == 0 && Op1C->equalsInt(1) && Class != cLong) { + // Note that we can't use inc for 64-bit increments, because it does not + // set the carry flag! + static unsigned const INCTab[] = { X86::INC8r, X86::INC16r, X86::INC32r }; + BuildMI(*MBB, IP, INCTab[Class], 1, DestReg).addReg(Op0r); return; } - static const unsigned OpcodeTab[][3] = { + static const unsigned OpcodeTab[][5] = { // Arithmetic operators - { X86::ADD8ri, X86::ADD16ri, X86::ADD32ri }, // ADD - { X86::SUB8ri, X86::SUB16ri, X86::SUB32ri }, // SUB + { X86::ADD8ri, X86::ADD16ri, X86::ADD32ri, 0, X86::ADD32ri }, // ADD + { X86::SUB8ri, X86::SUB16ri, X86::SUB32ri, 0, X86::SUB32ri }, // SUB // Bitwise operators - { X86::AND8ri, X86::AND16ri, X86::AND32ri }, // AND - { X86:: OR8ri, X86:: OR16ri, X86:: OR32ri }, // OR - { X86::XOR8ri, X86::XOR16ri, X86::XOR32ri }, // XOR + { X86::AND8ri, X86::AND16ri, X86::AND32ri, 0, X86::AND32ri }, // AND + { X86:: OR8ri, X86:: OR16ri, X86:: OR32ri, 0, X86::OR32ri }, // OR + { X86::XOR8ri, X86::XOR16ri, X86::XOR32ri, 0, X86::XOR32ri }, // XOR }; - assert(Class < cFP && "General code handles 64-bit integer types!"); unsigned Opcode = OpcodeTab[OperatorClass][Class]; + unsigned Op1l = cast(Op1C)->getRawValue(); + if (Class != cLong) { + BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l); + return; + } else { + // If this is a long value and the high or low bits have a special + // property, emit some special cases. + unsigned Op1h = cast(Op1C)->getRawValue() >> 32LL; + + // If the constant is zero in the low 32-bits, just copy the low part + // across and apply the normal 32-bit operation to the high parts. There + // will be no carry or borrow into the top. + if (Op1l == 0) { + if (OperatorClass != 2) // All but and... + BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0r); + else + BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0); + BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg+1) + .addReg(Op0r+1).addImm(Op1h); + return; + } - uint64_t Op1v = cast(Op1C)->getRawValue(); - BuildMI(*MBB, IP, Opcode, 5, DestReg).addReg(Op0r).addImm(Op1v); - return; + // If this is a logical operation and the top 32-bits are zero, just + // operate on the lower 32. + if (Op1h == 0 && OperatorClass > 1) { + BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg) + .addReg(Op0r).addImm(Op1l); + if (OperatorClass != 2) // All but and + BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(Op0r+1); + else + BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0); + return; + } + + // TODO: We could handle lots of other special cases here, such as AND'ing + // with 0xFFFFFFFF00000000 -> noop, etc. + + // Otherwise, code generate the full operation with a constant. + static const unsigned TopTab[] = { + X86::ADC32ri, X86::SBB32ri, X86::AND32ri, X86::OR32ri, X86::XOR32ri + }; + + BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l); + BuildMI(*MBB, IP, TopTab[OperatorClass], 2, DestReg+1) + .addReg(Op0r+1).addImm(Op1h); + return; + } } // Finally, handle the general case now. - static const unsigned OpcodeTab[][4] = { + static const unsigned OpcodeTab[][5] = { // Arithmetic operators - { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr, X86::FpADD }, // ADD - { X86::SUB8rr, X86::SUB16rr, X86::SUB32rr, X86::FpSUB }, // SUB + { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr, X86::FpADD, X86::ADD32rr },// ADD + { X86::SUB8rr, X86::SUB16rr, X86::SUB32rr, X86::FpSUB, X86::SUB32rr },// SUB // Bitwise operators - { X86::AND8rr, X86::AND16rr, X86::AND32rr, 0 }, // AND - { X86:: OR8rr, X86:: OR16rr, X86:: OR32rr, 0 }, // OR - { X86::XOR8rr, X86::XOR16rr, X86::XOR32rr, 0 }, // XOR + { X86::AND8rr, X86::AND16rr, X86::AND32rr, 0, X86::AND32rr }, // AND + { X86:: OR8rr, X86:: OR16rr, X86:: OR32rr, 0, X86:: OR32rr }, // OR + { X86::XOR8rr, X86::XOR16rr, X86::XOR32rr, 0, X86::XOR32rr }, // XOR }; - bool isLong = false; - if (Class == cLong) { - isLong = true; - Class = cInt; // Bottom 32 bits are handled just like ints - } - unsigned Opcode = OpcodeTab[OperatorClass][Class]; assert(Opcode && "Floating point arguments to logical inst?"); unsigned Op0r = getReg(Op0, MBB, IP); unsigned Op1r = getReg(Op1, MBB, IP); BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r); - if (isLong) { // Handle the upper 32 bits of long values... + if (Class == cLong) { // Handle the upper 32 bits of long values... static const unsigned TopTab[] = { X86::ADC32rr, X86::SBB32rr, X86::AND32rr, X86::OR32rr, X86::XOR32rr }; @@ -1641,8 +2025,19 @@ void ISel::doMultiplyConst(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP, unsigned DestReg, const Type *DestTy, unsigned op0Reg, unsigned ConstRHS) { + static const unsigned MOVrrTab[] = {X86::MOV8rr, X86::MOV16rr, X86::MOV32rr}; + static const unsigned MOVriTab[] = {X86::MOV8ri, X86::MOV16ri, X86::MOV32ri}; + unsigned Class = getClass(DestTy); + if (ConstRHS == 0) { + BuildMI(*MBB, IP, MOVriTab[Class], 1, DestReg).addImm(0); + return; + } else if (ConstRHS == 1) { + BuildMI(*MBB, IP, MOVrrTab[Class], 1, DestReg).addReg(op0Reg); + return; + } + // If the element size is exactly a power of 2, use a shift to get it. if (unsigned Shift = ExactLog2(ConstRHS)) { switch (Class) { @@ -1668,10 +2063,6 @@ void ISel::doMultiplyConst(MachineBasicBlock *MBB, } // Most general case, emit a normal multiply... - static const unsigned MOVriTab[] = { - X86::MOV8ri, X86::MOV16ri, X86::MOV32ri - }; - unsigned TmpReg = makeAnotherReg(DestTy); BuildMI(*MBB, IP, MOVriTab[Class], 1, TmpReg).addImm(ConstRHS); @@ -1688,7 +2079,7 @@ void ISel::visitMul(BinaryOperator &I) { unsigned DestReg = getReg(I); // Simple scalar multiply? - if (I.getType() != Type::LongTy && I.getType() != Type::ULongTy) { + if (getClass(I.getType()) != cLong) { if (ConstantInt *CI = dyn_cast(I.getOperand(1))) { unsigned Val = (unsigned)CI->getRawValue(); // Cannot be 64-bit constant MachineBasicBlock::iterator MBBI = BB->end(); @@ -1699,31 +2090,81 @@ void ISel::visitMul(BinaryOperator &I) { doMultiply(BB, MBBI, DestReg, I.getType(), Op0Reg, Op1Reg); } } else { - unsigned Op1Reg = getReg(I.getOperand(1)); - // Long value. We have to do things the hard way... - // Multiply the two low parts... capturing carry into EDX - BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg); - BuildMI(BB, X86::MUL32r, 1).addReg(Op1Reg); // AL*BL - - unsigned OverflowReg = makeAnotherReg(Type::UIntTy); - BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL - BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32 + if (ConstantInt *CI = dyn_cast(I.getOperand(1))) { + unsigned CLow = CI->getRawValue(); + unsigned CHi = CI->getRawValue() >> 32; - MachineBasicBlock::iterator MBBI = BB->end(); - unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL - BuildMI(*BB, MBBI, X86::IMUL32rr,2,AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg); + if (CLow == 0) { + // If the low part of the constant is all zeros, things are simple. + BuildMI(BB, X86::MOV32ri, 1, DestReg).addImm(0); + doMultiplyConst(BB, BB->end(), DestReg+1, Type::UIntTy, Op0Reg, CHi); + return; + } - unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy); - BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32) - AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg); - - MBBI = BB->end(); - unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH - BuildMI(*BB, MBBI, X86::IMUL32rr,2,ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1); - - BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32) - DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg); + // Multiply the two low parts... capturing carry into EDX + unsigned OverflowReg = 0; + if (CLow == 1) { + BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(Op0Reg); + } else { + unsigned Op1RegL = makeAnotherReg(Type::UIntTy); + OverflowReg = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::MOV32ri, 1, Op1RegL).addImm(CLow); + BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg); + BuildMI(BB, X86::MUL32r, 1).addReg(Op1RegL); // AL*BL + + BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL + BuildMI(BB, X86::MOV32rr, 1,OverflowReg).addReg(X86::EDX);// AL*BL >> 32 + } + + unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL + doMultiplyConst(BB, BB->end(), AHBLReg, Type::UIntTy, Op0Reg+1, CLow); + + unsigned AHBLplusOverflowReg; + if (OverflowReg) { + AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32) + AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg); + } else { + AHBLplusOverflowReg = AHBLReg; + } + + if (CHi == 0) { + BuildMI(BB, X86::MOV32rr, 1, DestReg+1).addReg(AHBLplusOverflowReg); + } else { + unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH + doMultiplyConst(BB, BB->end(), ALBHReg, Type::UIntTy, Op0Reg, CHi); + + BuildMI(BB, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32) + DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg); + } + } else { + unsigned Op1Reg = getReg(I.getOperand(1)); + // Multiply the two low parts... capturing carry into EDX + BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg); + BuildMI(BB, X86::MUL32r, 1).addReg(Op1Reg); // AL*BL + + unsigned OverflowReg = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL + BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32 + + MachineBasicBlock::iterator MBBI = BB->end(); + unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL + BuildMI(*BB, MBBI, X86::IMUL32rr, 2, + AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg); + + unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy); + BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32) + AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg); + + MBBI = BB->end(); + unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH + BuildMI(*BB, MBBI, X86::IMUL32rr, 2, + ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1); + + BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32) + DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg); + } } } @@ -1876,13 +2317,20 @@ void ISel::emitShiftOperation(MachineBasicBlock *MBB, } else { // Shifting more than 32 bits Amount -= 32; if (isLeftShift) { - BuildMI(*MBB, IP, X86::SHL32ri, 2, - DestReg + 1).addReg(SrcReg).addImm(Amount); - BuildMI(*MBB, IP, X86::MOV32ri, 1, - DestReg).addImm(0); + if (Amount != 0) { + BuildMI(*MBB, IP, X86::SHL32ri, 2, + DestReg + 1).addReg(SrcReg).addImm(Amount); + } else { + BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(SrcReg); + } + BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0); } else { - unsigned Opcode = isSigned ? X86::SAR32ri : X86::SHR32ri; - BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(SrcReg+1).addImm(Amount); + if (Amount != 0) { + BuildMI(*MBB, IP, isSigned ? X86::SAR32ri : X86::SHR32ri, 2, + DestReg).addReg(SrcReg+1).addImm(Amount); + } else { + BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg+1); + } BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0); } } @@ -2094,6 +2542,11 @@ void ISel::visitStoreInst(StoreInst &I) { /// void ISel::visitCastInst(CastInst &CI) { Value *Op = CI.getOperand(0); + + // Noop casts are not even emitted. + if (getClassB(CI.getType()) == getClassB(Op->getType())) + return; + // If this is a cast from a 32-bit integer to a Long type, and the only uses // of the case are GEP instructions, then the cast does not need to be // generated explicitly, it will be folded into the GEP. @@ -2239,7 +2692,7 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, // a larger signed value, then use FLD on the larger value. // const Type *PromoteType = 0; - unsigned PromoteOpcode; + unsigned PromoteOpcode = 0; unsigned RealDestReg = DestReg; switch (SrcTy->getPrimitiveID()) { case Type::BoolTyID: @@ -2277,8 +2730,7 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, if (PromoteType) { unsigned TmpReg = makeAnotherReg(PromoteType); - unsigned Opc = SrcTy->isSigned() ? X86::MOVSX16rr8 : X86::MOVZX16rr8; - BuildMI(*BB, IP, Opc, 1, TmpReg).addReg(SrcReg); + BuildMI(*BB, IP, PromoteOpcode, 1, TmpReg).addReg(SrcReg); SrcTy = PromoteType; SrcClass = getClass(PromoteType); SrcReg = TmpReg; @@ -2533,12 +2985,13 @@ void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP, // idx is the index into the array. Unlike with structure // indices, we may not know its actual value at code-generation // time. - assert(idx->getType() == Type::LongTy && "Bad GEP array index!"); // If idx is a constant, fold it into the offset. unsigned TypeSize = TD.getTypeSize(SqTy->getElementType()); if (ConstantSInt *CSI = dyn_cast(idx)) { Disp += TypeSize*CSI->getValue(); + } else if (ConstantUInt *CUI = dyn_cast(idx)) { + Disp += TypeSize*CUI->getValue(); } else { // If the index reg is already taken, we can't handle this index. if (IndexReg) return; @@ -2662,12 +3115,7 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, GEPOps.pop_back(); // Consume a GEP operand GEPTypes.pop_back(); - // idx is the index into the array. Unlike with structure - // indices, we may not know its actual value at code-generation - // time. - assert(idx->getType() == Type::LongTy && "Bad GEP array index!"); - - // Most GEP instructions use a [cast (int/uint) to LongTy] as their + // Many GEP instructions use a [cast (int/uint) to LongTy] as their // operand on X86. Handle this case directly now... if (CastInst *CI = dyn_cast(idx)) if (CI->getOperand(0)->getType() == Type::IntTy || @@ -2681,9 +3129,9 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, unsigned elementSize = TD.getTypeSize(ElTy); // If idxReg is a constant, we don't need to perform the multiply! - if (ConstantSInt *CSI = dyn_cast(idx)) { + if (ConstantInt *CSI = dyn_cast(idx)) { if (!CSI->isNullValue()) { - unsigned Offset = elementSize*CSI->getValue(); + unsigned Offset = elementSize*CSI->getRawValue(); unsigned Reg = makeAnotherReg(Type::UIntTy); BuildMI(*MBB, IP, X86::ADD32ri, 2, TargetReg) .addReg(Reg).addImm(Offset);