Fix a major bug in the signed shr code, which apparently only breaks 134.perl!

[oota-llvm.git] / lib / Target / X86 / X86ISelSimple.cpp
diff --git a/lib/Target/X86/X86ISelSimple.cpp b/lib/Target/X86/X86ISelSimple.cpp

index a938bae3fc71e6e931275131861cd381bded7c80..89842a4e9f11e506d87929f0eda637beba65746b 100644 (file)
--- a/lib/Target/X86/X86ISelSimple.cpp
+++ b/lib/Target/X86/X86ISelSimple.cpp
@@ -175,6 +175,7 @@ namespace {
      // Control flow operators
      void visitReturnInst(ReturnInst &RI);
      void visitBranchInst(BranchInst &BI);
+    void visitUnreachableInst(UnreachableInst &UI) {}
  
      struct ValueRecord {
        Value *Val;
@@ -312,6 +313,13 @@ namespace {
                              MachineBasicBlock::iterator IP,
                              Value *Op, Value *ShiftAmount, bool isLeftShift,
                              const Type *ResultTy, unsigned DestReg);
+
+    // Emit code for a 'SHLD DestReg, Op0, Op1, Amt' operation, where Amt is a
+    // constant.
+    void doSHLDConst(MachineBasicBlock *MBB, 
+                     MachineBasicBlock::iterator MBBI,
+                     unsigned DestReg, unsigned Op0Reg, unsigned Op1Reg,
+                     unsigned Op1Val);
        
      /// emitSelectOperation - Common code shared between visitSelectInst and the
      /// constant expression support.
@@ -447,7 +455,20 @@ unsigned X86ISel::getFixedSizedAllocaFI(AllocaInst *AI) {
  void X86ISel::copyConstantToRegister(MachineBasicBlock *MBB,
                                       MachineBasicBlock::iterator IP,
                                       Constant *C, unsigned R) {
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+  if (isa<UndefValue>(C)) {
+    switch (getClassB(C->getType())) {
+    case cFP:
+      // FIXME: SHOULD TEACH STACKIFIER ABOUT UNDEF VALUES!
+      BuildMI(*MBB, IP, X86::FLD0, 0, R);
+      return;
+    case cLong:
+      BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, R+1);
+      // FALL THROUGH
+    default:
+      BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, R);
+      return;
+    }
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
      unsigned Class = 0;
      switch (CE->getOpcode()) {
      case Instruction::GetElementPtr:
@@ -911,10 +932,10 @@ unsigned X86ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
    // The arguments are already supposed to be of the same type.
    const Type *CompTy = Op0->getType();
    unsigned Class = getClassB(CompTy);
-  unsigned Op0r = getReg(Op0, MBB, IP);
  
    // Special case handling of: cmp R, i
    if (isa<ConstantPointerNull>(Op1)) {
+    unsigned Op0r = getReg(Op0, MBB, IP);
      if (OpNum < 2)    // seteq/setne -> test
        BuildMI(*MBB, IP, X86::TEST32rr, 2).addReg(Op0r).addReg(Op0r);
      else
@@ -932,6 +953,28 @@ unsigned X86ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
        // can't handle unsigned comparisons against zero unless they are == or
        // !=.  These should have been strength reduced already anyway.
        if (Op1v == 0 && (CompTy->isSigned() || OpNum < 2)) {
+
+        // If this is a comparison against zero and the LHS is an and of a
+        // register with a constant, use the test to do the and.
+        if (Instruction *Op0I = dyn_cast<Instruction>(Op0))
+          if (Op0I->getOpcode() == Instruction::And && Op0->hasOneUse() &&
+              isa<ConstantInt>(Op0I->getOperand(1))) {
+            static const unsigned TESTTab[] = {
+              X86::TEST8ri, X86::TEST16ri, X86::TEST32ri
+            };
+            
+            // Emit test X, i
+            unsigned LHS = getReg(Op0I->getOperand(0), MBB, IP);
+            unsigned Imm =
+              cast<ConstantInt>(Op0I->getOperand(1))->getRawValue();
+            BuildMI(*MBB, IP, TESTTab[Class], 2).addReg(LHS).addImm(Imm);
+            
+            if (OpNum == 2) return 6;   // Map jl -> js
+            if (OpNum == 3) return 7;   // Map jg -> jns
+            return OpNum;
+          }
+
+        unsigned Op0r = getReg(Op0, MBB, IP);
          static const unsigned TESTTab[] = {
            X86::TEST8rr, X86::TEST16rr, X86::TEST32rr
          };
@@ -946,9 +989,11 @@ unsigned X86ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
          X86::CMP8ri, X86::CMP16ri, X86::CMP32ri
        };
  
+      unsigned Op0r = getReg(Op0, MBB, IP);
        BuildMI(*MBB, IP, CMPTab[Class], 2).addReg(Op0r).addImm(Op1v);
        return OpNum;
      } else {
+      unsigned Op0r = getReg(Op0, MBB, IP);
        assert(Class == cLong && "Unknown integer class!");
        unsigned LowCst = CI->getRawValue();
        unsigned HiCst = CI->getRawValue() >> 32;
@@ -995,6 +1040,8 @@ unsigned X86ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
      }
    }
  
+  unsigned Op0r = getReg(Op0, MBB, IP);
+
    // Special case handling of comparison against +/- 0.0
    if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op1))
      if (CFP->isExactlyValue(+0.0) || CFP->isExactlyValue(-0.0)) {
@@ -1961,6 +2008,23 @@ void X86ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) {
    Value *Op0 = B.getOperand(0), *Op1 = B.getOperand(1);
    unsigned Class = getClassB(B.getType());
  
+  // If this is AND X, C, and it is only used by a setcc instruction, it will
+  // be folded.  There is no need to emit this instruction.
+  if (B.hasOneUse() && OperatorClass == 2 && isa<ConstantInt>(Op1))
+    if (Class == cByte || Class == cShort || Class == cInt) {
+      Instruction *Use = cast<Instruction>(B.use_back());
+      if (isa<SetCondInst>(Use) &&
+          Use->getOperand(1) == Constant::getNullValue(B.getType())) {
+        switch (getSetCCNumber(Use->getOpcode())) {
+        case 0:
+        case 1:
+          return;
+        default:
+          if (B.getType()->isSigned()) return;
+        }
+      }
+    }
+
    // Special case: op Reg, load [mem]
    if (isa<LoadInst>(Op0) && !isa<LoadInst>(Op1) && Class != cLong &&
        Op0->hasOneUse() && 
@@ -2836,6 +2900,41 @@ void X86ISel::visitShiftInst(ShiftInst &I) {
                        getReg (I));
  }
  
+/// Emit code for a 'SHLD DestReg, Op0, Op1, Amt' operation, where Amt is a
+/// constant.
+void X86ISel::doSHLDConst(MachineBasicBlock *MBB, 
+                          MachineBasicBlock::iterator IP,
+                          unsigned DestReg, unsigned Op0Reg, unsigned Op1Reg,
+                          unsigned Amt) {
+  // SHLD is a very inefficient operation on every processor, try to do
+  // somethign simpler for common values of 'Amt'.
+  if (Amt == 0) {
+    BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0Reg);
+  } else if (Amt == 1) {
+    unsigned Tmp = makeAnotherReg(Type::UIntTy);
+    BuildMI(*MBB, IP, X86::ADD32rr, 2, Tmp).addReg(Op1Reg).addReg(Op1Reg);
+    BuildMI(*MBB, IP, X86::ADC32rr, 2, DestReg).addReg(Op0Reg).addReg(Op0Reg);
+  } else if (Amt == 2 || Amt == 3) {
+    // On the P4 and Athlon it is cheaper to replace shld ..., 2|3 with a
+    // shift/lea pair.  NOTE: This should not be done on the P6 family!
+    unsigned Tmp = makeAnotherReg(Type::UIntTy);
+    BuildMI(*MBB, IP, X86::SHR32ri, 2, Tmp).addReg(Op1Reg).addImm(32-Amt);
+    X86AddressMode AM;
+    AM.BaseType = X86AddressMode::RegBase;
+    AM.Base.Reg = Tmp;
+    AM.Scale = 1 << Amt;
+    AM.IndexReg = Op0Reg;
+    AM.Disp = 0;
+    addFullAddress(BuildMI(*MBB, IP, X86::LEA32r, 4, DestReg), AM);
+  } else {
+    // NOTE: It is always cheaper on the P4 to emit SHLD as two shifts and an OR
+    // than it is to emit a real SHLD.
+
+    BuildMI(*MBB, IP, X86::SHLD32rri8, 3, 
+            DestReg).addReg(Op0Reg).addReg(Op1Reg).addImm(Amt);
+  }
+}
+
  /// emitShiftOperation - Common code shared between visitShiftInst and
  /// constant expression support.
  void X86ISel::emitShiftOperation(MachineBasicBlock *MBB,
@@ -2846,62 +2945,67 @@ void X86ISel::emitShiftOperation(MachineBasicBlock *MBB,
    unsigned SrcReg = getReg (Op, MBB, IP);
    bool isSigned = ResultTy->isSigned ();
    unsigned Class = getClass (ResultTy);
-  
-  static const unsigned ConstantOperand[][4] = {
-    { X86::SHR8ri, X86::SHR16ri, X86::SHR32ri, X86::SHRD32rri8 },  // SHR
-    { X86::SAR8ri, X86::SAR16ri, X86::SAR32ri, X86::SHRD32rri8 },  // SAR
-    { X86::SHL8ri, X86::SHL16ri, X86::SHL32ri, X86::SHLD32rri8 },  // SHL
-    { X86::SHL8ri, X86::SHL16ri, X86::SHL32ri, X86::SHLD32rri8 },  // SAL = SHL
+
+  static const unsigned ConstantOperand[][3] = {
+    { X86::SHR8ri, X86::SHR16ri, X86::SHR32ri },  // SHR
+    { X86::SAR8ri, X86::SAR16ri, X86::SAR32ri },  // SAR
+    { X86::SHL8ri, X86::SHL16ri, X86::SHL32ri },  // SHL
+    { X86::SHL8ri, X86::SHL16ri, X86::SHL32ri },  // SAL = SHL
    };
  
-  static const unsigned NonConstantOperand[][4] = {
+  static const unsigned NonConstantOperand[][3] = {
      { X86::SHR8rCL, X86::SHR16rCL, X86::SHR32rCL },  // SHR
      { X86::SAR8rCL, X86::SAR16rCL, X86::SAR32rCL },  // SAR
      { X86::SHL8rCL, X86::SHL16rCL, X86::SHL32rCL },  // SHL
      { X86::SHL8rCL, X86::SHL16rCL, X86::SHL32rCL },  // SAL = SHL
    };
  
-  // Longs, as usual, are handled specially...
+  // Longs, as usual, are handled specially.
    if (Class == cLong) {
-    // If we have a constant shift, we can generate much more efficient code
-    // than otherwise...
-    //
      if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(ShiftAmount)) {
        unsigned Amount = CUI->getValue();
-      if (Amount < 32) {
+      if (Amount == 1 && isLeftShift) {   // X << 1 == X+X
+        BuildMI(*MBB, IP, X86::ADD32rr, 2,
+                DestReg).addReg(SrcReg).addReg(SrcReg);
+        BuildMI(*MBB, IP, X86::ADC32rr, 2,
+                DestReg+1).addReg(SrcReg+1).addReg(SrcReg+1);
+      } else if (Amount < 32) {
          const unsigned *Opc = ConstantOperand[isLeftShift*2+isSigned];
          if (isLeftShift) {
-          BuildMI(*MBB, IP, Opc[3], 3, 
-              DestReg+1).addReg(SrcReg+1).addReg(SrcReg).addImm(Amount);
+          doSHLDConst(MBB, IP, DestReg+1, SrcReg+1, SrcReg, Amount);
            BuildMI(*MBB, IP, Opc[2], 2, DestReg).addReg(SrcReg).addImm(Amount);
          } else {
-          BuildMI(*MBB, IP, Opc[3], 3,
-              DestReg).addReg(SrcReg  ).addReg(SrcReg+1).addImm(Amount);
+          BuildMI(*MBB, IP, X86::SHRD32rri8, 3,
+                  DestReg).addReg(SrcReg  ).addReg(SrcReg+1).addImm(Amount);
            BuildMI(*MBB, IP, Opc[2],2,DestReg+1).addReg(SrcReg+1).addImm(Amount);
          }
-      } else {                 // Shifting more than 32 bits
-        Amount -= 32;
+      } else if (Amount == 32) {
          if (isLeftShift) {
-          if (Amount != 0) {
-            BuildMI(*MBB, IP, X86::SHL32ri, 2,
-                    DestReg + 1).addReg(SrcReg).addImm(Amount);
-          } else {
-            BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(SrcReg);
-          }
+          BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(SrcReg);
            BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
          } else {
-          if (Amount != 0) {
-            BuildMI(*MBB, IP, isSigned ? X86::SAR32ri : X86::SHR32ri, 2,
-                    DestReg).addReg(SrcReg+1).addImm(Amount);
+          BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg+1);
+          if (!isSigned) {
+            BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
            } else {
-            BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg+1);
+            BuildMI(*MBB, IP, X86::SAR32ri, 2,
+                    DestReg+1).addReg(SrcReg).addImm(31);
            }
+        }
+      } else {                 // Shifting more than 32 bits
+        Amount -= 32;
+        if (isLeftShift) {
+          BuildMI(*MBB, IP, X86::SHL32ri, 2,
+                  DestReg + 1).addReg(SrcReg).addImm(Amount);
+          BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
+        } else {
+          BuildMI(*MBB, IP, isSigned ? X86::SAR32ri : X86::SHR32ri, 2,
+                  DestReg).addReg(SrcReg+1).addImm(Amount);
            BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
          }
        }
      } else {
        unsigned TmpReg = makeAnotherReg(Type::IntTy);
-
        if (!isLeftShift && isSigned) {
          // If this is a SHR of a Long, then we need to do funny sign extension
          // stuff.  TmpReg gets the value to use as the high-part if we are
@@ -2962,9 +3066,14 @@ void X86ISel::emitShiftOperation(MachineBasicBlock *MBB,
      // The shift amount is constant, guaranteed to be a ubyte. Get its value.
      assert(CUI->getType() == Type::UByteTy && "Shift amount not a ubyte?");
  
-    const unsigned *Opc = ConstantOperand[isLeftShift*2+isSigned];
-    BuildMI(*MBB, IP, Opc[Class], 2,
-        DestReg).addReg(SrcReg).addImm(CUI->getValue());
+    if (CUI->getValue() == 1 && isLeftShift) {    // X << 1 -> X+X
+      static const int AddOpC[] = { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr };
+      BuildMI(*MBB, IP, AddOpC[Class], 2,DestReg).addReg(SrcReg).addReg(SrcReg);
+    } else {
+      const unsigned *Opc = ConstantOperand[isLeftShift*2+isSigned];
+      BuildMI(*MBB, IP, Opc[Class], 2,
+              DestReg).addReg(SrcReg).addImm(CUI->getValue());
+    }
    } else {                  // The shift amount is non-constant.
      unsigned ShiftAmountReg = getReg (ShiftAmount, MBB, IP);
      BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::CL).addReg(ShiftAmountReg);
@@ -3347,17 +3456,8 @@ void X86ISel::emitCastOperation(MachineBasicBlock *BB,
        PromoteType = Type::IntTy;
        PromoteOpcode = X86::MOVZX32rr16;
        break;
-    case Type::UIntTyID: {
-      // Make a 64 bit temporary... and zero out the top of it...
-      unsigned TmpReg = makeAnotherReg(Type::LongTy);
-      BuildMI(*BB, IP, X86::MOV32rr, 1, TmpReg).addReg(SrcReg);
-      BuildMI(*BB, IP, X86::MOV32ri, 1, TmpReg+1).addImm(0);
-      SrcTy = Type::LongTy;
-      SrcClass = cLong;
-      SrcReg = TmpReg;
-      break;
-    }
      case Type::ULongTyID:
+    case Type::UIntTyID:
        // Don't fild into the read destination.
        DestReg = makeAnotherReg(Type::DoubleTy);
        break;
@@ -3392,10 +3492,28 @@ void X86ISel::emitCastOperation(MachineBasicBlock *BB,
        { 0/*byte*/, X86::FILD16m, X86::FILD32m, 0/*FP*/, X86::FILD64m };
      addFrameReference(BuildMI(*BB, IP, Op2[SrcClass], 5, DestReg), FrameIdx);
  
-    // We need special handling for unsigned 64-bit integer sources.  If the
-    // input number has the "sign bit" set, then we loaded it incorrectly as a
-    // negative 64-bit number.  In this case, add an offset value.
-    if (SrcTy == Type::ULongTy) {
+    if (SrcTy == Type::UIntTy) {
+      // If this is a cast from uint -> double, we need to be careful about if
+      // the "sign" bit is set.  If so, we don't want to make a negative number,
+      // we want to make a positive number.  Emit code to add an offset if the
+      // sign bit is set.
+
+      // Compute whether the sign bit is set by shifting the reg right 31 bits.
+      unsigned IsNeg = makeAnotherReg(Type::IntTy);
+      BuildMI(BB, X86::SHR32ri, 2, IsNeg).addReg(SrcReg).addImm(31);
+
+      // Create a CP value that has the offset in one word and 0 in the other.
+      static ConstantInt *TheOffset = ConstantUInt::get(Type::ULongTy,
+                                                        0x4f80000000000000ULL);
+      unsigned CPI = F->getConstantPool()->getConstantPoolIndex(TheOffset);
+      BuildMI(BB, X86::FADD32m, 5, RealDestReg).addReg(DestReg)
+        .addConstantPoolIndex(CPI).addZImm(4).addReg(IsNeg).addSImm(0);
+
+    } else if (SrcTy == Type::ULongTy) {
+      // We need special handling for unsigned 64-bit integer sources.  If the
+      // input number has the "sign bit" set, then we loaded it incorrectly as a
+      // negative 64-bit number.  In this case, add an offset value.
+
        // Emit a test instruction to see if the dynamic input value was signed.
        BuildMI(*BB, IP, X86::TEST32rr, 2).addReg(SrcReg+1).addReg(SrcReg+1);