Signficantly generalize our ability to constant fold floating point intrinsics, inclu...

[oota-llvm.git] / lib / Analysis / ConstantFolding.cpp
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp

index c0e9020d919574bdc66714c4d15d3271d98ad652..e499c73566c649e21fcab9354914b496887352e9 100644 (file)
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -9,30 +9,30 @@
  //
  // This file defines routines for folding instructions into constants.
  //
-// Also, to supplement the basic VMCore ConstantExpr simplifications,
+// Also, to supplement the basic IR ConstantExpr simplifications,
  // this file defines some additional folding routines that can make use of
-// DataLayout information. These functions cannot go in VMCore due to library
+// DataLayout information. These functions cannot go in IR due to library
  // dependency issues.
  //
  //===----------------------------------------------------------------------===//
  
  #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Operator.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
  #include "llvm/ADT/SmallVector.h"
  #include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Operator.h"
  #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FEnv.h"
  #include "llvm/Support/GetElementPtrTypeIterator.h"
  #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/FEnv.h"
+#include "llvm/Target/TargetLibraryInfo.h"
  #include <cerrno>
  #include <cmath>
  using namespace llvm;
@@ -68,7 +68,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
        unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
        Type *SrcIVTy =
          VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
-      // Ask VMCore to do the conversion now that #elts line up.
+      // Ask IR to do the conversion now that #elts line up.
        C = ConstantExpr::getBitCast(C, SrcIVTy);
        CDV = cast<ConstantDataVector>(C);
      }
@@ -104,7 +104,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
    if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C))
      return ConstantExpr::getBitCast(C, DestTy);
  
-  // If the element types match, VMCore can fold it.
+  // If the element types match, IR can fold it.
    unsigned NumDstElt = DestVTy->getNumElements();
    unsigned NumSrcElt = C->getType()->getVectorNumElements();
    if (NumDstElt == NumSrcElt)
@@ -131,7 +131,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
      // Recursively handle this integer conversion, if possible.
      C = FoldBitCast(C, DestIVTy, TD);
  
-    // Finally, VMCore can handle this now that #elts line up.
+    // Finally, IR can handle this now that #elts line up.
      return ConstantExpr::getBitCast(C, DestTy);
    }
  
@@ -141,9 +141,9 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
      unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
      Type *SrcIVTy =
        VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
-    // Ask VMCore to do the conversion now that #elts line up.
+    // Ask IR to do the conversion now that #elts line up.
      C = ConstantExpr::getBitCast(C, SrcIVTy);
-    // If VMCore wasn't able to fold it, bail out.
+    // If IR wasn't able to fold it, bail out.
      if (!isa<ConstantVector>(C) &&  // FIXME: Remove ConstantVector.
          !isa<ConstantDataVector>(C))
        return C;
@@ -170,15 +170,15 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
          Constant *Src =dyn_cast<ConstantInt>(C->getAggregateElement(SrcElt++));
          if (!Src)  // Reject constantexpr elements.
            return ConstantExpr::getBitCast(C, DestTy);
-  
+
          // Zero extend the element to the right size.
          Src = ConstantExpr::getZExt(Src, Elt->getType());
-  
+
          // Shift it to the right place, depending on endianness.
          Src = ConstantExpr::getShl(Src,
                                     ConstantInt::get(Src->getType(), ShiftAmt));
          ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
-  
+
          // Mix it in.
          Elt = ConstantExpr::getOr(Elt, Src);
        }
@@ -218,10 +218,10 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
  /// from a global, return the global and the constant.  Because of
  /// constantexprs, this function is recursive.
  static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
-                                       int64_t &Offset, const DataLayout &TD) {
+                                       APInt &Offset, const DataLayout &TD) {
    // Trivial case, constant is the global.
    if ((GV = dyn_cast<GlobalValue>(C))) {
-    Offset = 0;
+    Offset.clearAllBits();
      return true;
    }
  
@@ -235,34 +235,13 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
      return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD);
  
    // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
-  if (CE->getOpcode() == Instruction::GetElementPtr) {
-    // Cannot compute this if the element type of the pointer is missing size
-    // info.
-    if (!cast<PointerType>(CE->getOperand(0)->getType())
-                 ->getElementType()->isSized())
-      return false;
-
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) {
      // If the base isn't a global+constant, we aren't either.
      if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD))
        return false;
  
      // Otherwise, add any offset that our operands provide.
-    gep_type_iterator GTI = gep_type_begin(CE);
-    for (User::const_op_iterator i = CE->op_begin() + 1, e = CE->op_end();
-         i != e; ++i, ++GTI) {
-      ConstantInt *CI = dyn_cast<ConstantInt>(*i);
-      if (!CI) return false;  // Index isn't a simple constant?
-      if (CI->isZero()) continue;  // Not adding anything.
-
-      if (StructType *ST = dyn_cast<StructType>(*GTI)) {
-        // N = N + Offset
-        Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue());
-      } else {
-        SequentialType *SQT = cast<SequentialType>(*GTI);
-        Offset += TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue();
-      }
-    }
-    return true;
+    return GEP->accumulateConstantOffset(TD, Offset);
    }
  
    return false;
@@ -292,7 +271,10 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
      unsigned IntBytes = unsigned(CI->getBitWidth()/8);
  
      for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
-      CurPtr[i] = (unsigned char)(Val >> (ByteOffset * 8));
+      int n = ByteOffset;
+      if (!TD.isLittleEndian())
+        n = IntBytes - n - 1;
+      CurPtr[i] = (unsigned char)(Val >> (n * 8));
        ++ByteOffset;
      }
      return true;
@@ -307,6 +289,10 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
        C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD);
        return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
      }
+    if (CFP->getType()->isHalfTy()){
+      C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), TD);
+      return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+    }
      return false;
    }
  
@@ -378,8 +364,8 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
  
    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
      if (CE->getOpcode() == Instruction::IntToPtr &&
-        CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getType()))
-        return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
+        CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext()))
+      return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
                                  BytesLeft, TD);
    }
  
@@ -399,7 +385,9 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
      // that address spaces don't matter here since we're not going to result in
      // an actual new load.
      Type *MapTy;
-    if (LoadTy->isFloatTy())
+    if (LoadTy->isHalfTy())
+      MapTy = Type::getInt16PtrTy(C->getContext());
+    else if (LoadTy->isFloatTy())
        MapTy = Type::getInt32PtrTy(C->getContext());
      else if (LoadTy->isDoubleTy())
        MapTy = Type::getInt64PtrTy(C->getContext());
@@ -420,7 +408,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
    if (BytesLoaded > 32 || BytesLoaded == 0) return 0;
  
    GlobalValue *GVal;
-  int64_t Offset;
+  APInt Offset(TD.getPointerSizeInBits(), 0);
    if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD))
      return 0;
  
@@ -431,21 +419,31 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
  
    // If we're loading off the beginning of the global, some bytes may be valid,
    // but we don't try to handle this.
-  if (Offset < 0) return 0;
+  if (Offset.isNegative()) return 0;
  
    // If we're not accessing anything in this constant, the result is undefined.
-  if (uint64_t(Offset) >= TD.getTypeAllocSize(GV->getInitializer()->getType()))
+  if (Offset.getZExtValue() >=
+      TD.getTypeAllocSize(GV->getInitializer()->getType()))
      return UndefValue::get(IntType);
  
    unsigned char RawBytes[32] = {0};
-  if (!ReadDataFromGlobal(GV->getInitializer(), Offset, RawBytes,
+  if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes,
                            BytesLoaded, TD))
      return 0;
  
-  APInt ResultVal = APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1]);
-  for (unsigned i = 1; i != BytesLoaded; ++i) {
-    ResultVal <<= 8;
-    ResultVal |= RawBytes[BytesLoaded-1-i];
+  APInt ResultVal = APInt(IntType->getBitWidth(), 0);
+  if (TD.isLittleEndian()) {
+    ResultVal = RawBytes[BytesLoaded - 1];
+    for (unsigned i = 1; i != BytesLoaded; ++i) {
+      ResultVal <<= 8;
+      ResultVal |= RawBytes[BytesLoaded-1-i];
+    }
+  } else {
+    ResultVal = RawBytes[0];
+    for (unsigned i = 1; i != BytesLoaded; ++i) {
+      ResultVal <<= 8;
+      ResultVal |= RawBytes[i];
+    }
    }
  
    return ConstantInt::get(IntType->getContext(), ResultVal);
@@ -521,10 +519,8 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
      }
    }
  
-  // Try hard to fold loads from bitcasted strange and non-type-safe things.  We
-  // currently don't do any of this for big endian systems.  It can be
-  // generalized in the future if someone is interested.
-  if (TD && TD->isLittleEndian())
+  // Try hard to fold loads from bitcasted strange and non-type-safe things.
+  if (TD)
      return FoldReinterpretLoadFromConstPtr(CE, *TD);
    return 0;
  }
@@ -555,13 +551,18 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
    // constant.  This happens frequently when iterating over a global array.
    if (Opc == Instruction::Sub && TD) {
      GlobalValue *GV1, *GV2;
-    int64_t Offs1, Offs2;
+    unsigned PtrSize = TD->getPointerSizeInBits();
+    unsigned OpSize = TD->getTypeSizeInBits(Op0->getType());
+    APInt Offs1(PtrSize, 0), Offs2(PtrSize, 0);
  
      if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD))
        if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) &&
            GV1 == GV2) {
          // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
-        return ConstantInt::get(Op0->getType(), Offs1-Offs2);
+        // PtrToInt may change the bitwidth so we have convert to the right size
+        // first.
+        return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) -
+                                                Offs2.zextOrTrunc(OpSize));
        }
    }
  
@@ -575,7 +576,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
                                  Type *ResultTy, const DataLayout *TD,
                                  const TargetLibraryInfo *TLI) {
    if (!TD) return 0;
-  Type *IntPtrTy = TD->getIntPtrType(ResultTy);
+  Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext());
  
    bool Any = false;
    SmallVector<Constant*, 32> NewIdxs;
@@ -629,8 +630,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
        !Ptr->getType()->isPointerTy())
      return 0;
  
-  unsigned AS = cast<PointerType>(Ptr->getType())->getAddressSpace();
-  Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext(), AS);
+  Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext());
  
    // If this is a constant expr gep that is effectively computing an
    // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
@@ -703,8 +703,6 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
    // Also, this helps GlobalOpt do SROA on GlobalVariables.
    Type *Ty = Ptr->getType();
    assert(Ty->isPointerTy() && "Forming regular GEP of non-pointer type");
-  assert(Ty->getPointerAddressSpace() == AS
-      && "Operand and result of GEP should be in the same address space.");
    SmallVector<Constant*, 32> NewIdxs;
    do {
      if (SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
@@ -720,7 +718,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
  
        // Determine which element of the array the offset points into.
        APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
-      IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext(), AS);
+      IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext());
        if (ElemSize == 0)
          // The element size is 0. This may be [0 x Ty]*, so just use a zero
          // index for this level and proceed to the next level to see if it can
@@ -893,7 +891,7 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
  Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
                                           ArrayRef<Constant *> Ops,
                                           const DataLayout *TD,
-                                         const TargetLibraryInfo *TLI) {                                   
+                                         const TargetLibraryInfo *TLI) {
    // Handle easy binops first.
    if (Instruction::isBinaryOp(Opcode)) {
      if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
@@ -919,11 +917,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
        if (TD && CE->getOpcode() == Instruction::IntToPtr) {
          Constant *Input = CE->getOperand(0);
          unsigned InWidth = Input->getType()->getScalarSizeInBits();
-        unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace();
-        if (TD->getPointerSizeInBits(AS) < InWidth) {
+        if (TD->getPointerSizeInBits() < InWidth) {
            Constant *Mask =
              ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth,
-                                                  TD->getPointerSizeInBits(AS)));
+                                                  TD->getPointerSizeInBits()));
            Input = ConstantExpr::getAnd(Input, Mask);
          }
          // Do a zext or trunc to get to the dest size.
@@ -936,10 +933,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
      // the int size is >= the ptr size.  This requires knowing the width of a
      // pointer, so it can't be done in ConstantExpr::getCast.
      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
-      if (TD && CE->getOpcode() == Instruction::PtrToInt &&
-          TD->getPointerSizeInBits(
-            cast<PointerType>(CE->getOperand(0)->getType())->getAddressSpace())
-          <= CE->getType()->getScalarSizeInBits())
+      if (TD &&
+          TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() &&
+          CE->getOpcode() == Instruction::PtrToInt)
          return FoldBitCast(CE->getOperand(0), DestTy, *TD);
  
      return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
@@ -991,10 +987,9 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
    // ConstantExpr::getCompare cannot do this, because it doesn't have TD
    // around to know if bit truncation is happening.
    if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
-    Type *IntPtrTy = NULL;
      if (TD && Ops1->isNullValue()) {
+      Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
        if (CE0->getOpcode() == Instruction::IntToPtr) {
-        IntPtrTy = TD->getIntPtrType(CE0->getType());
          // Convert the integer value to the right size to ensure we get the
          // proper extension or truncation.
          Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
@@ -1005,21 +1000,19 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
  
        // Only do this transformation if the int is intptrty in size, otherwise
        // there is a truncation or extension that we aren't modeling.
-      if (CE0->getOpcode() == Instruction::PtrToInt) {
-        IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType());
-        if (CE0->getType() == IntPtrTy) {
-          Constant *C = CE0->getOperand(0);
-          Constant *Null = Constant::getNullValue(C->getType());
-          return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
-        }
+      if (CE0->getOpcode() == Instruction::PtrToInt &&
+          CE0->getType() == IntPtrTy) {
+        Constant *C = CE0->getOperand(0);
+        Constant *Null = Constant::getNullValue(C->getType());
+        return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
        }
      }
  
      if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
        if (TD && CE0->getOpcode() == CE1->getOpcode()) {
+        Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
  
          if (CE0->getOpcode() == Instruction::IntToPtr) {
-          Type *IntPtrTy = TD->getIntPtrType(CE0->getType());
            // Convert the integer value to the right size to ensure we get the
            // proper extension or truncation.
            Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0),
@@ -1028,16 +1021,14 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
                                                        IntPtrTy, false);
            return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD, TLI);
          }
-      }
  
-      // Only do this transformation if the int is intptrty in size, otherwise
-      // there is a truncation or extension that we aren't modeling.
-      if (CE0->getOpcode() == Instruction::PtrToInt) {
-        IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType());
-        if (CE0->getType() == IntPtrTy &&
-            CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType())
+        // Only do this transformation if the int is intptrty in size, otherwise
+        // there is a truncation or extension that we aren't modeling.
+        if ((CE0->getOpcode() == Instruction::PtrToInt &&
+             CE0->getType() == IntPtrTy &&
+             CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()))
            return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0),
-              CE1->getOperand(0), TD, TLI);
+                                                 CE1->getOperand(0), TD, TLI);
        }
      }
  
@@ -1104,6 +1095,13 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
  bool
  llvm::canConstantFoldCallTo(const Function *F) {
    switch (F->getIntrinsicID()) {
+  case Intrinsic::fabs:
+  case Intrinsic::log:
+  case Intrinsic::log2:
+  case Intrinsic::log10:
+  case Intrinsic::exp:
+  case Intrinsic::exp2:
+  case Intrinsic::floor:
    case Intrinsic::sqrt:
    case Intrinsic::pow:
    case Intrinsic::powi:
@@ -1171,11 +1169,17 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
      return 0;
    }
  
+  if (Ty->isHalfTy()) {
+    APFloat APF(V);
+    bool unused;
+    APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused);
+    return ConstantFP::get(Ty->getContext(), APF);
+  }
    if (Ty->isFloatTy())
      return ConstantFP::get(Ty->getContext(), APFloat((float)V));
    if (Ty->isDoubleTy())
      return ConstantFP::get(Ty->getContext(), APFloat(V));
-  llvm_unreachable("Can only constant fold float/double");
+  llvm_unreachable("Can only constant fold half/float/double");
  }
  
  static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
@@ -1187,11 +1191,17 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
      return 0;
    }
  
+  if (Ty->isHalfTy()) {
+    APFloat APF(V);
+    bool unused;
+    APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused);
+    return ConstantFP::get(Ty->getContext(), APF);
+  }
    if (Ty->isFloatTy())
      return ConstantFP::get(Ty->getContext(), APFloat((float)V));
    if (Ty->isDoubleTy())
      return ConstantFP::get(Ty->getContext(), APFloat(V));
-  llvm_unreachable("Can only constant fold float/double");
+  llvm_unreachable("Can only constant fold half/float/double");
  }
  
  /// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer
@@ -1243,7 +1253,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
        if (!TLI)
          return 0;
  
-      if (!Ty->isFloatTy() && !Ty->isDoubleTy())
+      if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
          return 0;
  
        /// We only fold functions with finite arguments. Folding NaN and inf is
@@ -1256,8 +1266,36 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
        /// the host native double versions.  Float versions are not called
        /// directly but for all these it is true (float)(f((double)arg)) ==
        /// f(arg).  Long double not supported yet.
-      double V = Ty->isFloatTy() ? (double)Op->getValueAPF().convertToFloat() :
-                                     Op->getValueAPF().convertToDouble();
+      double V;
+      if (Ty->isFloatTy())
+        V = Op->getValueAPF().convertToFloat();
+      else if (Ty->isDoubleTy())
+        V = Op->getValueAPF().convertToDouble();
+      else {
+        bool unused;
+        APFloat APF = Op->getValueAPF();
+        APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused);
+        V = APF.convertToDouble();
+      }
+
+      switch (F->getIntrinsicID()) {
+        default: break;
+        case Intrinsic::fabs:
+          return ConstantFoldFP(fabs, V, Ty);
+        case Intrinsic::log2:
+          return ConstantFoldFP(log2, V, Ty);
+        case Intrinsic::log:
+          return ConstantFoldFP(log, V, Ty);
+        case Intrinsic::log10:
+          return ConstantFoldFP(log10, V, Ty);
+        case Intrinsic::exp:
+          return ConstantFoldFP(exp, V, Ty);
+        case Intrinsic::exp2:
+          return ConstantFoldFP(exp2, V, Ty);
+        case Intrinsic::floor:
+          return ConstantFoldFP(floor, V, Ty);
+      }
+
        switch (Name[0]) {
        case 'a':
          if (Name == "acos" && TLI->has(LibFunc::acos))
@@ -1299,7 +1337,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
          else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10))
            return ConstantFoldFP(log10, V, Ty);
          else if (F->getIntrinsicID() == Intrinsic::sqrt &&
-                 (Ty->isFloatTy() || Ty->isDoubleTy())) {
+                 (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) {
            if (V >= -0.0)
              return ConstantFoldFP(sqrt, V, Ty);
            else // Undefined
@@ -1337,7 +1375,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
        case Intrinsic::ctpop:
          return ConstantInt::get(Ty, Op->getValue().countPopulation());
        case Intrinsic::convert_from_fp16: {
-        APFloat Val(Op->getValue());
+        APFloat Val(APFloat::IEEEhalf, Op->getValue());
  
          bool lost = false;
          APFloat::opStatus status =
@@ -1391,18 +1429,35 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
  
    if (Operands.size() == 2) {
      if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
-      if (!Ty->isFloatTy() && !Ty->isDoubleTy())
+      if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
          return 0;
-      double Op1V = Ty->isFloatTy() ?
-                      (double)Op1->getValueAPF().convertToFloat() :
-                      Op1->getValueAPF().convertToDouble();
+      double Op1V;
+      if (Ty->isFloatTy())
+        Op1V = Op1->getValueAPF().convertToFloat();
+      else if (Ty->isDoubleTy())
+        Op1V = Op1->getValueAPF().convertToDouble();
+      else {
+        bool unused;
+        APFloat APF = Op1->getValueAPF();
+        APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused);
+        Op1V = APF.convertToDouble();
+      }
+
        if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
          if (Op2->getType() != Op1->getType())
            return 0;
  
-        double Op2V = Ty->isFloatTy() ?
-                      (double)Op2->getValueAPF().convertToFloat():
-                      Op2->getValueAPF().convertToDouble();
+        double Op2V;
+        if (Ty->isFloatTy())
+          Op2V = Op2->getValueAPF().convertToFloat();
+        else if (Ty->isDoubleTy())
+          Op2V = Op2->getValueAPF().convertToDouble();
+        else {
+          bool unused;
+          APFloat APF = Op2->getValueAPF();
+          APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused);
+          Op2V = APF.convertToDouble();
+        }
  
          if (F->getIntrinsicID() == Intrinsic::pow) {
            return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
@@ -1416,6 +1471,10 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
          if (Name == "atan2" && TLI->has(LibFunc::atan2))
            return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
        } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
+        if (F->getIntrinsicID() == Intrinsic::powi && Ty->isHalfTy())
+          return ConstantFP::get(F->getContext(),
+                                 APFloat((float)std::pow((float)Op1V,
+                                                 (int)Op2C->getZExtValue())));
          if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy())
            return ConstantFP::get(F->getContext(),
                                   APFloat((float)std::pow((float)Op1V,
@@ -1468,12 +1527,12 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
            return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops);
          }
          case Intrinsic::cttz:
-          // FIXME: This should check for Op2 == 1, and become unreachable if
-          // Op1 == 0.
+          if (Op2->isOne() && Op1->isZero()) // cttz(0, 1) is undef.
+            return UndefValue::get(Ty);
            return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros());
          case Intrinsic::ctlz:
-          // FIXME: This should check for Op2 == 1, and become unreachable if
-          // Op1 == 0.
+          if (Op2->isOne() && Op1->isZero()) // ctlz(0, 1) is undef.
+            return UndefValue::get(Ty);
            return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros());
          }
        }