+
+ if (!TD)
+ return commonPointerCastTransforms(CI);
+
+ Type *Ty = CI.getType();
+ unsigned AS = CI.getPointerAddressSpace();
+
+ if (Ty->getScalarSizeInBits() == TD->getPointerSizeInBits(AS))
+ return commonPointerCastTransforms(CI);
+
+ Type *PtrTy = TD->getIntPtrType(CI.getContext(), AS);
+ if (Ty->isVectorTy()) // Handle vectors of pointers.
+ PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements());
+
+ Value *P = Builder->CreatePtrToInt(CI.getOperand(0), PtrTy);
+ return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
+}
+
+/// OptimizeVectorResize - This input value (which is known to have vector type)
+/// is being zero extended or truncated to the specified vector type. Try to
+/// replace it with a shuffle (and vector/vector bitcast) if possible.
+///
+/// The source and destination vector types may have different element types.
+static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
+ InstCombiner &IC) {
+ // We can only do this optimization if the output is a multiple of the input
+ // element size, or the input is a multiple of the output element size.
+ // Convert the input type to have the same element type as the output.
+ VectorType *SrcTy = cast<VectorType>(InVal->getType());
+
+ if (SrcTy->getElementType() != DestTy->getElementType()) {
+ // The input types don't need to be identical, but for now they must be the
+ // same size. There is no specific reason we couldn't handle things like
+ // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
+ // there yet.
+ if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
+ DestTy->getElementType()->getPrimitiveSizeInBits())
+ return 0;
+
+ SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
+ InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
+ }
+
+ // Now that the element types match, get the shuffle mask and RHS of the
+ // shuffle to use, which depends on whether we're increasing or decreasing the
+ // size of the input.
+ SmallVector<uint32_t, 16> ShuffleMask;
+ Value *V2;
+
+ if (SrcTy->getNumElements() > DestTy->getNumElements()) {
+ // If we're shrinking the number of elements, just shuffle in the low
+ // elements from the input and use undef as the second shuffle input.
+ V2 = UndefValue::get(SrcTy);
+ for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
+ ShuffleMask.push_back(i);
+
+ } else {
+ // If we're increasing the number of elements, shuffle in all of the
+ // elements from InVal and fill the rest of the result elements with zeros
+ // from a constant zero.
+ V2 = Constant::getNullValue(SrcTy);
+ unsigned SrcElts = SrcTy->getNumElements();
+ for (unsigned i = 0, e = SrcElts; i != e; ++i)
+ ShuffleMask.push_back(i);
+
+ // The excess elements reference the first element of the zero input.
+ for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i)
+ ShuffleMask.push_back(SrcElts);
+ }
+
+ return new ShuffleVectorInst(InVal, V2,
+ ConstantDataVector::get(V2->getContext(),
+ ShuffleMask));
+}
+
+static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
+ return Value % Ty->getPrimitiveSizeInBits() == 0;
+}
+
+static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
+ return Value / Ty->getPrimitiveSizeInBits();
+}
+
+/// CollectInsertionElements - V is a value which is inserted into a vector of
+/// VecEltTy. Look through the value to see if we can decompose it into
+/// insertions into the vector. See the example in the comment for
+/// OptimizeIntegerToVectorInsertions for the pattern this handles.
+/// The type of V is always a non-zero multiple of VecEltTy's size.
+/// Shift is the number of bits between the lsb of V and the lsb of
+/// the vector.
+///
+/// This returns false if the pattern can't be matched or true if it can,
+/// filling in Elements with the elements found here.
+static bool CollectInsertionElements(Value *V, unsigned Shift,
+ SmallVectorImpl<Value*> &Elements,
+ Type *VecEltTy, InstCombiner &IC) {
+ assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
+ "Shift should be a multiple of the element type size");
+
+ // Undef values never contribute useful bits to the result.
+ if (isa<UndefValue>(V)) return true;
+
+ // If we got down to a value of the right type, we win, try inserting into the
+ // right element.
+ if (V->getType() == VecEltTy) {
+ // Inserting null doesn't actually insert any elements.
+ if (Constant *C = dyn_cast<Constant>(V))
+ if (C->isNullValue())
+ return true;
+
+ unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
+ if (IC.getDataLayout()->isBigEndian())
+ ElementIndex = Elements.size() - ElementIndex - 1;
+
+ // Fail if multiple elements are inserted into this slot.
+ if (Elements[ElementIndex] != 0)
+ return false;
+
+ Elements[ElementIndex] = V;
+ return true;
+ }
+
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ // Figure out the # elements this provides, and bitcast it or slice it up
+ // as required.
+ unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
+ VecEltTy);
+ // If the constant is the size of a vector element, we just need to bitcast
+ // it to the right type so it gets properly inserted.
+ if (NumElts == 1)
+ return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
+ Shift, Elements, VecEltTy, IC);
+
+ // Okay, this is a constant that covers multiple elements. Slice it up into
+ // pieces and insert each element-sized piece into the vector.
+ if (!isa<IntegerType>(C->getType()))
+ C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
+ C->getType()->getPrimitiveSizeInBits()));
+ unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
+ Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ unsigned ShiftI = Shift+i*ElementSize;
+ Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
+ ShiftI));
+ Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
+ if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, IC))
+ return false;
+ }
+ return true;
+ }
+
+ if (!V->hasOneUse()) return false;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return false;
+ switch (I->getOpcode()) {
+ default: return false; // Unhandled case.
+ case Instruction::BitCast:
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC);
+ case Instruction::ZExt:
+ if (!isMultipleOfTypeSize(
+ I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
+ VecEltTy))
+ return false;
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC);
+ case Instruction::Or:
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC) &&
+ CollectInsertionElements(I->getOperand(1), Shift,
+ Elements, VecEltTy, IC);
+ case Instruction::Shl: {
+ // Must be shifting by a constant that is a multiple of the element size.
+ ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+ Shift += CI->getZExtValue();
+ if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC);
+ }
+
+ }
+}
+
+
+/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we
+/// may be doing shifts and ors to assemble the elements of the vector manually.
+/// Try to rip the code out and replace it with insertelements. This is to
+/// optimize code like this:
+///
+/// %tmp37 = bitcast float %inc to i32
+/// %tmp38 = zext i32 %tmp37 to i64
+/// %tmp31 = bitcast float %inc5 to i32
+/// %tmp32 = zext i32 %tmp31 to i64
+/// %tmp33 = shl i64 %tmp32, 32
+/// %ins35 = or i64 %tmp33, %tmp38
+/// %tmp43 = bitcast i64 %ins35 to <2 x float>
+///
+/// Into two insertelements that do "buildvector{%inc, %inc5}".
+static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
+ InstCombiner &IC) {
+ // We need to know the target byte order to perform this optimization.
+ if (!IC.getDataLayout()) return 0;
+
+ VectorType *DestVecTy = cast<VectorType>(CI.getType());
+ Value *IntInput = CI.getOperand(0);
+
+ SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
+ if (!CollectInsertionElements(IntInput, 0, Elements,
+ DestVecTy->getElementType(), IC))
+ return 0;
+
+ // If we succeeded, we know that all of the element are specified by Elements
+ // or are zero if Elements has a null entry. Recast this as a set of
+ // insertions.
+ Value *Result = Constant::getNullValue(CI.getType());
+ for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
+ if (Elements[i] == 0) continue; // Unset element.
+
+ Result = IC.Builder->CreateInsertElement(Result, Elements[i],
+ IC.Builder->getInt32(i));
+ }
+
+ return Result;
+}
+
+
+/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
+/// bitcast. The various long double bitcasts can't get in here.
+static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
+ // We need to know the target byte order to perform this optimization.
+ if (!IC.getDataLayout()) return 0;
+
+ Value *Src = CI.getOperand(0);
+ Type *DestTy = CI.getType();
+
+ // If this is a bitcast from int to float, check to see if the int is an
+ // extraction from a vector.
+ Value *VecInput = 0;
+ // bitcast(trunc(bitcast(somevector)))
+ if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
+ isa<VectorType>(VecInput->getType())) {
+ VectorType *VecTy = cast<VectorType>(VecInput->getType());
+ unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+
+ if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to be a vector type we can extract from.
+ if (VecTy->getElementType() != DestTy) {
+ VecTy = VectorType::get(DestTy,
+ VecTy->getPrimitiveSizeInBits() / DestWidth);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+ }
+
+ unsigned Elt = 0;
+ if (IC.getDataLayout()->isBigEndian())
+ Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1;
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));