Implement InstCombine/cast.ll:test29

[oota-llvm.git] / lib / Transforms / Scalar / InstructionCombining.cpp
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp

index 82e70073e83845ef7e3f6b6c4d53e6c9aac044cc..2c8f6eda4955a2582e9ff7b05da2bec90eff7475 100644 (file)
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -137,7 +137,9 @@ namespace {
      Instruction *visitStoreInst(StoreInst &SI);
      Instruction *visitBranchInst(BranchInst &BI);
      Instruction *visitSwitchInst(SwitchInst &SI);
+    Instruction *visitInsertElementInst(InsertElementInst &IE);
      Instruction *visitExtractElementInst(ExtractElementInst &EI);
+    Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI);
  
      // visitInstruction - Specify what to return for unhandled instructions...
      Instruction *visitInstruction(Instruction &I) { return 0; }
@@ -165,6 +167,9 @@ namespace {
      Value *InsertCastBefore(Value *V, const Type *Ty, Instruction &Pos) {
        if (V->getType() == Ty) return V;
  
+      if (Constant *CV = dyn_cast<Constant>(V))
+        return ConstantExpr::getCast(CV, Ty);
+      
        Instruction *C = new CastInst(V, Ty, V->getName(), &Pos);
        WorkList.push_back(C);
        return C;
@@ -451,6 +456,8 @@ static void ComputeMaskedBits(Value *V, uint64_t Mask, uint64_t &KnownZero,
    Instruction *I = dyn_cast<Instruction>(V);
    if (!I) return;
  
+  Mask &= V->getType()->getIntegralTypeMask();
+  
    switch (I->getOpcode()) {
    case Instruction::And:
      // If either the LHS or the RHS are Zero, the result is zero.
@@ -708,6 +715,8 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, uint64_t DemandedMask,
    Instruction *I = dyn_cast<Instruction>(V);
    if (!I) return false;        // Only analyze instructions.
  
+  DemandedMask &= V->getType()->getIntegralTypeMask();
+  
    uint64_t KnownZero2, KnownOne2;
    switch (I->getOpcode()) {
    default: break;
@@ -2631,6 +2640,19 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
          }
    }
  
+  // fold (and (cast A), (cast B)) -> (cast (and A, B))
+  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+      if (Op0C->getOperand(0)->getType() == Op1C->getOperand(0)->getType() &&
+          Op0C->getOperand(0)->getType()->isIntegral()) {
+        Instruction *NewOp = BinaryOperator::createAnd(Op0C->getOperand(0),
+                                                       Op1C->getOperand(0),
+                                                       I.getName());
+        InsertNewInstBefore(NewOp, I);
+        return new CastInst(NewOp, I.getType());
+      }
+  }
+
    return Changed ? &I : 0;
  }
  
@@ -2856,6 +2878,20 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
            }
          }
    }
+    
+  // fold (or (cast A), (cast B)) -> (cast (or A, B))
+  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+      if (Op0C->getOperand(0)->getType() == Op1C->getOperand(0)->getType() &&
+          Op0C->getOperand(0)->getType()->isIntegral()) {
+        Instruction *NewOp = BinaryOperator::createOr(Op0C->getOperand(0),
+                                                      Op1C->getOperand(0),
+                                                      I.getName());
+        InsertNewInstBefore(NewOp, I);
+        return new CastInst(NewOp, I.getType());
+      }
+  }
+      
  
    return Changed ? &I : 0;
  }
@@ -3021,6 +3057,19 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
      if (Instruction *R = AssociativeOpt(I, FoldSetCCLogical(*this, RHS)))
        return R;
  
+  // fold (xor (cast A), (cast B)) -> (cast (xor A, B))
+  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+      if (Op0C->getOperand(0)->getType() == Op1C->getOperand(0)->getType() &&
+          Op0C->getOperand(0)->getType()->isIntegral()) {
+        Instruction *NewOp = BinaryOperator::createXor(Op0C->getOperand(0),
+                                                       Op1C->getOperand(0),
+                                                       I.getName());
+        InsertNewInstBefore(NewOp, I);
+        return new CastInst(NewOp, I.getType());
+      }
+  }
+    
    return Changed ? &I : 0;
  }
  
@@ -4430,7 +4479,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantUInt *Op1,
        // this case, C1 == C2 and C1 is 8, 16, or 32.
        if (ShiftAmt1 == ShiftAmt2) {
          const Type *SExtType = 0;
-        switch (ShiftAmt1) {
+        switch (Op0->getType()->getPrimitiveSizeInBits() - ShiftAmt1) {
          case 8 : SExtType = Type::SByteTy; break;
          case 16: SExtType = Type::ShortTy; break;
          case 32: SExtType = Type::IntTy; break;
@@ -4539,6 +4588,10 @@ static bool isEliminableCastOfCast(const Type *SrcTy, const Type *MidTy,
        SrcTy->getPrimitiveSize() < MidTy->getPrimitiveSize())
      return true;
    
+  // Packed type conversions don't modify bits.
+  if (isa<PackedType>(SrcTy) && isa<PackedType>(MidTy) &&isa<PackedType>(DstTy))
+    return true;
+  
    return false;
  }
  
@@ -4805,7 +4858,30 @@ Instruction *InstCombiner::visitCastInst(CastInst &CI) {
    if (isa<PHINode>(Src))
      if (Instruction *NV = FoldOpIntoPhi(CI))
        return NV;
+  
+  // If the source and destination are pointers, and this cast is equivalent to
+  // a getelementptr X, 0, 0, 0...  turn it into the appropriate getelementptr.
+  // This can enhance SROA and other transforms that want type-safe pointers.
+  if (const PointerType *DstPTy = dyn_cast<PointerType>(CI.getType()))
+    if (const PointerType *SrcPTy = dyn_cast<PointerType>(Src->getType())) {
+      const Type *DstTy = DstPTy->getElementType();
+      const Type *SrcTy = SrcPTy->getElementType();
+      
+      Constant *ZeroUInt = Constant::getNullValue(Type::UIntTy);
+      unsigned NumZeros = 0;
+      while (SrcTy != DstTy && 
+             isa<CompositeType>(SrcTy) && !isa<PointerType>(SrcTy)) {
+        SrcTy = cast<CompositeType>(SrcTy)->getTypeAtIndex(ZeroUInt);
+        ++NumZeros;
+      }
  
+      // If we found a path from the src to dest, create the getelementptr now.
+      if (SrcTy == DstTy) {
+        std::vector<Value*> Idxs(NumZeros+1, ZeroUInt);
+        return new GetElementPtrInst(Src, Idxs);
+      }
+    }
+      
    // If the source value is an instruction with only this use, we can attempt to
    // propagate the cast into the instruction.  Also, only handle integral types
    // for now.
@@ -5439,11 +5515,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      default: break;
      case Intrinsic::ppc_altivec_lvx:
      case Intrinsic::ppc_altivec_lvxl:
-      // Turn lvx -> load if the pointer is known aligned.
+    case Intrinsic::x86_sse_loadu_ps:
+    case Intrinsic::x86_sse2_loadu_pd:
+    case Intrinsic::x86_sse2_loadu_dq:
+      // Turn PPC lvx     -> load if the pointer is known aligned.
+      // Turn X86 loadups -> load if the pointer is known aligned.
        if (GetKnownAlignment(II->getOperand(1), TD) >= 16) {
-        Instruction *Ptr = new CastInst(II->getOperand(1), 
-                                        PointerType::get(II->getType()), "tmp");
-        InsertNewInstBefore(Ptr, CI);
+        Value *Ptr = InsertCastBefore(II->getOperand(1),
+                                      PointerType::get(II->getType()), CI);
          return new LoadInst(Ptr);
        }
        break;
@@ -5451,14 +5530,73 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      case Intrinsic::ppc_altivec_stvxl:
        // Turn stvx -> store if the pointer is known aligned.
        if (GetKnownAlignment(II->getOperand(2), TD) >= 16) {
-        const Type *OpTy = II->getOperand(1)->getType();
-        Instruction *Ptr = new CastInst(II->getOperand(2), 
-                                        PointerType::get(OpTy), "tmp");
-        InsertNewInstBefore(Ptr, CI);
+        const Type *OpPtrTy = PointerType::get(II->getOperand(1)->getType());
+        Value *Ptr = InsertCastBefore(II->getOperand(2), OpPtrTy, CI);
          return new StoreInst(II->getOperand(1), Ptr);
        }
        break;
-      
+    case Intrinsic::x86_sse_storeu_ps:
+    case Intrinsic::x86_sse2_storeu_pd:
+    case Intrinsic::x86_sse2_storeu_dq:
+    case Intrinsic::x86_sse2_storel_dq:
+      // Turn X86 storeu -> store if the pointer is known aligned.
+      if (GetKnownAlignment(II->getOperand(1), TD) >= 16) {
+        const Type *OpPtrTy = PointerType::get(II->getOperand(2)->getType());
+        Value *Ptr = InsertCastBefore(II->getOperand(1), OpPtrTy, CI);
+        return new StoreInst(II->getOperand(2), Ptr);
+      }
+      break;
+    case Intrinsic::ppc_altivec_vperm:
+      // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
+      if (ConstantPacked *Mask = dyn_cast<ConstantPacked>(II->getOperand(3))) {
+        assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
+        
+        // Check that all of the elements are integer constants or undefs.
+        bool AllEltsOk = true;
+        for (unsigned i = 0; i != 16; ++i) {
+          if (!isa<ConstantInt>(Mask->getOperand(i)) && 
+              !isa<UndefValue>(Mask->getOperand(i))) {
+            AllEltsOk = false;
+            break;
+          }
+        }
+        
+        if (AllEltsOk) {
+          // Cast the input vectors to byte vectors.
+          Value *Op0 = InsertCastBefore(II->getOperand(1), Mask->getType(), CI);
+          Value *Op1 = InsertCastBefore(II->getOperand(2), Mask->getType(), CI);
+          Value *Result = UndefValue::get(Op0->getType());
+          
+          // Only extract each element once.
+          Value *ExtractedElts[32];
+          memset(ExtractedElts, 0, sizeof(ExtractedElts));
+          
+          for (unsigned i = 0; i != 16; ++i) {
+            if (isa<UndefValue>(Mask->getOperand(i)))
+              continue;
+            unsigned Idx =cast<ConstantInt>(Mask->getOperand(i))->getRawValue();
+            Idx &= 31;  // Match the hardware behavior.
+            
+            if (ExtractedElts[Idx] == 0) {
+              Instruction *Elt = 
+                new ExtractElementInst(Idx < 16 ? Op0 : Op1,
+                                       ConstantUInt::get(Type::UIntTy, Idx&15),
+                                       "tmp");
+              InsertNewInstBefore(Elt, CI);
+              ExtractedElts[Idx] = Elt;
+            }
+          
+            // Insert this value into the result vector.
+            Result = new InsertElementInst(Result, ExtractedElts[Idx],
+                                           ConstantUInt::get(Type::UIntTy, i),
+                                           "tmp");
+            InsertNewInstBefore(cast<Instruction>(Result), CI);
+          }
+          return new CastInst(Result, CI.getType());
+        }
+      }
+      break;
+
      case Intrinsic::stackrestore: {
        // If the save is right next to the restore, remove the restore.  This can
        // happen when variable allocas are DCE'd.
@@ -5587,8 +5725,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
    // Check to see if we are changing the return type...
    if (OldRetTy != FT->getReturnType()) {
      if (Callee->isExternal() &&
-        !OldRetTy->isLosslesslyConvertibleTo(FT->getReturnType()) &&
-        !Caller->use_empty())
+        !(OldRetTy->isLosslesslyConvertibleTo(FT->getReturnType()) ||
+          (isa<PointerType>(FT->getReturnType()) && 
+           TD->getIntPtrType()->isLosslesslyConvertibleTo(OldRetTy)))
+        && !Caller->use_empty())
        return false;   // Cannot transform this return value...
  
      // If the callsite is an invoke instruction, and the return value is used by
@@ -6724,7 +6864,8 @@ static bool CheapToScalarize(Value *V, bool isConstant) {
  static Value *FindScalarElement(Value *V, unsigned EltNo) {
    assert(isa<PackedType>(V->getType()) && "Not looking at a vector?");
    const PackedType *PTy = cast<PackedType>(V->getType());
-  if (EltNo >= PTy->getNumElements())  // Out of range access.
+  unsigned Width = PTy->getNumElements();
+  if (EltNo >= Width)  // Out of range access.
      return UndefValue::get(PTy->getElementType());
    
    if (isa<UndefValue>(V))
@@ -6745,6 +6886,19 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
      // Otherwise, the insertelement doesn't modify the value, recurse on its
      // vector input.
      return FindScalarElement(III->getOperand(0), EltNo);
+  } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
+    if (isa<ConstantAggregateZero>(SVI->getOperand(2))) {
+      return FindScalarElement(SVI->getOperand(0), 0);
+    } else if (ConstantPacked *CP = 
+                   dyn_cast<ConstantPacked>(SVI->getOperand(2))) {
+      if (isa<UndefValue>(CP->getOperand(EltNo)))
+        return UndefValue::get(PTy->getElementType());
+      unsigned InEl = cast<ConstantUInt>(CP->getOperand(EltNo))->getValue();
+      if (InEl < Width)
+        return FindScalarElement(SVI->getOperand(0), InEl);
+      else
+        return FindScalarElement(SVI->getOperand(1), InEl - Width);
+    }
    }
    
    // Otherwise, we don't know.
@@ -6776,9 +6930,10 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
    
    // If extracting a specified index from the vector, see if we can recursively
    // find a previously computed scalar that was inserted into the vector.
-  if (ConstantUInt *IdxC = dyn_cast<ConstantUInt>(EI.getOperand(1)))
+  if (ConstantUInt *IdxC = dyn_cast<ConstantUInt>(EI.getOperand(1))) {
      if (Value *Elt = FindScalarElement(EI.getOperand(0), IdxC->getValue()))
        return ReplaceInstUsesWith(EI, Elt);
+  }
    
    if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0)))
      if (I->hasOneUse()) {
@@ -6822,6 +6977,290 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
    return 0;
  }
  
+/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
+/// elements from either LHS or RHS, return the shuffle mask and true. 
+/// Otherwise, return false.
+static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
+                                         std::vector<Constant*> &Mask) {
+  assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
+         "Invalid CollectSingleShuffleElements");
+  unsigned NumElts = cast<PackedType>(V->getType())->getNumElements();
+
+  if (isa<UndefValue>(V)) {
+    Mask.assign(NumElts, UndefValue::get(Type::UIntTy));
+    return true;
+  } else if (V == LHS) {
+    for (unsigned i = 0; i != NumElts; ++i)
+      Mask.push_back(ConstantUInt::get(Type::UIntTy, i));
+    return true;
+  } else if (V == RHS) {
+    for (unsigned i = 0; i != NumElts; ++i)
+      Mask.push_back(ConstantUInt::get(Type::UIntTy, i+NumElts));
+    return true;
+  } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+    // If this is an insert of an extract from some other vector, include it.
+    Value *VecOp    = IEI->getOperand(0);
+    Value *ScalarOp = IEI->getOperand(1);
+    Value *IdxOp    = IEI->getOperand(2);
+    
+    if (!isa<ConstantInt>(IdxOp))
+      return false;
+    unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getRawValue();
+    
+    if (isa<UndefValue>(ScalarOp)) {  // inserting undef into vector.
+      // Okay, we can handle this if the vector we are insertinting into is
+      // transitively ok.
+      if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+        // If so, update the mask to reflect the inserted undef.
+        Mask[InsertedIdx] = UndefValue::get(Type::UIntTy);
+        return true;
+      }      
+    } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
+      if (isa<ConstantInt>(EI->getOperand(1)) &&
+          EI->getOperand(0)->getType() == V->getType()) {
+        unsigned ExtractedIdx =
+          cast<ConstantInt>(EI->getOperand(1))->getRawValue();
+        
+        // This must be extracting from either LHS or RHS.
+        if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
+          // Okay, we can handle this if the vector we are insertinting into is
+          // transitively ok.
+          if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+            // If so, update the mask to reflect the inserted value.
+            if (EI->getOperand(0) == LHS) {
+              Mask[InsertedIdx & (NumElts-1)] = 
+                 ConstantUInt::get(Type::UIntTy, ExtractedIdx);
+            } else {
+              assert(EI->getOperand(0) == RHS);
+              Mask[InsertedIdx & (NumElts-1)] = 
+                ConstantUInt::get(Type::UIntTy, ExtractedIdx+NumElts);
+              
+            }
+            return true;
+          }
+        }
+      }
+    }
+  }
+  // TODO: Handle shufflevector here!
+  
+  return false;
+}
+
+/// CollectShuffleElements - We are building a shuffle of V, using RHS as the
+/// RHS of the shuffle instruction, if it is not null.  Return a shuffle mask
+/// that computes V and the LHS value of the shuffle.
+static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
+                                     Value *&RHS) {
+  assert(isa<PackedType>(V->getType()) && 
+         (RHS == 0 || V->getType() == RHS->getType()) &&
+         "Invalid shuffle!");
+  unsigned NumElts = cast<PackedType>(V->getType())->getNumElements();
+
+  if (isa<UndefValue>(V)) {
+    Mask.assign(NumElts, UndefValue::get(Type::UIntTy));
+    return V;
+  } else if (isa<ConstantAggregateZero>(V)) {
+    Mask.assign(NumElts, ConstantUInt::get(Type::UIntTy, 0));
+    return V;
+  } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+    // If this is an insert of an extract from some other vector, include it.
+    Value *VecOp    = IEI->getOperand(0);
+    Value *ScalarOp = IEI->getOperand(1);
+    Value *IdxOp    = IEI->getOperand(2);
+    
+    if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+      if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
+          EI->getOperand(0)->getType() == V->getType()) {
+        unsigned ExtractedIdx =
+          cast<ConstantInt>(EI->getOperand(1))->getRawValue();
+        unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getRawValue();
+        
+        // Either the extracted from or inserted into vector must be RHSVec,
+        // otherwise we'd end up with a shuffle of three inputs.
+        if (EI->getOperand(0) == RHS || RHS == 0) {
+          RHS = EI->getOperand(0);
+          Value *V = CollectShuffleElements(VecOp, Mask, RHS);
+          Mask[InsertedIdx & (NumElts-1)] = 
+            ConstantUInt::get(Type::UIntTy, NumElts+ExtractedIdx);
+          return V;
+        }
+        
+        if (VecOp == RHS) {
+          Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
+          // Everything but the extracted element is replaced with the RHS.
+          for (unsigned i = 0; i != NumElts; ++i) {
+            if (i != InsertedIdx)
+              Mask[i] = ConstantUInt::get(Type::UIntTy, NumElts+i);
+          }
+          return V;
+        }
+        
+        // If this insertelement is a chain that comes from exactly these two
+        // vectors, return the vector and the effective shuffle.
+        if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
+          return EI->getOperand(0);
+        
+      }
+    }
+  }
+  // TODO: Handle shufflevector here!
+  
+  // Otherwise, can't do anything fancy.  Return an identity vector.
+  for (unsigned i = 0; i != NumElts; ++i)
+    Mask.push_back(ConstantUInt::get(Type::UIntTy, i));
+  return V;
+}
+
+Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
+  Value *VecOp    = IE.getOperand(0);
+  Value *ScalarOp = IE.getOperand(1);
+  Value *IdxOp    = IE.getOperand(2);
+  
+  // If the inserted element was extracted from some other vector, and if the 
+  // indexes are constant, try to turn this into a shufflevector operation.
+  if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+    if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
+        EI->getOperand(0)->getType() == IE.getType()) {
+      unsigned NumVectorElts = IE.getType()->getNumElements();
+      unsigned ExtractedIdx=cast<ConstantInt>(EI->getOperand(1))->getRawValue();
+      unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getRawValue();
+      
+      if (ExtractedIdx >= NumVectorElts) // Out of range extract.
+        return ReplaceInstUsesWith(IE, VecOp);
+      
+      if (InsertedIdx >= NumVectorElts)  // Out of range insert.
+        return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
+      
+      // If we are extracting a value from a vector, then inserting it right
+      // back into the same place, just use the input vector.
+      if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
+        return ReplaceInstUsesWith(IE, VecOp);      
+      
+      // We could theoretically do this for ANY input.  However, doing so could
+      // turn chains of insertelement instructions into a chain of shufflevector
+      // instructions, and right now we do not merge shufflevectors.  As such,
+      // only do this in a situation where it is clear that there is benefit.
+      if (isa<UndefValue>(VecOp) || isa<ConstantAggregateZero>(VecOp)) {
+        // Turn this into shuffle(EIOp0, VecOp, Mask).  The result has all of
+        // the values of VecOp, except then one read from EIOp0.
+        // Build a new shuffle mask.
+        std::vector<Constant*> Mask;
+        if (isa<UndefValue>(VecOp))
+          Mask.assign(NumVectorElts, UndefValue::get(Type::UIntTy));
+        else {
+          assert(isa<ConstantAggregateZero>(VecOp) && "Unknown thing");
+          Mask.assign(NumVectorElts, ConstantUInt::get(Type::UIntTy,
+                                                       NumVectorElts));
+        } 
+        Mask[InsertedIdx] = ConstantUInt::get(Type::UIntTy, ExtractedIdx);
+        return new ShuffleVectorInst(EI->getOperand(0), VecOp,
+                                     ConstantPacked::get(Mask));
+      }
+      
+      // If this insertelement isn't used by some other insertelement, turn it
+      // (and any insertelements it points to), into one big shuffle.
+      if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
+        std::vector<Constant*> Mask;
+        Value *RHS = 0;
+        Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
+        if (RHS == 0) RHS = UndefValue::get(LHS->getType());
+        // We now have a shuffle of LHS, RHS, Mask.
+        return new ShuffleVectorInst(LHS, RHS, ConstantPacked::get(Mask));
+      }
+    }
+  }
+
+  return 0;
+}
+
+
+Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+  Value *LHS = SVI.getOperand(0);
+  Value *RHS = SVI.getOperand(1);
+  Constant *Mask = cast<Constant>(SVI.getOperand(2));
+
+  bool MadeChange = false;
+  
+  if (isa<UndefValue>(Mask))
+    return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
+  
+  // TODO: If we have shuffle(x, undef, mask) and any elements of mask refer to
+  // the undef, change them to undefs.
+  
+  // Canonicalize shuffle(x,x) -> shuffle(x,undef)
+  if (LHS == RHS) {
+    if (isa<UndefValue>(LHS)) {
+      // shuffle(undef,undef,mask) -> undef.
+      return ReplaceInstUsesWith(SVI, LHS);
+    }
+    
+    if (!isa<ConstantAggregateZero>(Mask)) {
+      // Remap any references to RHS to use LHS.
+      ConstantPacked *CP = cast<ConstantPacked>(Mask);
+      std::vector<Constant*> Elts;
+      for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) {
+        Elts.push_back(CP->getOperand(i));
+        if (isa<UndefValue>(CP->getOperand(i)))
+          continue;
+        unsigned MV = cast<ConstantInt>(CP->getOperand(i))->getRawValue();
+        if (MV >= e)
+          Elts.back() = ConstantUInt::get(Type::UIntTy, MV & (e-1));
+      }
+      Mask = ConstantPacked::get(Elts);
+    }
+    SVI.setOperand(1, UndefValue::get(RHS->getType()));
+    SVI.setOperand(2, Mask);
+    MadeChange = true;
+  }
+  
+  // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
+  if (isa<UndefValue>(LHS)) {
+    // shuffle(undef,x,<0,0,0,0>) -> undef.
+    if (isa<ConstantAggregateZero>(Mask))
+      return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
+    
+    ConstantPacked *CPM = cast<ConstantPacked>(Mask);
+    std::vector<Constant*> Elts;
+    for (unsigned i = 0, e = CPM->getNumOperands(); i != e; ++i) {
+      if (isa<UndefValue>(CPM->getOperand(i)))
+        Elts.push_back(CPM->getOperand(i));
+      else {
+        unsigned EltNo = cast<ConstantUInt>(CPM->getOperand(i))->getRawValue();
+        if (EltNo >= e)
+          Elts.push_back(ConstantUInt::get(Type::UIntTy, EltNo-e));
+        else               // Referring to the undef.
+          Elts.push_back(UndefValue::get(Type::UIntTy));
+      }
+    }
+    return new ShuffleVectorInst(RHS, LHS, ConstantPacked::get(Elts));
+  }
+  
+  if (ConstantPacked *CP = dyn_cast<ConstantPacked>(Mask)) {
+    bool isLHSID = true, isRHSID = true;
+    
+    // Analyze the shuffle.
+    for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) {
+      if (isa<UndefValue>(CP->getOperand(i)))
+        continue;
+      unsigned MV = cast<ConstantInt>(CP->getOperand(i))->getRawValue();
+      
+      // Is this an identity shuffle of the LHS value?
+      isLHSID &= (MV == i);
+      
+      // Is this an identity shuffle of the RHS value?
+      isRHSID &= (MV-e == i);
+    }
+
+    // Eliminate identity shuffles.
+    if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
+    if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
+  }
+  
+  return MadeChange ? &SVI : 0;
+}
+
+
  
  void InstCombiner::removeFromWorkList(Instruction *I) {
    WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), I),