break;
}
+ case Intrinsic::x86_sse41_pblendvb:
+ case Intrinsic::x86_sse41_blendvps:
+ case Intrinsic::x86_sse41_blendvpd:
+ case Intrinsic::x86_avx_blendv_ps_256:
+ case Intrinsic::x86_avx_blendv_pd_256:
+ case Intrinsic::x86_avx2_pblendvb: {
+ // Convert blendv* to vector selects if the mask is constant.
+ // This optimization is convoluted because the intrinsic is defined as
+ // getting a vector of floats or doubles for the ps and pd versions.
+ // FIXME: That should be changed.
+ Value *Mask = II->getArgOperand(2);
+ if (auto C = dyn_cast<ConstantDataVector>(Mask)) {
+ auto Tyi1 = Builder->getInt1Ty();
+ auto SelectorType = cast<VectorType>(Mask->getType());
+ auto EltTy = SelectorType->getElementType();
+ unsigned Size = SelectorType->getNumElements();
+ unsigned BitWidth = EltTy->isFloatTy() ? 32 : (EltTy->isDoubleTy() ? 64 : EltTy->getIntegerBitWidth());
+ assert(BitWidth == 64 || BitWidth == 32 || BitWidth == 8 && "Wrong arguments for variable blend intrinsic");
+ SmallVector<Constant*, 32> Selectors;
+ for (unsigned I = 0; I < Size; ++I) {
+ // The intrinsics only read the top bit
+ uint64_t Selector;
+ if (BitWidth == 8)
+ Selector = C->getElementAsInteger(I);
+ else
+ Selector = C->getElementAsAPFloat(I).bitcastToAPInt().getZExtValue();
+ Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1)));
+ }
+ auto NewSelector = ConstantVector::get(Selectors);
+ return SelectInst::Create(NewSelector, II->getArgOperand(0), II->getArgOperand(1), "blendv");
+ } else {
+ break;
+ }
+ }
+
case Intrinsic::x86_avx_vpermilvar_ps:
case Intrinsic::x86_avx_vpermilvar_ps_256:
case Intrinsic::x86_avx_vpermilvar_pd: