// Vector insert
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_insertps : GCCBuiltin<"__builtin_ia32_insertps128">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_i32_ty],
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem]>;
}
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_sse41_pblendw : GCCBuiltin<"__builtin_ia32_pblendw128">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse41_blendpd : GCCBuiltin<"__builtin_ia32_blendpd">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty],
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse41_blendps : GCCBuiltin<"__builtin_ia32_blendps">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty],
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_sse41_blendvpd : GCCBuiltin<"__builtin_ia32_blendvpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_v2f64_ty],
// Vector dot product
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_dppd : GCCBuiltin<"__builtin_ia32_dppd">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_i32_ty],
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
[IntrNoMem, Commutative]>;
def int_x86_sse41_dpps : GCCBuiltin<"__builtin_ia32_dpps">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_i32_ty],
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem, Commutative]>;
}
// Vector sum of absolute differences
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw128">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i32_ty],
+ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i8_ty],
[IntrNoMem, Commutative]>;
}
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_blend_pd_256 : GCCBuiltin<"__builtin_ia32_blendpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
- llvm_v4f64_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx_blend_ps_256 : GCCBuiltin<"__builtin_ia32_blendps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
- llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx_blendv_pd_256 : GCCBuiltin<"__builtin_ia32_blendvpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_dp_ps_256 : GCCBuiltin<"__builtin_ia32_dpps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
- llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
}
// Vector compare
llvm_v32i8_ty], [IntrNoMem]>;
def int_x86_avx2_pblendw : GCCBuiltin<"__builtin_ia32_pblendw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx2_pblendd_128 : GCCBuiltin<"__builtin_ia32_pblendd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx2_pblendd_256 : GCCBuiltin<"__builtin_ia32_pblendd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i8_ty], [IntrNoMem]>;
}
// Vector load with broadcast
llvm_v32i8_ty], [IntrNoMem]>;
def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
- llvm_i32_ty], [IntrNoMem, Commutative]>;
+ llvm_i8_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa256">,
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
}
return true;
}
+// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
+// arguments have changed their type from i32 to i8.
+static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
+ Function *&NewFn) {
+ // Check that the last argument is an i32.
+ Type *LastArgType = F->getFunctionType()->getParamType(
+ F->getFunctionType()->getNumParams() - 1);
+ if (!LastArgType->isIntegerTy(32))
+ return false;
+
+ // Move this function aside and map down.
+ F->setName(F->getName() + ".old");
+ NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
+ return true;
+}
+
static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
assert(F && "Illegal to upgrade a non-existent Function.");
if (Name == "x86.sse41.ptestnzc")
return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
}
+ // Several blend and other instructions with maskes used the wrong number of
+ // bits.
+ if (Name == "x86.sse41.pblendw")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_pblendw,
+ NewFn);
+ if (Name == "x86.sse41.blendpd")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendpd,
+ NewFn);
+ if (Name == "x86.sse41.blendps")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendps,
+ NewFn);
+ if (Name == "x86.sse41.insertps")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
+ NewFn);
+ if (Name == "x86.sse41.dppd")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
+ NewFn);
+ if (Name == "x86.sse41.dpps")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
+ NewFn);
+ if (Name == "x86.sse41.mpsadbw")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
+ NewFn);
+ if (Name == "x86.avx.blend.pd.256")
+ return UpgradeX86IntrinsicsWith8BitMask(
+ F, Intrinsic::x86_avx_blend_pd_256, NewFn);
+ if (Name == "x86.avx.blend.ps.256")
+ return UpgradeX86IntrinsicsWith8BitMask(
+ F, Intrinsic::x86_avx_blend_ps_256, NewFn);
+ if (Name == "x86.avx.dp.ps.256")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
+ NewFn);
+ if (Name == "x86.avx2.pblendw")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_pblendw,
+ NewFn);
+ if (Name == "x86.avx2.pblendd.128")
+ return UpgradeX86IntrinsicsWith8BitMask(
+ F, Intrinsic::x86_avx2_pblendd_128, NewFn);
+ if (Name == "x86.avx2.pblendd.256")
+ return UpgradeX86IntrinsicsWith8BitMask(
+ F, Intrinsic::x86_avx2_pblendd_256, NewFn);
+ if (Name == "x86.avx2.mpsadbw")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
+ NewFn);
+
// frcz.ss/sd may need to have an argument dropped
if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
F->setName(Name + ".old");
CI->eraseFromParent();
return;
}
+
+ case Intrinsic::x86_sse41_pblendw:
+ case Intrinsic::x86_sse41_blendpd:
+ case Intrinsic::x86_sse41_blendps:
+ case Intrinsic::x86_sse41_insertps:
+ case Intrinsic::x86_sse41_dppd:
+ case Intrinsic::x86_sse41_dpps:
+ case Intrinsic::x86_sse41_mpsadbw:
+ case Intrinsic::x86_avx_blend_pd_256:
+ case Intrinsic::x86_avx_blend_ps_256:
+ case Intrinsic::x86_avx_dp_ps_256:
+ case Intrinsic::x86_avx2_pblendw:
+ case Intrinsic::x86_avx2_pblendd_128:
+ case Intrinsic::x86_avx2_pblendd_256:
+ case Intrinsic::x86_avx2_mpsadbw: {
+ // Need to truncate the last argument from i32 to i8 -- this argument models
+ // an inherently 8-bit immediate operand to these x86 instructions.
+ SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
+ CI->arg_operands().end());
+
+ // Replace the last argument with a trunc.
+ Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
+
+ CallInst *NewCall = Builder.CreateCall(NewFn, Args);
+ CI->replaceAllUsesWith(NewCall);
+ CI->eraseFromParent();
+ return;
+ }
}
}
(0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
}
-inline bool isImmZExtu32u8Value(uint64_t Value) {
- return (Value <= 0x00000000000000FFULL);
-}
-
inline bool isImmSExti64i8Value(uint64_t Value) {
return (( Value <= 0x000000000000007FULL)||
(0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
// extension.
return isImmSExti32i8Value(CE->getValue());
}
- bool isImmZExtu32u8() const {
- if (!isImm())
- return false;
-
- // If this isn't a constant expr, just assume it fits and let relaxation
- // handle it.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE)
- return true;
-
- // Otherwise, check the value is in a range that makes sense for this
- // extension.
- return isImmZExtu32u8Value(CE->getValue());
- }
bool isImmSExti64i8() const {
if (!isImm())
return false;
// vinsertps - insert f32 to XMM
def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3),
+ (ins VR128X:$src1, VR128X:$src2, i8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
EVEX_4V;
def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
- (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3),
+ (ins VR128X:$src1, f32mem:$src2, i8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128X:$dst, (X86insertps VR128X:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
// vextractps - extract 32 bits from XMM
def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
- (ins VR128X:$src1, u32u8imm:$src2),
+ (ins VR128X:$src1, i32i8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
EVEX;
def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
- (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2),
+ (ins f32mem:$dst, VR128X:$src1, i32i8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
def X86insertps : SDNode<"X86ISD::INSERTPS",
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
- SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
+ SDTCisVT<2, v4f32>, SDTCisVT<3, i8>]>>;
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
+ SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
let RenderMethod = "addImmOperands";
}
-class ImmZExtAsmOperandClass : AsmOperandClass {
- let SuperClasses = [ImmAsmOperand];
- let RenderMethod = "addImmOperands";
-}
-
def X86GR32orGR64AsmOperand : AsmOperandClass {
let Name = "GR32orGR64";
}
let PrintMethod = "printRoundingControl";
let OperandType = "OPERAND_IMMEDIATE";
}
+
// Sign-extended immediate classes. We don't need to define the full lattice
// here because there is no instruction with an ambiguity between ImmSExti64i32
// and ImmSExti32i8.
let Name = "ImmSExti32i8";
}
-// [0, 0x000000FF]
-def ImmZExtu32u8AsmOperand : ImmZExtAsmOperandClass {
- let Name = "ImmZExtu32u8";
-}
-
-
// [0, 0x0000007F] |
// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
let ParserMatchClass = ImmSExti32i8AsmOperand;
let OperandType = "OPERAND_IMMEDIATE";
}
-// 32-bits but only 8 bits are significant, and those 8 bits are unsigned.
-def u32u8imm : Operand<i32> {
- let ParserMatchClass = ImmZExtu32u8AsmOperand;
- let OperandType = "OPERAND_IMMEDIATE";
-}
// 64-bits but only 32 bits are significant.
def i64i32imm : Operand<i64> {
// the corresponding elements in the second input vector.
def : Pat<(v8f32 (X86Blendi (v8f32 (fsub VR256:$lhs, VR256:$rhs)),
- (v8f32 (fadd VR256:$lhs, VR256:$rhs)), (i32 170))),
+ (v8f32 (fadd VR256:$lhs, VR256:$rhs)), (i8 170))),
(VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>;
// Constant 10 corresponds to the binary mask '1010'.
// - the 2nd and 4th element from the second input vector (the 'fadd' node).
def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
- (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
+ (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))),
(VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
- (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
+ (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))),
(VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
- (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))),
+ (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i8 10))),
(VADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
- (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))),
+ (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i8 2))),
(VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
(v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
// - the 2nd and 4th element from the second input vector (the fadd node).
def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
- (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))),
+ (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i8 10))),
(ADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
- (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))),
+ (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i8 2))),
(ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
(v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
OpndItins itins = DEFAULT_ITINS> {
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, u32u8imm:$src3),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
(X86insertps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>,
Sched<[WriteFShuffle]>;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f32mem:$src2, u32u8imm:$src3),
+ (ins VR128:$src1, f32mem:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
OpndItins itins = DEFAULT_ITINS> {
let isCommutable = 1 in
def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, u32u8imm:$src3),
+ (ins RC:$src1, RC:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>,
Sched<[itins.Sched]>;
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
+ (ins RC:$src1, x86memop:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
X86MemOperand x86memop> {
let isCommutable = 1 in
def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, u32u8imm:$src3),
+ (ins RC:$src1, RC:$src2, i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
Sched<[WriteBlend]>, VEX_4V;
def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
+ (ins RC:$src1, x86memop:$src2, i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
--- /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s
+
+define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
+ ; CHECK: vblendpd
+ %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
+ ; CHECK: vblendps
+ %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
+ ; CHECK: vdpps
+ %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+
+
define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
; CHECK: vblendpd
- %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+ %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
-declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
+declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
; CHECK: vblendps
- %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
-declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
; CHECK: vdppd
- %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+ %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
-declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
+declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
; CHECK: vdpps
- %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
-declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
; CHECK: vinsertps
- %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
-declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK: vmpsadbw
- %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+ %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
-declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK: vpblendw
- %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+ %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
-declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
; CHECK: vblendpd
- %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
+ %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
-declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
+declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
; CHECK: vblendps
- %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+ %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
-declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
; CHECK: vdpps
- %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+ %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
-declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
; X32: movl 8(%esp), %ecx
; CHECK-NOT: mov
;; Try to match a bit more of the instr, since we need the load's offset.
-; CHECK: vinsertps $192, 12(%{{...}},%{{...}}), %
+; CHECK: vinsertps $-64, 12(%{{...}},%{{...}}), %
; CHECK-NEXT: ret
%1 = getelementptr inbounds <4 x float>* %pb, i64 %index
%2 = load <4 x float>* %1, align 16
--- /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s
+
+define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
+ ; CHECK: vpblendw
+ %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1]
+ ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpblendd
+ %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
+ ; CHECK: vpblendd
+ %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
+ ; CHECK: vmpsadbw
+ %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1]
+ ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
+
define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
; CHECK: vmpsadbw
- %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1]
+ %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1]
ret <16 x i16> %res
}
-declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
+declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) {
define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
; CHECK: vpblendw
- %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1]
+ %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i8 7) ; <<16 x i16>> [#uses=1]
ret <16 x i16> %res
}
-declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
+declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone
define <32 x i8> @test_x86_avx2_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) {
define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK: vpblendd
- %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1]
+ %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i8 7) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
-declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone
+declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i8) nounwind readnone
define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK: vpblendd
- %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]
+ %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
ret <8 x i32> %res
}
-declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone
+declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) {
--- /dev/null
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse4.1 | FileCheck %s
+; This test works just like the non-upgrade one except that it only checks
+; forms which require auto-upgrading.
+
+define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: blendpd
+ %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: blendps
+ %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: dppd
+ %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: dpps
+ %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: insertps
+ %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+
+define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: mpsadbw
+ %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: pblendw
+ %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
+
+
define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
; CHECK: blendpd
- %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+ %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
-declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
+declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
; CHECK: blendps
- %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
-declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
; CHECK: dppd
- %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+ %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
-declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
+declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
; CHECK: dpps
- %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
-declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
; CHECK: insertps
- %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
-declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK: mpsadbw
- %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+ %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
-declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK: pblendw
- %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+ %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
-declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
; X32: movl 8(%esp), %ecx
; CHECK-NOT: mov
;; Try to match a bit more of the instr, since we need the load's offset.
-; CHECK: insertps $192, 12(%{{...}},%{{...}}), %
+; CHECK: insertps $-64, 12(%{{...}},%{{...}}), %
; CHECK-NEXT: ret
%1 = getelementptr inbounds <4 x float>* %pb, i64 %index
%2 = load <4 x float>* %1, align 16
// CHECK: blendvps %xmm2, %xmm1 # encoding: [0x66,0x0f,0x38,0x14,0xca]
blendvps %xmm2, %xmm1
-// rdar://9795008
-// These instructions take a mask not an 8-bit sign extended value.
+// These instructions can take an unsigned 8-bit mask as well as a signed 8-bit
+// immediate. Check both forms here.
// CHECK: blendps $129, %xmm2, %xmm1
blendps $0x81, %xmm2, %xmm1
+// CHECK: blendps $-64, %xmm2, %xmm1
+ blendps $-64, %xmm2, %xmm1
// CHECK: blendpd $129, %xmm2, %xmm1
blendpd $0x81, %xmm2, %xmm1
+// CHECK: blendpd $-64, %xmm2, %xmm1
+ blendpd $-64, %xmm2, %xmm1
// CHECK: pblendw $129, %xmm2, %xmm1
pblendw $0x81, %xmm2, %xmm1
+// CHECK: pblendw $-64, %xmm2, %xmm1
+ pblendw $-64, %xmm2, %xmm1
// CHECK: mpsadbw $129, %xmm2, %xmm1
mpsadbw $0x81, %xmm2, %xmm1
+// CHECK: mpsadbw $-64, %xmm2, %xmm1
+ mpsadbw $-64, %xmm2, %xmm1
// CHECK: dpps $129, %xmm2, %xmm1
dpps $0x81, %xmm2, %xmm1
+// CHECK: dpps $-64, %xmm2, %xmm1
+ dpps $-64, %xmm2, %xmm1
// CHECK: dppd $129, %xmm2, %xmm1
dppd $0x81, %xmm2, %xmm1
+// CHECK: dppd $-64, %xmm2, %xmm1
+ dppd $-64, %xmm2, %xmm1
// CHECK: insertps $129, %xmm2, %xmm1
insertps $0x81, %xmm2, %xmm1
+// CHECK: insertps $-64, %xmm2, %xmm1
+ insertps $-64, %xmm2, %xmm1
// PR13253 handle implicit optional third argument that must always be xmm0
// CHECK: pblendvb %xmm2, %xmm1
TYPE("i32mem", TYPE_Mv)
TYPE("i32imm", TYPE_IMMv)
TYPE("i32i8imm", TYPE_IMM32)
- TYPE("u32u8imm", TYPE_IMM32)
TYPE("GR32", TYPE_R32)
TYPE("GR32orGR64", TYPE_R32)
TYPE("i64mem", TYPE_Mv)
ENCODING("i16imm", ENCODING_IW)
}
ENCODING("i32i8imm", ENCODING_IB)
- ENCODING("u32u8imm", ENCODING_IB)
ENCODING("SSECC", ENCODING_IB)
ENCODING("AVXCC", ENCODING_IB)
ENCODING("AVX512RC", ENCODING_IB)