Name == "x86.sse2.psrl.dq.bs" ||
Name == "x86.avx2.psll.dq.bs" ||
Name == "x86.avx2.psrl.dq.bs" ||
+ Name == "x86.sse41.pblendw" ||
+ Name == "x86.sse41.blendpd" ||
+ Name == "x86.sse41.blendps" ||
+ Name == "x86.avx.blend.pd.256" ||
+ Name == "x86.avx.blend.ps.256" ||
+ Name == "x86.avx2.pblendw" ||
+ Name == "x86.avx2.pblendd.128" ||
+ Name == "x86.avx2.pblendd.256" ||
(Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
NewFn = nullptr;
return true;
}
// Several blend and other instructions with maskes used the wrong number of
// bits.
- if (Name == "x86.sse41.pblendw")
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_pblendw,
- NewFn);
- if (Name == "x86.sse41.blendpd")
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendpd,
- NewFn);
- if (Name == "x86.sse41.blendps")
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendps,
- NewFn);
if (Name == "x86.sse41.insertps")
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
NewFn);
if (Name == "x86.sse41.mpsadbw")
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
NewFn);
- if (Name == "x86.avx.blend.pd.256")
- return UpgradeX86IntrinsicsWith8BitMask(
- F, Intrinsic::x86_avx_blend_pd_256, NewFn);
- if (Name == "x86.avx.blend.ps.256")
- return UpgradeX86IntrinsicsWith8BitMask(
- F, Intrinsic::x86_avx_blend_ps_256, NewFn);
if (Name == "x86.avx.dp.ps.256")
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
NewFn);
- if (Name == "x86.avx2.pblendw")
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_pblendw,
- NewFn);
- if (Name == "x86.avx2.pblendd.128")
- return UpgradeX86IntrinsicsWith8BitMask(
- F, Intrinsic::x86_avx2_pblendd_128, NewFn);
- if (Name == "x86.avx2.pblendd.256")
- return UpgradeX86IntrinsicsWith8BitMask(
- F, Intrinsic::x86_avx2_pblendd_256, NewFn);
if (Name == "x86.avx2.mpsadbw")
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
NewFn);
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
Shift);
+ } else if (Name == "llvm.x86.sse41.pblendw" ||
+ Name == "llvm.x86.sse41.blendpd" ||
+ Name == "llvm.x86.sse41.blendps" ||
+ Name == "llvm.x86.avx.blend.pd.256" ||
+ Name == "llvm.x86.avx.blend.ps.256" ||
+ Name == "llvm.x86.avx2.pblendw" ||
+ Name == "llvm.x86.avx2.pblendd.128" ||
+ Name == "llvm.x86.avx2.pblendd.256") {
+ Value *Op0 = CI->getArgOperand(0);
+ Value *Op1 = CI->getArgOperand(1);
+ unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ VectorType *VecTy = cast<VectorType>(CI->getType());
+ unsigned NumElts = VecTy->getNumElements();
+
+ SmallVector<Constant*, 16> Idxs;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
+ Idxs.push_back(Builder.getInt32(Idx));
+ }
+
+ Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
} else {
bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
if (Name == "llvm.x86.avx.vpermil.pd.256")
return;
}
- case Intrinsic::x86_sse41_pblendw:
- case Intrinsic::x86_sse41_blendpd:
- case Intrinsic::x86_sse41_blendps:
case Intrinsic::x86_sse41_insertps:
case Intrinsic::x86_sse41_dppd:
case Intrinsic::x86_sse41_dpps:
case Intrinsic::x86_sse41_mpsadbw:
- case Intrinsic::x86_avx_blend_pd_256:
- case Intrinsic::x86_avx_blend_ps_256:
case Intrinsic::x86_avx_dp_ps_256:
- case Intrinsic::x86_avx2_pblendw:
- case Intrinsic::x86_avx2_pblendd_128:
- case Intrinsic::x86_avx2_pblendd_256:
case Intrinsic::x86_avx2_mpsadbw: {
// Need to truncate the last argument from i32 to i8 -- this argument models
// an inherently 8-bit immediate operand to these x86 instructions.
Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
+/// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate
+multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop, bit Is2Addr = 1,
+ OpndItins itins = DEFAULT_ITINS> {
+ let isCommutable = 1 in
+ def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, u8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))],
+ itins.rr>, Sched<[itins.Sched]>;
+ def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, u8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src1,
+ (bitconvert (memop_frag addr:$src2)), imm:$src3)))], itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+}
+
let Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
}
let ExeDomain = SSEPackedSingle in {
- defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
- VR128, loadv4f32, f128mem, 0,
- DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
- defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
- int_x86_avx_blend_ps_256, VR256, loadv8f32,
- f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
- VEX_4V, VEX_L;
+ defm VBLENDPS : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v4f32,
+ VR128, loadv4f32, f128mem, 0,
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
+ defm VBLENDPSY : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v8f32,
+ VR256, loadv8f32, f256mem, 0,
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L;
}
let ExeDomain = SSEPackedDouble in {
- defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
- VR128, loadv2f64, f128mem, 0,
- DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
- defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
- int_x86_avx_blend_pd_256,VR256, loadv4f64,
- f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
- VEX_4V, VEX_L;
+ defm VBLENDPD : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v2f64,
+ VR128, loadv2f64, f128mem, 0,
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
+ defm VBLENDPDY : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v4f64,
+ VR256, loadv4f64, f256mem, 0,
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L;
}
- defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
- VR128, loadv2i64, i128mem, 0,
- DEFAULT_ITINS_BLENDSCHED>, VEX_4V;
+ defm VPBLENDW : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v8i16,
+ VR128, loadv2i64, i128mem, 0,
+ DEFAULT_ITINS_BLENDSCHED>, VEX_4V;
let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
VR256, loadv4i64, i256mem, 0,
DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_L;
}
- defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
- VR256, loadv4i64, i256mem, 0,
- DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L;
+ defm VPBLENDWY : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v16i16,
+ VR256, loadv4i64, i256mem, 0,
+ DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L;
}
let Constraints = "$src1 = $dst" in {
1, SSE_MPSADBW_ITINS>;
}
let ExeDomain = SSEPackedSingle in
- defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
- VR128, memopv4f32, f128mem,
- 1, SSE_INTALU_ITINS_FBLEND_P>;
+ defm BLENDPS : SS41I_binop_rmi<0x0C, "blendps", X86Blendi, v4f32,
+ VR128, memopv4f32, f128mem,
+ 1, SSE_INTALU_ITINS_FBLEND_P>;
let ExeDomain = SSEPackedDouble in
- defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
- VR128, memopv2f64, f128mem,
- 1, SSE_INTALU_ITINS_FBLEND_P>;
- defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
- VR128, memopv2i64, i128mem,
- 1, SSE_INTALU_ITINS_BLEND_P>;
+ defm BLENDPD : SS41I_binop_rmi<0x0D, "blendpd", X86Blendi, v2f64,
+ VR128, memopv2f64, f128mem,
+ 1, SSE_INTALU_ITINS_FBLEND_P>;
+ defm PBLENDW : SS41I_binop_rmi<0x0E, "pblendw", X86Blendi, v8i16,
+ VR128, memopv2i64, i128mem,
+ 1, SSE_INTALU_ITINS_BLEND_P>;
let ExeDomain = SSEPackedSingle in
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
VR128, memopv4f32, f128mem, 1,
def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
(v4f64 VR256:$src2))),
(VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
-
- def : Pat<(v8f32 (X86Blendi (v8f32 VR256:$src1), (v8f32 VR256:$src2),
- (imm:$mask))),
- (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$mask)>;
- def : Pat<(v4f64 (X86Blendi (v4f64 VR256:$src1), (v4f64 VR256:$src2),
- (imm:$mask))),
- (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
-
- def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
- (imm:$mask))),
- (VPBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
- def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
- (imm:$mask))),
- (VBLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
- def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
- (imm:$mask))),
- (VBLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
}
let Predicates = [HasAVX2] in {
def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
(v32i8 VR256:$src2))),
(VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
- def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2),
- (imm:$mask))),
- (VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>;
}
// Patterns
def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
(v2f64 VR128:$src2))),
(BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
-
- def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
- (imm:$mask))),
- (PBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
- def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
- (imm:$mask))),
- (BLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
- def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
- (imm:$mask))),
- (BLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
-
}
let SchedRW = [WriteLoad] in {
// AVX2 Instructions
//===----------------------------------------------------------------------===//
-/// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate
-multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop> {
+/// AVX2_binop_rmi - AVX2 binary operator with 8-bit immediate
+multiclass AVX2_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop> {
let isCommutable = 1 in
def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
Sched<[WriteBlend]>, VEX_4V;
def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
- (IntId RC:$src1,
- (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
+ (OpVT (OpNode RC:$src1,
+ (bitconvert (memop_frag addr:$src2)), imm:$src3)))]>,
Sched<[WriteBlendLd, ReadAfterLd]>, VEX_4V;
}
-defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
- VR128, loadv2i64, i128mem>;
-defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
- VR256, loadv4i64, i256mem>, VEX_L;
-
-def : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2),
- imm:$mask)),
- (VPBLENDDrri VR128:$src1, VR128:$src2, imm:$mask)>;
-def : Pat<(v8i32 (X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2),
- imm:$mask)),
- (VPBLENDDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
+defm VPBLENDD : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v4i32,
+ VR128, loadv2i64, i128mem>;
+defm VPBLENDDY : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v8i32,
+ VR256, loadv4i64, i256mem>, VEX_L;
//===----------------------------------------------------------------------===//
// VPBROADCAST - Load from memory and broadcast to all elements of the