def MipsSHLL_DSP : MipsDSPBase<"SHLL_DSP", SDT_MipsSHIFT_DSP>;
def MipsSHRA_DSP : MipsDSPBase<"SHRA_DSP", SDT_MipsSHIFT_DSP>;
def MipsSHRL_DSP : MipsDSPBase<"SHRL_DSP", SDT_MipsSHIFT_DSP>;
+def MipsSETCC_DSP : MipsDSPBase<"SETCC_DSP", SDTSetCC>;
+def MipsSELECT_CC_DSP : MipsDSPBase<"SELECT_CC_DSP", SDTSelectCC>;
// Flags.
class UseAC {
def COPY_AC_DSP : PseudoSE<(outs ACRegsDSP:$dst), (ins ACRegsDSP:$src), []>;
+// Pseudo CMP and PICK instructions.
+class PseudoCMP<Instruction RealInst> :
+ PseudoDSP<(outs DSPCC:$cmp), (ins DSPRegs:$rs, DSPRegs:$rt), []>,
+ PseudoInstExpansion<(RealInst DSPRegs:$rs, DSPRegs:$rt)>, NeverHasSideEffects;
+
+class PseudoPICK<Instruction RealInst> :
+ PseudoDSP<(outs DSPRegs:$rd), (ins DSPCC:$cmp, DSPRegs:$rs, DSPRegs:$rt), []>,
+ PseudoInstExpansion<(RealInst DSPRegs:$rd, DSPRegs:$rs, DSPRegs:$rt)>,
+ NeverHasSideEffects;
+
+def PseudoCMP_EQ_PH : PseudoCMP<CMP_EQ_PH>;
+def PseudoCMP_LT_PH : PseudoCMP<CMP_LT_PH>;
+def PseudoCMP_LE_PH : PseudoCMP<CMP_LE_PH>;
+def PseudoCMPU_EQ_QB : PseudoCMP<CMPU_EQ_QB>;
+def PseudoCMPU_LT_QB : PseudoCMP<CMPU_LT_QB>;
+def PseudoCMPU_LE_QB : PseudoCMP<CMPU_LE_QB>;
+
+def PseudoPICK_PH : PseudoPICK<PICK_PH>;
+def PseudoPICK_QB : PseudoPICK<PICK_QB>;
+
// Patterns.
class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> :
Pat<pattern, result>, Requires<[pred]>;
def : DSPShiftPat<SHRA_QB, v4i8, int_mips_shra_qb, immZExt3, HasDSPR2>;
def : DSPShiftPat<SHRL_QB, v4i8, int_mips_shrl_qb, immZExt3>;
+// SETCC/SELECT_CC patterns.
+class DSPSetCCPat<Instruction Cmp, Instruction Pick, ValueType ValTy,
+ CondCode CC> :
+ DSPPat<(ValTy (MipsSETCC_DSP ValTy:$a, ValTy:$b, CC)),
+ (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)),
+ (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPRegs)),
+ (ValTy ZERO)))>;
+
+class DSPSetCCPatInv<Instruction Cmp, Instruction Pick, ValueType ValTy,
+ CondCode CC> :
+ DSPPat<(ValTy (MipsSETCC_DSP ValTy:$a, ValTy:$b, CC)),
+ (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)),
+ (ValTy ZERO),
+ (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPRegs))))>;
+
+class DSPSelectCCPat<Instruction Cmp, Instruction Pick, ValueType ValTy,
+ CondCode CC> :
+ DSPPat<(ValTy (MipsSELECT_CC_DSP ValTy:$a, ValTy:$b, ValTy:$c, ValTy:$d, CC)),
+ (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), $c, $d))>;
+
+class DSPSelectCCPatInv<Instruction Cmp, Instruction Pick, ValueType ValTy,
+ CondCode CC> :
+ DSPPat<(ValTy (MipsSELECT_CC_DSP ValTy:$a, ValTy:$b, ValTy:$c, ValTy:$d, CC)),
+ (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), $d, $c))>;
+
+def : DSPSetCCPat<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETEQ>;
+def : DSPSetCCPat<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETLT>;
+def : DSPSetCCPat<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETLE>;
+def : DSPSetCCPatInv<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETNE>;
+def : DSPSetCCPatInv<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETGE>;
+def : DSPSetCCPatInv<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETGT>;
+def : DSPSetCCPat<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETEQ>;
+def : DSPSetCCPat<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETULT>;
+def : DSPSetCCPat<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETULE>;
+def : DSPSetCCPatInv<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETNE>;
+def : DSPSetCCPatInv<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETUGE>;
+def : DSPSetCCPatInv<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETUGT>;
+
+def : DSPSelectCCPat<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETEQ>;
+def : DSPSelectCCPat<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETLT>;
+def : DSPSelectCCPat<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETLE>;
+def : DSPSelectCCPatInv<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETNE>;
+def : DSPSelectCCPatInv<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETGE>;
+def : DSPSelectCCPatInv<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETGT>;
+def : DSPSelectCCPat<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETEQ>;
+def : DSPSelectCCPat<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETULT>;
+def : DSPSelectCCPat<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETULE>;
+def : DSPSelectCCPatInv<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETNE>;
+def : DSPSelectCCPatInv<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETUGE>;
+def : DSPSelectCCPatInv<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETUGT>;
+
// Extr patterns.
class EXTR_W_TY1_R2_Pat<SDPatternOperator OpNode, Instruction Instr> :
DSPPat<(i32 (OpNode CPURegs:$rs, ACRegsDSP:$ac)),
--- /dev/null
+; RUN: llc -march=mips -mattr=dsp < %s | FileCheck %s
+
+; CHECK: select_v2q15_eq_:
+; CHECK: cmp.eq.ph ${{[0-9]+}}, ${{[0-9]+}}
+; CHECK: pick.ph ${{[0-9]+}}, $6, $7
+
+define { i32 } @select_v2q15_eq_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = bitcast i32 %a3.coerce to <2 x i16>
+ %cmp = icmp eq <2 x i16> %0, %1
+ %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+ %4 = bitcast <2 x i16> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v2q15_lt_:
+; CHECK: cmp.lt.ph $4, $5
+; CHECK: pick.ph ${{[0-9]+}}, $6, $7
+
+define { i32 } @select_v2q15_lt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = bitcast i32 %a3.coerce to <2 x i16>
+ %cmp = icmp slt <2 x i16> %0, %1
+ %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+ %4 = bitcast <2 x i16> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v2q15_le_:
+; CHECK: cmp.le.ph $4, $5
+; CHECK: pick.ph ${{[0-9]+}}, $6, $7
+
+define { i32 } @select_v2q15_le_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = bitcast i32 %a3.coerce to <2 x i16>
+ %cmp = icmp sle <2 x i16> %0, %1
+ %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+ %4 = bitcast <2 x i16> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v2q15_ne_:
+; CHECK: cmp.eq.ph ${{[0-9]+}}, ${{[0-9]+}}
+; CHECK: pick.ph ${{[0-9]+}}, $7, $6
+
+define { i32 } @select_v2q15_ne_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = bitcast i32 %a3.coerce to <2 x i16>
+ %cmp = icmp ne <2 x i16> %0, %1
+ %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+ %4 = bitcast <2 x i16> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v2q15_gt_:
+; CHECK: cmp.le.ph $4, $5
+; CHECK: pick.ph ${{[0-9]+}}, $7, $6
+
+define { i32 } @select_v2q15_gt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = bitcast i32 %a3.coerce to <2 x i16>
+ %cmp = icmp sgt <2 x i16> %0, %1
+ %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+ %4 = bitcast <2 x i16> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v2q15_ge_:
+; CHECK: cmp.lt.ph $4, $5
+; CHECK: pick.ph ${{[0-9]+}}, $7, $6
+
+define { i32 } @select_v2q15_ge_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = bitcast i32 %a3.coerce to <2 x i16>
+ %cmp = icmp sge <2 x i16> %0, %1
+ %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+ %4 = bitcast <2 x i16> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4ui8_eq_:
+; CHECK: cmpu.eq.qb ${{[0-9]+}}, ${{[0-9]+}}
+; CHECK: pick.qb ${{[0-9]+}}, $6, $7
+
+define { i32 } @select_v4ui8_eq_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = bitcast i32 %a2.coerce to <4 x i8>
+ %3 = bitcast i32 %a3.coerce to <4 x i8>
+ %cmp = icmp eq <4 x i8> %0, %1
+ %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+ %4 = bitcast <4 x i8> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4ui8_lt_:
+; CHECK: cmpu.lt.qb $4, $5
+; CHECK: pick.qb ${{[0-9]+}}, $6, $7
+
+define { i32 } @select_v4ui8_lt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = bitcast i32 %a2.coerce to <4 x i8>
+ %3 = bitcast i32 %a3.coerce to <4 x i8>
+ %cmp = icmp ult <4 x i8> %0, %1
+ %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+ %4 = bitcast <4 x i8> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4ui8_le_:
+; CHECK: cmpu.le.qb $4, $5
+; CHECK: pick.qb ${{[0-9]+}}, $6, $7
+
+define { i32 } @select_v4ui8_le_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = bitcast i32 %a2.coerce to <4 x i8>
+ %3 = bitcast i32 %a3.coerce to <4 x i8>
+ %cmp = icmp ule <4 x i8> %0, %1
+ %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+ %4 = bitcast <4 x i8> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4ui8_ne_:
+; CHECK: cmpu.eq.qb ${{[0-9]+}}, ${{[0-9]+}}
+; CHECK: pick.qb ${{[0-9]+}}, $7, $6
+
+define { i32 } @select_v4ui8_ne_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = bitcast i32 %a2.coerce to <4 x i8>
+ %3 = bitcast i32 %a3.coerce to <4 x i8>
+ %cmp = icmp ne <4 x i8> %0, %1
+ %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+ %4 = bitcast <4 x i8> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4ui8_gt_:
+; CHECK: cmpu.le.qb $4, $5
+; CHECK: pick.qb ${{[0-9]+}}, $7, $6
+
+define { i32 } @select_v4ui8_gt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = bitcast i32 %a2.coerce to <4 x i8>
+ %3 = bitcast i32 %a3.coerce to <4 x i8>
+ %cmp = icmp ugt <4 x i8> %0, %1
+ %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+ %4 = bitcast <4 x i8> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4ui8_ge_:
+; CHECK: cmpu.lt.qb $4, $5
+; CHECK: pick.qb ${{[0-9]+}}, $7, $6
+
+define { i32 } @select_v4ui8_ge_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = bitcast i32 %a2.coerce to <4 x i8>
+ %3 = bitcast i32 %a3.coerce to <4 x i8>
+ %cmp = icmp uge <4 x i8> %0, %1
+ %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+ %4 = bitcast <4 x i8> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v2ui16_lt_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @select_v2ui16_lt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = bitcast i32 %a3.coerce to <2 x i16>
+ %cmp = icmp ult <2 x i16> %0, %1
+ %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+ %4 = bitcast <2 x i16> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v2ui16_le_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @select_v2ui16_le_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = bitcast i32 %a3.coerce to <2 x i16>
+ %cmp = icmp ule <2 x i16> %0, %1
+ %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+ %4 = bitcast <2 x i16> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v2ui16_gt_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @select_v2ui16_gt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = bitcast i32 %a3.coerce to <2 x i16>
+ %cmp = icmp ugt <2 x i16> %0, %1
+ %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+ %4 = bitcast <2 x i16> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v2ui16_ge_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @select_v2ui16_ge_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = bitcast i32 %a3.coerce to <2 x i16>
+ %cmp = icmp uge <2 x i16> %0, %1
+ %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+ %4 = bitcast <2 x i16> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4i8_lt_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @select_v4i8_lt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = bitcast i32 %a2.coerce to <4 x i8>
+ %3 = bitcast i32 %a3.coerce to <4 x i8>
+ %cmp = icmp slt <4 x i8> %0, %1
+ %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+ %4 = bitcast <4 x i8> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4i8_le_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @select_v4i8_le_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = bitcast i32 %a2.coerce to <4 x i8>
+ %3 = bitcast i32 %a3.coerce to <4 x i8>
+ %cmp = icmp sle <4 x i8> %0, %1
+ %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+ %4 = bitcast <4 x i8> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4i8_gt_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @select_v4i8_gt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = bitcast i32 %a2.coerce to <4 x i8>
+ %3 = bitcast i32 %a3.coerce to <4 x i8>
+ %cmp = icmp sgt <4 x i8> %0, %1
+ %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+ %4 = bitcast <4 x i8> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4i8_ge_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @select_v4i8_ge_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = bitcast i32 %a2.coerce to <4 x i8>
+ %3 = bitcast i32 %a3.coerce to <4 x i8>
+ %cmp = icmp sge <4 x i8> %0, %1
+ %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+ %4 = bitcast <4 x i8> %or to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2q15_eq_:
+; CHECK: cmp.eq.ph ${{[0-9]+}}, ${{[0-9]+}}
+; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v2q15_eq_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %cmp = icmp eq <2 x i16> %0, %1
+ %sext = sext <2 x i1> %cmp to <2 x i16>
+ %2 = bitcast <2 x i16> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2q15_lt_:
+; CHECK: cmp.lt.ph $4, $5
+; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v2q15_lt_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %cmp = icmp slt <2 x i16> %0, %1
+ %sext = sext <2 x i1> %cmp to <2 x i16>
+ %2 = bitcast <2 x i16> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2q15_le_:
+; CHECK: cmp.le.ph $4, $5
+; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v2q15_le_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %cmp = icmp sle <2 x i16> %0, %1
+ %sext = sext <2 x i1> %cmp to <2 x i16>
+ %2 = bitcast <2 x i16> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2q15_ne_:
+; CHECK: cmp.eq.ph ${{[0-9]+}}, ${{[0-9]+}}
+; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v2q15_ne_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %cmp = icmp ne <2 x i16> %0, %1
+ %sext = sext <2 x i1> %cmp to <2 x i16>
+ %2 = bitcast <2 x i16> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2q15_gt_:
+; CHECK: cmp.le.ph $4, $5
+; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v2q15_gt_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %cmp = icmp sgt <2 x i16> %0, %1
+ %sext = sext <2 x i1> %cmp to <2 x i16>
+ %2 = bitcast <2 x i16> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2q15_ge_:
+; CHECK: cmp.lt.ph $4, $5
+; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v2q15_ge_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %cmp = icmp sge <2 x i16> %0, %1
+ %sext = sext <2 x i1> %cmp to <2 x i16>
+ %2 = bitcast <2 x i16> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4ui8_eq_:
+; CHECK: cmpu.eq.qb ${{[0-9]+}}, ${{[0-9]+}}
+; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v4ui8_eq_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %cmp = icmp eq <4 x i8> %0, %1
+ %sext = sext <4 x i1> %cmp to <4 x i8>
+ %2 = bitcast <4 x i8> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4ui8_lt_:
+; CHECK: cmpu.lt.qb $4, $5
+; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v4ui8_lt_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %cmp = icmp ult <4 x i8> %0, %1
+ %sext = sext <4 x i1> %cmp to <4 x i8>
+ %2 = bitcast <4 x i8> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4ui8_le_:
+; CHECK: cmpu.le.qb $4, $5
+; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v4ui8_le_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %cmp = icmp ule <4 x i8> %0, %1
+ %sext = sext <4 x i1> %cmp to <4 x i8>
+ %2 = bitcast <4 x i8> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4ui8_ne_:
+; CHECK: cmpu.eq.qb ${{[0-9]+}}, ${{[0-9]+}}
+; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v4ui8_ne_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %cmp = icmp ne <4 x i8> %0, %1
+ %sext = sext <4 x i1> %cmp to <4 x i8>
+ %2 = bitcast <4 x i8> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4ui8_gt_:
+; CHECK: cmpu.le.qb $4, $5
+; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v4ui8_gt_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %cmp = icmp ugt <4 x i8> %0, %1
+ %sext = sext <4 x i1> %cmp to <4 x i8>
+ %2 = bitcast <4 x i8> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4ui8_ge_:
+; CHECK: cmpu.lt.qb $4, $5
+; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v4ui8_ge_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %cmp = icmp uge <4 x i8> %0, %1
+ %sext = sext <4 x i1> %cmp to <4 x i8>
+ %2 = bitcast <4 x i8> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2ui16_lt_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @compare_v2ui16_lt_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %cmp = icmp ult <2 x i16> %0, %1
+ %sext = sext <2 x i1> %cmp to <2 x i16>
+ %2 = bitcast <2 x i16> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2ui16_le_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @compare_v2ui16_le_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %cmp = icmp ule <2 x i16> %0, %1
+ %sext = sext <2 x i1> %cmp to <2 x i16>
+ %2 = bitcast <2 x i16> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2ui16_gt_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @compare_v2ui16_gt_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %cmp = icmp ugt <2 x i16> %0, %1
+ %sext = sext <2 x i1> %cmp to <2 x i16>
+ %2 = bitcast <2 x i16> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2ui16_ge_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @compare_v2ui16_ge_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %cmp = icmp uge <2 x i16> %0, %1
+ %sext = sext <2 x i1> %cmp to <2 x i16>
+ %2 = bitcast <2 x i16> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4i8_lt_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @compare_v4i8_lt_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %cmp = icmp slt <4 x i8> %0, %1
+ %sext = sext <4 x i1> %cmp to <4 x i8>
+ %2 = bitcast <4 x i8> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4i8_le_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @compare_v4i8_le_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %cmp = icmp sle <4 x i8> %0, %1
+ %sext = sext <4 x i1> %cmp to <4 x i8>
+ %2 = bitcast <4 x i8> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4i8_gt_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @compare_v4i8_gt_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %cmp = icmp sgt <4 x i8> %0, %1
+ %sext = sext <4 x i1> %cmp to <4 x i8>
+ %2 = bitcast <4 x i8> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4i8_ge_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @compare_v4i8_ge_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %cmp = icmp sge <4 x i8> %0, %1
+ %sext = sext <4 x i1> %cmp to <4 x i8>
+ %2 = bitcast <4 x i8> %sext to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}