From c8a1fa77a73e7c885035421712ceba951f9024cb Mon Sep 17 00:00:00 2001
From: Daniel Sanders <daniel.sanders@imgtec.com>
Date: Fri, 27 Sep 2013 11:48:57 +0000
Subject: [PATCH] [mips][msa] Added support for matching splati from normal IR
 (i.e. not intrinsics)

Updated some of the vshf since they (correctly) emit splati's now


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191511 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Mips/InstPrinter/MipsInstPrinter.cpp      |  9 +++
 lib/Target/Mips/InstPrinter/MipsInstPrinter.h |  1 +
 lib/Target/Mips/MSA.txt                       |  4 +
 lib/Target/Mips/MipsISelDAGToDAG.cpp          | 10 +++
 lib/Target/Mips/MipsISelDAGToDAG.h            |  4 +
 lib/Target/Mips/MipsMSAInstrInfo.td           | 52 +++++++++++--
 lib/Target/Mips/MipsSEISelDAGToDAG.cpp        | 10 +++
 lib/Target/Mips/MipsSEISelDAGToDAG.h          |  4 +
 lib/Target/Mips/MipsSEISelLowering.cpp        |  7 ++
 test/CodeGen/Mips/msa/shuffle.ll              | 76 ++++++++++++++++---
 10 files changed, 157 insertions(+), 20 deletions(-)

diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index 2612e3ba1ce..78845898997 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -184,6 +184,15 @@ void MipsInstPrinter::printUnsignedImm(const MCInst *MI, int opNum,
     printOperand(MI, opNum, O);
 }
 
+void MipsInstPrinter::printUnsignedImm8(const MCInst *MI, int opNum,
+                                        raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(opNum);
+  if (MO.isImm())
+    O << (unsigned short int)(unsigned char)MO.getImm();
+  else
+    printOperand(MI, opNum, O);
+}
+
 void MipsInstPrinter::
 printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
   // Load/Store memory operands -- imm($reg)
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 11590ad8241..f75ae249c3e 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -93,6 +93,7 @@ public:
 private:
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printUnsignedImm(const MCInst *MI, int opNum, raw_ostream &O);
+  void printUnsignedImm8(const MCInst *MI, int opNum, raw_ostream &O);
   void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
   void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O);
   void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O);
diff --git a/lib/Target/Mips/MSA.txt b/lib/Target/Mips/MSA.txt
index 398c7afd188..a4f320ac007 100644
--- a/lib/Target/Mips/MSA.txt
+++ b/lib/Target/Mips/MSA.txt
@@ -28,3 +28,7 @@ ilvl.d, pckev.d:
 ilvr.d, ilvod.d, pckod.d:
         It is not possible to emit ilvr.d, or pckod.d since ilvod.d covers the
         same shuffle. ilvod.d will be emitted instead.
+
+splati.w:
+        It is not possible to emit splati.w since shf.w covers the same cases.
+        shf.w will be emitted instead.
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 7766fea4f5d..0d239fd46db 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -104,6 +104,16 @@ bool MipsDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const {
   return false;
 }
 
+bool MipsDAGToDAGISel::selectVSplatUimm1(SDValue N, SDValue &Imm) const {
+  llvm_unreachable("Unimplemented function.");
+  return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimm2(SDValue N, SDValue &Imm) const {
+  llvm_unreachable("Unimplemented function.");
+  return false;
+}
+
 bool MipsDAGToDAGISel::selectVSplatUimm3(SDValue N, SDValue &Imm) const {
   llvm_unreachable("Unimplemented function.");
   return false;
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
index 208701e346c..e5695c41c43 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -78,6 +78,10 @@ private:
 
   /// \brief Select constant vector splats.
   virtual bool selectVSplat(SDNode *N, APInt &Imm) const;
+  /// \brief Select constant vector splats whose value fits in a uimm1.
+  virtual bool selectVSplatUimm1(SDValue N, SDValue &Imm) const;
+  /// \brief Select constant vector splats whose value fits in a uimm2.
+  virtual bool selectVSplatUimm2(SDValue N, SDValue &Imm) const;
   /// \brief Select constant vector splats whose value fits in a uimm3.
   virtual bool selectVSplatUimm3(SDValue N, SDValue &Imm) const;
   /// \brief Select constant vector splats whose value fits in a uimm4.
diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td
index 76fcdcc8e6b..7ed2f841692 100644
--- a/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -77,6 +77,14 @@ def simm5 : Operand<i32>;
 
 def simm10 : Operand<i32>;
 
+def vsplat_uimm1 : Operand<vAny> {
+  let PrintMethod = "printUnsignedImm8";
+}
+
+def vsplat_uimm2 : Operand<vAny> {
+  let PrintMethod = "printUnsignedImm8";
+}
+
 def vsplat_uimm3 : Operand<vAny> {
   let PrintMethod = "printUnsignedImm";
 }
@@ -222,6 +230,10 @@ def vsplati8_uimm3 : SplatComplexPattern<vsplat_uimm3, v16i8, 1,
                                          "selectVSplatUimm3",
                                          [build_vector, bitconvert]>;
 
+def vsplati8_uimm4 : SplatComplexPattern<vsplat_uimm4, v16i8, 1,
+                                         "selectVSplatUimm4",
+                                         [build_vector, bitconvert]>;
+
 def vsplati8_uimm5 : SplatComplexPattern<vsplat_uimm5, v16i8, 1,
                                          "selectVSplatUimm5",
                                          [build_vector, bitconvert]>;
@@ -234,6 +246,10 @@ def vsplati8_simm5 : SplatComplexPattern<vsplat_simm5, v16i8, 1,
                                          "selectVSplatSimm5",
                                          [build_vector, bitconvert]>;
 
+def vsplati16_uimm3 : SplatComplexPattern<vsplat_uimm3, v8i16, 1,
+                                          "selectVSplatUimm3",
+                                          [build_vector, bitconvert]>;
+
 def vsplati16_uimm4 : SplatComplexPattern<vsplat_uimm4, v8i16, 1,
                                           "selectVSplatUimm4",
                                           [build_vector, bitconvert]>;
@@ -246,6 +262,10 @@ def vsplati16_simm5 : SplatComplexPattern<vsplat_simm5, v8i16, 1,
                                           "selectVSplatSimm5",
                                           [build_vector, bitconvert]>;
 
+def vsplati32_uimm2 : SplatComplexPattern<vsplat_uimm2, v4i32, 1,
+                                          "selectVSplatUimm2",
+                                          [build_vector, bitconvert]>;
+
 def vsplati32_uimm5 : SplatComplexPattern<vsplat_uimm5, v4i32, 1,
                                           "selectVSplatUimm5",
                                           [build_vector, bitconvert]>;
@@ -254,6 +274,10 @@ def vsplati32_simm5 : SplatComplexPattern<vsplat_simm5, v4i32, 1,
                                           "selectVSplatSimm5",
                                           [build_vector, bitconvert]>;
 
+def vsplati64_uimm1 : SplatComplexPattern<vsplat_uimm1, v2i64, 1,
+                                          "selectVSplatUimm1",
+                                          [build_vector, bitconvert]>;
+
 def vsplati64_uimm5 : SplatComplexPattern<vsplat_uimm5, v2i64, 1,
                                           "selectVSplatUimm5",
                                           [build_vector, bitconvert]>;
@@ -1235,6 +1259,18 @@ class MSA_VEC_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   InstrItinClass Itinerary = itin;
 }
 
+class MSA_ELM_SPLAT_DESC_BASE<string instr_asm, SplatComplexPattern SplatImm,
+                              RegisterClass RCWD,
+                              RegisterClass RCWS = RCWD,
+                              InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs RCWD:$wd);
+  dag InOperandList = (ins RCWS:$ws, SplatImm.OpClass:$u3);
+  string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$u3]");
+  list<dag> Pattern = [(set RCWD:$wd, (MipsVSHF SplatImm:$u3, RCWS:$ws,
+                                                RCWS:$ws))];
+  InstrItinClass Itinerary = itin;
+}
+
 class MSA_VEC_PSEUDO_BASE<SDPatternOperator OpNode, RegisterClass RCWD,
                           RegisterClass RCWS = RCWD,
                           RegisterClass RCWT = RCWD> :
@@ -2172,14 +2208,14 @@ class SPLAT_W_DESC : MSA_3R_DESC_BASE<"splat.w", int_mips_splat_w, MSA128WOpnd,
 class SPLAT_D_DESC : MSA_3R_DESC_BASE<"splat.d", int_mips_splat_d, MSA128DOpnd,
                                       MSA128DOpnd, GPR32Opnd>;
 
-class SPLATI_B_DESC : MSA_BIT_B_DESC_BASE<"splati.b", int_mips_splati_b,
-                                          MSA128B>;
-class SPLATI_H_DESC : MSA_BIT_H_DESC_BASE<"splati.h", int_mips_splati_h,
-                                          MSA128H>;
-class SPLATI_W_DESC : MSA_BIT_W_DESC_BASE<"splati.w", int_mips_splati_w,
-                                          MSA128W>;
-class SPLATI_D_DESC : MSA_BIT_D_DESC_BASE<"splati.d", int_mips_splati_d,
-                                          MSA128D>;
+class SPLATI_B_DESC : MSA_ELM_SPLAT_DESC_BASE<"splati.b", vsplati8_uimm4,
+                                              MSA128B>;
+class SPLATI_H_DESC : MSA_ELM_SPLAT_DESC_BASE<"splati.h", vsplati16_uimm3,
+                                              MSA128H>;
+class SPLATI_W_DESC : MSA_ELM_SPLAT_DESC_BASE<"splati.w", vsplati32_uimm2,
+                                              MSA128W>;
+class SPLATI_D_DESC : MSA_ELM_SPLAT_DESC_BASE<"splati.d", vsplati64_uimm1,
+                                              MSA128D>;
 
 class SRA_B_DESC : MSA_3R_DESC_BASE<"sra.b", sra, MSA128BOpnd>;
 class SRA_H_DESC : MSA_3R_DESC_BASE<"sra.h", sra, MSA128HOpnd>;
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 8a9481c7dee..6277b6bf34e 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -450,6 +450,16 @@ selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed,
 }
 
 // Select constant vector splats.
+bool MipsSEDAGToDAGISel::
+selectVSplatUimm1(SDValue N, SDValue &Imm) const {
+  return selectVSplatCommon(N, Imm, false, 1);
+}
+
+bool MipsSEDAGToDAGISel::
+selectVSplatUimm2(SDValue N, SDValue &Imm) const {
+  return selectVSplatCommon(N, Imm, false, 2);
+}
+
 bool MipsSEDAGToDAGISel::
 selectVSplatUimm3(SDValue N, SDValue &Imm) const {
   return selectVSplatCommon(N, Imm, false, 3);
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index fe0da12aa32..759d3afc373 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -63,6 +63,10 @@ private:
   /// \brief Select constant vector splats whose value fits in a given integer.
   virtual bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed,
                                   unsigned ImmBitSize) const;
+  /// \brief Select constant vector splats whose value fits in a uimm1.
+  virtual bool selectVSplatUimm1(SDValue N, SDValue &Imm) const;
+  /// \brief Select constant vector splats whose value fits in a uimm2.
+  virtual bool selectVSplatUimm2(SDValue N, SDValue &Imm) const;
   /// \brief Select constant vector splats whose value fits in a uimm3.
   virtual bool selectVSplatUimm3(SDValue N, SDValue &Imm) const;
   /// \brief Select constant vector splats whose value fits in a uimm4.
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index 51682f031fc..84db5ceb273 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1470,6 +1470,13 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::mips_slli_d:
     return DAG.getNode(ISD::SHL, DL, Op->getValueType(0),
                        Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+  case Intrinsic::mips_splati_b:
+  case Intrinsic::mips_splati_h:
+  case Intrinsic::mips_splati_w:
+  case Intrinsic::mips_splati_d:
+    return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
+                       lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1),
+                       Op->getOperand(1));
   case Intrinsic::mips_sra_b:
   case Intrinsic::mips_sra_h:
   case Intrinsic::mips_sra_w:
diff --git a/test/CodeGen/Mips/msa/shuffle.ll b/test/CodeGen/Mips/msa/shuffle.ll
index 9265d251dda..b5df0e0ad36 100644
--- a/test/CodeGen/Mips/msa/shuffle.ll
+++ b/test/CodeGen/Mips/msa/shuffle.ll
@@ -21,8 +21,7 @@ define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind
   %1 = load <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  ; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 1
-  ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
+  ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
   store <16 x i8> %2, <16 x i8>* %c
   ; CHECK-DAG: st.b [[R3]], 0($4)
 
@@ -69,8 +68,7 @@ define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind
   %1 = load <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17>
-  ; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 1
-  ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
+  ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
   store <16 x i8> %2, <16 x i8>* %c
   ; CHECK-DAG: st.b [[R3]], 0($4)
 
@@ -99,8 +97,7 @@ define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind
   %1 = load <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
-  ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
+  ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
   store <8 x i16> %2, <8 x i16>* %c
   ; CHECK-DAG: st.h [[R3]], 0($4)
 
@@ -147,8 +144,7 @@ define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind
   %1 = load <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9>
-  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
-  ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
+  ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
   store <8 x i16> %2, <8 x i16>* %c
   ; CHECK-DAG: st.h [[R3]], 0($4)
 
@@ -254,8 +250,7 @@ define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
   %1 = load <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
-  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
-  ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
+  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
   store <2 x i64> %2, <2 x i64>* %c
   ; CHECK-DAG: st.d [[R3]], 0($4)
 
@@ -302,8 +297,7 @@ define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
   %1 = load <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3>
-  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
-  ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
+  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
   store <2 x i64> %2, <2 x i64>* %c
   ; CHECK-DAG: st.d [[R3]], 0($4)
 
@@ -748,3 +742,61 @@ define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
   ret void
   ; CHECK: .size pckod_v2i64_0
 }
+
+define void @splati_v16i8_0(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: splati_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <16 x i8> %1, <16 x i8> undef,
+                     <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+  ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][4]
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size splati_v16i8_0
+}
+
+define void @splati_v8i16_0(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: splati_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+  ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][4]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size splati_v8i16_0
+}
+
+define void @splati_v4i32_0(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: splati_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  ; shf.w and splati.w are equivalent
+  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 255
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size splati_v4i32_0
+}
+
+define void @splati_v2i64_0(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: splati_v2i64_0:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size splati_v2i64_0
+}
-- 
2.34.1