Improve ARM64 vector creation

author Louis Gerbarg <lgg@apple.com>

Thu, 17 Apr 2014 20:51:50 +0000 (20:51 +0000)

committer Louis Gerbarg <lgg@apple.com>

Thu, 17 Apr 2014 20:51:50 +0000 (20:51 +0000)
author Louis Gerbarg <lgg@apple.com>
Thu, 17 Apr 2014 20:51:50 +0000 (20:51 +0000)
committer Louis Gerbarg <lgg@apple.com>
Thu, 17 Apr 2014 20:51:50 +0000 (20:51 +0000)
diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp

index 1bb2198c1e39742dc41c908e969669597d3a9845..d8ff81104acb4d6054dbaa2f48948ad8da0b2ac8 100644 (file)
--- a/lib/Target/ARM64/ARM64ISelLowering.cpp
+++ b/lib/Target/ARM64/ARM64ISelLowering.cpp
@@ -4891,7 +4891,7 @@ FailedModImm:
      if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
        isConstant = false;
  
-    if (isa<ConstantSDNode>(V)) {
+    if (isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V)) {
        ++NumConstantLanes;
        if (!ConstantValue.getNode())
          ConstantValue = V;
@@ -4955,7 +4955,7 @@ FailedModImm:
      for (unsigned i = 0; i < NumElts; ++i) {
        SDValue V = Op.getOperand(i);
        SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
-      if (!isa<ConstantSDNode>(V)) {
+      if (!isa<ConstantSDNode>(V) && !isa<ConstantFPSDNode>(V)) {
          // Note that type legalization likely mucked about with the VT of the
          // source operand, so we may have to convert it here before inserting.
          Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td

index 5bb57c5092b26a571ddabdcd5ae7a99b59d58e74..ce15789b548045ee451283b39dccbf45138197dc 100644 (file)
--- a/lib/Target/ARM64/ARM64InstrInfo.td
+++ b/lib/Target/ARM64/ARM64InstrInfo.td
@@ -3472,6 +3472,9 @@ def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
  def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
  def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
  
+def : Pat<(v2f64 (ARM64dup (f64 fpimm0))), (MOVIv2d_ns (i32 0))>;
+def : Pat<(v4f32 (ARM64dup (f32 fpimm0))), (MOVIv2d_ns (i32 0))>;
+
  // EDIT per word & halfword: 2s, 4h, 4s, & 8h
  defm MOVI      : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
  def : Pat<(v2i32 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
diff --git a/test/CodeGen/ARM64/vector-insertion.ll b/test/CodeGen/ARM64/vector-insertion.ll

new file mode 100644 (file)

index 0000000..c9ca749
--- /dev/null
+++ b/test/CodeGen/ARM64/vector-insertion.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=arm64 -mcpu=generic < %s | FileCheck %s
+
+define void @test0f(float* nocapture %x, float %a) #0 {
+entry:
+  %0 = insertelement <4 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %a, i32 0
+  %1 = bitcast float* %x to <4 x float>*
+  store <4 x float> %0, <4 x float>* %1, align 16
+  ret void
+
+  ; CHECK-LABEL: test0f
+  ; CHECK: movi.2d v[[TEMP:[0-9]+]], #0000000000000000
+  ; CHECK: ins.s v[[TEMP]][0], v{{[0-9]+}}[0]
+  ; CHECK: str q[[TEMP]], [x0]
+  ; CHECK: ret
+
+
+}
+
+
+define void @test1f(float* nocapture %x, float %a) #0 {
+entry:
+  %0 = insertelement <4 x float> <float undef, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, float %a, i32 0
+  %1 = bitcast float* %x to <4 x float>*
+  store <4 x float> %0, <4 x float>* %1, align 16
+  ret void
+
+  ; CHECK-LABEL: test1f
+  ; CHECK: fmov  s[[TEMP:[0-9]+]], #1.000000e+00
+  ; CHECK: dup.4s  v[[TEMP2:[0-9]+]], v[[TEMP]][0]
+  ; CHECK: ins.s v[[TEMP2]][0], v0[0]
+  ; CHECK: str q[[TEMP2]], [x0]
+  ; CHECK: ret
+}
author	Louis Gerbarg <lgg@apple.com>
	Thu, 17 Apr 2014 20:51:50 +0000 (20:51 +0000)
committer	Louis Gerbarg <lgg@apple.com>
	Thu, 17 Apr 2014 20:51:50 +0000 (20:51 +0000)
lib/Target/ARM64/ARM64ISelLowering.cpp		patch \| blob \| history
lib/Target/ARM64/ARM64InstrInfo.td		patch \| blob \| history
test/CodeGen/ARM64/vector-insertion.ll	[new file with mode: 0644]	patch \| blob