[AArch64]Fix the pattern match failure for v1i8/v1i16/v1i32 types.

author Hao Liu <Hao.Liu@arm.com>

Mon, 16 Dec 2013 02:51:28 +0000 (02:51 +0000)

committer Hao Liu <Hao.Liu@arm.com>

Mon, 16 Dec 2013 02:51:28 +0000 (02:51 +0000)
author Hao Liu <Hao.Liu@arm.com>
Mon, 16 Dec 2013 02:51:28 +0000 (02:51 +0000)
committer Hao Liu <Hao.Liu@arm.com>
Mon, 16 Dec 2013 02:51:28 +0000 (02:51 +0000)
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index 626a912a67b5535a680342bceb85036bec1ae9ab..3266fc2dcc3b5444722695d8681801e8d4a033be 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4053,8 +4053,12 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
        // just use DUPLANE. We can only do this if the lane being extracted
        // is at a constant index, as the DUP from lane instructions only have
        // constant-index forms.
+      // FIXME: for now we have v1i8, v1i16, v1i32 legal vector types, if they
+      // are not legal any more, no need to check the type size in bits should
+      // be large than 64.
        if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
-          isa<ConstantSDNode>(Value->getOperand(1))) {
+          isa<ConstantSDNode>(Value->getOperand(1)) &&
+          Value->getOperand(0).getValueType().getSizeInBits() >= 64) {
            N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT,
                          Value->getOperand(0), Value->getOperand(1));
        } else
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td

index e9b1298a24c3bb2f6372e87069b518933469f83d..dd87b92df25e25cb1dde848115654eb1ee4b4cf1 100644 (file)
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -3252,6 +3252,21 @@ def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr),
  def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr),
            (ST1_8B GPR64xsp:$addr, VPR64:$value)>;
  
+// Match load/store of v1i8/v1i16/v1i32 type to FPR8/FPR16/FPR32 load/store.
+// FIXME: for now we have v1i8, v1i16, v1i32 legal types, if they are illegal,
+// these patterns are not needed any more.
+def : Pat<(v1i8 (load GPR64xsp:$addr)), (LSFP8_LDR $addr, 0)>;
+def : Pat<(v1i16 (load GPR64xsp:$addr)), (LSFP16_LDR $addr, 0)>;
+def : Pat<(v1i32 (load GPR64xsp:$addr)), (LSFP32_LDR $addr, 0)>;
+
+def : Pat<(store (v1i8 FPR8:$value), GPR64xsp:$addr),
+          (LSFP8_STR $value, $addr, 0)>;
+def : Pat<(store (v1i16 FPR16:$value), GPR64xsp:$addr),
+          (LSFP16_STR $value, $addr, 0)>;
+def : Pat<(store (v1i32 FPR32:$value), GPR64xsp:$addr),
+          (LSFP32_STR $value, $addr, 0)>;
+
+
  // End of vector load/store multiple N-element structure(class SIMD lselem)
  
  // The followings are post-index vector load/store multiple N-element
diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll

index 016ccb985758bad2048562c19a80d320c2c3ad25..c783c00c714400c7fa7d7c30b319d33df7ca67bf 100644 (file)
--- a/test/CodeGen/AArch64/neon-copy.ll
+++ b/test/CodeGen/AArch64/neon-copy.ll
@@ -662,4 +662,45 @@ define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
  ; CHECK: ins {{v[0-9]+}}.d[0], {{x[0-9]+}}
    %b = insertelement <2 x i64> undef, i64 %a, i32 0
    ret <2 x i64> %b
+}
+
+define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
+; CHECK-LABEL: testDUP.v1i8:
+; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+  %b = extractelement <1 x i8> %a, i32 0
+  %c = insertelement <8 x i8> undef, i8 %b, i32 0
+  %d = insertelement <8 x i8> %c, i8 %b, i32 1
+  %e = insertelement <8 x i8> %d, i8 %b, i32 2
+  %f = insertelement <8 x i8> %e, i8 %b, i32 3
+  %g = insertelement <8 x i8> %f, i8 %b, i32 4
+  %h = insertelement <8 x i8> %g, i8 %b, i32 5
+  %i = insertelement <8 x i8> %h, i8 %b, i32 6
+  %j = insertelement <8 x i8> %i, i8 %b, i32 7
+  ret <8 x i8> %j
+}
+
+define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
+; CHECK-LABEL: testDUP.v1i16:
+; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+  %b = extractelement <1 x i16> %a, i32 0
+  %c = insertelement <8 x i16> undef, i16 %b, i32 0
+  %d = insertelement <8 x i16> %c, i16 %b, i32 1
+  %e = insertelement <8 x i16> %d, i16 %b, i32 2
+  %f = insertelement <8 x i16> %e, i16 %b, i32 3
+  %g = insertelement <8 x i16> %f, i16 %b, i32 4
+  %h = insertelement <8 x i16> %g, i16 %b, i32 5
+  %i = insertelement <8 x i16> %h, i16 %b, i32 6
+  %j = insertelement <8 x i16> %i, i16 %b, i32 7
+  ret <8 x i16> %j
+}
+
+define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
+; CHECK-LABEL: testDUP.v1i32:
+; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+  %b = extractelement <1 x i32> %a, i32 0
+  %c = insertelement <4 x i32> undef, i32 %b, i32 0
+  %d = insertelement <4 x i32> %c, i32 %b, i32 1
+  %e = insertelement <4 x i32> %d, i32 %b, i32 2
+  %f = insertelement <4 x i32> %e, i32 %b, i32 3
+  ret <4 x i32> %f
  }
 \ No newline at end of file
author	Hao Liu <Hao.Liu@arm.com>
	Mon, 16 Dec 2013 02:51:28 +0000 (02:51 +0000)
committer	Hao Liu <Hao.Liu@arm.com>
	Mon, 16 Dec 2013 02:51:28 +0000 (02:51 +0000)
lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64InstrNEON.td		patch \| blob \| history
test/CodeGen/AArch64/neon-copy.ll		patch \| blob \| history