[AArch64]Fix the problem that can't select mul of v1i64/v2i64 types.

author Hao Liu <Hao.Liu@arm.com>

Mon, 30 Dec 2013 01:38:41 +0000 (01:38 +0000)

committer Hao Liu <Hao.Liu@arm.com>

Mon, 30 Dec 2013 01:38:41 +0000 (01:38 +0000)
author Hao Liu <Hao.Liu@arm.com>
Mon, 30 Dec 2013 01:38:41 +0000 (01:38 +0000)
committer Hao Liu <Hao.Liu@arm.com>
Mon, 30 Dec 2013 01:38:41 +0000 (01:38 +0000)
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index 757eb0b248e01dbfb632b9ab979ad5e40311da4c..548e76e52189ff00aa84f9ef7dd4482b41222607 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -385,6 +385,18 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
            setTruncStoreAction(VT, VT1, Expand);
        }
      }
+
+    // There is no v1i64/v2i64 multiply, expand v1i64/v2i64 to GPR i64 multiply.
+    // FIXME: For a v2i64 multiply, we copy VPR to GPR and do 2 i64 multiplies,
+    // and then copy back to VPR. This solution may be optimized by Following 3
+    // NEON instructions:
+    //        pmull  v2.1q, v0.1d, v1.1d
+    //        pmull2 v3.1q, v0.2d, v1.2d
+    //        ins    v2.d[1], v3.d[0]
+    // As currently we can't verify the correctness of such assumption, we can
+    // do such optimization in the future.
+    setOperationAction(ISD::MUL, MVT::v1i64, Expand);
+    setOperationAction(ISD::MUL, MVT::v2i64, Expand);
    }
  }
  
diff --git a/test/CodeGen/AArch64/neon-mul-div.ll b/test/CodeGen/AArch64/neon-mul-div.ll

index 6c4f20b0a0cf4d151bdb10adb12b2ab720324d26..09ba072f9f434d0feb200901d0d9bdce4191d51f 100644 (file)
--- a/test/CodeGen/AArch64/neon-mul-div.ll
+++ b/test/CodeGen/AArch64/neon-mul-div.ll
@@ -37,6 +37,21 @@ define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) {
         ret <4 x i32> %tmp3
  }
  
+define <1 x i64> @mul1xi64(<1 x i64> %A, <1 x i64> %B) {
+;CHECK-LABEL: mul1xi64:
+;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
+  %tmp3 = mul <1 x i64> %A, %B;
+  ret <1 x i64> %tmp3
+}
+
+define <2 x i64> @mul2xi64(<2 x i64> %A, <2 x i64> %B) {
+;CHECK-LABEL: mul2xi64:
+;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
+;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
+  %tmp3 = mul <2 x i64> %A, %B;
+  ret <2 x i64> %tmp3
+}
+
   define <2 x float> @mul2xfloat(<2 x float> %A, <2 x float> %B) {
  ;CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
         %tmp3 = fmul <2 x float> %A, %B;
author	Hao Liu <Hao.Liu@arm.com>
	Mon, 30 Dec 2013 01:38:41 +0000 (01:38 +0000)
committer	Hao Liu <Hao.Liu@arm.com>
	Mon, 30 Dec 2013 01:38:41 +0000 (01:38 +0000)
lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
test/CodeGen/AArch64/neon-mul-div.ll		patch \| blob \| history