[AArch64]Implement 128 bit register copy with NEON.

author Kevin Qin <Kevin.Qin@arm.com>

Tue, 26 Nov 2013 02:33:42 +0000 (02:33 +0000)

committer Kevin Qin <Kevin.Qin@arm.com>

Tue, 26 Nov 2013 02:33:42 +0000 (02:33 +0000)
author Kevin Qin <Kevin.Qin@arm.com>
Tue, 26 Nov 2013 02:33:42 +0000 (02:33 +0000)
committer Kevin Qin <Kevin.Qin@arm.com>
Tue, 26 Nov 2013 02:33:42 +0000 (02:33 +0000)
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp

index 706d0b05e06ca5ae1e9fef92a3b1ffc8c841e00f..180110a84dd663adce0f91f12486f95c33b1ef72 100644 (file)
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -114,23 +114,25 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
    } else if (AArch64::FPR128RegClass.contains(DestReg)) {
      assert(AArch64::FPR128RegClass.contains(SrcReg));
  
-    // FIXME: there's no good way to do this, at least without NEON:
-    //   + There's no single move instruction for q-registers
-    //   + We can't create a spill slot and use normal STR/LDR because stack
-    //     allocation has already happened
-    //   + We can't go via X-registers with FMOV because register allocation has
-    //     already happened.
-    // This may not be efficient, but at least it works.
-    BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
-      .addReg(SrcReg)
-      .addReg(AArch64::XSP)
-      .addImm(0x1ff & -16);
-
-    BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
-      .addReg(AArch64::XSP, RegState::Define)
-      .addReg(AArch64::XSP)
-      .addImm(16);
-    return;
+    // If NEON is enable, we use ORR to implement this copy.
+    // If NEON isn't available, emit STR and LDR to handle this.
+    if(getSubTarget().hasNEON()) {
+      BuildMI(MBB, I, DL, get(AArch64::ORRvvv_16B), DestReg)
+        .addReg(SrcReg)
+        .addReg(SrcReg);
+      return;
+    } else {
+      BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
+        .addReg(SrcReg)
+        .addReg(AArch64::XSP)
+        .addImm(0x1ff & -16);
+
+      BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
+        .addReg(AArch64::XSP, RegState::Define)
+        .addReg(AArch64::XSP)
+        .addImm(16);
+      return;
+    }
    } else {
      llvm_unreachable("Unknown register class in copyPhysReg");
    }
diff --git a/test/CodeGen/AArch64/neon-perm.ll b/test/CodeGen/AArch64/neon-perm.ll

index 4db4771cf13a1cbd0c4ac22d8f4e6a39a8d1ccb1..6ad93d01f9f806b80732762dab243c18d11fa16e 100644 (file)
--- a/test/CodeGen/AArch64/neon-perm.ll
+++ b/test/CodeGen/AArch64/neon-perm.ll
@@ -238,6 +238,7 @@ entry:
  define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) {
  ; CHECK: test_vuzp2q_s64:
  ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
  entry:
    %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
    ret <2 x i64> %shuffle.i
@@ -294,6 +295,7 @@ entry:
  define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) {
  ; CHECK: test_vuzp2q_u64:
  ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
  entry:
    %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
    ret <2 x i64> %shuffle.i
@@ -318,6 +320,7 @@ entry:
  define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) {
  ; CHECK: test_vuzp2q_f64:
  ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
  entry:
    %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
    ret <2 x double> %shuffle.i
author	Kevin Qin <Kevin.Qin@arm.com>
	Tue, 26 Nov 2013 02:33:42 +0000 (02:33 +0000)
committer	Kevin Qin <Kevin.Qin@arm.com>
	Tue, 26 Nov 2013 02:33:42 +0000 (02:33 +0000)
lib/Target/AArch64/AArch64InstrInfo.cpp		patch \| blob \| history
test/CodeGen/AArch64/neon-perm.ll		patch \| blob \| history