[ARM64] Fix an issue where we were always assuming a copy was coming from a D subregi...

author Chad Rosier <mcrosier@codeaurora.org>

Mon, 28 Apr 2014 16:21:50 +0000 (16:21 +0000)

committer Chad Rosier <mcrosier@codeaurora.org>

Mon, 28 Apr 2014 16:21:50 +0000 (16:21 +0000)
author Chad Rosier <mcrosier@codeaurora.org>
Mon, 28 Apr 2014 16:21:50 +0000 (16:21 +0000)
committer Chad Rosier <mcrosier@codeaurora.org>
Mon, 28 Apr 2014 16:21:50 +0000 (16:21 +0000)
diff --git a/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp b/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp

index 851572485dd6e44dd83127f5ee4a1d8899b034a6..87eec8f6160b8be9ccf0ce0af87a74c8f0da1bca 100644 (file)
--- a/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp
+++ b/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp
@@ -90,7 +90,7 @@ public:
    virtual bool runOnMachineFunction(MachineFunction &F);
  
    const char *getPassName() const {
-    return "AdvSIMD scalar operation optimization";
+    return "AdvSIMD Scalar Operation Optimization";
    }
  
    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -117,7 +117,7 @@ static bool isFPR64(unsigned Reg, unsigned SubReg,
              SubReg == 0) ||
             (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR128RegClass) &&
              SubReg == ARM64::dsub);
-  // Physical register references just check the regist class directly.
+  // Physical register references just check the register class directly.
    return (ARM64::FPR64RegClass.contains(Reg) && SubReg == 0) ||
           (ARM64::FPR128RegClass.contains(Reg) && SubReg == ARM64::dsub);
  }
@@ -148,7 +148,7 @@ static unsigned getSrcFromCopy(const MachineInstr *MI,
                  MRI) &&
          isFPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(),
                  MRI)) {
-      SubReg = ARM64::dsub;
+      SubReg = MI->getOperand(1).getSubReg();
        return MI->getOperand(1).getReg();
      }
    }
diff --git a/test/CodeGen/ARM64/AdvSIMD-Scalar.ll b/test/CodeGen/ARM64/AdvSIMD-Scalar.ll

index 6397ac54d3e614e4f9560ff9398f92e713fb7d8c..3e75eed4cd5db2c58c828ff5a1bf48d9615a1d5e 100644 (file)
--- a/test/CodeGen/ARM64/AdvSIMD-Scalar.ll
+++ b/test/CodeGen/ARM64/AdvSIMD-Scalar.ll
@@ -1,10 +1,15 @@
  ; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -arm64-simd-scalar=true -asm-verbose=false | FileCheck %s
-;
+; RUN: llc < %s -march=arm64 -arm64-neon-syntax=generic -arm64-simd-scalar=true -asm-verbose=false | FileCheck %s -check-prefix=GENERIC
+
  define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
  ; CHECK-LABEL: bar:
  ; CHECK: add.2d        v[[REG:[0-9]+]], v0, v1
  ; CHECK: add   d[[REG3:[0-9]+]], d[[REG]], d1
  ; CHECK: sub   d[[REG2:[0-9]+]], d[[REG]], d1
+; GENERIC-LABEL: bar:
+; GENERIC: add v[[REG:[0-9]+]].2d, v0.2d, v1.2d
+; GENERIC: add d[[REG3:[0-9]+]], d[[REG]], d1
+; GENERIC: sub d[[REG2:[0-9]+]], d[[REG]], d1
    %add = add <2 x i64> %a, %b
    %vgetq_lane = extractelement <2 x i64> %add, i32 0
    %vgetq_lane2 = extractelement <2 x i64> %b, i32 0
@@ -19,6 +24,9 @@ define double @subdd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
  ; CHECK-LABEL: subdd_su64:
  ; CHECK: sub d0, d1, d0
  ; CHECK-NEXT: ret
+; GENERIC-LABEL: subdd_su64:
+; GENERIC: sub d0, d1, d0
+; GENERIC-NEXT: ret
    %vecext = extractelement <2 x i64> %a, i32 0
    %vecext1 = extractelement <2 x i64> %b, i32 0
    %sub.i = sub nsw i64 %vecext1, %vecext
@@ -30,9 +38,30 @@ define double @vaddd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
  ; CHECK-LABEL: vaddd_su64:
  ; CHECK: add d0, d1, d0
  ; CHECK-NEXT: ret
+; GENERIC-LABEL: vaddd_su64:
+; GENERIC: add d0, d1, d0
+; GENERIC-NEXT: ret
    %vecext = extractelement <2 x i64> %a, i32 0
    %vecext1 = extractelement <2 x i64> %b, i32 0
    %add.i = add nsw i64 %vecext1, %vecext
    %retval = bitcast i64 %add.i to double
    ret double %retval
  }
+
+; sub MI doesn't access dsub register.
+define double @add_sub_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
+; CHECK-LABEL: add_sub_su64:
+; CHECK: add d0, d1, d0
+; CHECK: sub d0, {{d[0-9]+}}, d0
+; CHECK-NEXT: ret
+; GENERIC-LABEL: add_sub_su64:
+; GENERIC: add d0, d1, d0
+; GENERIC: sub d0, {{d[0-9]+}}, d0
+; GENERIC-NEXT: ret
+  %vecext = extractelement <2 x i64> %a, i32 0
+  %vecext1 = extractelement <2 x i64> %b, i32 0
+  %add.i = add i64 %vecext1, %vecext
+  %sub.i = sub i64 0, %add.i
+  %retval = bitcast i64 %sub.i to double
+  ret double %retval
+}
author	Chad Rosier <mcrosier@codeaurora.org>
	Mon, 28 Apr 2014 16:21:50 +0000 (16:21 +0000)
committer	Chad Rosier <mcrosier@codeaurora.org>
	Mon, 28 Apr 2014 16:21:50 +0000 (16:21 +0000)
lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp		patch \| blob \| history
test/CodeGen/ARM64/AdvSIMD-Scalar.ll		patch \| blob \| history