Fix PR5367. QPR_8 is the super regclass of DPR_8 and SPR_8.

author Evan Cheng <evan.cheng@apple.com>

Tue, 3 Nov 2009 05:52:54 +0000 (05:52 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Tue, 3 Nov 2009 05:52:54 +0000 (05:52 +0000)
author Evan Cheng <evan.cheng@apple.com>
Tue, 3 Nov 2009 05:52:54 +0000 (05:52 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Tue, 3 Nov 2009 05:52:54 +0000 (05:52 +0000)
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp

index fa445bf968ec6d50e3241b7cd00d192142bdbe1f..c1c531c9376bb7e56c0f01b8f3b376fffd61af13 100644 (file)
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -257,7 +257,6 @@ const TargetRegisterClass *
  ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
                                                const TargetRegisterClass *B,
                                                unsigned SubIdx) const {
-#if 0
    switch (SubIdx) {
    default: return 0;
    case 1:
@@ -266,19 +265,27 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
    case 4:
      // S sub-registers.
      if (A->getSize() == 8) {
+      if (B == &ARM::SPR_8RegClass)
+        return &ARM::DPR_8RegClass;
+      assert(B == &ARM::SPRRegClass && "Expecting SPR register class!");
        if (A == &ARM::DPR_8RegClass)
          return A;
        return &ARM::DPR_VFP2RegClass;
      }
  
      assert(A->getSize() == 16 && "Expecting a Q register class!");
+    if (B == &ARM::SPR_8RegClass)
+      return &ARM::QPR_8RegClass;
      return &ARM::QPR_VFP2RegClass;
    case 5:
    case 6:
      // D sub-registers.
+    if (B == &ARM::DPR_VFP2RegClass)
+      return &ARM::QPR_VFP2RegClass;
+    if (B == &ARM::DPR_8RegClass)
+      return &ARM::QPR_8RegClass;
      return A;
    }
-#endif
    return 0;
  }
  
diff --git a/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll b/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll

new file mode 100644 (file)

index 0000000..465368b
--- /dev/null
+++ b/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll
@@ -0,0 +1,63 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 -enable-unsafe-fp-math < %s
+; PR5367
+
+define arm_aapcs_vfpcc void @_Z27Benchmark_SceDualQuaternionPvm(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+  br i1 undef, label %return, label %bb
+
+bb:                                               ; preds = %bb, %entry
+  %0 = load float* undef, align 4                 ; <float> [#uses=1]
+  %1 = load float* null, align 4                  ; <float> [#uses=1]
+  %2 = insertelement <4 x float> undef, float undef, i32 1 ; <<4 x float>> [#uses=1]
+  %3 = insertelement <4 x float> %2, float %1, i32 2 ; <<4 x float>> [#uses=2]
+  %4 = insertelement <4 x float> undef, float %0, i32 2 ; <<4 x float>> [#uses=1]
+  %5 = insertelement <4 x float> %4, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=4]
+  %6 = fsub <4 x float> zeroinitializer, %3       ; <<4 x float>> [#uses=1]
+  %7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=2]
+  %8 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %9 = shufflevector <2 x float> %8, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=2]
+  %10 = fmul <4 x float> %7, %9                   ; <<4 x float>> [#uses=1]
+  %11 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %12 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=2]
+  %13 = shufflevector <2 x float> %12, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %14 = fmul <4 x float> %11, %13                 ; <<4 x float>> [#uses=1]
+  %15 = fadd <4 x float> %10, %14                 ; <<4 x float>> [#uses=1]
+  %16 = shufflevector <2 x float> %12, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  %17 = fadd <4 x float> %15, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %18 = shufflevector <4 x float> %17, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ; <<4 x float>> [#uses=1]
+  %19 = fmul <4 x float> %7, %16                  ; <<4 x float>> [#uses=1]
+  %20 = fadd <4 x float> %19, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %21 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef> ; <<4 x float>> [#uses=1]
+  %22 = shufflevector <4 x float> %21, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %23 = fmul <4 x float> %22, %9                  ; <<4 x float>> [#uses=1]
+  %24 = fadd <4 x float> %20, %23                 ; <<4 x float>> [#uses=1]
+  %25 = shufflevector <4 x float> %18, <4 x float> %24, <4 x i32> <i32 0, i32 1, i32 6, i32 undef> ; <<4 x float>> [#uses=1]
+  %26 = shufflevector <4 x float> %25, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 7> ; <<4 x float>> [#uses=1]
+  %27 = fmul <4 x float> %26, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
+  %28 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %5 ; <<4 x float>> [#uses=1]
+  %29 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]
+  %30 = fmul <4 x float> zeroinitializer, %29     ; <<4 x float>> [#uses=1]
+  %31 = fmul <4 x float> %30, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
+  %32 = shufflevector <4 x float> %27, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %33 = shufflevector <4 x float> %28, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %34 = shufflevector <2 x float> %33, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  %35 = fmul <4 x float> %32, %34                 ; <<4 x float>> [#uses=1]
+  %36 = fadd <4 x float> %35, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %37 = shufflevector <4 x float> %5, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> ; <<4 x float>> [#uses=1]
+  %38 = shufflevector <4 x float> %37, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %39 = fmul <4 x float> zeroinitializer, %38     ; <<4 x float>> [#uses=1]
+  %40 = fadd <4 x float> %36, %39                 ; <<4 x float>> [#uses=1]
+  %41 = fadd <4 x float> %40, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %42 = shufflevector <4 x float> undef, <4 x float> %41, <4 x i32> <i32 0, i32 1, i32 6, i32 3> ; <<4 x float>> [#uses=1]
+  %43 = fmul <4 x float> %42, %31                 ; <<4 x float>> [#uses=1]
+  store float undef, float* undef, align 4
+  store float 0.000000e+00, float* null, align 4
+  %44 = extractelement <4 x float> %43, i32 1     ; <float> [#uses=1]
+  store float %44, float* undef, align 4
+  br i1 undef, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
+
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll

index c6967cc2d8c4df309be910bab968ed402c78a8d7..c260b973b5a0b339d1e607fd8bf6a5c2b3f1d3b4 100644 (file)
--- a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
+++ b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=cortex-a8 < %s | grep vmov | count 2
+; RUN: llc -mcpu=cortex-a8 < %s | grep vmov | count 1
  
  target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
  target triple = "armv7-eabi"
diff --git a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll

index 8f646946f42a1daf99633b953f5082b46d0e186f..76474746ee4870fd4f6263b4b121b8b34c5ba36e 100644 (file)
--- a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
@@ -1,7 +1,6 @@
  ; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp 
  ; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp | not grep fcpys
  ; rdar://7117307
-; XFAIL: *
  
         %struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
         %struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll

index ece53e8cb79f512d5a3c83837810191c165ffda6..4320328e9c102de56ef286b0575292c885218bc6 100644 (file)
--- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
+++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
@@ -1,5 +1,4 @@
  ; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep fcpys | count 4
-; XFAIL: *
  
  define arm_apcscc void @fht(float* nocapture %fz, i16 signext %n) nounwind {
  entry:
author	Evan Cheng <evan.cheng@apple.com>
	Tue, 3 Nov 2009 05:52:54 +0000 (05:52 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Tue, 3 Nov 2009 05:52:54 +0000 (05:52 +0000)
lib/Target/ARM/ARMBaseRegisterInfo.cpp		patch \| blob \| history
test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/ARM/2009-11-01-NeonMoves.ll		patch \| blob \| history
test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll		patch \| blob \| history
test/CodeGen/Thumb2/cross-rc-coalescing-2.ll		patch \| blob \| history