From: Evan Cheng Date: Tue, 3 Nov 2009 05:52:54 +0000 (+0000) Subject: Fix PR5367. QPR_8 is the super regclass of DPR_8 and SPR_8. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=ba908640b3e0c1218748776e244d4b7234451155;p=oota-llvm.git Fix PR5367. QPR_8 is the super regclass of DPR_8 and SPR_8. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@85871 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index fa445bf968e..c1c531c9376 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -257,7 +257,6 @@ const TargetRegisterClass * ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned SubIdx) const { -#if 0 switch (SubIdx) { default: return 0; case 1: @@ -266,19 +265,27 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, case 4: // S sub-registers. if (A->getSize() == 8) { + if (B == &ARM::SPR_8RegClass) + return &ARM::DPR_8RegClass; + assert(B == &ARM::SPRRegClass && "Expecting SPR register class!"); if (A == &ARM::DPR_8RegClass) return A; return &ARM::DPR_VFP2RegClass; } assert(A->getSize() == 16 && "Expecting a Q register class!"); + if (B == &ARM::SPR_8RegClass) + return &ARM::QPR_8RegClass; return &ARM::QPR_VFP2RegClass; case 5: case 6: // D sub-registers. + if (B == &ARM::DPR_VFP2RegClass) + return &ARM::QPR_VFP2RegClass; + if (B == &ARM::DPR_8RegClass) + return &ARM::QPR_8RegClass; return A; } -#endif return 0; } diff --git a/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll b/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll new file mode 100644 index 00000000000..465368b0ba8 --- /dev/null +++ b/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll @@ -0,0 +1,63 @@ +; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 -enable-unsafe-fp-math < %s +; PR5367 + +define arm_aapcs_vfpcc void @_Z27Benchmark_SceDualQuaternionPvm(i8* nocapture %pBuffer, i32 %numItems) nounwind { +entry: + br i1 undef, label %return, label %bb + +bb: ; preds = %bb, %entry + %0 = load float* undef, align 4 ; [#uses=1] + %1 = load float* null, align 4 ; [#uses=1] + %2 = insertelement <4 x float> undef, float undef, i32 1 ; <<4 x float>> [#uses=1] + %3 = insertelement <4 x float> %2, float %1, i32 2 ; <<4 x float>> [#uses=2] + %4 = insertelement <4 x float> undef, float %0, i32 2 ; <<4 x float>> [#uses=1] + %5 = insertelement <4 x float> %4, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=4] + %6 = fsub <4 x float> zeroinitializer, %3 ; <<4 x float>> [#uses=1] + %7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=2] + %8 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> ; <<2 x float>> [#uses=1] + %9 = shufflevector <2 x float> %8, <2 x float> undef, <4 x i32> ; <<4 x float>> [#uses=2] + %10 = fmul <4 x float> %7, %9 ; <<4 x float>> [#uses=1] + %11 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] + %12 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> ; <<2 x float>> [#uses=2] + %13 = shufflevector <2 x float> %12, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] + %14 = fmul <4 x float> %11, %13 ; <<4 x float>> [#uses=1] + %15 = fadd <4 x float> %10, %14 ; <<4 x float>> [#uses=1] + %16 = shufflevector <2 x float> %12, <2 x float> undef, <4 x i32> ; <<4 x float>> [#uses=1] + %17 = fadd <4 x float> %15, zeroinitializer ; <<4 x float>> [#uses=1] + %18 = shufflevector <4 x float> %17, <4 x float> zeroinitializer, <4 x i32> ; <<4 x float>> [#uses=1] + %19 = fmul <4 x float> %7, %16 ; <<4 x float>> [#uses=1] + %20 = fadd <4 x float> %19, zeroinitializer ; <<4 x float>> [#uses=1] + %21 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> ; <<4 x float>> [#uses=1] + %22 = shufflevector <4 x float> %21, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] + %23 = fmul <4 x float> %22, %9 ; <<4 x float>> [#uses=1] + %24 = fadd <4 x float> %20, %23 ; <<4 x float>> [#uses=1] + %25 = shufflevector <4 x float> %18, <4 x float> %24, <4 x i32> ; <<4 x float>> [#uses=1] + %26 = shufflevector <4 x float> %25, <4 x float> undef, <4 x i32> ; <<4 x float>> [#uses=1] + %27 = fmul <4 x float> %26, ; <<4 x float>> [#uses=1] + %28 = fsub <4 x float> , %5 ; <<4 x float>> [#uses=1] + %29 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] + %30 = fmul <4 x float> zeroinitializer, %29 ; <<4 x float>> [#uses=1] + %31 = fmul <4 x float> %30, ; <<4 x float>> [#uses=1] + %32 = shufflevector <4 x float> %27, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] + %33 = shufflevector <4 x float> %28, <4 x float> undef, <2 x i32> ; <<2 x float>> [#uses=1] + %34 = shufflevector <2 x float> %33, <2 x float> undef, <4 x i32> ; <<4 x float>> [#uses=1] + %35 = fmul <4 x float> %32, %34 ; <<4 x float>> [#uses=1] + %36 = fadd <4 x float> %35, zeroinitializer ; <<4 x float>> [#uses=1] + %37 = shufflevector <4 x float> %5, <4 x float> undef, <4 x i32> ; <<4 x float>> [#uses=1] + %38 = shufflevector <4 x float> %37, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] + %39 = fmul <4 x float> zeroinitializer, %38 ; <<4 x float>> [#uses=1] + %40 = fadd <4 x float> %36, %39 ; <<4 x float>> [#uses=1] + %41 = fadd <4 x float> %40, zeroinitializer ; <<4 x float>> [#uses=1] + %42 = shufflevector <4 x float> undef, <4 x float> %41, <4 x i32> ; <<4 x float>> [#uses=1] + %43 = fmul <4 x float> %42, %31 ; <<4 x float>> [#uses=1] + store float undef, float* undef, align 4 + store float 0.000000e+00, float* null, align 4 + %44 = extractelement <4 x float> %43, i32 1 ; [#uses=1] + store float %44, float* undef, align 4 + br i1 undef, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} + +declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone diff --git a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll index c6967cc2d8c..c260b973b5a 100644 --- a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll +++ b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll @@ -1,4 +1,4 @@ -; RUN: llc -mcpu=cortex-a8 < %s | grep vmov | count 2 +; RUN: llc -mcpu=cortex-a8 < %s | grep vmov | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" target triple = "armv7-eabi" diff --git a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll index 8f646946f42..76474746ee4 100644 --- a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll +++ b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll @@ -1,7 +1,6 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp ; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp | not grep fcpys ; rdar://7117307 -; XFAIL: * %struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List } %struct.List = type { %struct.List*, %struct.Patient*, %struct.List* } diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll index ece53e8cb79..4320328e9c1 100644 --- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll +++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll @@ -1,5 +1,4 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep fcpys | count 4 -; XFAIL: * define arm_apcscc void @fht(float* nocapture %fz, i16 signext %n) nounwind { entry: