From 19a3e9aabe2f55bc1badd0c519eed730d449a9b0 Mon Sep 17 00:00:00 2001 From: Hao Liu Date: Thu, 20 Mar 2014 05:36:59 +0000 Subject: [PATCH] [ARM]Fix an assertion failure in A15SDOptimizer about DPair reg class by treating DPair as QPR. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204304 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/A15SDOptimizer.cpp | 12 +++++-- test/CodeGen/ARM/a15-SD-dep.ll | 59 +++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 3 deletions(-) diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp index 8edf00a70a6..37bf9033a05 100644 --- a/lib/Target/ARM/A15SDOptimizer.cpp +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -416,7 +416,8 @@ SmallVector A15SDOptimizer::getReadDPRs(MachineInstr *MI) { if (!MO.isReg() || !MO.isUse()) continue; if (!usesRegClass(MO, &ARM::DPRRegClass) && - !usesRegClass(MO, &ARM::QPRRegClass)) + !usesRegClass(MO, &ARM::QPRRegClass) && + !usesRegClass(MO, &ARM::DPairRegClass)) // Treat DPair as QPR continue; Defs.push_back(MO.getReg()); @@ -536,7 +537,10 @@ A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) { InsertPt++; unsigned Out; - if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) { + // DPair has the same length as QPR and also has two DPRs as subreg. + // Treat DPair as QPR. + if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass) || + MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPairRegClass)) { unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg, ARM::dsub_0, &ARM::DPRRegClass); unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg, @@ -569,7 +573,9 @@ A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) { default: llvm_unreachable("Unknown preferred lane!"); } - bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass); + // Treat DPair as QPR + bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass) || + usesRegClass(MI->getOperand(0), &ARM::DPairRegClass); Out = createImplicitDef(MBB, InsertPt, DL); Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg); diff --git a/test/CodeGen/ARM/a15-SD-dep.ll b/test/CodeGen/ARM/a15-SD-dep.ll index 019ff6129b0..5e5ca4b873f 100644 --- a/test/CodeGen/ARM/a15-SD-dep.ll +++ b/test/CodeGen/ARM/a15-SD-dep.ll @@ -56,3 +56,62 @@ define arm_aapcs_vfpcc <4 x float> @t5(<4 x float> %q, float %f) { %i2 = fadd <4 x float> %i1, %i1 ret <4 x float> %i2 } + +; Test that DPair can be successfully passed as QPR. +; CHECK-ENABLED-LABEL: test_DPair1: +; CHECK-DISABLED-LABEL: test_DPair1: +define void @test_DPair1(i32 %vsout, i8* nocapture %out, float %x, float %y) { +entry: + %0 = insertelement <4 x float> undef, float %x, i32 1 + %1 = insertelement <4 x float> %0, float %y, i32 0 + ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0] + ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[1] + ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0] + ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[1] + ; CHECK-DISABLED-NOT: vdup + switch i32 %vsout, label %sw.epilog [ + i32 1, label %sw.bb + i32 0, label %sw.bb6 + ] + +sw.bb: ; preds = %entry + %2 = insertelement <4 x float> %1, float 0.000000e+00, i32 0 + br label %sw.bb6 + +sw.bb6: ; preds = %sw.bb, %entry + %sum.0 = phi <4 x float> [ %1, %entry ], [ %2, %sw.bb ] + %3 = extractelement <4 x float> %sum.0, i32 0 + %conv = fptoui float %3 to i8 + store i8 %conv, i8* %out, align 1 + ret void + +sw.epilog: ; preds = %entry + ret void +} + +; CHECK-ENABLED-LABEL: test_DPair2: +; CHECK-DISABLED-LABEL: test_DPair2: +define void @test_DPair2(i32 %vsout, i8* nocapture %out, float %x) { +entry: + %0 = insertelement <4 x float> undef, float %x, i32 0 + ; CHECK-ENABLED: vdup.32 q{{[0-9]*}}, d{{[0-9]*}}[0] + ; CHECK-DISABLED-NOT: vdup + switch i32 %vsout, label %sw.epilog [ + i32 1, label %sw.bb + i32 0, label %sw.bb1 + ] + +sw.bb: ; preds = %entry + %1 = insertelement <4 x float> %0, float 0.000000e+00, i32 0 + br label %sw.bb1 + +sw.bb1: ; preds = %entry, %sw.bb + %sum.0 = phi <4 x float> [ %0, %entry ], [ %1, %sw.bb ] + %2 = extractelement <4 x float> %sum.0, i32 0 + %conv = fptoui float %2 to i8 + store i8 %conv, i8* %out, align 1 + br label %sw.epilog + +sw.epilog: ; preds = %entry, %sw.bb1 + ret void +} \ No newline at end of file -- 2.34.1