From: Silviu Baranga Date: Wed, 4 Sep 2013 17:05:18 +0000 (+0000) Subject: Fix scheduling for vldm/vstm instructions that load/store more than 32 bytes on Corte... X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=87b120690b64f41c5b2367653e542ae2cfaa27ba;p=oota-llvm.git Fix scheduling for vldm/vstm instructions that load/store more than 32 bytes on Cortex-A9. This also makes the existing code more compact. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189958 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 74ee50bbc6a..603e775d351 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1879,6 +1879,10 @@ def CortexA9Itineraries : ProcessorItineraries< // The following definitions describe the simpler per-operand machine model. // This works with MachineScheduler and will eventually replace itineraries. +class A9WriteLMOpsListType writes> { + list Writes = writes; + SchedMachineModel SchedModel = ?; +} // Cortex-A9 machine model for scheduling and other instruction cost heuristics. def CortexA9Model : SchedMachineModel { @@ -2011,7 +2015,7 @@ def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>; // Define a predicate to select the LDM based on number of memory addresses. def A9LMAdr#NumAddr#Pred : - SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>; + SchedPredicate<"(TII->getNumLDMAddresses(MI)+1)/2 == "#NumAddr>; } // foreach NumAddr @@ -2054,48 +2058,30 @@ def A9WriteL#NumAddr#Hi : WriteSequence< //===----------------------------------------------------------------------===// // LDM: Load multiple into 32-bit integer registers. +def A9WriteLMOpsList : A9WriteLMOpsListType< + [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi, + A9WriteL3, A9WriteL3Hi, + A9WriteL4, A9WriteL4Hi, + A9WriteL5, A9WriteL5Hi, + A9WriteL6, A9WriteL6Hi, + A9WriteL7, A9WriteL7Hi, + A9WriteL8, A9WriteL8Hi]>; + // A9WriteLM variants expand into a pair of writes for each 64-bit // value loaded. When the number of registers is odd, the last // A9WriteLnHi is naturally ignored because the instruction has no // following def operands. These variants take no issue resource, so // they may need to be part of a WriteSequence that includes A9WriteIssue. def A9WriteLM : SchedWriteVariant<[ - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, // For unknown LDMs, define the maximum number of writes, but only // make the first two consume resources. SchedVar; // 21-22 + def A9WriteLMfpPostRA : SchedWriteVariant<[ - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, // For unknown LDMs, define the maximum number of writes, but only - // make the first two consume resources. - SchedVar undef, <16 x i64> , <16 x i32> + store <16 x i64> %vecinit285, <16 x i64>* undef, align 128 + %0 = load i64* undef, align 8 + %vecinit379 = insertelement <16 x i64> undef, i64 %0, i32 9 + %1 = load i64* undef, align 8 + %vecinit419 = insertelement <16 x i64> undef, i64 %1, i32 15 + store <16 x i64> %vecinit419, <16 x i64>* undef, align 128 + %vecinit579 = insertelement <16 x i64> undef, i64 0, i32 4 + %vecinit582 = shufflevector <16 x i64> %vecinit579, <16 x i64> , <16 x i32> + %vecinit584 = insertelement <16 x i64> %vecinit582, i64 undef, i32 9 + %vecinit586 = insertelement <16 x i64> %vecinit584, i64 0, i32 10 + %vecinit589 = shufflevector <16 x i64> %vecinit586, <16 x i64> , <16 x i32> + %2 = load i64* undef, align 8 + %vecinit591 = insertelement <16 x i64> %vecinit589, i64 %2, i32 15 + store <16 x i64> %vecinit591, <16 x i64>* undef, align 128 + %vecinit694 = shufflevector <16 x i64> undef, <16 x i64> , <16 x i32> + store <16 x i64> %vecinit694, <16 x i64>* undef, align 128 + %3 = load i64* undef, align 8 + %vecinit1331 = insertelement <16 x i64> undef, i64 %3, i32 14 + %4 = load i64* undef, align 8 + %vecinit1468 = insertelement <16 x i64> undef, i64 %4, i32 11 + %vecinit1471 = shufflevector <16 x i64> %vecinit1468, <16 x i64> , <16 x i32> + %vecinit1474 = shufflevector <16 x i64> %vecinit1471, <16 x i64> , <16 x i32> + store <16 x i64> %vecinit1474, <16 x i64>* undef, align 128 + %vecinit1552 = shufflevector <16 x i64> undef, <16 x i64> , <16 x i32> + %vecinit1555 = shufflevector <16 x i64> %vecinit1552, <16 x i64> , <16 x i32> + %vecinit1558 = shufflevector <16 x i64> %vecinit1555, <16 x i64> , <16 x i32> + store <16 x i64> %vecinit1558, <16 x i64>* undef, align 128 + %vecinit1591 = shufflevector <16 x i64> undef, <16 x i64> , <16 x i32> + %vecinit1594 = shufflevector <16 x i64> %vecinit1591, <16 x i64> , <16 x i32> + %vecinit1597 = shufflevector <16 x i64> %vecinit1594, <16 x i64> , <16 x i32> + %vecinit1599 = insertelement <16 x i64> %vecinit1597, i64 undef, i32 8 + %vecinit1601 = insertelement <16 x i64> %vecinit1599, i64 undef, i32 9 + %vecinit1603 = insertelement <16 x i64> %vecinit1601, i64 undef, i32 10 + %5 = load i64* undef, align 8 + %vecinit1605 = insertelement <16 x i64> %vecinit1603, i64 %5, i32 11 + %vecinit1608 = shufflevector <16 x i64> %vecinit1605, <16 x i64> , <16 x i32> + %6 = load i64* undef, align 8 + %vecinit1610 = insertelement <16 x i64> %vecinit1608, i64 %6, i32 15 + store <16 x i64> %vecinit1610, <16 x i64>* undef, align 128 + %vecinit2226 = shufflevector <16 x i64> undef, <16 x i64> , <16 x i32> + %7 = load i64* undef, align 8 + %vecinit2228 = insertelement <16 x i64> %vecinit2226, i64 %7, i32 8 + %vecinit2230 = insertelement <16 x i64> %vecinit2228, i64 undef, i32 9 + %vecinit2233 = shufflevector <16 x i64> %vecinit2230, <16 x i64> , <16 x i32> + %vecinit2236 = shufflevector <16 x i64> %vecinit2233, <16 x i64> , <16 x i32> + store <16 x i64> %vecinit2236, <16 x i64>* undef, align 128 + %vecinit2246 = shufflevector <16 x i64> undef, <16 x i64> , <16 x i32> + %vecinit2249 = shufflevector <16 x i64> %vecinit2246, <16 x i64> , <16 x i32> + %vecinit2252 = shufflevector <16 x i64> %vecinit2249, <16 x i64> , <16 x i32> + %vecinit2255 = shufflevector <16 x i64> %vecinit2252, <16 x i64> , <16 x i32> + %8 = load i64* %arrayidx39, align 8 + %vecinit2257 = insertelement <16 x i64> %vecinit2255, i64 %8, i32 13 + %vecinit2260 = shufflevector <16 x i64> %vecinit2257, <16 x i64> , <16 x i32> + store <16 x i64> %vecinit2260, <16 x i64>* null, align 128 + ret void +} +attributes #0 = { noredzone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }