[PowerPC/BlockPlacement] Allow target to provide a per-loop alignment preference

author Hal Finkel <hfinkel@anl.gov>

Sat, 3 Jan 2015 17:58:24 +0000 (17:58 +0000)

committer Hal Finkel <hfinkel@anl.gov>

Sat, 3 Jan 2015 17:58:24 +0000 (17:58 +0000)
author Hal Finkel <hfinkel@anl.gov>
Sat, 3 Jan 2015 17:58:24 +0000 (17:58 +0000)
committer Hal Finkel <hfinkel@anl.gov>
Sat, 3 Jan 2015 17:58:24 +0000 (17:58 +0000)
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h

index 9a04d3c6c679f50c98dddb296e2551c18492b047..7a62d5d19e335ae4181e036816cec802c2536e23 100644 (file)
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -51,6 +51,7 @@ namespace llvm {
    class MachineFunction;
    class MachineInstr;
    class MachineJumpTableInfo;
+  class MachineLoop;
    class Mangler;
    class MCContext;
    class MCExpr;
@@ -929,7 +930,7 @@ public:
    }
  
    /// Return the preferred loop alignment.
-  unsigned getPrefLoopAlignment() const {
+  virtual unsigned getPrefLoopAlignment(MachineLoop *ML = nullptr) const {
      return PrefLoopAlignment;
    }
  
@@ -1336,7 +1337,8 @@ protected:
  
    /// Set the target's preferred loop alignment. Default alignment is zero, it
    /// means the target does not care about loop alignment.  The alignment is
-  /// specified in log2(bytes).
+  /// specified in log2(bytes). The target may also override
+  /// getPrefLoopAlignment to provide per-loop values.
    void setPrefLoopAlignment(unsigned Align) {
      PrefLoopAlignment = Align;
    }
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp

index 08fd20036f045ba247b5decce11fd46b036f0c24..aaa7d9156976850ac1382304600ea2e8bd86544b 100644 (file)
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1046,9 +1046,6 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
    if (F.getFunction()->getAttributes().
          hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize))
      return;
-  unsigned Align = TLI->getPrefLoopAlignment();
-  if (!Align)
-    return;  // Don't care about loop alignment.
    if (FunctionChain.begin() == FunctionChain.end())
      return;  // Empty chain.
  
@@ -1066,6 +1063,10 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
      if (!L)
        continue;
  
+    unsigned Align = TLI->getPrefLoopAlignment(L);
+    if (!Align)
+      continue;  // Don't care about loop alignment.
+
      // If the block is cold relative to the function entry don't waste space
      // aligning it.
      BlockFrequency Freq = MBFI->getBlockFreq(*BI);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index 8d8c32264dbbed6eefaf0a1401ba3df7515a463f..203a610a6bc6655b02cd6462464982a482336a45 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -24,6 +24,7 @@
  #include "llvm/CodeGen/MachineFrameInfo.h"
  #include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/CodeGen/SelectionDAG.h"
  #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
@@ -9049,6 +9050,40 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
    }
  }
  
+unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
+  switch (Subtarget.getDarwinDirective()) {
+  default: break;
+  case PPC::DIR_970:
+  case PPC::DIR_PWR4:
+  case PPC::DIR_PWR5:
+  case PPC::DIR_PWR5X:
+  case PPC::DIR_PWR6:
+  case PPC::DIR_PWR6X:
+  case PPC::DIR_PWR7:
+  case PPC::DIR_PWR8: {
+    if (!ML)
+      break;
+
+    const PPCInstrInfo *TII =
+      static_cast<const PPCInstrInfo *>(getTargetMachine().getSubtargetImpl()->
+                                          getInstrInfo());
+
+    // For small loops (between 5 and 8 instructions), align to a 32-byte
+    // boundary so that the entire loop fits in one instruction-cache line.
+    uint64_t LoopSize = 0;
+    for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
+      for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J)
+        LoopSize += TII->GetInstSizeInBytes(J);
+
+    if (LoopSize > 16 && LoopSize <= 32)
+      return 5;
+
+    break;
+  }
+  }
+
+  return TargetLowering::getPrefLoopAlignment(ML);
+}
  
  /// getConstraintType - Given a constraint, return the type of
  /// constraint it is for this target.
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h

index b4b11d846e37019f0ad8814bf227c763ba8e96e5..d9142c7e7d03f6f934eea44e4612419c889ae9bd 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -449,6 +449,8 @@ namespace llvm {
                                         const SelectionDAG &DAG,
                                         unsigned Depth = 0) const override;
  
+    unsigned getPrefLoopAlignment(MachineLoop *ML) const override;
+
      Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
                                    bool IsStore, bool IsLoad) const override;
      Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
diff --git a/test/CodeGen/PowerPC/code-align.ll b/test/CodeGen/PowerPC/code-align.ll

index 5550547d010d6ce90435a0b67b31448cc31d51ab..306230be500532dddd4ffb0b7b0a8aae890b78e2 100644 (file)
--- a/test/CodeGen/PowerPC/code-align.ll
+++ b/test/CodeGen/PowerPC/code-align.ll
@@ -1,15 +1,15 @@
  ; RUN: llc -mcpu=ppc64 < %s | FileCheck %s -check-prefix=GENERIC
-; RUN: llc -mcpu=970 < %s | FileCheck %s -check-prefix=BASIC
+; RUN: llc -mcpu=970 < %s | FileCheck %s -check-prefix=PWR
  ; RUN: llc -mcpu=a2 < %s | FileCheck %s -check-prefix=BASIC
  ; RUN: llc -mcpu=e500mc < %s | FileCheck %s -check-prefix=BASIC
  ; RUN: llc -mcpu=e5500 < %s | FileCheck %s -check-prefix=BASIC
-; RUN: llc -mcpu=pwr4 < %s | FileCheck %s -check-prefix=BASIC
-; RUN: llc -mcpu=pwr5 < %s | FileCheck %s -check-prefix=BASIC
-; RUN: llc -mcpu=pwr5x < %s | FileCheck %s -check-prefix=BASIC
-; RUN: llc -mcpu=pwr6 < %s | FileCheck %s -check-prefix=BASIC
-; RUN: llc -mcpu=pwr6x < %s | FileCheck %s -check-prefix=BASIC
-; RUN: llc -mcpu=pwr7 < %s | FileCheck %s -check-prefix=BASIC
-; RUN: llc -mcpu=pwr8 < %s | FileCheck %s -check-prefix=BASIC
+; RUN: llc -mcpu=pwr4 < %s | FileCheck %s -check-prefix=PWR
+; RUN: llc -mcpu=pwr5 < %s | FileCheck %s -check-prefix=PWR
+; RUN: llc -mcpu=pwr5x < %s | FileCheck %s -check-prefix=PWR
+; RUN: llc -mcpu=pwr6 < %s | FileCheck %s -check-prefix=PWR
+; RUN: llc -mcpu=pwr6x < %s | FileCheck %s -check-prefix=PWR
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s -check-prefix=PWR
+; RUN: llc -mcpu=pwr8 < %s | FileCheck %s -check-prefix=PWR
  target datalayout = "E-m:e-i64:64-n32:64"
  target triple = "powerpc64-unknown-linux-gnu"
  
@@ -21,10 +21,13 @@ entry:
  
  ; GENERIC-LABEL: .globl  foo
  ; BASIC-LABEL: .globl  foo
+; PWR-LABEL: .globl  foo
  ; GENERIC: .align  2
  ; BASIC: .align  4
+; PWR: .align  4
  ; GENERIC: @foo
  ; BASIC: @foo
+; PWR: @foo
  }
  
  ; Function Attrs: nounwind
@@ -34,12 +37,16 @@ entry:
  
  ; GENERIC-LABEL: @loop
  ; BASIC-LABEL: @loop
+; PWR-LABEL: @loop
  ; GENERIC: mtctr
  ; BASIC: mtctr
+; PWR: mtctr
  ; GENERIC-NOT: .align
  ; BASIC: .align  4
+; PWR: .align  4
  ; GENERIC: bdnz
  ; BASIC: bdnz
+; PWR: bdnz
  
  vector.body:                                      ; preds = %vector.body, %entry
    %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
@@ -60,6 +67,38 @@ for.end:                                          ; preds = %vector.body
    ret void
  }
  
+; Function Attrs: nounwind
+define void @sloop(i32 signext %x, i32* nocapture %a) #1 {
+entry:
+  br label %for.body
+
+; GENERIC-LABEL: @sloop
+; BASIC-LABEL: @sloop
+; PWR-LABEL: @sloop
+; GENERIC: mtctr
+; BASIC: mtctr
+; PWR: mtctr
+; GENERIC-NOT: .align
+; BASIC: .align  4
+; PWR: .align  5
+; GENERIC: bdnz
+; BASIC: bdnz
+; PWR: bdnz
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, 4
+  store i32 %add, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 2048
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
  attributes #0 = { nounwind readnone }
  attributes #1 = { nounwind }
author	Hal Finkel <hfinkel@anl.gov>
	Sat, 3 Jan 2015 17:58:24 +0000 (17:58 +0000)
committer	Hal Finkel <hfinkel@anl.gov>
	Sat, 3 Jan 2015 17:58:24 +0000 (17:58 +0000)
include/llvm/Target/TargetLowering.h		patch \| blob \| history
lib/CodeGen/MachineBlockPlacement.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.h		patch \| blob \| history
test/CodeGen/PowerPC/code-align.ll		patch \| blob \| history