R600/SI: Use VALU instructions for copying i1 values

author Tom Stellard <thomas.stellard@amd.com>

Wed, 30 Apr 2014 15:31:33 +0000 (15:31 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Wed, 30 Apr 2014 15:31:33 +0000 (15:31 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Wed, 30 Apr 2014 15:31:33 +0000 (15:31 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Wed, 30 Apr 2014 15:31:33 +0000 (15:31 +0000)
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h

index 3e1848b5f8ea282ff4f7cfa815c8d841fedd39a8..5d0cf81c4025b8bc6561539168109668d11cd320 100644 (file)
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -37,11 +37,15 @@ FunctionPass *createAMDGPUCFGStructurizerPass();
  // SI Passes
  FunctionPass *createSITypeRewriter();
  FunctionPass *createSIAnnotateControlFlowPass();
+FunctionPass *createSILowerI1CopiesPass();
  FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
  FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
  FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
  FunctionPass *createSIInsertWaits(TargetMachine &tm);
  
+void initializeSILowerI1CopiesPass(PassRegistry &);
+extern char &SILowerI1CopiesID;
+
  // Passes common to R600 and SI
  Pass *createAMDGPUStructurizeCFGPass();
  FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp

index f0935401ecb997a4c5c7d23539fce3c72544bc37..6b68c2abe367644f9b10da539296683daf1b7e25 100644 (file)
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -154,6 +154,7 @@ AMDGPUPassConfig::addPreISel() {
  
  bool AMDGPUPassConfig::addInstSelector() {
    addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
+  addPass(createSILowerI1CopiesPass());
    return false;
  }
  
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt

index 93a51179754fa4392b4524e16ceebf219aa7a0ae..3c6fa5a7ae46f079ed4172aed955c147b544322a 100644 (file)
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -45,6 +45,7 @@ add_llvm_target(R600CodeGen
    SIInstrInfo.cpp
    SIISelLowering.cpp
    SILowerControlFlow.cpp
+  SILowerI1Copies.cpp
    SIMachineFunctionInfo.cpp
    SIRegisterInfo.cpp
    SITypeRewriter.cpp
diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp

index a9a7c5ce0fe6a6c285b1295c194bb4d9535136b7..f6b8b783d0216c0669088b3d50c2b3260e8079a7 100644 (file)
--- a/lib/Target/R600/SIFixSGPRCopies.cpp
+++ b/lib/Target/R600/SIFixSGPRCopies.cpp
@@ -185,7 +185,8 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
    const TargetRegisterClass *SrcRC;
  
    if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
-      DstRC == &AMDGPU::M0RegRegClass)
+      DstRC == &AMDGPU::M0RegRegClass ||
+      MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass)
      return false;
  
    SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp

index 8c686c91502f94585372f2e4085382988d020e32..e68804850785a5f046056b294ea8646380ea7d90 100644 (file)
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
  
  SITargetLowering::SITargetLowering(TargetMachine &TM) :
      AMDGPUTargetLowering(TM) {
-  addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
+  addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
    addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass);
  
    addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td

index 00f9be61e245c7cd6f02c9ea84375ac0eb688762..27e7abe1a388576ca4c909618b40bc53e7c1fb19 100644 (file)
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1398,6 +1398,12 @@ def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
  
  let isCodeGenOnly = 1, isPseudo = 1 in {
  
+def V_MOV_I1 : InstSI <
+  (outs VReg_1:$dst),
+  (ins i1imm:$src),
+  "", [(set i1:$dst, (imm:$src))]
+>;
+
  def LOAD_CONST : AMDGPUShaderInst <
    (outs GPRF32:$dst),
    (ins i32imm:$src),
@@ -1980,11 +1986,6 @@ def : Pat <
    (V_MOV_B32_e32 fpimm:$imm)
  >;
  
-def : Pat <
-  (i1 imm:$imm),
-  (S_MOV_B64 imm:$imm)
->;
-
  def : Pat <
    (i64 InlineImm<i64>:$imm),
    (S_MOV_B64 InlineImm<i64>:$imm)
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp

index d1d925dd231b0a301f1fe916c4f925ccf377ea82..6601f2a980650f919a033da2ef06f1e874e95c7c 100644 (file)
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -67,7 +67,7 @@ private:
    static const unsigned SkipThreshold = 12;
  
    static char ID;
-  const TargetRegisterInfo *TRI;
+  const SIRegisterInfo *TRI;
    const SIInstrInfo *TII;
  
    bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
@@ -427,7 +427,7 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
  
  bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
    TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
-  TRI = MF.getTarget().getRegisterInfo();
+  TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo());
    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
  
    bool HaveKill = false;
diff --git a/lib/Target/R600/SILowerI1Copies.cpp b/lib/Target/R600/SILowerI1Copies.cpp

new file mode 100644 (file)

index 0000000..766380e
--- /dev/null
+++ b/lib/Target/R600/SILowerI1Copies.cpp
@@ -0,0 +1,130 @@
+//===-- SILowerI1Copies.cpp - Lower I1 Copies -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// i1 values are usually inserted by the CFG Structurize pass and they are
+/// unique in that they can be copied from VALU to SALU registers.
+/// This is not possible for any other value type.  Since there are no
+/// MOV instructions for i1, we to use V_CMP_* and V_CNDMASK to move the i1.
+///
+//===----------------------------------------------------------------------===//
+//
+
+#define DEBUG_TYPE "si-i1-copies"
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+class SILowerI1Copies : public MachineFunctionPass {
+public:
+  static char ID;
+
+public:
+  SILowerI1Copies() : MachineFunctionPass(ID) {
+    initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF) override;
+
+  virtual const char *getPassName() const override {
+    return "SI Lower il Copies";
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+  AU.addRequired<MachineDominatorTree>();
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE,
+                      "SI Lower il Copies", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(SILowerI1Copies, DEBUG_TYPE,
+                    "SI Lower il Copies", false, false)
+
+char SILowerI1Copies::ID = 0;
+
+char &llvm::SILowerI1CopiesID = SILowerI1Copies::ID;
+
+FunctionPass *llvm::createSILowerI1CopiesPass() {
+  return new SILowerI1Copies();
+}
+
+bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+      MF.getTarget().getInstrInfo());
+  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+
+  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+                                                  BI != BE; ++BI) {
+
+    MachineBasicBlock &MBB = *BI;
+    MachineBasicBlock::iterator I, Next;
+    for (I = MBB.begin(); I != MBB.end(); I = Next) {
+      Next = std::next(I);
+      MachineInstr &MI = *I;
+
+      if (MI.getOpcode() == AMDGPU::V_MOV_I1) {
+        MI.setDesc(TII->get(AMDGPU::V_MOV_B32_e32));
+        continue;
+      }
+
+      if (MI.getOpcode() != AMDGPU::COPY ||
+          !TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()) ||
+          !TargetRegisterInfo::isVirtualRegister(MI.getOperand(1).getReg()))
+        continue;
+
+
+      const TargetRegisterClass *DstRC =
+          MRI.getRegClass(MI.getOperand(0).getReg());
+      const TargetRegisterClass *SrcRC =
+          MRI.getRegClass(MI.getOperand(1).getReg());
+
+      if (DstRC == &AMDGPU::VReg_1RegClass &&
+          TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) {
+        BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CNDMASK_B32_e64))
+                .addOperand(MI.getOperand(0))
+                .addImm(0)
+                .addImm(-1)
+                .addOperand(MI.getOperand(1))
+                .addImm(0)
+                .addImm(0)
+                .addImm(0)
+                .addImm(0);
+        MI.eraseFromParent();
+      } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
+                 SrcRC == &AMDGPU::VReg_1RegClass) {
+        BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64))
+                .addOperand(MI.getOperand(0))
+                .addImm(0)
+                .addOperand(MI.getOperand(1))
+                .addImm(0)
+                .addImm(0)
+                .addImm(0)
+                .addImm(0);
+        MI.eraseFromParent();
+      }
+
+    }
+  }
+  return false;
+}
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td

index 6d6d8b9bd847f9a03156d3bc214995621af2348a..f1f01deaf361fe7c55020ce98aef9ccdd1ed19b7 100644 (file)
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -189,6 +189,8 @@ def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256
  
  def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
  
+def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)>;
+
  //===----------------------------------------------------------------------===//
  //  [SV]Src_(32|64) register classes, can have either an immediate or an register
  //===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/R600/valu-i1.ll b/test/CodeGen/R600/valu-i1.ll

new file mode 100644 (file)

index 0000000..5d5e3ff
--- /dev/null
+++ b/test/CodeGen/R600/valu-i1.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s
+
+; Make sure the i1 values created by the cfg structurizer pass are
+; moved using VALU instructions
+; SI-NOT: S_MOV_B64 s[{{[0-9]:[0-9]}}], -1
+; SI: V_MOV_B32_e32 v{{[0-9]}}, -1
+define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) {
+entry:
+  switch i32 %a, label %default [
+    i32 0, label %case0
+    i32 1, label %case1
+  ]
+
+case0:
+  %arrayidx1 = getelementptr i32 addrspace(1)* %dst, i32 %b
+  store i32 0, i32 addrspace(1)* %arrayidx1, align 4
+  br label %end
+
+case1:
+  %arrayidx5 = getelementptr i32 addrspace(1)* %dst, i32 %b
+  store i32 1, i32 addrspace(1)* %arrayidx5, align 4
+  br label %end
+
+default:
+  %cmp8 = icmp eq i32 %a, 2
+  %arrayidx10 = getelementptr i32 addrspace(1)* %dst, i32 %b
+  br i1 %cmp8, label %if, label %else
+
+if:
+  store i32 2, i32 addrspace(1)* %arrayidx10, align 4
+  br label %end
+
+else:
+  store i32 3, i32 addrspace(1)* %arrayidx10, align 4
+  br label %end
+
+end:
+  ret void
+}
author	Tom Stellard <thomas.stellard@amd.com>
	Wed, 30 Apr 2014 15:31:33 +0000 (15:31 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Wed, 30 Apr 2014 15:31:33 +0000 (15:31 +0000)
lib/Target/R600/AMDGPU.h		patch \| blob \| history
lib/Target/R600/AMDGPUTargetMachine.cpp		patch \| blob \| history
lib/Target/R600/CMakeLists.txt		patch \| blob \| history
lib/Target/R600/SIFixSGPRCopies.cpp		patch \| blob \| history
lib/Target/R600/SIISelLowering.cpp		patch \| blob \| history
lib/Target/R600/SIInstructions.td		patch \| blob \| history
lib/Target/R600/SILowerControlFlow.cpp		patch \| blob \| history
lib/Target/R600/SILowerI1Copies.cpp	[new file with mode: 0644]	patch \| blob
lib/Target/R600/SIRegisterInfo.td		patch \| blob \| history
test/CodeGen/R600/valu-i1.ll	[new file with mode: 0644]	patch \| blob