R600/SI: Allow f64 inline immediates in i64 operands

[oota-llvm.git] / lib / Target / R600 / SIShrinkInstructions.cpp
diff --git a/lib/Target/R600/SIShrinkInstructions.cpp b/lib/Target/R600/SIShrinkInstructions.cpp

index c33514f719fec3851e07ff9f5256b7f44ede5005..97bbd78d621fd2b346704db0ae6eb88b612e4481 100644 (file)
--- a/lib/Target/R600/SIShrinkInstructions.cpp
+++ b/lib/Target/R600/SIShrinkInstructions.cpp
@@ -10,6 +10,7 @@
  //
  
  #include "AMDGPU.h"
+#include "AMDGPUMCInstLower.h"
  #include "AMDGPUSubtarget.h"
  #include "SIInstrInfo.h"
  #include "llvm/ADT/Statistic.h"
@@ -97,20 +98,19 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
    if (Src1 && (!isVGPR(Src1, TRI, MRI) || (Src1Mod && Src1Mod->getImm() != 0)))
      return false;
  
-  // We don't need to check src0, all input types are legal, so just make
-  // sure src0 isn't using any modifiers.
-  const MachineOperand *Src0Mod =
-      TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
-  if (Src0Mod && Src0Mod->getImm() != 0)
+  // We don't need to check src0, all input types are legal, so just make sure
+  // src0 isn't using any modifiers.
+  if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
      return false;
  
    // Check output modifiers
-  const MachineOperand *Omod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
-  if (Omod && Omod->getImm() != 0)
+  if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
      return false;
  
-  const MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp);
-  return !Clamp || Clamp->getImm() == 0;
+  if (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
+    return false;
+
+  return true;
  }
  
  /// \brief This function checks \p MI for operands defined by a move immediate
@@ -127,38 +127,32 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
           TII->isVOPC(MI.getOpcode()));
  
    const SIRegisterInfo &TRI = TII->getRegisterInfo();
-  MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+  int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
+  MachineOperand &Src0 = MI.getOperand(Src0Idx);
  
    // Only one literal constant is allowed per instruction, so if src0 is a
    // literal constant then we can't do any folding.
-  if (Src0->isImm() && TII->isLiteralConstant(*Src0))
+  if (Src0.isImm() &&
+      TII->isLiteralConstant(Src0, TII->getOpSize(MI, Src0Idx)))
      return;
  
-
    // Literal constants and SGPRs can only be used in Src0, so if Src0 is an
    // SGPR, we cannot commute the instruction, so we can't fold any literal
    // constants.
-  if (Src0->isReg() && !isVGPR(Src0, TRI, MRI))
+  if (Src0.isReg() && !isVGPR(&Src0, TRI, MRI))
      return;
  
    // Try to fold Src0
-  if (Src0->isReg()) {
-    unsigned Reg = Src0->getReg();
+  if (Src0.isReg()) {
+    unsigned Reg = Src0.getReg();
      MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
      if (Def && Def->isMoveImmediate()) {
        MachineOperand &MovSrc = Def->getOperand(1);
        bool ConstantFolded = false;
  
        if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) {
-        Src0->ChangeToImmediate(MovSrc.getImm());
+        Src0.ChangeToImmediate(MovSrc.getImm());
          ConstantFolded = true;
-      } else if (MovSrc.isFPImm()) {
-        const APFloat &APF = MovSrc.getFPImm()->getValueAPF();
-        if (&APF.getSemantics() == &APFloat::IEEEsingle) {
-          MRI.removeRegOperandFromUseList(Src0);
-          Src0->ChangeToImmediate(APF.bitcastToAPInt().getZExtValue());
-          ConstantFolded = true;
-        }
        }
        if (ConstantFolded) {
          if (MRI.use_empty(Reg))
@@ -191,24 +185,36 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
        Next = std::next(I);
        MachineInstr &MI = *I;
  
+      // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
+      if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
+        const MachineOperand &Src = MI.getOperand(1);
+
+        if (Src.isImm()) {
+          if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4))
+            MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
+        }
+
+        continue;
+      }
+
        if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
          continue;
  
        if (!canShrink(MI, TII, TRI, MRI)) {
-        // Try commtuing the instruction and see if that enables us to shrink
+        // Try commuting the instruction and see if that enables us to shrink
          // it.
          if (!MI.isCommutable() || !TII->commuteInstruction(&MI) ||
              !canShrink(MI, TII, TRI, MRI))
            continue;
        }
  
-      int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
-
-      // Op32 could be -1 here if we started with an instruction that had a
+      // getVOPe32 could be -1 here if we started with an instruction that had
        // a 32-bit encoding and then commuted it to an instruction that did not.
-      if (Op32 == -1)
+      if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
          continue;
  
+      int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
+
        if (TII->isVOPC(Op32)) {
          unsigned DstReg = MI.getOperand(0).getReg();
          if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
@@ -221,10 +227,9 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
            // vreg1 = VOPC;
            // S_AND_B64 vreg0, vreg1
            //
-          // So, instead of forcing the instruction to write to VCC, we provide a
-          // hint to the register allocator to use VCC and then we
-          // we will run this pass again after RA and shrink it if it outpus to
-          // VCC.
+          // So, instead of forcing the instruction to write to VCC, we provide
+          // a hint to the register allocator to use VCC and then we we will run
+          // this pass again after RA and shrink it if it outputs to VCC.
            MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
            continue;
          }