R600/SI: Remove i1 pseudo VALU ops

author Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 3 Dec 2014 05:22:35 +0000 (05:22 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 3 Dec 2014 05:22:35 +0000 (05:22 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 3 Dec 2014 05:22:35 +0000 (05:22 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 3 Dec 2014 05:22:35 +0000 (05:22 +0000)
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td

index cdbc22e0ead4fce1958e6bc47f7fea95a7ac29fd..4b3be5be578f3e0030c867ed12810a9a58527de3 100644 (file)
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -131,6 +131,10 @@ def as_i32imm: SDNodeXForm<imm, [{
    return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i32);
  }]>;
  
+def as_i64imm: SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i64);
+}]>;
+
  def IMM8bit : PatLeaf <(imm),
    [{return isUInt<8>(N->getZExtValue());}]
  >;
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td

index 00ce9bfcc269200e6a589c6776478babaa4ee1cd..cfe6c81ced95ce2acbe396a1254faa9e8a0f70d2 100644 (file)
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1686,30 +1686,8 @@ defm V_TRIG_PREOP_F64 : VOP3Inst <
  //===----------------------------------------------------------------------===//
  // Pseudo Instructions
  //===----------------------------------------------------------------------===//
-
  let isCodeGenOnly = 1, isPseudo = 1 in {
  
-def V_MOV_I1 : InstSI <
-  (outs VReg_1:$dst),
-  (ins i1imm:$src),
-  "", [(set i1:$dst, (imm:$src))]
->;
-
-def V_AND_I1 : InstSI <
-   (outs VReg_1:$dst), (ins VReg_1:$src0, VReg_1:$src1), "",
-   [(set i1:$dst, (and i1:$src0, i1:$src1))]
->;
-
-def V_OR_I1 : InstSI <
-   (outs VReg_1:$dst), (ins VReg_1:$src0, VReg_1:$src1), "",
-   [(set i1:$dst, (or i1:$src0, i1:$src1))]
->;
-
-def V_XOR_I1 : InstSI <
-  (outs VReg_1:$dst), (ins VReg_1:$src0, VReg_1:$src1), "",
-  [(set i1:$dst, (xor i1:$src0, i1:$src1))]
->;
-
  let hasSideEffects = 1 in {
  def SGPR_USE : InstSI <(outs),(ins), "", []>;
  }
@@ -2495,6 +2473,14 @@ def : Pat <
    (S_MOV_B64 InlineImm<i64>:$imm)
  >;
  
+// XXX - Should this use a s_cmp to set SCC?
+
+// Set to sign-extended 64-bit value (true = -1, false = 0)
+def : Pat <
+  (i1 imm:$imm),
+  (S_MOV_B64 (i64 (as_i64imm $imm)))
+>;
+
  /********** ===================== **********/
  /********** Interpolation Paterns **********/
  /********** ===================== **********/
@@ -3045,6 +3031,27 @@ def : Pat <
      (V_CNDMASK_B32_e64 0, -1, $src), sub1)
  >;
  
+// If we need to perform a logical operation on i1 values, we need to
+// use vector comparisons since there is only one SCC register. Vector
+// comparisions still write to a pair of SGPRs, so treat these as
+// 64-bit comparisons. When legalizing SGPR copies, instructions
+// resulting in the copies from SCC to these instructions will be
+// moved to the VALU.
+def : Pat <
+  (i1 (and i1:$src0, i1:$src1)),
+  (S_AND_B64 $src0, $src1)
+>;
+
+def : Pat <
+  (i1 (or i1:$src0, i1:$src1)),
+  (S_OR_B64 $src0, $src1)
+>;
+
+def : Pat <
+  (i1 (xor i1:$src0, i1:$src1)),
+  (S_XOR_B64 $src0, $src1)
+>;
+
  def : Pat <
    (f32 (sint_to_fp i1:$src)),
    (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_NEG_ONE, $src)
@@ -3057,7 +3064,7 @@ def : Pat <
  
  def : Pat <
    (f64 (sint_to_fp i1:$src)),
-    (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src))
+  (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src))
  >;
  
  def : Pat <
diff --git a/lib/Target/R600/SILowerI1Copies.cpp b/lib/Target/R600/SILowerI1Copies.cpp

index 226a672b343ef7ed2c083a4683d202ef659816e2..7767c4c0671bd809c8283a964763b002a52ca798 100644 (file)
--- a/lib/Target/R600/SILowerI1Copies.cpp
+++ b/lib/Target/R600/SILowerI1Copies.cpp
@@ -85,30 +85,6 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
        Next = std::next(I);
        MachineInstr &MI = *I;
  
-      if (MI.getOpcode() == AMDGPU::V_MOV_I1) {
-        I1Defs.push_back(MI.getOperand(0).getReg());
-        MI.setDesc(TII->get(AMDGPU::V_MOV_B32_e32));
-        continue;
-      }
-
-      if (MI.getOpcode() == AMDGPU::V_AND_I1) {
-        I1Defs.push_back(MI.getOperand(0).getReg());
-        MI.setDesc(TII->get(AMDGPU::V_AND_B32_e64));
-        continue;
-      }
-
-      if (MI.getOpcode() == AMDGPU::V_OR_I1) {
-        I1Defs.push_back(MI.getOperand(0).getReg());
-        MI.setDesc(TII->get(AMDGPU::V_OR_B32_e64));
-        continue;
-      }
-
-      if (MI.getOpcode() == AMDGPU::V_XOR_I1) {
-        I1Defs.push_back(MI.getOperand(0).getReg());
-        MI.setDesc(TII->get(AMDGPU::V_XOR_B32_e64));
-        continue;
-      }
-
        if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF) {
          unsigned Reg = MI.getOperand(0).getReg();
          const TargetRegisterClass *RC = MRI.getRegClass(Reg);
@@ -117,32 +93,52 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
          continue;
        }
  
-      if (MI.getOpcode() != AMDGPU::COPY ||
-          !TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()) ||
-          !TargetRegisterInfo::isVirtualRegister(MI.getOperand(1).getReg()))
+      if (MI.getOpcode() != AMDGPU::COPY)
          continue;
  
+      const MachineOperand &Dst = MI.getOperand(0);
+      const MachineOperand &Src = MI.getOperand(1);
+
+      if (!TargetRegisterInfo::isVirtualRegister(Src.getReg()) ||
+          !TargetRegisterInfo::isVirtualRegister(Dst.getReg()))
+        continue;
  
-      const TargetRegisterClass *DstRC =
-          MRI.getRegClass(MI.getOperand(0).getReg());
-      const TargetRegisterClass *SrcRC =
-          MRI.getRegClass(MI.getOperand(1).getReg());
+      const TargetRegisterClass *DstRC = MRI.getRegClass(Dst.getReg());
+      const TargetRegisterClass *SrcRC = MRI.getRegClass(Src.getReg());
  
        if (DstRC == &AMDGPU::VReg_1RegClass &&
            TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) {
-        I1Defs.push_back(MI.getOperand(0).getReg());
-        BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CNDMASK_B32_e64))
-                .addOperand(MI.getOperand(0))
-                .addImm(0)
-                .addImm(-1)
-                .addOperand(MI.getOperand(1));
+        I1Defs.push_back(Dst.getReg());
+        DebugLoc DL = MI.getDebugLoc();
+
+        MachineInstr *DefInst = MRI.getUniqueVRegDef(Src.getReg());
+        if (DefInst->getOpcode() == AMDGPU::S_MOV_B64) {
+          if (DefInst->getOperand(1).isImm()) {
+            I1Defs.push_back(Dst.getReg());
+
+            int64_t Val = DefInst->getOperand(1).getImm();
+            assert(Val == 0 || Val == -1);
+
+            BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_MOV_B32_e32))
+              .addOperand(Dst)
+              .addImm(Val);
+            MI.eraseFromParent();
+            continue;
+          }
+        }
+
+        BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64))
+          .addOperand(Dst)
+          .addImm(0)
+          .addImm(-1)
+          .addOperand(Src);
          MI.eraseFromParent();
        } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
                   SrcRC == &AMDGPU::VReg_1RegClass) {
          BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64))
-                .addOperand(MI.getOperand(0))
-                .addOperand(MI.getOperand(1))
-                .addImm(0);
+          .addOperand(Dst)
+          .addOperand(Src)
+          .addImm(0);
          MI.eraseFromParent();
        }
      }
diff --git a/test/CodeGen/R600/fceil64.ll b/test/CodeGen/R600/fceil64.ll

index 029f41dc7ed1e6c37871cb9ca9119e9c0f5835ee..c459a6a63eb66ace7847e7cc825e152f5bea978f 100644 (file)
--- a/test/CodeGen/R600/fceil64.ll
+++ b/test/CodeGen/R600/fceil64.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
  
  declare double @llvm.ceil.f64(double) nounwind readnone
  declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
@@ -22,12 +22,15 @@ declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
  ; SI: cmp_gt_i32
  ; SI: cndmask_b32
  ; SI: cndmask_b32
-; SI: cmp_gt_f64
-; SI: cndmask_b32
-; SI: cmp_ne_i32
-; SI: cndmask_b32
-; SI: cndmask_b32
+; SI: v_cmp_o_f64
+; SI: v_cmp_neq_f64
+; SI: s_and_b64
+; SI: v_cmp_gt_f64
+; SI: s_and_b64
+; SI: v_cndmask_b32
+; SI: v_cndmask_b32
  ; SI: v_add_f64
+; SI: s_endpgm
  define void @fceil_f64(double addrspace(1)* %out, double %x) {
    %y = call double @llvm.ceil.f64(double %x) nounwind readnone
    store double %y, double addrspace(1)* %out
diff --git a/test/CodeGen/R600/ffloor.ll b/test/CodeGen/R600/ffloor.ll

index 166f7055fb177a313cacdcbc8caeb9402262295e..77b7997b9096acad1aa149d923480476eaeef99d 100644 (file)
--- a/test/CodeGen/R600/ffloor.ll
+++ b/test/CodeGen/R600/ffloor.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
  
  declare double @llvm.floor.f64(double) nounwind readnone
  declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
@@ -23,12 +23,15 @@ declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
  ; SI: cmp_gt_i32
  ; SI: cndmask_b32
  ; SI: cndmask_b32
-; SI: cmp_lt_f64
-; SI: cndmask_b32
-; SI: cmp_ne_i32
-; SI: cndmask_b32
-; SI: cndmask_b32
+; SI: v_cmp_o_f64
+; SI: v_cmp_neq_f64
+; SI: s_and_b64
+; SI: v_cmp_lt_f64
+; SI: s_and_b64
+; SI: v_cndmask_b32
+; SI: v_cndmask_b32
  ; SI: v_add_f64
+; SI: s_endpgm
  define void @ffloor_f64(double addrspace(1)* %out, double %x) {
    %y = call double @llvm.floor.f64(double %x) nounwind readnone
    store double %y, double addrspace(1)* %out
diff --git a/test/CodeGen/R600/setcc.ll b/test/CodeGen/R600/setcc.ll

index 371ebbedf18e2086f30a2fbb42a8c3cba4b23a54..1cca2bc21e2bc3ca37c93b2953bb541169aaf4d5 100644 (file)
--- a/test/CodeGen/R600/setcc.ll
+++ b/test/CodeGen/R600/setcc.ll
@@ -96,11 +96,12 @@ entry:
  ; R600-DAG: SETNE_DX10
  ; R600-DAG: AND_INT
  ; R600-DAG: SETNE_INT
-; SI: v_cmp_o_f32
-; SI: v_cmp_neq_f32
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_and_b32_e32
+
+; SI-DAG: v_cmp_o_f32_e32 vcc
+; SI-DAG: v_cmp_neq_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
+; SI: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP1]], vcc
+; SI: v_cndmask_b32_e64 [[VRESULT:v[0-9]+]], 0, -1, [[AND]]
+; SI: buffer_store_dword [[VRESULT]]
  define void @f32_one(i32 addrspace(1)* %out, float %a, float %b) {
  entry:
    %0 = fcmp one float %a, %b
@@ -130,11 +131,12 @@ entry:
  ; R600-DAG: SETE_DX10
  ; R600-DAG: OR_INT
  ; R600-DAG: SETNE_INT
-; SI: v_cmp_u_f32
-; SI: v_cmp_eq_f32
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_or_b32_e32
+
+; SI-DAG: v_cmp_u_f32_e32 vcc
+; SI-DAG: v_cmp_eq_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
+; SI: s_or_b64 [[OR:s\[[0-9]+:[0-9]+\]]], [[CMP1]], vcc
+; SI: v_cndmask_b32_e64 [[VRESULT:v[0-9]+]], 0, -1, [[OR]]
+; SI: buffer_store_dword [[VRESULT]]
  define void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) {
  entry:
    %0 = fcmp ueq float %a, %b
@@ -148,9 +150,8 @@ entry:
  ; R600: SETE_DX10
  ; SI: v_cmp_u_f32
  ; SI: v_cmp_gt_f32
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_or_b32_e32
+; SI: s_or_b64
+; SI: v_cndmask_b32
  define void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) {
  entry:
    %0 = fcmp ugt float %a, %b
@@ -164,9 +165,8 @@ entry:
  ; R600: SETE_DX10
  ; SI: v_cmp_u_f32
  ; SI: v_cmp_ge_f32
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_or_b32_e32
+; SI: s_or_b64
+; SI: v_cndmask_b32
  define void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) {
  entry:
    %0 = fcmp uge float %a, %b
@@ -180,9 +180,8 @@ entry:
  ; R600: SETE_DX10
  ; SI: v_cmp_u_f32
  ; SI: v_cmp_lt_f32
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_or_b32_e32
+; SI: s_or_b64
+; SI: v_cndmask_b32
  define void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) {
  entry:
    %0 = fcmp ult float %a, %b
@@ -196,9 +195,8 @@ entry:
  ; R600: SETE_DX10
  ; SI: v_cmp_u_f32
  ; SI: v_cmp_le_f32
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_or_b32_e32
+; SI: s_or_b64
+; SI: v_cndmask_b32
  define void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) {
  entry:
    %0 = fcmp ule float %a, %b
diff --git a/test/CodeGen/R600/setcc64.ll b/test/CodeGen/R600/setcc64.ll

index 6e43172b1cb9c3ec2ec5c00556f087514f748e57..282a5dea976bd516541620d2db40e3cd51f0bb3e 100644 (file)
--- a/test/CodeGen/R600/setcc64.ll
+++ b/test/CodeGen/R600/setcc64.ll
@@ -57,11 +57,11 @@ entry:
  }
  
  ; FUNC-LABEL: {{^}}f64_one:
-; SI: v_cmp_o_f64
-; SI: v_cmp_neq_f64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_and_b32_e32
+; SI-DAG: v_cmp_o_f64_e32 vcc
+; SI-DAG: v_cmp_neq_f64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]]
+; SI: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP1]], vcc
+; SI: v_cndmask_b32_e64 [[VRESULT:v[0-9]+]], 0, -1, [[AND]]
+; SI: buffer_store_dword [[VRESULT]]
  define void @f64_one(i32 addrspace(1)* %out, double %a, double %b) {
  entry:
    %0 = fcmp one double %a, %b
@@ -83,9 +83,8 @@ entry:
  ; FUNC-LABEL: {{^}}f64_ueq:
  ; SI: v_cmp_u_f64
  ; SI: v_cmp_eq_f64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_or_b32_e32
+; SI: s_or_b64
+; SI: v_cndmask_b32
  define void @f64_ueq(i32 addrspace(1)* %out, double %a, double %b) {
  entry:
    %0 = fcmp ueq double %a, %b
@@ -97,9 +96,8 @@ entry:
  ; FUNC-LABEL: {{^}}f64_ugt:
  ; SI: v_cmp_u_f64
  ; SI: v_cmp_gt_f64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_or_b32_e32
+; SI: s_or_b64
+; SI: v_cndmask_b32
  define void @f64_ugt(i32 addrspace(1)* %out, double %a, double %b) {
  entry:
    %0 = fcmp ugt double %a, %b
@@ -111,9 +109,8 @@ entry:
  ; FUNC-LABEL: {{^}}f64_uge:
  ; SI: v_cmp_u_f64
  ; SI: v_cmp_ge_f64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_or_b32_e32
+; SI: s_or_b64
+; SI: v_cndmask_b32
  define void @f64_uge(i32 addrspace(1)* %out, double %a, double %b) {
  entry:
    %0 = fcmp uge double %a, %b
@@ -125,9 +122,8 @@ entry:
  ; FUNC-LABEL: {{^}}f64_ult:
  ; SI: v_cmp_u_f64
  ; SI: v_cmp_lt_f64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_or_b32_e32
+; SI: s_or_b64
+; SI: v_cndmask_b32
  define void @f64_ult(i32 addrspace(1)* %out, double %a, double %b) {
  entry:
    %0 = fcmp ult double %a, %b
@@ -139,9 +135,8 @@ entry:
  ; FUNC-LABEL: {{^}}f64_ule:
  ; SI: v_cmp_u_f64
  ; SI: v_cmp_le_f64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
-; SI: v_or_b32_e32
+; SI: s_or_b64
+; SI: v_cndmask_b32
  define void @f64_ule(i32 addrspace(1)* %out, double %a, double %b) {
  entry:
    %0 = fcmp ule double %a, %b
diff --git a/test/CodeGen/R600/sgpr-control-flow.ll b/test/CodeGen/R600/sgpr-control-flow.ll

index d8b8dffa7fa40400733ca861dd84fba722b99432..667c4ea3a4e0f00980db7114467b7a814a9ecf67 100644 (file)
--- a/test/CodeGen/R600/sgpr-control-flow.ll
+++ b/test/CodeGen/R600/sgpr-control-flow.ll
@@ -59,6 +59,47 @@ endif:
    ret void
  }
  
+; FIXME: Should write to different SGPR pairs instead of copying to
+; VALU for i1 phi.
+
+; SI-LABEL: {{^}}sgpr_if_else_valu_cmp_phi_br:
+; SI: buffer_load_dword [[AVAL:v[0-9]+]]
+; SI: v_cmp_lt_i32_e64 [[CMP_IF:s\[[0-9]+:[0-9]+\]]], [[AVAL]], 0
+; SI: v_cndmask_b32_e64 [[V_CMP:v[0-9]+]], 0, -1, [[CMP_IF]]
+
+; SI: BB2_1:
+; SI: buffer_load_dword [[AVAL:v[0-9]+]]
+; SI: v_cmp_eq_i32_e64 [[CMP_ELSE:s\[[0-9]+:[0-9]+\]]], [[AVAL]], 0
+; SI: v_cndmask_b32_e64 [[V_CMP]], 0, -1, [[CMP_ELSE]]
+
+; SI: v_cmp_ne_i32_e64 [[CMP_CMP:s\[[0-9]+:[0-9]+\]]], [[V_CMP]], 0
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP_CMP]]
+; SI: buffer_store_dword [[RESULT]]
+define void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
+entry:
+  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp1 = icmp eq i32 %tid, 0
+  br i1 %tmp1, label %if, label %else
+
+if:
+  %gep.if = getelementptr i32 addrspace(1)* %a, i32 %tid
+  %a.val = load i32 addrspace(1)* %gep.if
+  %cmp.if = icmp eq i32 %a.val, 0
+  br label %endif
+
+else:
+  %gep.else = getelementptr i32 addrspace(1)* %b, i32 %tid
+  %b.val = load i32 addrspace(1)* %gep.else
+  %cmp.else = icmp slt i32 %b.val, 0
+  br label %endif
+
+endif:
+  %tmp4 = phi i1 [%cmp.if, %if], [%cmp.else, %else]
+  %ext = sext i1 %tmp4 to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
  declare i32 @llvm.r600.read.tidig.x() #0
  
  attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/valu-i1.ll b/test/CodeGen/R600/valu-i1.ll

index a193077067eddaa8b14e7d79cce96e603e6d0c30..7b9f3343980ce54ad55e79af60b088558bf1744e 100644 (file)
--- a/test/CodeGen/R600/valu-i1.ll
+++ b/test/CodeGen/R600/valu-i1.ll
@@ -1,10 +1,13 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s
  
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; SI-LABEL: @test_if
  ; Make sure the i1 values created by the cfg structurizer pass are
  ; moved using VALU instructions
  ; SI-NOT: s_mov_b64 s[{{[0-9]:[0-9]}}], -1
  ; SI: v_mov_b32_e32 v{{[0-9]}}, -1
-define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) {
+define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) #1 {
  entry:
    switch i32 %a, label %default [
      i32 0, label %case0
@@ -37,3 +40,150 @@ else:
  end:
    ret void
  }
+
+; SI-LABEL: @simple_test_v_if
+; SI: v_cmp_ne_i32_e64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
+; SI: s_and_saveexec_b64 [[BR_SREG]], [[BR_SREG]]
+; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
+
+; SI: ; BB#1
+; SI: buffer_store_dword
+; SI: s_endpgm
+
+; SI: BB1_2:
+; SI: s_or_b64 exec, exec, [[BR_SREG]]
+; SI: s_endpgm
+define void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %is.0 = icmp ne i32 %tid, 0
+  br i1 %is.0, label %store, label %exit
+
+store:
+  %gep = getelementptr i32 addrspace(1)* %dst, i32 %tid
+  store i32 999, i32 addrspace(1)* %gep
+  ret void
+
+exit:
+  ret void
+}
+
+; SI-LABEL: @simple_test_v_loop
+; SI: v_cmp_ne_i32_e64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
+; SI: s_and_saveexec_b64 [[BR_SREG]], [[BR_SREG]]
+; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
+; SI: s_cbranch_execz BB2_2
+
+; SI: ; BB#1:
+; SI: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
+
+; SI: BB2_3:
+; SI: buffer_load_dword
+; SI: buffer_store_dword
+; SI: v_cmp_eq_i32_e32 vcc,
+; SI: s_or_b64 [[OR_SREG:s\[[0-9]+:[0-9]+\]]]
+; SI: v_add_i32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
+; SI: s_andn2_b64 exec, exec, [[OR_SREG]]
+; SI: s_cbranch_execnz BB2_3
+
+define void @simple_test_v_loop(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
+entry:
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %is.0 = icmp ne i32 %tid, 0
+  %limit = add i32 %tid, 64
+  br i1 %is.0, label %loop, label %exit
+
+loop:
+  %i = phi i32 [%tid, %entry], [%i.inc, %loop]
+  %gep.src = getelementptr i32 addrspace(1)* %src, i32 %i
+  %gep.dst = getelementptr i32 addrspace(1)* %dst, i32 %i
+  %load = load i32 addrspace(1)* %src
+  store i32 %load, i32 addrspace(1)* %gep.dst
+  %i.inc = add nsw i32 %i, 1
+  %cmp = icmp eq i32 %limit, %i.inc
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; SI-LABEL: @multi_vcond_loop
+
+; Load loop limit from buffer
+; Branch to exit if uniformly not taken
+; SI: ; BB#0:
+; SI: buffer_load_dword [[VBOUND:v[0-9]+]]
+; SI: v_cmp_gt_i32_e64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]]
+; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG]], [[OUTER_CMP_SREG]]
+; SI: s_xor_b64 [[OUTER_CMP_SREG]], exec, [[OUTER_CMP_SREG]]
+; SI: s_cbranch_execz BB3_2
+
+; Initialize inner condition to false
+; SI: ; BB#1:
+; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0{{$}}
+; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], [[ZERO]]
+
+; Clear exec bits for workitems that load -1s
+; SI: BB3_3:
+; SI: buffer_load_dword [[A:v[0-9]+]]
+; SI: buffer_load_dword [[B:v[0-9]+]]
+; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], [[A]], -1
+; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_1:s\[[0-9]+:[0-9]+\]]], [[B]], -1
+; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
+; SI: s_and_saveexec_b64 [[ORNEG1]], [[ORNEG1]]
+; SI: s_xor_b64 [[ORNEG1]], exec, [[ORNEG1]]
+; SI: s_cbranch_execz BB3_5
+
+; SI: BB#4:
+; SI: buffer_store_dword
+; SI: v_cmp_ge_i64_e32 vcc
+; SI: s_or_b64 [[COND_STATE]], vcc, [[COND_STATE]]
+
+; SI: BB3_5:
+; SI: s_or_b64 exec, exec, [[ORNEG1]]
+; SI: s_or_b64 [[COND_STATE]], [[ORNEG1]], [[COND_STATE]]
+; SI: s_andn2_b64 exec, exec, [[COND_STATE]]
+; SI: s_cbranch_execnz BB3_3
+
+; SI: BB#6
+; SI: s_or_b64 exec, exec, [[COND_STATE]]
+
+; SI: BB3_2:
+; SI-NOT: [[COND_STATE]]
+; SI: s_endpgm
+
+define void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addrspace(1)* noalias nocapture readonly %arg1, i32 addrspace(1)* noalias nocapture readonly %arg2, i32 addrspace(1)* noalias nocapture readonly %arg3) #1 {
+bb:
+  %tmp = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tmp4 = sext i32 %tmp to i64
+  %tmp5 = getelementptr inbounds i32 addrspace(1)* %arg3, i64 %tmp4
+  %tmp6 = load i32 addrspace(1)* %tmp5, align 4
+  %tmp7 = icmp sgt i32 %tmp6, 0
+  %tmp8 = sext i32 %tmp6 to i64
+  br i1 %tmp7, label %bb10, label %bb26
+
+bb10:                                             ; preds = %bb, %bb20
+  %tmp11 = phi i64 [ %tmp23, %bb20 ], [ 0, %bb ]
+  %tmp12 = add nsw i64 %tmp11, %tmp4
+  %tmp13 = getelementptr inbounds i32 addrspace(1)* %arg1, i64 %tmp12
+  %tmp14 = load i32 addrspace(1)* %tmp13, align 4
+  %tmp15 = getelementptr inbounds i32 addrspace(1)* %arg2, i64 %tmp12
+  %tmp16 = load i32 addrspace(1)* %tmp15, align 4
+  %tmp17 = icmp ne i32 %tmp14, -1
+  %tmp18 = icmp ne i32 %tmp16, -1
+  %tmp19 = and i1 %tmp17, %tmp18
+  br i1 %tmp19, label %bb20, label %bb26
+
+bb20:                                             ; preds = %bb10
+  %tmp21 = add nsw i32 %tmp16, %tmp14
+  %tmp22 = getelementptr inbounds i32 addrspace(1)* %arg, i64 %tmp12
+  store i32 %tmp21, i32 addrspace(1)* %tmp22, align 4
+  %tmp23 = add nuw nsw i64 %tmp11, 1
+  %tmp24 = icmp slt i64 %tmp23, %tmp8
+  br i1 %tmp24, label %bb10, label %bb26
+
+bb26:                                             ; preds = %bb10, %bb20, %bb
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll

index be47f8c05982287bf929345f656a335a1ac0f314..bf98e7df86a0a30216dce584b63d872f29085f3c 100644 (file)
--- a/test/CodeGen/R600/xor.ll
+++ b/test/CodeGen/R600/xor.ll
@@ -39,19 +39,37 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
  ; FUNC-LABEL: {{^}}xor_i1:
  ; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
  
-; SI: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-
+; SI-DAG: v_cmp_ge_f32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 0.0
+; SI-DAG: v_cmp_ge_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 1.0
+; SI: s_xor_b64 [[XOR:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]]
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[XOR]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
  define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
    %a = load float addrspace(1) * %in0
    %b = load float addrspace(1) * %in1
    %acmp = fcmp oge float %a, 0.000000e+00
-  %bcmp = fcmp oge float %b, 0.000000e+00
+  %bcmp = fcmp oge float %b, 1.000000e+00
    %xor = xor i1 %acmp, %bcmp
    %result = select i1 %xor, float %a, float %b
    store float %result, float addrspace(1)* %out
    ret void
  }
  
+; FUNC-LABEL: {{^}}v_xor_i1:
+; SI: buffer_load_ubyte [[A:v[0-9]+]]
+; SI: buffer_load_ubyte [[B:v[0-9]+]]
+; SI: v_xor_b32_e32 [[XOR:v[0-9]+]], [[B]], [[A]]
+; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[XOR]]
+; SI: buffer_store_byte [[RESULT]]
+define void @v_xor_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in0, i1 addrspace(1)* %in1) {
+  %a = load i1 addrspace(1)* %in0
+  %b = load i1 addrspace(1)* %in1
+  %xor = xor i1 %a, %b
+  store i1 %xor, i1 addrspace(1)* %out
+  ret void
+}
+
  ; FUNC-LABEL: {{^}}vector_xor_i32:
  ; SI: v_xor_b32_e32
  define void @vector_xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 3 Dec 2014 05:22:35 +0000 (05:22 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 3 Dec 2014 05:22:35 +0000 (05:22 +0000)
lib/Target/R600/SIInstrInfo.td		patch \| blob \| history
lib/Target/R600/SIInstructions.td		patch \| blob \| history
lib/Target/R600/SILowerI1Copies.cpp		patch \| blob \| history
test/CodeGen/R600/fceil64.ll		patch \| blob \| history
test/CodeGen/R600/ffloor.ll		patch \| blob \| history
test/CodeGen/R600/setcc.ll		patch \| blob \| history
test/CodeGen/R600/setcc64.ll		patch \| blob \| history
test/CodeGen/R600/sgpr-control-flow.ll		patch \| blob \| history
test/CodeGen/R600/valu-i1.ll		patch \| blob \| history
test/CodeGen/R600/xor.ll		patch \| blob \| history