From bf26b3fcaeceabda4831178509276cb63cd753b4 Mon Sep 17 00:00:00 2001 From: Marek Olsak Date: Mon, 27 Jul 2015 11:37:42 +0000 Subject: [PATCH] AMDGPU/SI: Fix the V_FRACT_F64 SI bug workaround This is a candidate for 3.7. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243263 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIInstructions.td | 4 ++-- test/CodeGen/AMDGPU/llvm.AMDGPU.fract.f64.ll | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index de8675e57fd..b2edc036049 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -3273,13 +3273,13 @@ def : Pat < (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)), (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))), (V_CNDMASK_B64_PSEUDO - $x, (V_MIN_F64 SRCMODS.NONE, (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE), SRCMODS.NONE, (V_MOV_B64_PSEUDO 0x3fefffffffffffff), DSTCLAMP.NONE, DSTOMOD.NONE), + $x, (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)) >; @@ -3291,13 +3291,13 @@ def : Pat < $x, SRCMODS.NEG, (V_CNDMASK_B64_PSEUDO - $x, (V_MIN_F64 SRCMODS.NONE, (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE), SRCMODS.NONE, (V_MOV_B64_PSEUDO 0x3fefffffffffffff), DSTCLAMP.NONE, DSTOMOD.NONE), + $x, (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)), DSTCLAMP.NONE, DSTOMOD.NONE) >; diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.fract.f64.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.fract.f64.ll index e098dd35d6d..6049dca0401 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.fract.f64.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.fract.f64.ll @@ -11,8 +11,8 @@ declare double @llvm.AMDGPU.fract.f64(double) nounwind readnone ; SI: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff ; SI: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]] ; SI: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3 -; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[LO]], v[[MINLO]], [[COND]] -; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[HI]], v[[MINHI]], [[COND]] +; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]] +; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]] ; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]] ; CI: buffer_store_dwordx2 [[FRC]] define void @fract_f64(double addrspace(1)* %out, double addrspace(1)* %src) nounwind { @@ -28,8 +28,8 @@ define void @fract_f64(double addrspace(1)* %out, double addrspace(1)* %src) nou ; SI: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff ; SI: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]] ; SI: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3 -; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[LO]], v[[MINLO]], [[COND]] -; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[HI]], v[[MINHI]], [[COND]] +; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]] +; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]] ; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]] ; CI: buffer_store_dwordx2 [[FRC]] define void @fract_f64_neg(double addrspace(1)* %out, double addrspace(1)* %src) nounwind { @@ -46,8 +46,8 @@ define void @fract_f64_neg(double addrspace(1)* %out, double addrspace(1)* %src) ; SI: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff ; SI: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]] ; SI: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3 -; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[LO]], v[[MINLO]], [[COND]] -; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[HI]], v[[MINHI]], [[COND]] +; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]] +; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]] ; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]] ; CI: buffer_store_dwordx2 [[FRC]] define void @fract_f64_neg_abs(double addrspace(1)* %out, double addrspace(1)* %src) nounwind { -- 2.34.1