From 7f5e43f61d3b28a03537c29156b0bad7dd3476e4 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 23 Nov 2011 04:03:08 +0000 Subject: [PATCH] Fix PR11422. This was a bug in keeping track of the available domains when merging domain values. The wrong domain mask caused ExecutionDepsFix to try to move VANDPSYrr to the integer domain which is only available in AVX2. Also add an assertion to catch future attempts at emitting AVX2 instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145096 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ExecutionDepsFix.cpp | 8 +- lib/Target/X86/X86InstrInfo.cpp | 5 +- test/CodeGen/X86/2011-11-22-AVX2-Domains.ll | 99 +++++++++++++++++++++ 3 files changed, 109 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/X86/2011-11-22-AVX2-Domains.ll diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 16a8f921b3d..300f0371211 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -600,6 +600,9 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { while (!Regs.empty()) { if (!dv) { dv = Regs.pop_back_val().Value; + // Force the first dv to match the current instruction. + dv->AvailableDomains = dv->getCommonDomains(available); + assert(dv->AvailableDomains && "Domain should have been filtered"); continue; } @@ -617,9 +620,10 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { } // dv is the DomainValue we are going to use for this instruction. - if (!dv) + if (!dv) { dv = alloc(); - dv->AvailableDomains = available; + dv->AvailableDomains = available; + } dv->Instrs.push_back(mi); // Finally set all defs and non-collapsed uses to dv. diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 4f5b757acd2..24c4a53792d 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3606,8 +3606,11 @@ void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; assert(dom && "Not an SSE instruction"); const unsigned *table = lookup(MI->getOpcode(), dom); - if (!table) // try the other table + if (!table) { // try the other table + assert((TM.getSubtarget().hasAVX2() || Domain < 3) && + "256-bit vector operations only available in AVX2"); table = lookupAVX2(MI->getOpcode(), dom); + } assert(table && "Cannot change domain"); MI->setDesc(get(table[Domain-1])); } diff --git a/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll b/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll new file mode 100644 index 00000000000..8174109378d --- /dev/null +++ b/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll @@ -0,0 +1,99 @@ +; RUN: llc < %s -mcpu=corei7-avx -mattr=+avx | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-darwin11" + +; This test would create a vpand %ymm instruction that is only legal in AVX2. +; CHECK-NOT: vpand %ymm + +declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone + +define void @ShadeTile() nounwind { +allocas: + br i1 undef, label %if_then, label %if_else + +if_then: ; preds = %allocas + unreachable + +if_else: ; preds = %allocas + br i1 undef, label %for_loop156.lr.ph, label %if_exit + +for_loop156.lr.ph: ; preds = %if_else + %val_6.i21244 = load i16* undef, align 2 + %0 = insertelement <8 x i16> undef, i16 %val_6.i21244, i32 6 + %val_7.i21248 = load i16* undef, align 2 + %1 = insertelement <8 x i16> %0, i16 %val_7.i21248, i32 7 + %uint2uint32.i20206 = zext <8 x i16> %1 to <8 x i32> + %bitop5.i20208 = and <8 x i32> %uint2uint32.i20206, + %bitop8.i20209 = and <8 x i32> %uint2uint32.i20206, + %bitop12.i20211 = lshr <8 x i32> %bitop5.i20208, + %binop13.i20212 = add <8 x i32> %bitop12.i20211, + %bitop15.i20213 = shl <8 x i32> %binop13.i20212, + %bitop17.i20214 = shl <8 x i32> %bitop8.i20209, + %bitop20.i20215 = or <8 x i32> undef, %bitop15.i20213 + %bitop22.i20216 = or <8 x i32> %bitop20.i20215, %bitop17.i20214 + %int_to_float_bitcast.i.i.i20217 = bitcast <8 x i32> %bitop22.i20216 to <8 x float> + %binop401 = fmul <8 x float> undef, + %binop402 = fadd <8 x float> %binop401, + %binop403 = fmul <8 x float> zeroinitializer, %binop402 + %binop406 = fmul <8 x float> %int_to_float_bitcast.i.i.i20217, + %binop407 = fadd <8 x float> %binop406, + %binop408 = fmul <8 x float> zeroinitializer, %binop407 + %binop411 = fsub <8 x float> , undef + %val_4.i21290 = load i16* undef, align 2 + %2 = insertelement <8 x i16> undef, i16 %val_4.i21290, i32 4 + %val_5.i21294 = load i16* undef, align 2 + %3 = insertelement <8 x i16> %2, i16 %val_5.i21294, i32 5 + %val_6.i21298 = load i16* undef, align 2 + %4 = insertelement <8 x i16> %3, i16 %val_6.i21298, i32 6 + %ptr_7.i21301 = inttoptr i64 undef to i16* + %val_7.i21302 = load i16* %ptr_7.i21301, align 2 + %5 = insertelement <8 x i16> %4, i16 %val_7.i21302, i32 7 + %uint2uint32.i20218 = zext <8 x i16> %5 to <8 x i32> + %structelement561 = load i8** undef, align 8 + %ptr2int563 = ptrtoint i8* %structelement561 to i64 + %smear.ptr_smear7571 = insertelement <8 x i64> undef, i64 %ptr2int563, i32 7 + %new_ptr582 = add <8 x i64> %smear.ptr_smear7571, zeroinitializer + %val_5.i21509 = load i8* null, align 1 + %6 = insertelement <8 x i8> undef, i8 %val_5.i21509, i32 5 + %7 = insertelement <8 x i8> %6, i8 undef, i32 6 + %iptr_7.i21515 = extractelement <8 x i64> %new_ptr582, i32 7 + %ptr_7.i21516 = inttoptr i64 %iptr_7.i21515 to i8* + %val_7.i21517 = load i8* %ptr_7.i21516, align 1 + %8 = insertelement <8 x i8> %7, i8 %val_7.i21517, i32 7 + %uint2float.i20245 = uitofp <8 x i8> %8 to <8 x float> + %binop.i20246 = fmul <8 x float> %uint2float.i20245, + br i1 undef, label %for_loop594.lr.ph, label %for_exit595 + +if_exit: ; preds = %if_else + ret void + +for_loop594.lr.ph: ; preds = %for_loop156.lr.ph + %bitop8.i20221 = and <8 x i32> %uint2uint32.i20218, + br i1 undef, label %cif_test_all730, label %cif_mask_mixed1552 + +for_exit595: ; preds = %for_loop156.lr.ph + unreachable + +cif_test_all730: ; preds = %for_loop594.lr.ph + %binop11.i20545 = fmul <8 x float> %binop408, zeroinitializer + %binop12.i20546 = fadd <8 x float> undef, %binop11.i20545 + %binop15.i20547 = fmul <8 x float> %binop411, undef + %binop16.i20548 = fadd <8 x float> %binop12.i20546, %binop15.i20547 + %bincmp774 = fcmp ogt <8 x float> %binop16.i20548, zeroinitializer + %val_to_boolvec32775 = sext <8 x i1> %bincmp774 to <8 x i32> + %floatmask.i20549 = bitcast <8 x i32> %val_to_boolvec32775 to <8 x float> + %v.i20550 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %floatmask.i20549) nounwind readnone + %cond = icmp eq i32 %v.i20550, 255 + br i1 %cond, label %cif_test_all794, label %cif_test_mixed + +cif_test_all794: ; preds = %cif_test_all730 + %binop.i20572 = fmul <8 x float> %binop403, undef + unreachable + +cif_test_mixed: ; preds = %cif_test_all730 + %binop1207 = fmul <8 x float> %binop.i20246, undef + unreachable + +cif_mask_mixed1552: ; preds = %for_loop594.lr.ph + unreachable +} -- 2.34.1