From: Kalle Raiskila Date: Fri, 4 Mar 2011 13:19:18 +0000 (+0000) Subject: Allow vector shifts (shl,lshr,ashr) on SPU. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=31cbac1cfea8703098e09e7ff5fa8a626eebc920;p=oota-llvm.git Allow vector shifts (shl,lshr,ashr) on SPU. There was a previous implementation with patterns that would have matched e.g. shl , but this is not valid LLVM IR so they never were selected. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126998 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 02f52b36953..e103c9b6a5a 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -2015,9 +2015,9 @@ class SHLHInst pattern>: RotShiftVec, pattern>; class SHLHVecInst: - SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB), + SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), [(set (vectype VECREG:$rT), - (SPUvec_shl (vectype VECREG:$rA), R16C:$rB))]>; + (SPUvec_shl (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; multiclass ShiftLeftHalfword { @@ -2065,9 +2065,9 @@ class SHLInst pattern>: multiclass ShiftLeftWord { def v4i32: - SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB), + SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), [(set (v4i32 VECREG:$rT), - (SPUvec_shl (v4i32 VECREG:$rA), R16C:$rB))]>; + (SPUvec_shl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; def r32: SHLInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>; @@ -2513,19 +2513,11 @@ class ROTHMInst pattern>: RotShiftVec, pattern>; def ROTHMv8i16: - ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), [/* see patterns below - $rB must be negated */]>; -def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R32C:$rB), - (ROTHMv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R16C:$rB), - (ROTHMv8i16 VECREG:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R8C:$rB), - (ROTHMv8i16 VECREG:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>; +def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), + (ROTHMv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>; // ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left // Note: This instruction doesn't match a pattern because rB must be negated @@ -2586,19 +2578,11 @@ class ROTMInst pattern>: RotShiftVec, pattern>; def ROTMv4i32: - ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), [/* see patterns below - $rB must be negated */]>; -def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), R32C:$rB), - (ROTMv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), R16C:$rB), - (ROTMv4i32 VECREG:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), R8C:$rB), - (ROTMv4i32 VECREG:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; +def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), + (ROTMv4i32 VECREG:$rA, (SFIvec VECREG:$rB, 0))>; def ROTMr32: ROTMInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), @@ -2804,20 +2788,12 @@ defm ROTQMBII: RotateMaskQuadByBitsImm; //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ def ROTMAHv8i16: - RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), "rotmah\t$rT, $rA, $rB", RotShiftVec, [/* see patterns below - $rB must be negated */]>; -def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R32C:$rB), - (ROTMAHv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R16C:$rB), - (ROTMAHv8i16 VECREG:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R8C:$rB), - (ROTMAHv8i16 VECREG:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; +def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), + (ROTMAHv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>; def ROTMAHr16: RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB), @@ -2859,20 +2835,12 @@ def : Pat<(sra R16C:$rA, (i8 imm:$val)), (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>; def ROTMAv4i32: - RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), "rotma\t$rT, $rA, $rB", RotShiftVec, [/* see patterns below - $rB must be negated */]>; -def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R32C:$rB), - (ROTMAv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R16C:$rB), - (ROTMAv4i32 VECREG:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R8C:$rB), - (ROTMAv4i32 VECREG:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; +def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), + (ROTMAv4i32 VECREG:$rA, (SFIvec (v4i32 VECREG:$rB), 0))>; def ROTMAr32: RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll index 92390abf946..c4a5abd2904 100644 --- a/test/CodeGen/CellSPU/shift_ops.ll +++ b/test/CodeGen/CellSPU/shift_ops.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep {shlh } %t1.s | count 9 +; RUN: grep {shlh } %t1.s | count 10 ; RUN: grep {shlhi } %t1.s | count 3 -; RUN: grep {shl } %t1.s | count 9 +; RUN: grep {shl } %t1.s | count 11 ; RUN: grep {shli } %t1.s | count 3 ; RUN: grep {xshw } %t1.s | count 5 ; RUN: grep {and } %t1.s | count 14 @@ -14,15 +14,12 @@ ; RUN: grep {rotqbyi } %t1.s | count 1 ; RUN: grep {rotqbii } %t1.s | count 2 ; RUN: grep {rotqbybi } %t1.s | count 1 -; RUN: grep {sfi } %t1.s | count 4 +; RUN: grep {sfi } %t1.s | count 6 ; RUN: cat %t1.s | FileCheck %s target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" target triple = "spu" -; Vector shifts are not currently supported in gcc or llvm assembly. These are -; not tested. - ; Shift left i16 via register, note that the second operand to shl is promoted ; to a 32-bit type: @@ -293,3 +290,55 @@ define i128 @test_lshr_i128( i128 %val ) { %rv = lshr i128 %val, 64 ret i128 %rv } + +;Vector shifts +define <2 x i32> @shl_v2i32(<2 x i32> %val, <2 x i32> %sh) { +;CHECK: shl +;CHECK: bi $lr + %rv = shl <2 x i32> %val, %sh + ret <2 x i32> %rv +} + +define <4 x i32> @shl_v4i32(<4 x i32> %val, <4 x i32> %sh) { +;CHECK: shl +;CHECK: bi $lr + %rv = shl <4 x i32> %val, %sh + ret <4 x i32> %rv +} + +define <8 x i16> @shl_v8i16(<8 x i16> %val, <8 x i16> %sh) { +;CHECK: shlh +;CHECK: bi $lr + %rv = shl <8 x i16> %val, %sh + ret <8 x i16> %rv +} + +define <4 x i32> @lshr_v4i32(<4 x i32> %val, <4 x i32> %sh) { +;CHECK: rotm +;CHECK: bi $lr + %rv = lshr <4 x i32> %val, %sh + ret <4 x i32> %rv +} + +define <8 x i16> @lshr_v8i16(<8 x i16> %val, <8 x i16> %sh) { +;CHECK: sfhi +;CHECK: rothm +;CHECK: bi $lr + %rv = lshr <8 x i16> %val, %sh + ret <8 x i16> %rv +} + +define <4 x i32> @ashr_v4i32(<4 x i32> %val, <4 x i32> %sh) { +;CHECK: rotma +;CHECK: bi $lr + %rv = ashr <4 x i32> %val, %sh + ret <4 x i32> %rv +} + +define <8 x i16> @ashr_v8i16(<8 x i16> %val, <8 x i16> %sh) { +;CHECK: sfhi +;CHECK: rotmah +;CHECK: bi $lr + %rv = ashr <8 x i16> %val, %sh + ret <8 x i16> %rv +}