X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FPowerPC%2FREADME_ALTIVEC.txt;h=34765de1b2179fffb5ec7ede41a26e9969806fc9;hp=cb3ad5bd3a1971c6603d8fe560ff2c3ba34f41a0;hb=b69d556c370b32dee9f64d8250e51aad33963cc2;hpb=c56226c6d1ee25d17eb9bd0246d864325ecb811e diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt index cb3ad5bd3a1..34765de1b21 100644 --- a/lib/Target/PowerPC/README_ALTIVEC.txt +++ b/lib/Target/PowerPC/README_ALTIVEC.txt @@ -211,6 +211,51 @@ vector float f(vector float a, vector float b) { //===----------------------------------------------------------------------===// +We should do a little better with eliminating dead stores. +The stores to the stack are dead since %a and %b are not needed + +; Function Attrs: nounwind +define <16 x i8> @test_vpmsumb() #0 { + entry: + %a = alloca <16 x i8>, align 16 + %b = alloca <16 x i8>, align 16 + store <16 x i8> , <16 x i8>* %a, align 16 + store <16 x i8> , <16 x i8>* %b, align 16 + %0 = load <16 x i8>* %a, align 16 + %1 = load <16 x i8>* %b, align 16 + %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1) + ret <16 x i8> %2 +} + + +; Function Attrs: nounwind readnone +declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1 + + +Produces the following code with -mtriple=powerpc64-unknown-linux-gnu: +# BB#0: # %entry + addis 3, 2, .LCPI0_0@toc@ha + addis 4, 2, .LCPI0_1@toc@ha + addi 3, 3, .LCPI0_0@toc@l + addi 4, 4, .LCPI0_1@toc@l + lxvw4x 0, 0, 3 + addi 3, 1, -16 + lxvw4x 35, 0, 4 + stxvw4x 0, 0, 3 + ori 2, 2, 0 + lxvw4x 34, 0, 3 + addi 3, 1, -32 + stxvw4x 35, 0, 3 + vpmsumb 2, 2, 3 + blr + .long 0 + .quad 0 + +The two stxvw4x instructions are not needed. +With -mtriple=powerpc64le-unknown-linux-gnu, the associated permutes +are present too. +//===----------------------------------------------------------------------===// + The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll: define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind {