X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=test%2FCodeGen%2FX86%2Ffmul-combines.ll;h=703651153c1196ac478889eda90264f7d5c16599;hb=3d4542ce3da1cb0782c65d38130556a00ed2586d;hp=a0122356720bc9415881e0233725244ccc608512;hpb=0ac5626b56ece9e73344cdd7551ff4fbaa0d145a;p=oota-llvm.git diff --git a/test/CodeGen/X86/fmul-combines.ll b/test/CodeGen/X86/fmul-combines.ll index a0122356720..703651153c1 100644 --- a/test/CodeGen/X86/fmul-combines.ll +++ b/test/CodeGen/X86/fmul-combines.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=x86-64 < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-unknown -march=x86-64 < %s | FileCheck %s ; CHECK-LABEL: fmul2_f32: ; CHECK: addss %xmm0, %xmm0 @@ -55,6 +55,54 @@ define <4 x float> @fmul_c3_c4_v4f32(<4 x float> %x) #0 { ret <4 x float> %z } +; We should be able to pre-multiply the two constant vectors. +; CHECK: float 5.000000e+00 +; CHECK: float 1.200000e+01 +; CHECK: float 2.100000e+01 +; CHECK: float 3.200000e+01 +; CHECK-LABEL: fmul_v4f32_two_consts_no_splat: +; CHECK: mulps +; CHECK-NOT: mulps +; CHECK-NEXT: ret +define <4 x float> @fmul_v4f32_two_consts_no_splat(<4 x float> %x) #0 { + %y = fmul <4 x float> %x, + %z = fmul <4 x float> %y, + ret <4 x float> %z +} + +; Same as above, but reverse operands to make sure non-canonical form is also handled. +; CHECK: float 5.000000e+00 +; CHECK: float 1.200000e+01 +; CHECK: float 2.100000e+01 +; CHECK: float 3.200000e+01 +; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_non_canonical: +; CHECK: mulps +; CHECK-NOT: mulps +; CHECK-NEXT: ret +define <4 x float> @fmul_v4f32_two_consts_no_splat_non_canonical(<4 x float> %x) #0 { + %y = fmul <4 x float> , %x + %z = fmul <4 x float> , %y + ret <4 x float> %z +} + +; More than one use of a constant multiply should not inhibit the optimization. +; Instead of a chain of 2 dependent mults, this test will have 2 independent mults. +; CHECK: float 5.000000e+00 +; CHECK: float 1.200000e+01 +; CHECK: float 2.100000e+01 +; CHECK: float 3.200000e+01 +; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_multiple_use: +; CHECK: mulps +; CHECK: mulps +; CHECK: addps +; CHECK: ret +define <4 x float> @fmul_v4f32_two_consts_no_splat_multiple_use(<4 x float> %x) #0 { + %y = fmul <4 x float> %x, + %z = fmul <4 x float> %y, + %a = fadd <4 x float> %y, %z + ret <4 x float> %a +} + ; CHECK-LABEL: fmul_c2_c4_f32: ; CHECK-NOT: addss ; CHECK: mulss