; RUN: llc -mcpu=core-avx -debug-only=stackmaps < %s | FileCheck %s ; REQUIRES: asserts target triple = "x86_64-pc-linux-gnu" ; Can we lower a single vector? define <2 x i8 addrspace(1)*> @test(<2 x i8 addrspace(1)*> %obj) gc "statepoint-example" { entry: ; CHECK-LABEL: @test ; CHECK: subq $24, %rsp ; CHECK: movaps %xmm0, (%rsp) ; CHECK: callq do_safepoint ; CHECK: movaps (%rsp), %xmm0 ; CHECK: addq $24, %rsp %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj) %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj) ret <2 x i8 addrspace(1)*> %obj.relocated } ; Can we lower the base, derived pairs if both are vectors? define <2 x i8 addrspace(1)*> @test2(<2 x i8 addrspace(1)*> %obj, i64 %offset) gc "statepoint-example" { entry: ; CHECK-LABEL: @test2 ; CHECK: subq $40, %rsp ; CHECK: movd %rdi, %xmm1 ; CHECK: pshufd $68, %xmm1, %xmm1 # xmm1 = xmm1[0,1,0,1] ; CHECK: paddq %xmm0, %xmm1 ; CHECK: movdqa %xmm0, 16(%rsp) ; CHECK: movdqa %xmm1, (%rsp) ; CHECK: callq do_safepoint ; CHECK: movaps (%rsp), %xmm0 ; CHECK: addq $40, %rsp %derived = getelementptr i8, <2 x i8 addrspace(1)*> %obj, i64 %offset %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj, <2 x i8 addrspace(1)*> %derived) %derived.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 8) ; (%obj, %derived) ret <2 x i8 addrspace(1)*> %derived.relocated } ; Originally, this was just a variant of @test2 above, but it ends up ; covering a bunch of interesting missed optimizations. Specifically: ; - We waste a stack slot for a value that a backend transform pass ; CSEd to another spilled one. ; - We don't remove the testb even though it serves no purpose ; - We could in principal reuse the argument memory (%rsi) and do away ; with stack slots entirely. define <2 x i64 addrspace(1)*> @test3(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" { entry: ; CHECK-LABEL: @test3 ; CHECK: subq $40, %rsp ; CHECK: testb $1, %dil ; CHECK: movaps (%rsi), %xmm0 ; CHECK: movaps %xmm0, 16(%rsp) ; CHECK: movaps %xmm0, (%rsp) ; CHECK: callq do_safepoint ; CHECK: movaps (%rsp), %xmm0 ; CHECK: addq $40, %rsp br i1 %cnd, label %taken, label %untaken taken: ; preds = %entry %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr br label %merge untaken: ; preds = %entry %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr br label %merge merge: ; preds = %untaken, %taken %obj.base = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ] %obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ] %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i64 addrspace(1)*> %obj, <2 x i64 addrspace(1)*> %obj.base) %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 7) ; (%obj.base, %obj) %obj.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.relocated to <2 x i64 addrspace(1)*> %obj.base.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 8) ; (%obj.base, %obj.base) %obj.base.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.base.relocated to <2 x i64 addrspace(1)*> ret <2 x i64 addrspace(1)*> %obj.relocated.casted } ; Can we handle vector constants? At the moment, we don't appear to actually ; get selection dag nodes for these. define <2 x i8 addrspace(1)*> @test4() gc "statepoint-example" { entry: ; CHECK-LABEL: @test4 ; CHECK: subq $24, %rsp ; CHECK: xorps %xmm0, %xmm0 ; CHECK: movaps %xmm0, (%rsp) ; CHECK: callq do_safepoint ; CHECK: movaps (%rsp), %xmm0 ; CHECK: addq $24, %rsp %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> zeroinitializer) %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj) ret <2 x i8 addrspace(1)*> %obj.relocated } ; Check that we can lower a constant typed as i128 correctly. Note that the ; actual value is representable in 64 bits. We don't have a representation ; of larger than 64 bit constant in the StackMap format. define void @test5() gc "statepoint-example" { entry: ; CHECK-LABEL: @test5 ; CHECK: push ; CHECK: callq do_safepoint ; CHECK: pop %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 1, i128 0) ret void } ; CHECK: __LLVM_StackMaps: ; CHECK: .Ltmp1-test ; Check for the two spill slots ; Stack Maps: Loc 3: Indirect 7+0 [encoding: .byte 3, .byte 16, .short 7, .int 0] ; Stack Maps: Loc 4: Indirect 7+0 [encoding: .byte 3, .byte 16, .short 7, .int 0] ; CHECK: .byte 3 ; CHECK: .byte 16 ; CHECK: .short 7 ; CHECK: .long 0 ; CHECK: .byte 3 ; CHECK: .byte 16 ; CHECK: .short 7 ; CHECK: .long 0 ; CHECK: .Ltmp3-test2 ; Check for the two spill slots ; Stack Maps: Loc 3: Indirect 7+16 [encoding: .byte 3, .byte 16, .short 7, .int 16] ; Stack Maps: Loc 4: Indirect 7+0 [encoding: .byte 3, .byte 16, .short 7, .int 0] ; CHECK: .byte 3 ; CHECK: .byte 16 ; CHECK: .short 7 ; CHECK: .long 16 ; CHECK: .byte 3 ; CHECK: .byte 16 ; CHECK: .short 7 ; CHECK: .long 0 ; CHECK: .Ltmp5-test3 ; Check for the four spill slots ; Stack Maps: Loc 3: Indirect 7+16 [encoding: .byte 3, .byte 16, .short 7, .int 16] ; Stack Maps: Loc 4: Indirect 7+16 [encoding: .byte 3, .byte 16, .short 7, .int 16] ; Stack Maps: Loc 5: Indirect 7+16 [encoding: .byte 3, .byte 16, .short 7, .int 16] ; Stack Maps: Loc 6: Indirect 7+0 [encoding: .byte 3, .byte 16, .short 7, .int 0] ; CHECK: .byte 3 ; CHECK: .byte 16 ; CHECK: .short 7 ; CHECK: .long 16 ; CHECK: .byte 3 ; CHECK: .byte 16 ; CHECK: .short 7 ; CHECK: .long 16 ; CHECK: .byte 3 ; CHECK: .byte 16 ; CHECK: .short 7 ; CHECK: .long 16 ; CHECK: .byte 3 ; CHECK: .byte 16 ; CHECK: .short 7 ; CHECK: .long 0 declare void @do_safepoint() declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) declare <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token, i32, i32)