[X86][AVX] Added missing stack folding support + test for vptest ymm instruction
authorSimon Pilgrim <llvm-dev@redking.me.uk>
Sat, 7 Feb 2015 21:44:06 +0000 (21:44 +0000)
committerSimon Pilgrim <llvm-dev@redking.me.uk>
Sat, 7 Feb 2015 21:44:06 +0000 (21:44 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228509 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86InstrInfo.cpp
test/CodeGen/X86/stack-folding-int-avx1.ll

index 312e98e218907d00021d8873ca400bec0947afde..4685905db60b02c31a1cc5f39e6a1d13a0e477fa 100644 (file)
@@ -638,6 +638,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMOVUPSYrr,      X86::VMOVUPSYrm,          0 },
     { X86::VPERMILPDYri,    X86::VPERMILPDYmi,        0 },
     { X86::VPERMILPSYri,    X86::VPERMILPSYmi,        0 },
+    { X86::VPTESTYrr,       X86::VPTESTYrm,           0 },
     { X86::VRCPPSYr,        X86::VRCPPSYm,            0 },
     { X86::VRCPPSYr_Int,    X86::VRCPPSYm_Int,        0 },
     { X86::VROUNDYPDr,      X86::VROUNDYPDm,          0 },
index 3880ce0ee6b79ffc87b76ad8a141d140481d6c66..22d86a7e5f785d53c8ea33d673ea1230e0326237 100644 (file)
@@ -1056,6 +1056,15 @@ define i32 @stack_fold_ptest(<2 x i64> %a0, <2 x i64> %a1) {
 }
 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
 
+define i32 @stack_fold_ptest_ymm(<4 x i64> %a0, <4 x i64> %a1) {
+  ;CHECK-LABEL: stack_fold_ptest_ymm
+  ;CHECK:       vptest {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1)
+  ret i32 %2
+}
+declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
+
 define <16 x i8> @stack_fold_punpckhbw(<16 x i8> %a0, <16 x i8> %a1) {
   ;CHECK-LABEL: stack_fold_punpckhbw
   ;CHECK:       vpunpckhbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload