Enable execution dependency fix pass for YMM registers when AVX2 is enabled. Add...

author Craig Topper <craig.topper@gmail.com>

Wed, 9 Nov 2011 09:37:21 +0000 (09:37 +0000)

committer Craig Topper <craig.topper@gmail.com>

Wed, 9 Nov 2011 09:37:21 +0000 (09:37 +0000)
author Craig Topper <craig.topper@gmail.com>
Wed, 9 Nov 2011 09:37:21 +0000 (09:37 +0000)
committer Craig Topper <craig.topper@gmail.com>
Wed, 9 Nov 2011 09:37:21 +0000 (09:37 +0000)
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index 8dcd637a966fcaa974b23e123b75e0d265a2d53b..102911fa4216b4361581782520249780abc54e18 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -3367,6 +3367,14 @@ static const unsigned ReplaceableInstrs[][3] = {
    { X86::VMOVUPSYmr,   X86::VMOVUPDYmr,   X86::VMOVDQUYmr  },
    { X86::VMOVUPSYrm,   X86::VMOVUPDYrm,   X86::VMOVDQUYrm  },
    { X86::VMOVNTPSYmr,  X86::VMOVNTPDYmr,  X86::VMOVNTDQYmr },
+  { X86::VANDNPSYrm,   X86::VANDNPDYrm,   X86::VPANDNYrm   },
+  { X86::VANDNPSYrr,   X86::VANDNPDYrr,   X86::VPANDNYrr   },
+  { X86::VANDPSYrm,    X86::VANDPDYrm,    X86::VPANDYrm    },
+  { X86::VANDPSYrr,    X86::VANDPDYrr,    X86::VPANDYrr    },
+  { X86::VORPSYrm,     X86::VORPDYrm,     X86::VPORYrm     },
+  { X86::VORPSYrr,     X86::VORPDYrr,     X86::VPORYrr     },
+  { X86::VXORPSYrm,    X86::VXORPDYrm,    X86::VPXORYrm    },
+  { X86::VXORPSYrr,    X86::VXORPDYrr,    X86::VPXORYrr    },
  };
  
  // FIXME: Some shuffle and unpack instructions have equivalents in different
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp

index 15c6c4e7a7d212833a59b6eda52c53fea2e340f5..4d4d7c06ab9abe38fda1668284f2742f8e158a6d 100644 (file)
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -135,10 +135,18 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
  bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
                                        CodeGenOpt::Level OptLevel) {
    bool ShouldPrint = false;
-  if (OptLevel != CodeGenOpt::None &&
-      (Subtarget.hasSSE2() || Subtarget.hasAVX())) {
-    PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass));
-    ShouldPrint = true;
+  if (OptLevel != CodeGenOpt::None) {
+    if (Subtarget.hasXMMInt()) {
+      PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass));
+      ShouldPrint = true;
+    }
+    if (Subtarget.hasAVX2()) {
+      // FIXME this should be turned on for just AVX, but the pass doesn't check
+      // that instructions are valid before replacing them and there are AVX2
+      // integer instructions in the table.
+      PM.add(createExecutionDependencyFixPass(&X86::VR256RegClass));
+      ShouldPrint = true;
+    }
    }
  
    if (Subtarget.hasAVX() && UseVZeroUpper) {
diff --git a/test/CodeGen/X86/avx2-logic.ll b/test/CodeGen/X86/avx2-logic.ll

new file mode 100644 (file)

index 0000000..a763bc0
--- /dev/null
+++ b/test/CodeGen/X86/avx2-logic.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vpandn  %ymm
+define <4 x i64> @vpandn(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %x = and <4 x i64> %a, %y
+  ret <4 x i64> %x
+}
+
+; CHECK: vpand %ymm
+define <4 x i64> @vpand(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = and <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
+
+; CHECK: vpor %ymm
+define <4 x i64> @vpor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = or <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
+
+; CHECK: vpxor %ymm
+define <4 x i64> @vpxor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = xor <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
author	Craig Topper <craig.topper@gmail.com>
	Wed, 9 Nov 2011 09:37:21 +0000 (09:37 +0000)
committer	Craig Topper <craig.topper@gmail.com>
	Wed, 9 Nov 2011 09:37:21 +0000 (09:37 +0000)
lib/Target/X86/X86InstrInfo.cpp		patch \| blob \| history
lib/Target/X86/X86TargetMachine.cpp		patch \| blob \| history
test/CodeGen/X86/avx2-logic.ll	[new file with mode: 0644]	patch \| blob