Add X86 TZCNT instruction and patterns to select it. Also added core-avx2 processor...
authorCraig Topper <craig.topper@gmail.com>
Fri, 14 Oct 2011 03:21:46 +0000 (03:21 +0000)
committerCraig Topper <craig.topper@gmail.com>
Fri, 14 Oct 2011 03:21:46 +0000 (03:21 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141939 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86.td
lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86InstrInfo.td
lib/Target/X86/X86Subtarget.cpp
lib/Target/X86/X86Subtarget.h
test/CodeGen/X86/bmi.ll [new file with mode: 0644]
test/MC/Disassembler/X86/simple-tests.txt
test/MC/Disassembler/X86/x86-32.txt

index 133ae7066e99fd099688ca5053e8c2d21271f85e..104b91fd353403ff35a07db63a3466ea3ac1b195 100644 (file)
@@ -104,6 +104,8 @@ def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
                        "Support 16-bit floating point conversion instructions">;
 def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
                                       "Support LZCNT instruction">;
+def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
+                                      "Support BMI instructions">;
 
 //===----------------------------------------------------------------------===//
 // X86 processors supported.
@@ -158,6 +160,12 @@ def : Proc<"core-avx-i",      [FeatureSSE42, FeatureCMPXCHG16B,
                                FeatureAES, FeatureCLMUL,
                                FeatureRDRAND, FeatureF16C]>;
 
+// Haswell
+def : Proc<"core-avx2",       [FeatureSSE42, FeatureCMPXCHG16B, FeatureAES,
+                               FeatureCLMUL, FeatureRDRAND, FeatureF16C,
+                               FeatureFMA3, FeatureMOVBE, FeatureLZCNT,
+                               FeatureBMI]>;
+
 def : Proc<"k6",              [FeatureMMX]>;
 def : Proc<"k6-2",            [Feature3DNow]>;
 def : Proc<"k6-3",            [Feature3DNow]>;
index 251064b3bb0dc7e87db45441bba1891abaf1fe4d..f85c201d01c2079a383d740caff6ea5bddf0d4ca 100644 (file)
@@ -379,11 +379,15 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::FREM             , MVT::f80  , Expand);
   setOperationAction(ISD::FLT_ROUNDS_      , MVT::i32  , Custom);
 
-  setOperationAction(ISD::CTTZ             , MVT::i8   , Custom);
-  setOperationAction(ISD::CTTZ             , MVT::i16  , Custom);
-  setOperationAction(ISD::CTTZ             , MVT::i32  , Custom);
-  if (Subtarget->is64Bit())
-    setOperationAction(ISD::CTTZ           , MVT::i64  , Custom);
+  if (Subtarget->hasBMI()) {
+    setOperationAction(ISD::CTTZ           , MVT::i8   , Promote);
+  } else {
+    setOperationAction(ISD::CTTZ           , MVT::i8   , Custom);
+    setOperationAction(ISD::CTTZ           , MVT::i16  , Custom);
+    setOperationAction(ISD::CTTZ           , MVT::i32  , Custom);
+    if (Subtarget->is64Bit())
+      setOperationAction(ISD::CTTZ         , MVT::i64  , Custom);
+  }
 
   if (Subtarget->hasLZCNT()) {
     setOperationAction(ISD::CTLZ           , MVT::i8   , Promote);
index c43351a0293bfa0dfad5180e2f612b53ce172c0a..cc358feb97eaeec0dc0e896d2981c0c6ff2b03dd 100644 (file)
@@ -478,6 +478,7 @@ def HasMOVBE     : Predicate<"Subtarget->hasMOVBE()">;
 def HasRDRAND    : Predicate<"Subtarget->hasRDRAND()">;
 def HasF16C      : Predicate<"Subtarget->hasF16C()">;
 def HasLZCNT     : Predicate<"Subtarget->hasLZCNT()">;
+def HasBMI       : Predicate<"Subtarget->hasBMI()">;
 def FPStackf32   : Predicate<"!Subtarget->hasXMM()">;
 def FPStackf64   : Predicate<"!Subtarget->hasXMMInt()">;
 def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
@@ -1372,6 +1373,37 @@ let Predicates = [HasLZCNT], Defs = [EFLAGS] in {
                       (implicit EFLAGS)]>, XS;
 }
 
+//===----------------------------------------------------------------------===//
+// TZCNT Instruction
+//
+let Predicates = [HasBMI], Defs = [EFLAGS] in {
+  def TZCNT16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                    "tzcnt{w}\t{$src, $dst|$dst, $src}",
+                    [(set GR16:$dst, (cttz GR16:$src)), (implicit EFLAGS)]>, XS,
+                    OpSize;
+  def TZCNT16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                    "tzcnt{w}\t{$src, $dst|$dst, $src}",
+                    [(set GR16:$dst, (cttz (loadi16 addr:$src))),
+                     (implicit EFLAGS)]>, XS, OpSize;
+
+  def TZCNT32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                    "tzcnt{l}\t{$src, $dst|$dst, $src}",
+                    [(set GR32:$dst, (cttz GR32:$src)), (implicit EFLAGS)]>, XS;
+  def TZCNT32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                    "tzcnt{l}\t{$src, $dst|$dst, $src}",
+                    [(set GR32:$dst, (cttz (loadi32 addr:$src))),
+                     (implicit EFLAGS)]>, XS;
+
+  def TZCNT64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                     "tzcnt{q}\t{$src, $dst|$dst, $src}",
+                     [(set GR64:$dst, (cttz GR64:$src)), (implicit EFLAGS)]>,
+                     XS;
+  def TZCNT64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                     "tzcnt{q}\t{$src, $dst|$dst, $src}",
+                     [(set GR64:$dst, (cttz (loadi64 addr:$src))),
+                      (implicit EFLAGS)]>, XS;
+}
+
 //===----------------------------------------------------------------------===//
 // Subsystems.
 //===----------------------------------------------------------------------===//
index c2f60be3219b3b6ddf0d30fbe470d125f35e442a..7064dd06fa307a4629c4774ebad451352eb8d4d5 100644 (file)
@@ -290,6 +290,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
   , HasRDRAND(false)
   , HasF16C(false)
   , HasLZCNT(false)
+  , HasBMI(false)
   , IsBTMemSlow(false)
   , IsUAMemFast(false)
   , HasVectorUAMem(false)
index f67575a94dfd30e3ecf63ebb55d40418f070e722..3258d3d0ada398be44d5ac64712c723f4ee0ed8b 100644 (file)
@@ -102,6 +102,9 @@ protected:
   /// HasLZCNT - Processor has LZCNT instruction.
   bool HasLZCNT;
 
+  /// HasBMI - Processor has BMI1 instructions.
+  bool HasBMI;
+
   /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
   bool IsBTMemSlow;
 
@@ -188,6 +191,7 @@ public:
   bool hasRDRAND() const { return HasRDRAND; }
   bool hasF16C() const { return HasF16C; }
   bool hasLZCNT() const { return HasLZCNT; }
+  bool hasBMI() const { return HasBMI; }
   bool isBTMemSlow() const { return IsBTMemSlow; }
   bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
   bool hasVectorUAMem() const { return HasVectorUAMem; }
diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll
new file mode 100644 (file)
index 0000000..e0d1b58
--- /dev/null
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=x86-64 -mattr=+bmi | FileCheck %s
+
+define i32 @t1(i32 %x) nounwind  {
+       %tmp = tail call i32 @llvm.cttz.i32( i32 %x )
+       ret i32 %tmp
+; CHECK: t1:
+; CHECK: tzcntl
+}
+
+declare i32 @llvm.cttz.i32(i32) nounwind readnone
+
+define i16 @t2(i16 %x) nounwind  {
+       %tmp = tail call i16 @llvm.cttz.i16( i16 %x )
+       ret i16 %tmp
+; CHECK: t2:
+; CHECK: tzcntw
+}
+
+declare i16 @llvm.cttz.i16(i16) nounwind readnone
+
+define i64 @t3(i64 %x) nounwind  {
+       %tmp = tail call i64 @llvm.cttz.i64( i64 %x )
+       ret i64 %tmp
+; CHECK: t3:
+; CHECK: tzcntq
+}
+
+declare i64 @llvm.cttz.i64(i64) nounwind readnone
+
+define i8 @t4(i8 %x) nounwind  {
+       %tmp = tail call i8 @llvm.cttz.i8( i8 %x )
+       ret i8 %tmp
+; CHECK: t4:
+; CHECK: tzcntw
+}
+
+declare i8 @llvm.cttz.i8(i8) nounwind readnone
+
index 1540d12a280bd41bd81a5421c1cf798e36c8de2f..8ca3015c2cda2d86d27d40d32bbb91948ab1b646 100644 (file)
 
 # CHECK: lzcntq %rax, %rax
 0xf3 0x48 0x0f 0xbd 0xc0
+
+# CHECK: tzcntl %eax, %eax
+0xf3 0x0f 0xbc 0xc0
+
+# CHECK: tzcntw %ax, %ax
+0x66 0xf3 0x0f 0xbc 0xc0
+
+# CHECK: tzcntq %rax, %rax
+0xf3 0x48 0x0f 0xbc 0xc0
index 146b6667e5148ab2f13f0d6a11bc6935fb6a3a9c..e58356bd432aaf3ecc858277705b4b63d987cdd1 100644 (file)
 
 # CHECK: lzcntw %ax, %ax
 0x66 0xf3 0x0f 0xbd 0xc0
+
+# CHECK: tzcntl %eax, %eax
+0xf3 0x0f 0xbc 0xc0
+
+# CHECK: tzcntw %ax, %ax
+0x66 0xf3 0x0f 0xbc 0xc0