- Add "Commutative" property to intrinsics. This allows tblgen to generate the commut...
authorEvan Cheng <evan.cheng@apple.com>
Mon, 16 Jun 2008 20:29:38 +0000 (20:29 +0000)
committerEvan Cheng <evan.cheng@apple.com>
Mon, 16 Jun 2008 20:29:38 +0000 (20:29 +0000)
- Mark lots of X86 intrinsics as "Commutative" to allow load folding.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52353 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/Intrinsics.td
include/llvm/IntrinsicsX86.td
test/CodeGen/X86/commute-intrinsic.ll [new file with mode: 0644]
utils/TableGen/CodeGenDAGPatterns.cpp
utils/TableGen/CodeGenDAGPatterns.h
utils/TableGen/CodeGenIntrinsics.h
utils/TableGen/CodeGenTarget.cpp

index 18c42e41f8aae16d0dab67df9821b2ac2ae913ea..866107cbfb31a67ce408c7c1e499eec2c31ff6a2 100644 (file)
@@ -48,6 +48,9 @@ def IntrWriteArgMem : IntrinsicProperty;
 // default if the intrinsic has no other Intr*Mem property.
 def IntrWriteMem : IntrinsicProperty;
 
+// Commutative - This intrinsic is commutative: X op Y == Y op X.
+def Commutative : IntrinsicProperty;
+
 //===----------------------------------------------------------------------===//
 // Types used by intrinsics.
 //===----------------------------------------------------------------------===//
index 372f7211df6e73e1469efd7b79c3fd0c7ab4e073..dbb8496cb415c2d0f2fe779e892c4092bb9d9a9f 100644 (file)
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse_add_ss : GCCBuiltin<"__builtin_ia32_addss">,
               Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
+                         llvm_v4f32_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse_sub_ss : GCCBuiltin<"__builtin_ia32_subss">,
               Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty,
                          llvm_v4f32_ty], [IntrNoMem]>;
   def int_x86_sse_mul_ss : GCCBuiltin<"__builtin_ia32_mulss">,
               Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
+                         llvm_v4f32_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse_div_ss : GCCBuiltin<"__builtin_ia32_divss">,
               Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty,
                          llvm_v4f32_ty], [IntrNoMem]>;
@@ -176,13 +176,13 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse2_add_sd : GCCBuiltin<"__builtin_ia32_addsd">,
               Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
+                         llvm_v2f64_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse2_sub_sd : GCCBuiltin<"__builtin_ia32_subsd">,
               Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
                          llvm_v2f64_ty], [IntrNoMem]>;
   def int_x86_sse2_mul_sd : GCCBuiltin<"__builtin_ia32_mulsd">,
               Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
+                         llvm_v2f64_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse2_div_sd : GCCBuiltin<"__builtin_ia32_divsd">,
               Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
                          llvm_v2f64_ty], [IntrNoMem]>;
@@ -256,16 +256,16 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb128">,
               Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem]>;
+                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw128">,
               Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb128">,
               Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem]>;
+                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw128">,
               Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb128">,
               Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
                          llvm_v16i8_ty], [IntrNoMem]>;
@@ -280,37 +280,37 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                          llvm_v8i16_ty], [IntrNoMem]>;
   def int_x86_sse2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw128">,
               Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw128">,
               Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse2_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq128">,
               Intrinsic<[llvm_v2i64_ty, llvm_v4i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem]>;
+                         llvm_v4i32_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd128">,
               Intrinsic<[llvm_v4i32_ty, llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb128">,
               Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem]>;
+                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw128">,
               Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse2_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub128">,
               Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem]>;
+                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse2_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw128">,
               Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse2_pminu_b : GCCBuiltin<"__builtin_ia32_pminub128">,
               Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem]>;
+                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse2_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw128">,
               Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw128">,
               Intrinsic<[llvm_v2i64_ty, llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem]>;
+                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
 }
 
 // Integer shift ops.
@@ -553,24 +553,24 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_ssse3_phadd_w         : GCCBuiltin<"__builtin_ia32_phaddw">,
               Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
   def int_x86_ssse3_phadd_w_128     : GCCBuiltin<"__builtin_ia32_phaddw128">,
               Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
 
   def int_x86_ssse3_phadd_d         : GCCBuiltin<"__builtin_ia32_phaddd">,
               Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
-                         llvm_v2i32_ty], [IntrNoMem]>;
+                         llvm_v2i32_ty], [IntrNoMem, Commutative]>;
   def int_x86_ssse3_phadd_d_128     : GCCBuiltin<"__builtin_ia32_phaddd128">,
               Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem]>;
+                         llvm_v4i32_ty], [IntrNoMem, Commutative]>;
 
   def int_x86_ssse3_phadd_sw        : GCCBuiltin<"__builtin_ia32_phaddsw">,
               Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
   def int_x86_ssse3_phadd_sw_128    : GCCBuiltin<"__builtin_ia32_phaddsw128">,
               Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem]>;
+                         llvm_v4i32_ty], [IntrNoMem, Commutative]>;
 
   def int_x86_ssse3_phsub_w         : GCCBuiltin<"__builtin_ia32_phsubw">,
               Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
@@ -595,17 +595,17 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
   def int_x86_ssse3_pmadd_ub_sw     : GCCBuiltin<"__builtin_ia32_pmaddubsw">,
               Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
   def int_x86_ssse3_pmadd_ub_sw_128 : GCCBuiltin<"__builtin_ia32_pmaddubsw128">,
               Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
 
   def int_x86_ssse3_pmul_hr_sw      : GCCBuiltin<"__builtin_ia32_pmulhrsw">,
               Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
   def int_x86_ssse3_pmul_hr_sw_128  : GCCBuiltin<"__builtin_ia32_pmulhrsw128">,
               Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
 }
 
 // Shuffle ops
@@ -692,128 +692,170 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 // Vector sign and zero extend
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_pmovsxbd        : GCCBuiltin<"__builtin_ia32_pmovsxbd128">,
-              Intrinsic<[llvm_v4i32_ty, llvm_v16i8_ty]>;
+              Intrinsic<[llvm_v4i32_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
   def int_x86_sse41_pmovsxbq        : GCCBuiltin<"__builtin_ia32_pmovsxbq128">,
-              Intrinsic<[llvm_v2i64_ty, llvm_v16i8_ty]>;
+              Intrinsic<[llvm_v2i64_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
   def int_x86_sse41_pmovsxbw        : GCCBuiltin<"__builtin_ia32_pmovsxbw128">,
-              Intrinsic<[llvm_v8i16_ty, llvm_v16i8_ty]>;
+              Intrinsic<[llvm_v8i16_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
   def int_x86_sse41_pmovsxdq        : GCCBuiltin<"__builtin_ia32_pmovsxdq128">,
-              Intrinsic<[llvm_v2i64_ty, llvm_v4i32_ty]>;
+              Intrinsic<[llvm_v2i64_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
   def int_x86_sse41_pmovsxwd        : GCCBuiltin<"__builtin_ia32_pmovsxwd128">,
-              Intrinsic<[llvm_v4i32_ty, llvm_v8i16_ty]>;
+              Intrinsic<[llvm_v4i32_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
   def int_x86_sse41_pmovsxwq        : GCCBuiltin<"__builtin_ia32_pmovsxwq128">,
-              Intrinsic<[llvm_v2i64_ty, llvm_v8i16_ty]>;
+              Intrinsic<[llvm_v2i64_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
   def int_x86_sse41_pmovzxbd        : GCCBuiltin<"__builtin_ia32_pmovzxbd128">,
-              Intrinsic<[llvm_v4i32_ty, llvm_v16i8_ty]>;
+              Intrinsic<[llvm_v4i32_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
   def int_x86_sse41_pmovzxbq        : GCCBuiltin<"__builtin_ia32_pmovzxbq128">,
-              Intrinsic<[llvm_v2i64_ty, llvm_v16i8_ty]>;
+              Intrinsic<[llvm_v2i64_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
   def int_x86_sse41_pmovzxbw        : GCCBuiltin<"__builtin_ia32_pmovzxbw128">,
-              Intrinsic<[llvm_v8i16_ty, llvm_v16i8_ty]>;
+              Intrinsic<[llvm_v8i16_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
   def int_x86_sse41_pmovzxdq        : GCCBuiltin<"__builtin_ia32_pmovzxdq128">,
-              Intrinsic<[llvm_v2i64_ty, llvm_v4i32_ty]>;
+              Intrinsic<[llvm_v2i64_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
   def int_x86_sse41_pmovzxwd        : GCCBuiltin<"__builtin_ia32_pmovzxwd128">,
-              Intrinsic<[llvm_v4i32_ty, llvm_v8i16_ty]>;
+              Intrinsic<[llvm_v4i32_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
   def int_x86_sse41_pmovzxwq        : GCCBuiltin<"__builtin_ia32_pmovzxwq128">,
-              Intrinsic<[llvm_v2i64_ty, llvm_v8i16_ty]>;
+              Intrinsic<[llvm_v2i64_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
 }
 
 // Vector min element
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_phminposuw     : GCCBuiltin<"__builtin_ia32_phminposuw128">,
-              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty]>;
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
 }
 
 // Vector compare, min, max
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_pcmpeqq         : GCCBuiltin<"__builtin_ia32_pcmpeqq">,
-              Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty]>;
+              Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem, Commutative]>;
   def int_x86_sse41_pmaxsb          : GCCBuiltin<"__builtin_ia32_pmaxsb128">,
-              Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty]>;
+              Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem, Commutative]>;
   def int_x86_sse41_pmaxsd          : GCCBuiltin<"__builtin_ia32_pmaxsd128">,
-              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty]>;
+              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem, Commutative]>;
   def int_x86_sse41_pmaxud          : GCCBuiltin<"__builtin_ia32_pmaxud128">,
-              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty]>;
+              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem, Commutative]>;
   def int_x86_sse41_pmaxuw          : GCCBuiltin<"__builtin_ia32_pmaxuw128">,
-              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty]>;
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem, Commutative]>;
   def int_x86_sse41_pminsb          : GCCBuiltin<"__builtin_ia32_pminsb128">,
-              Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty]>;
+              Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem, Commutative]>;
   def int_x86_sse41_pminsd          : GCCBuiltin<"__builtin_ia32_pminsd128">,
-              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty]>;
+              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem, Commutative]>;
   def int_x86_sse41_pminud          : GCCBuiltin<"__builtin_ia32_pminud128">,
-              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty]>;
+              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem, Commutative]>;
   def int_x86_sse41_pminuw          : GCCBuiltin<"__builtin_ia32_pminuw128">,
-              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty]>;
+              Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem, Commutative]>;
 }
 
 // Vector pack
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_packusdw        : GCCBuiltin<"__builtin_ia32_packusdw128">,
-              Intrinsic<[llvm_v8i16_ty, llvm_v4i32_ty, llvm_v4i32_ty]>;
+              Intrinsic<[llvm_v8i16_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
 }
 
 // Vector multiply
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_pmuldq          : GCCBuiltin<"__builtin_ia32_pmuldq128">,
-              Intrinsic<[llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty]>;
+              Intrinsic<[llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem, Commutative]>;
   def int_x86_sse41_pmulld          : GCCBuiltin<"__builtin_ia32_pmulld128">,
-              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty]>;
+              Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem, Commutative]>;
 }
 
 // Vector extract
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_pextrb         :
-              Intrinsic<[llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty]>;
+              Intrinsic<[llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
   def int_x86_sse41_pextrd         :
-              Intrinsic<[llvm_i32_ty, llvm_v4i32_ty, llvm_i32_ty]>;
+              Intrinsic<[llvm_i32_ty, llvm_v4i32_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
   def int_x86_sse41_pextrq         :
-              Intrinsic<[llvm_i64_ty, llvm_v2i64_ty, llvm_i32_ty]>;
+              Intrinsic<[llvm_i64_ty, llvm_v2i64_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
   def int_x86_sse41_extractps      : GCCBuiltin<"__builtin_ia32_extractps128">,
-              Intrinsic<[llvm_i32_ty, llvm_v4f32_ty, llvm_i32_ty]>;
+              Intrinsic<[llvm_i32_ty, llvm_v4f32_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
 }
 
 // Vector insert
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_pinsrb         : GCCBuiltin<"__builtin_ia32_vec_set_v16qi">,
-          Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty]>;
+          Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty],
+                    [IntrNoMem]>;
   def int_x86_sse41_pinsrd         : GCCBuiltin<"__builtin_ia32_vec_set_v4si">,
-          Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty]>;
+          Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty],
+                    [IntrNoMem]>;
   def int_x86_sse41_pinsrq         : GCCBuiltin<"__builtin_ia32_vec_set_v2di">,
-          Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty]>;
+          Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty],
+                    [IntrNoMem]>;
   def int_x86_sse41_insertps       : GCCBuiltin<"__builtin_ia32_insertps128">,
-          Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty]>;
+          Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty],
+                    [IntrNoMem]>;
 }
 
 // Vector blend
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_pblendvb         : GCCBuiltin<"__builtin_ia32_pblendvb128">,
-        Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty]>;
+        Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+                  [IntrNoMem]>;
   def int_x86_sse41_pblendw          : GCCBuiltin<"__builtin_ia32_pblendw128">,
-        Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty]>;
+        Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
+                  [IntrNoMem]>;
   def int_x86_sse41_blendpd          : GCCBuiltin<"__builtin_ia32_blendpd">,
-        Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty]>;
+        Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty],
+                  [IntrNoMem]>;
   def int_x86_sse41_blendps          : GCCBuiltin<"__builtin_ia32_blendps">,
-        Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty]>;
+        Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty],
+                  [IntrNoMem]>;
   def int_x86_sse41_blendvpd         : GCCBuiltin<"__builtin_ia32_blendvpd">,
-        Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty]>;
+        Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                  [IntrNoMem]>;
   def int_x86_sse41_blendvps         : GCCBuiltin<"__builtin_ia32_blendvps">,
-        Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty]>;
+        Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                  [IntrNoMem]>;
 }
 
 // Vector dot product
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_dppd            : GCCBuiltin<"__builtin_ia32_dppd">,
-          Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty]>;
+          Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty],
+                    [IntrNoMem, Commutative]>;
   def int_x86_sse41_dpps            : GCCBuiltin<"__builtin_ia32_dpps">,
-          Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty]>;
+          Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty],
+                    [IntrNoMem, Commutative]>;
 }
 
 // Vector sum of absolute differences
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_mpsadbw         : GCCBuiltin<"__builtin_ia32_mpsadbw128">,
-          Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty]>;
+          Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+                    [IntrNoMem, Commutative]>;
 }
 
-// Vector sum of absolute differences
+// Cacheability support ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_movntdqa        : GCCBuiltin<"__builtin_ia32_movntdqa">,
           Intrinsic<[llvm_v2i64_ty, llvm_ptr_ty], [IntrReadMem]>;
@@ -836,17 +878,17 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   // Addition
   def int_x86_mmx_padds_b : GCCBuiltin<"__builtin_ia32_paddsb">,
               Intrinsic<[llvm_v8i8_ty, llvm_v8i8_ty,
-                         llvm_v8i8_ty], [IntrNoMem]>;
+                         llvm_v8i8_ty], [IntrNoMem, Commutative]>;
   def int_x86_mmx_padds_w : GCCBuiltin<"__builtin_ia32_paddsw">,
               Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
 
   def int_x86_mmx_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb">,
               Intrinsic<[llvm_v8i8_ty, llvm_v8i8_ty,
-                         llvm_v8i8_ty], [IntrNoMem]>;
+                         llvm_v8i8_ty], [IntrNoMem, Commutative]>;
   def int_x86_mmx_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw">,
               Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
 
   // Subtraction
   def int_x86_mmx_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb">,
@@ -866,45 +908,45 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   // Multiplication
   def int_x86_mmx_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw">,
               Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
   def int_x86_mmx_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw">,
               Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
   def int_x86_mmx_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq">,
               Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
-                         llvm_v2i32_ty], [IntrNoMem]>;
+                         llvm_v2i32_ty], [IntrNoMem, Commutative]>;
   def int_x86_mmx_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd">,
               Intrinsic<[llvm_v2i32_ty, llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
 
   // Averages
   def int_x86_mmx_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb">,
               Intrinsic<[llvm_v8i8_ty, llvm_v8i8_ty,
-                         llvm_v8i8_ty], [IntrNoMem]>;
+                         llvm_v8i8_ty], [IntrNoMem, Commutative]>;
   def int_x86_mmx_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw">,
               Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
 
   // Maximum
   def int_x86_mmx_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub">,
               Intrinsic<[llvm_v8i8_ty, llvm_v8i8_ty,
-                         llvm_v8i8_ty], [IntrNoMem]>;
+                         llvm_v8i8_ty], [IntrNoMem, Commutative]>;
   def int_x86_mmx_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw">,
               Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
 
   // Minimum
   def int_x86_mmx_pminu_b : GCCBuiltin<"__builtin_ia32_pminub">,
               Intrinsic<[llvm_v8i8_ty, llvm_v8i8_ty,
-                         llvm_v8i8_ty], [IntrNoMem]>;
+                         llvm_v8i8_ty], [IntrNoMem, Commutative]>;
   def int_x86_mmx_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw">,
               Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
-                         llvm_v4i16_ty], [IntrNoMem]>;
+                         llvm_v4i16_ty], [IntrNoMem, Commutative]>;
 
   // Packed sum of absolute differences
   def int_x86_mmx_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw">,
               Intrinsic<[llvm_v4i16_ty, llvm_v8i8_ty,
-                         llvm_v8i8_ty], [IntrNoMem]>;
+                         llvm_v8i8_ty], [IntrNoMem, Commutative]>;
 }
 
 // Integer shift ops.
diff --git a/test/CodeGen/X86/commute-intrinsic.ll b/test/CodeGen/X86/commute-intrinsic.ll
new file mode 100644 (file)
index 0000000..12c0e03
--- /dev/null
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+sse2 -relocation-model=static | not grep movaps
+
+@a = external global <2 x i64>         ; <<2 x i64>*> [#uses=1]
+
+define <2 x i64> @madd(<2 x i64> %b) nounwind  {
+entry:
+       %tmp2 = load <2 x i64>* @a, align 16            ; <<2 x i64>> [#uses=1]
+       %tmp6 = bitcast <2 x i64> %b to <8 x i16>               ; <<8 x i16>> [#uses=1]
+       %tmp9 = bitcast <2 x i64> %tmp2 to <8 x i16>            ; <<8 x i16>> [#uses=1]
+       %tmp11 = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd( <8 x i16> %tmp9, <8 x i16> %tmp6 ) nounwind readnone              ; <<4 x i32>> [#uses=1]
+       %tmp14 = bitcast <4 x i32> %tmp11 to <2 x i64>          ; <<2 x i64>> [#uses=1]
+       ret <2 x i64> %tmp14
+}
+
+declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 
index 882c7245f6126b5bae863dc9d8aee39060689754..44dbe6c95d1e0352a42209c0ec3ef1f99407a8db 100644 (file)
@@ -742,6 +742,15 @@ getIntrinsicInfo(const CodeGenDAGPatterns &CDP) const {
   return &CDP.getIntrinsicInfo(IID);
 }
 
+/// isCommutativeIntrinsic - Return true if the node corresponds to a
+/// commutative intrinsic.
+bool
+TreePatternNode::isCommutativeIntrinsic(const CodeGenDAGPatterns &CDP) const {
+  if (const CodeGenIntrinsic *Int = getIntrinsicInfo(CDP))
+    return Int->isCommutative;
+  return false;
+}
+
 
 /// ApplyTypeConstraints - Apply all of the type constraints relevent to
 /// this node and its children in the tree.  This returns true if it makes a
@@ -999,11 +1008,13 @@ bool TreePatternNode::canPatternMatch(std::string &Reason,
   // If this node is a commutative operator, check that the LHS isn't an
   // immediate.
   const SDNodeInfo &NodeInfo = CDP.getSDNodeInfo(getOperator());
-  if (NodeInfo.hasProperty(SDNPCommutative)) {
+  bool isCommIntrinsic = isCommutativeIntrinsic(CDP);
+  if (NodeInfo.hasProperty(SDNPCommutative) || isCommIntrinsic) {
     // Scan all of the operands of the node and make sure that only the last one
     // is a constant node, unless the RHS also is.
     if (!OnlyOnRHSOfCommutative(getChild(getNumChildren()-1))) {
-      for (unsigned i = 0, e = getNumChildren()-1; i != e; ++i)
+      bool Skip = isCommIntrinsic ? 1 : 0; // First operand is intrinsic id.
+      for (unsigned i = Skip, e = getNumChildren()-1; i != e; ++i)
         if (OnlyOnRHSOfCommutative(getChild(i))) {
           Reason="Immediate value must be on the RHS of commutative operators!";
           return false;
@@ -2250,8 +2261,10 @@ static void GenerateVariantsOf(TreePatternNode *N,
   CombineChildVariants(N, ChildVariants, OutVariants, CDP, DepVars);
 
   // If this node is commutative, consider the commuted order.
-  if (NodeInfo.hasProperty(SDNPCommutative)) {
-    assert(N->getNumChildren()==2 &&"Commutative but doesn't have 2 children!");
+  bool isCommIntrinsic = N->isCommutativeIntrinsic(CDP);
+  if (NodeInfo.hasProperty(SDNPCommutative) || isCommIntrinsic) {
+    assert((N->getNumChildren()==2 || isCommIntrinsic) &&
+           "Commutative but doesn't have 2 children!");
     // Don't count children which are actually register references.
     unsigned NC = 0;
     for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i) {
@@ -2265,7 +2278,20 @@ static void GenerateVariantsOf(TreePatternNode *N,
       NC++;
     }
     // Consider the commuted order.
-    if (NC == 2)
+    if (isCommIntrinsic) {
+      // Commutative intrinsic. First operand is the intrinsic id, 2nd and 3rd
+      // operands are the commutative operands, and there might be more operands
+      // after those.
+      assert(NC >= 3 &&
+             "Commutative intrinsic should have at least 3 childrean!");
+      std::vector<std::vector<TreePatternNode*> > Variants;
+      Variants.push_back(ChildVariants[0]); // Intrinsic id.
+      Variants.push_back(ChildVariants[2]);
+      Variants.push_back(ChildVariants[1]);
+      for (unsigned i = 3; i != NC; ++i)
+        Variants.push_back(ChildVariants[i]);
+      CombineChildVariants(N, Variants, OutVariants, CDP, DepVars);
+    } else if (NC == 2)
       CombineChildVariants(N, ChildVariants[1], ChildVariants[0],
                            OutVariants, CDP, DepVars);
   }
index 40cfa88cd2f75b11cb807633fd24389dd8b42df3..50c39bcf1695a01037108fc04c014c1a98dcd57d 100644 (file)
@@ -221,6 +221,10 @@ public:
   /// getIntrinsicInfo - If this node corresponds to an intrinsic, return the
   /// CodeGenIntrinsic information for it, otherwise return a null pointer.
   const CodeGenIntrinsic *getIntrinsicInfo(const CodeGenDAGPatterns &CDP) const;
+
+  /// isCommutativeIntrinsic - Return true if the node is an intrinsic which is
+  /// marked isCommutative.
+  bool isCommutativeIntrinsic(const CodeGenDAGPatterns &CDP) const;
   
   void print(std::ostream &OS) const;
   void dump() const;
index a66c30b6cb89cc7d3758a18322b5bf3a3e76c790..4de93864a8acf40146c385b9ebfa52971b9ddc55 100644 (file)
@@ -49,6 +49,10 @@ namespace llvm {
     // types.
     bool isOverloaded;
 
+    // isCommutative - True if the intrinsic is commutative.
+    //
+    bool isCommutative;
+
     CodeGenIntrinsic(Record *R);
   };
 
index a76f5cd55f83f796d5e19fc50bccfc791274568f..9b3864780dab85b5d462b553b2e2988ff691c613 100644 (file)
@@ -404,6 +404,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
   std::string DefName = R->getName();
   ModRef = WriteMem;
   isOverloaded = false;
+  isCommutative = false;
   
   if (DefName.size() <= 4 || 
       std::string(DefName.begin(), DefName.begin()+4) != "int_")
@@ -469,6 +470,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
       ModRef = WriteArgMem;
     else if (Property->getName() == "IntrWriteMem")
       ModRef = WriteMem;
+    else if (Property->getName() == "Commutative")
+      isCommutative = true;
     else
       assert(0 && "Unknown property!");
   }