R600: Use masked read sel for texture instructions

author Vincent Lejeune <vljn@ovi.com>

Sun, 13 Oct 2013 17:56:10 +0000 (17:56 +0000)

committer Vincent Lejeune <vljn@ovi.com>

Sun, 13 Oct 2013 17:56:10 +0000 (17:56 +0000)
author Vincent Lejeune <vljn@ovi.com>
Sun, 13 Oct 2013 17:56:10 +0000 (17:56 +0000)
committer Vincent Lejeune <vljn@ovi.com>
Sun, 13 Oct 2013 17:56:10 +0000 (17:56 +0000)
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp

index 81a28be104a6fdd4bb9efb17789f6ec42cb6c368..3c2e3888e08569f39d6363db972c3553f0e7761a 100644 (file)
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -1379,6 +1379,11 @@ CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
    };
  
    for (unsigned i = 0; i < 4; i++) {
+    if (NewBldVec[i].getOpcode() == ISD::UNDEF)
+      // We mask write here to teach later passes that the ith element of this
+      // vector is undef. Thus we can use it to reduce 128 bits reg usage,
+      // break false dependencies and additionnaly make assembly easier to read.
+      RemapSwizzle[i] = 7; // SEL_MASK_WRITE
      if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
        if (C->isZero()) {
          RemapSwizzle[i] = 4; // SEL_0
diff --git a/test/CodeGen/R600/swizzle-export.ll b/test/CodeGen/R600/swizzle-export.ll

index 02fe13a720e79785c6adea83f205d1a6dc01ccbe..9a58f667f0d1ecd4fa5f022b1a77c8b20c97aabb 100644 (file)
--- a/test/CodeGen/R600/swizzle-export.ll
+++ b/test/CodeGen/R600/swizzle-export.ll
@@ -93,6 +93,7 @@ main_body:
  }
  
  ; EG-CHECK: @main2
+; EG-CHECK: T{{[0-9]+}}.XY__
  ; EG-CHECK: T{{[0-9]+}}.YXZ0
  
  define void @main2() #0 {
@@ -110,14 +111,12 @@ main_body:
    %10 = extractelement <4 x float> %9, i32 1
    %11 = insertelement <4 x float> undef, float %0, i32 0
    %12 = insertelement <4 x float> %11, float %1, i32 1
-  %13 = insertelement <4 x float> %12, float %2, i32 2
-  %14 = insertelement <4 x float> %13, float %3, i32 3
-  call void @llvm.R600.store.swizzle(<4 x float> %14, i32 60, i32 1)
-  %15 = insertelement <4 x float> undef, float %6, i32 0
-  %16 = insertelement <4 x float> %15, float %8, i32 1
-  %17 = insertelement <4 x float> %16, float %10, i32 2
-  %18 = insertelement <4 x float> %17, float 0.000000e+00, i32 3
-  call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 2)
+  call void @llvm.R600.store.swizzle(<4 x float> %12, i32 60, i32 1)
+  %13 = insertelement <4 x float> undef, float %6, i32 0
+  %14 = insertelement <4 x float> %13, float %8, i32 1
+  %15 = insertelement <4 x float> %14, float %10, i32 2
+  %16 = insertelement <4 x float> %15, float 0.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %16, i32 0, i32 2)
    ret void
  }
author	Vincent Lejeune <vljn@ovi.com>
	Sun, 13 Oct 2013 17:56:10 +0000 (17:56 +0000)
committer	Vincent Lejeune <vljn@ovi.com>
	Sun, 13 Oct 2013 17:56:10 +0000 (17:56 +0000)
lib/Target/R600/R600ISelLowering.cpp		patch \| blob \| history
test/CodeGen/R600/swizzle-export.ll		patch \| blob \| history