AMDGPU: Switch barrier intrinsics to using convergent

author Matt Arsenault <Matthew.Arsenault@amd.com>

Sat, 19 Dec 2015 01:46:41 +0000 (01:46 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Sat, 19 Dec 2015 01:46:41 +0000 (01:46 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Sat, 19 Dec 2015 01:46:41 +0000 (01:46 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Sat, 19 Dec 2015 01:46:41 +0000 (01:46 +0000)
diff --git a/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/lib/Target/AMDGPU/AMDGPUIntrinsics.td

index ab489cd2a4abe4f21460d345063f08360b08f0bd..1de3546485b1ea710693180ed4b5b1b77ca87587 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUIntrinsics.td
+++ b/lib/Target/AMDGPU/AMDGPUIntrinsics.td
@@ -69,8 +69,8 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
    def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
    def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
    def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
    def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
    def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
    def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_AMDGPU_barrier_local  : Intrinsic<[], [], []>;
-  def int_AMDGPU_barrier_global  : Intrinsic<[], [], []>;
+  def int_AMDGPU_barrier_local  : Intrinsic<[], [], [IntrConvergent]>;
+  def int_AMDGPU_barrier_global  : Intrinsic<[], [], [IntrConvergent]>;
  }
  
  // Legacy names for compatibility.
  }
  
  // Legacy names for compatibility.
diff --git a/test/CodeGen/AMDGPU/addrspacecast.ll b/test/CodeGen/AMDGPU/addrspacecast.ll

index 9be212feef000423c9496d90c69aeee3fec55d1b..61bcd4b3c093d9fc86385f5c93431eb66e124861 100644 (file)
--- a/test/CodeGen/AMDGPU/addrspacecast.ll
+++ b/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -62,5 +62,5 @@ declare void @llvm.AMDGPU.barrier.local() #1
  declare i32 @llvm.r600.read.tidig.x() #3
  
  attributes #0 = { nounwind }
  declare i32 @llvm.r600.read.tidig.x() #3
  
  attributes #0 = { nounwind }
-attributes #1 = { nounwind noduplicate }
+attributes #1 = { nounwind convergent }
  attributes #3 = { nounwind readnone }
  attributes #3 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll b/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll

index 67738f7daff768ea9b8fab91647199d3026c9c74..f8a74222d5669d7d4e6f42020017e11b693851d5 100644 (file)
--- a/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
+++ b/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
@@ -2,7 +2,7 @@
  ; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
  
  declare i32 @llvm.SI.tid() nounwind readnone
  ; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
  
  declare i32 @llvm.SI.tid() nounwind readnone
-declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
+declare void @llvm.AMDGPU.barrier.local() nounwind convergent
  
  ; The required pointer calculations for the alloca'd actually requires
  ; an add and won't be folded into the addressing, which fails with a
  
  ; The required pointer calculations for the alloca'd actually requires
  ; an add and won't be folded into the addressing, which fails with a
@@ -35,7 +35,7 @@ define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 add
    %alloca_ptr = getelementptr [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b
    store i32 %result, i32* %alloca_ptr, align 4
    ; Dummy call
    %alloca_ptr = getelementptr [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b
    store i32 %result, i32* %alloca_ptr, align 4
    ; Dummy call
-  call void @llvm.AMDGPU.barrier.local() nounwind noduplicate
+  call void @llvm.AMDGPU.barrier.local() nounwind convergent
    %reload = load i32, i32* %alloca_ptr, align 4
    %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
    store i32 %reload, i32 addrspace(1)* %out_ptr, align 4
    %reload = load i32, i32* %alloca_ptr, align 4
    %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
    store i32 %reload, i32 addrspace(1)* %out_ptr, align 4
diff --git a/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll b/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll

index 8f63a587a27340bc1cc7bad6f3faca053309ee35..171883e4c74b2166ea661045e4993d91ebdd9d29 100644 (file)
--- a/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll
+++ b/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll
@@ -49,4 +49,4 @@ declare i32 @llvm.r600.read.tgid.x() #1
  
  attributes #0 = { nounwind }
  attributes #1 = { nounwind readnone }
  
  attributes #0 = { nounwind }
  attributes #1 = { nounwind readnone }
-attributes #2 = { noduplicate nounwind }
+attributes #2 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll b/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll

index ac17df119708b1ace2d156f335a67754c881f715..e657991557e3a4b3259ba856cf8b1652d6cb29b5 100644 (file)
--- a/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll
+++ b/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll
@@ -66,5 +66,5 @@ for.end:                                          ; preds = %for.body
  }
  
  attributes #0 = { nounwind readnone }
  }
  
  attributes #0 = { nounwind readnone }
-attributes #1 = { noduplicate nounwind }
+attributes #1 = { convergent nounwind }
  attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/AMDGPU/ds-sub-offset.ll b/test/CodeGen/AMDGPU/ds-sub-offset.ll

index ffa45f6398fd80c1abab3de31f75c97069e8de67..7d6eddb01993c6e37785793b2eaf91bc451140d9 100644 (file)
--- a/test/CodeGen/AMDGPU/ds-sub-offset.ll
+++ b/test/CodeGen/AMDGPU/ds-sub-offset.ll
@@ -122,4 +122,4 @@ define void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #1 {
  
  attributes #0 = { nounwind readnone }
  attributes #1 = { nounwind }
  
  attributes #0 = { nounwind readnone }
  attributes #1 = { nounwind }
-attributes #2 = { nounwind noduplicate convergent }
+attributes #2 = { nounwind convergent }
diff --git a/test/CodeGen/AMDGPU/ds_read2.ll b/test/CodeGen/AMDGPU/ds_read2.ll

index 579f989faeb5e3aa1e0806077c53afa236a48e23..5170d9c82712f75b28e025ad018d52c5883bc7e7 100644 (file)
--- a/test/CodeGen/AMDGPU/ds_read2.ll
+++ b/test/CodeGen/AMDGPU/ds_read2.ll
@@ -505,9 +505,9 @@ declare i32 @llvm.r600.read.tidig.x() #1
  ; Function Attrs: nounwind readnone
  declare i32 @llvm.r600.read.tidig.y() #1
  
  ; Function Attrs: nounwind readnone
  declare i32 @llvm.r600.read.tidig.y() #1
  
-; Function Attrs: noduplicate nounwind
+; Function Attrs: convergent nounwind
  declare void @llvm.AMDGPU.barrier.local() #2
  
  attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { nounwind readnone }
  declare void @llvm.AMDGPU.barrier.local() #2
  
  attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { nounwind readnone }
-attributes #2 = { noduplicate nounwind }
+attributes #2 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/ds_read2_superreg.ll b/test/CodeGen/AMDGPU/ds_read2_superreg.ll

index 8073426e9d8bada49e7da5a9f6cbba9aaf54a327..0061aaf2cdbd14d08c5ebe892f00cb494be68707 100644 (file)
--- a/test/CodeGen/AMDGPU/ds_read2_superreg.ll
+++ b/test/CodeGen/AMDGPU/ds_read2_superreg.ll
@@ -229,9 +229,9 @@ declare i32 @llvm.r600.read.tidig.x() #1
  ; Function Attrs: nounwind readnone
  declare i32 @llvm.r600.read.tidig.y() #1
  
  ; Function Attrs: nounwind readnone
  declare i32 @llvm.r600.read.tidig.y() #1
  
-; Function Attrs: noduplicate nounwind
+; Function Attrs: convergent nounwind
  declare void @llvm.AMDGPU.barrier.local() #2
  
  attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { nounwind readnone }
  declare void @llvm.AMDGPU.barrier.local() #2
  
  attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { nounwind readnone }
-attributes #2 = { noduplicate nounwind }
+attributes #2 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/ds_read2st64.ll b/test/CodeGen/AMDGPU/ds_read2st64.ll

index 00de55be852761b9fbb6735efd0dc7cf0042d0ff..4a0571ea16f2bbab84373973fd90f7e5a761f334 100644 (file)
--- a/test/CodeGen/AMDGPU/ds_read2st64.ll
+++ b/test/CodeGen/AMDGPU/ds_read2st64.ll
@@ -264,9 +264,5 @@ declare i32 @llvm.r600.read.tidig.x() #1
  ; Function Attrs: nounwind readnone
  declare i32 @llvm.r600.read.tidig.y() #1
  
  ; Function Attrs: nounwind readnone
  declare i32 @llvm.r600.read.tidig.y() #1
  
-; Function Attrs: noduplicate nounwind
-declare void @llvm.AMDGPU.barrier.local() #2
-
  attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { nounwind readnone }
  attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { nounwind readnone }
-attributes #2 = { noduplicate nounwind }
diff --git a/test/CodeGen/AMDGPU/ds_write2.ll b/test/CodeGen/AMDGPU/ds_write2.ll

index b408459e82c728eadc442dedac985fd84832ca8a..9d3a293f3b8987e6cd7122cf9f0d5a4408c8e676 100644 (file)
--- a/test/CodeGen/AMDGPU/ds_write2.ll
+++ b/test/CodeGen/AMDGPU/ds_write2.ll
@@ -431,9 +431,9 @@ declare i32 @llvm.r600.read.tidig.x() #1
  ; Function Attrs: nounwind readnone
  declare i32 @llvm.r600.read.tidig.y() #1
  
  ; Function Attrs: nounwind readnone
  declare i32 @llvm.r600.read.tidig.y() #1
  
-; Function Attrs: noduplicate nounwind
+; Function Attrs: convergent nounwind
  declare void @llvm.AMDGPU.barrier.local() #2
  
  attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { nounwind readnone }
  declare void @llvm.AMDGPU.barrier.local() #2
  
  attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { nounwind readnone }
-attributes #2 = { noduplicate nounwind }
+attributes #2 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/ds_write2st64.ll b/test/CodeGen/AMDGPU/ds_write2st64.ll

index 358aa6a9e3636aa6d3c9883c70687f79a26c7466..5a1024ccf6d72710d35134952c4af9886a7e7df9 100644 (file)
--- a/test/CodeGen/AMDGPU/ds_write2st64.ll
+++ b/test/CodeGen/AMDGPU/ds_write2st64.ll
@@ -109,9 +109,9 @@ declare i32 @llvm.r600.read.tidig.x() #1
  ; Function Attrs: nounwind readnone
  declare i32 @llvm.r600.read.tidig.y() #1
  
  ; Function Attrs: nounwind readnone
  declare i32 @llvm.r600.read.tidig.y() #1
  
-; Function Attrs: noduplicate nounwind
+; Function Attrs: convergent nounwind
  declare void @llvm.AMDGPU.barrier.local() #2
  
  attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { nounwind readnone }
  declare void @llvm.AMDGPU.barrier.local() #2
  
  attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { nounwind readnone }
-attributes #2 = { noduplicate nounwind }
+attributes #2 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/flat-address-space.ll b/test/CodeGen/AMDGPU/flat-address-space.ll

index d65b757a4c4723347e06cdb56c6fc6a4f53eebba..86e0c07323bb203f6af5b7d11294047532e8e6d1 100644 (file)
--- a/test/CodeGen/AMDGPU/flat-address-space.ll
+++ b/test/CodeGen/AMDGPU/flat-address-space.ll
@@ -131,5 +131,5 @@ declare void @llvm.AMDGPU.barrier.local() #1
  declare i32 @llvm.r600.read.tidig.x() #3
  
  attributes #0 = { nounwind }
  declare i32 @llvm.r600.read.tidig.x() #3
  
  attributes #0 = { nounwind }
-attributes #1 = { nounwind noduplicate }
+attributes #1 = { nounwind convergent }
  attributes #3 = { nounwind readnone }
  attributes #3 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/indirect-private-64.ll b/test/CodeGen/AMDGPU/indirect-private-64.ll

index 99e8d62f8e3ddcb0de443456da9da92c0f90a86d..2a3b29f54fa9bc03c66eae1338c42dfdd97c60a5 100644 (file)
--- a/test/CodeGen/AMDGPU/indirect-private-64.ll
+++ b/test/CodeGen/AMDGPU/indirect-private-64.ll
@@ -4,7 +4,7 @@
  ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
  
  
  ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
  
  
-declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
+declare void @llvm.AMDGPU.barrier.local() convergent nounwind
  
  ; SI-LABEL: {{^}}private_access_f64_alloca:
  
  
  ; SI-LABEL: {{^}}private_access_f64_alloca:
  
@@ -18,7 +18,7 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double
    %array = alloca double, i32 16, align 8
    %ptr = getelementptr double, double* %array, i32 %b
    store double %val, double* %ptr, align 8
    %array = alloca double, i32 16, align 8
    %ptr = getelementptr double, double* %array, i32 %b
    store double %val, double* %ptr, align 8
-  call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
+  call void @llvm.AMDGPU.barrier.local() convergent nounwind
    %result = load double, double* %ptr, align 8
    store double %result, double addrspace(1)* %out, align 8
    ret void
    %result = load double, double* %ptr, align 8
    store double %result, double addrspace(1)* %out, align 8
    ret void
@@ -38,7 +38,7 @@ define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out
    %array = alloca <2 x double>, i32 16, align 16
    %ptr = getelementptr <2 x double>, <2 x double>* %array, i32 %b
    store <2 x double> %val, <2 x double>* %ptr, align 16
    %array = alloca <2 x double>, i32 16, align 16
    %ptr = getelementptr <2 x double>, <2 x double>* %array, i32 %b
    store <2 x double> %val, <2 x double>* %ptr, align 16
-  call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
+  call void @llvm.AMDGPU.barrier.local() convergent nounwind
    %result = load <2 x double>, <2 x double>* %ptr, align 16
    store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16
    ret void
    %result = load <2 x double>, <2 x double>* %ptr, align 16
    store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16
    ret void
@@ -56,7 +56,7 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs
    %array = alloca i64, i32 16, align 8
    %ptr = getelementptr i64, i64* %array, i32 %b
    store i64 %val, i64* %ptr, align 8
    %array = alloca i64, i32 16, align 8
    %ptr = getelementptr i64, i64* %array, i32 %b
    store i64 %val, i64* %ptr, align 8
-  call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
+  call void @llvm.AMDGPU.barrier.local() convergent nounwind
    %result = load i64, i64* %ptr, align 8
    store i64 %result, i64 addrspace(1)* %out, align 8
    ret void
    %result = load i64, i64* %ptr, align 8
    store i64 %result, i64 addrspace(1)* %out, align 8
    ret void
@@ -76,7 +76,7 @@ define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <
    %array = alloca <2 x i64>, i32 16, align 16
    %ptr = getelementptr <2 x i64>, <2 x i64>* %array, i32 %b
    store <2 x i64> %val, <2 x i64>* %ptr, align 16
    %array = alloca <2 x i64>, i32 16, align 16
    %ptr = getelementptr <2 x i64>, <2 x i64>* %array, i32 %b
    store <2 x i64> %val, <2 x i64>* %ptr, align 16
-  call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
+  call void @llvm.AMDGPU.barrier.local() convergent nounwind
    %result = load <2 x i64>, <2 x i64>* %ptr, align 16
    store <2 x i64> %result, <2 x i64> addrspace(1)* %out, align 16
    ret void
    %result = load <2 x i64>, <2 x i64>* %ptr, align 16
    store <2 x i64> %result, <2 x i64> addrspace(1)* %out, align 16
    ret void
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll

index f948c987b0385a4d5a681035704db589dea959b2..7dc094ed1b4b741a2ac4af9283a279aadcd7bf7f 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll
+++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll
@@ -4,7 +4,6 @@
  ; FIXME: Enable for VI.
  
  declare i32 @llvm.r600.read.tidig.x() nounwind readnone
  ; FIXME: Enable for VI.
  
  declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate
  declare float @llvm.AMDGPU.div.fmas.f32(float, float, float, i1) nounwind readnone
  declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone
  
  declare float @llvm.AMDGPU.div.fmas.f32(float, float, float, i1) nounwind readnone
  declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone
  
diff --git a/test/CodeGen/AMDGPU/merge-stores.ll b/test/CodeGen/AMDGPU/merge-stores.ll

index 8b01c96fe2c6ad1fe40c1d370bb271c786369f7b..65b454b5d8cbb10192784e9984cc701c5581ad24 100644 (file)
--- a/test/CodeGen/AMDGPU/merge-stores.ll
+++ b/test/CodeGen/AMDGPU/merge-stores.ll
@@ -708,4 +708,4 @@ define void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x dou
  declare void @llvm.AMDGPU.barrier.local() #1
  
  attributes #0 = { nounwind }
  declare void @llvm.AMDGPU.barrier.local() #1
  
  attributes #0 = { nounwind }
-attributes #1 = { noduplicate nounwind }
+attributes #1 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll b/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll

index 3863afda5dd3a131104f535faef2c12d318e905a..e4b16c0a165f4cbe19947d7dfe6457d301c99f35 100644 (file)
--- a/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll
+++ b/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll
@@ -3,7 +3,7 @@
  ; RUN: llc -O0 -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI
  ; RUN: llc -O0 -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI
  
  ; RUN: llc -O0 -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI
  ; RUN: llc -O0 -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI
  
-declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
+declare void @llvm.AMDGPU.barrier.local() nounwind convergent
  
  
  ; SI-LABEL: {{^}}main(
  
  
  ; SI-LABEL: {{^}}main(
diff --git a/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll

index 35e9ff834631cfb9b4c8c31ce875e38d3b8911c6..bc766dbcac6766ab2a79d65514bc53d6ea9b7c06 100644 (file)
--- a/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
+++ b/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
@@ -234,4 +234,4 @@ define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrsp
  
  attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
  attributes #1 = { "ShaderType"="1" nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
  
  attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
  attributes #1 = { "ShaderType"="1" nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #2 = { nounwind noduplicate }
+attributes #2 = { nounwind convergent }
diff --git a/test/CodeGen/AMDGPU/store-barrier.ll b/test/CodeGen/AMDGPU/store-barrier.ll

index 4a72b4d090adf37a145ab95439a3035f92da6db8..ba4049f28a6e57b7fc1a8922137a39ff9813ace7 100644 (file)
--- a/test/CodeGen/AMDGPU/store-barrier.ll
+++ b/test/CodeGen/AMDGPU/store-barrier.ll
@@ -36,7 +36,7 @@ bb:
    ret void
  }
  
    ret void
  }
  
-; Function Attrs: noduplicate nounwind
+; Function Attrs: convergent nounwind
  declare void @llvm.AMDGPU.barrier.local() #2
  
  declare void @llvm.AMDGPU.barrier.local() #2
  
-attributes #2 = { noduplicate nounwind }
+attributes #2 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/wait.ll b/test/CodeGen/AMDGPU/wait.ll

index 5aaf00283206d05a1d83188e96cddf2b5da1dfdd..107e84b33be9a3e06a4d8c6ab010303e8b3f52c8 100644 (file)
--- a/test/CodeGen/AMDGPU/wait.ll
+++ b/test/CodeGen/AMDGPU/wait.ll
@@ -70,7 +70,7 @@ main_body:
  }
  
  
  }
  
  
-; Function Attrs: noduplicate nounwind
+; Function Attrs: convergent nounwind
  declare void @llvm.AMDGPU.barrier.global() #1
  
  ; Function Attrs: nounwind readnone
  declare void @llvm.AMDGPU.barrier.global() #1
  
  ; Function Attrs: nounwind readnone
@@ -79,7 +79,7 @@ declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #2
  declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
  
  attributes #0 = { "ShaderType"="1" }
  declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
  
  attributes #0 = { "ShaderType"="1" }
-attributes #1 = { noduplicate nounwind }
+attributes #1 = { convergent nounwind }
  attributes #2 = { nounwind readnone }
  
  !0 = !{!1, !1, i64 0, i32 1}
  attributes #2 = { nounwind readnone }
  
  !0 = !{!1, !1, i64 0, i32 1}
diff --git a/test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg b/test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg

new file mode 100644 (file)

index 0000000..6baccf0
--- /dev/null
+++ b/test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll b/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll

new file mode 100644 (file)

index 0000000..3cbb702
--- /dev/null
+++ b/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll
@@ -0,0 +1,33 @@
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -loop-unroll -S < %s | FileCheck %s
+
+; CHECK-LABEL: @test_unroll_convergent_barrier(
+; CHECK: call void @llvm.AMDGPU.barrier.global()
+; CHECK: call void @llvm.AMDGPU.barrier.global()
+; CHECK: call void @llvm.AMDGPU.barrier.global()
+; CHECK: call void @llvm.AMDGPU.barrier.global()
+; CHECK-NOT: br
+define void @test_unroll_convergent_barrier(i32 addrspace(1)* noalias nocapture %out, i32 addrspace(1)* noalias nocapture %in) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx.in = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %indvars.iv
+  %arrayidx.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %indvars.iv
+  %load = load i32, i32 addrspace(1)* %arrayidx.in
+  call void @llvm.AMDGPU.barrier.global() #1
+  %add = add i32 %load, %sum.02
+  store i32 %add, i32 addrspace(1)* %arrayidx.out
+  %indvars.iv.next = add i32 %indvars.iv, 1
+  %exitcond = icmp eq i32 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare void @llvm.AMDGPU.barrier.global() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind convergent }
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Sat, 19 Dec 2015 01:46:41 +0000 (01:46 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Sat, 19 Dec 2015 01:46:41 +0000 (01:46 +0000)
lib/Target/AMDGPU/AMDGPUIntrinsics.td		patch \| blob \| history
test/CodeGen/AMDGPU/addrspacecast.ll		patch \| blob \| history
test/CodeGen/AMDGPU/array-ptr-calc-i32.ll		patch \| blob \| history
test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll		patch \| blob \| history
test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll		patch \| blob \| history
test/CodeGen/AMDGPU/ds-sub-offset.ll		patch \| blob \| history
test/CodeGen/AMDGPU/ds_read2.ll		patch \| blob \| history
test/CodeGen/AMDGPU/ds_read2_superreg.ll		patch \| blob \| history
test/CodeGen/AMDGPU/ds_read2st64.ll		patch \| blob \| history
test/CodeGen/AMDGPU/ds_write2.ll		patch \| blob \| history
test/CodeGen/AMDGPU/ds_write2st64.ll		patch \| blob \| history
test/CodeGen/AMDGPU/flat-address-space.ll		patch \| blob \| history
test/CodeGen/AMDGPU/indirect-private-64.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll		patch \| blob \| history
test/CodeGen/AMDGPU/merge-stores.ll		patch \| blob \| history
test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll		patch \| blob \| history
test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll		patch \| blob \| history
test/CodeGen/AMDGPU/store-barrier.ll		patch \| blob \| history
test/CodeGen/AMDGPU/wait.ll		patch \| blob \| history
test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg	[new file with mode: 0644]	patch \| blob
test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll	[new file with mode: 0644]	patch \| blob