workgroup_barrier.hpp Source File

workgroup_barrier.hpp Source File#

Composable Kernel: workgroup_barrier.hpp Source File
utility/workgroup_barrier.hpp
Go to the documentation of this file.
1#pragma once
2#include <hip/hip_runtime.h>
3#include <stdint.h>
4
5namespace ck {
7{
8 __device__ workgroup_barrier(uint32_t* ptr) : base_ptr(ptr) {}
9
10 __device__ uint32_t ld(uint32_t offset)
11 {
12#if 0
14 amdgcn_make_buffer_resource(base_ptr),
15 0,
16 offset,
17 AMDGCN_BUFFER_GLC);
18 union cvt {
19 float f32;
20 uint32_t u32;
21 };
22 cvt x;
23 x.f32 = d;
24 return x.u32;
25#endif
26 return __atomic_load_n(base_ptr + offset, __ATOMIC_RELAXED);
27 }
28
29 __device__ void wait_eq(uint32_t offset, uint32_t value)
30 {
31 if(threadIdx.x == 0)
32 {
33 while(ld(offset) != value) {}
34 }
35 __syncthreads();
36 }
37
38 __device__ void wait_lt(uint32_t offset, uint32_t value)
39 {
40 if(threadIdx.x == 0)
41 {
42 while(ld(offset) < value) {}
43 }
44 __syncthreads();
45 }
46
47 __device__ void wait_set(uint32_t offset, uint32_t compare, uint32_t value)
48 {
49 if(threadIdx.x == 0)
50 {
51 while(atomicCAS(base_ptr + offset, compare, value) != compare) {}
52 }
53 __syncthreads();
54 }
55
56 // enter critical zoon, assume buffer is zero when launch kernel
57 __device__ void aquire(uint32_t offset) { wait_set(offset, 0, 1); }
58
59 // exit critical zoon, assume buffer is zero when launch kernel
60 __device__ void release(uint32_t offset) { wait_set(offset, 1, 0); }
61
62 __device__ void inc(uint32_t offset)
63 {
64 __syncthreads();
65 if(threadIdx.x == 0)
66 {
67 atomicAdd(base_ptr + offset, 1);
68 }
69 }
70
72};
73} // namespace ck
Definition ck.hpp:268
__device__ float llvm_amdgcn_raw_buffer_load_fp32(int32x4_t srsrc, index_t voffset, index_t soffset, index_t glc_slc) __asm("llvm.amdgcn.raw.buffer.load.f32")
const GenericPointer< typename T::ValueType > T2 value
Definition pointer.h:1697
unsigned int uint32_t
Definition stdint.h:126
__device__ void wait_lt(uint32_t offset, uint32_t value)
Definition utility/workgroup_barrier.hpp:38
__device__ void wait_set(uint32_t offset, uint32_t compare, uint32_t value)
Definition utility/workgroup_barrier.hpp:47
uint32_t * base_ptr
Definition utility/workgroup_barrier.hpp:71
__device__ void release(uint32_t offset)
Definition utility/workgroup_barrier.hpp:60
__device__ workgroup_barrier(uint32_t *ptr)
Definition utility/workgroup_barrier.hpp:8
__device__ void inc(uint32_t offset)
Definition utility/workgroup_barrier.hpp:62
__device__ void aquire(uint32_t offset)
Definition utility/workgroup_barrier.hpp:57
__device__ void wait_eq(uint32_t offset, uint32_t value)
Definition utility/workgroup_barrier.hpp:29
__device__ uint32_t ld(uint32_t offset)
Definition utility/workgroup_barrier.hpp:10