blockwise_gemm_dlops_v3.hpp Source File#
blockwise_gemm_dlops_v3.hpp
Go to the documentation of this file.
155 __device__ void MoveABlockSliceWindow(const ABlockSliceMoveStepIdx& a_block_slice_move_step_idx)
Definition ck.hpp:268
__host__ __device__ constexpr auto make_multi_index(Xs &&... xs)
Definition array_multi_index.hpp:15
__host__ __device__ constexpr auto make_single_stage_tensor_adaptor(const Transforms &transforms, LowerDimensionOldTopIdss, UpperDimensionNewTopIdss)
Definition tensor_description/tensor_adaptor.hpp:425
__host__ __device__ constexpr auto make_merge_transform(const LowLengths &low_lengths)
Definition multi_index_transform_helper.hpp:55
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
static constexpr auto E1
Definition blockwise_gemm_dlops_v3.hpp:33
static constexpr auto b_thread_mtx_
Definition blockwise_gemm_dlops_v3.hpp:47
__device__ void MoveABlockSliceWindow(const ABlockSliceMoveStepIdx &a_block_slice_move_step_idx)
Definition blockwise_gemm_dlops_v3.hpp:155
static constexpr auto I4
Definition blockwise_gemm_dlops_v3.hpp:27
static constexpr auto E2
Definition blockwise_gemm_dlops_v3.hpp:35
static constexpr auto WoPerBlock
Definition blockwise_gemm_dlops_v3.hpp:38
static constexpr auto KPerBlock
Definition blockwise_gemm_dlops_v3.hpp:34
__device__ void Run(const ABlockBuffer &a_block_buf, const BThreadBuffer &b_thread_buf, CThreadBuffer &c_thread_buf) const
Definition blockwise_gemm_dlops_v3.hpp:112
static __device__ constexpr auto GetCThreadDesc_K_N_Ho_WoLengths()
Definition blockwise_gemm_dlops_v3.hpp:86
static constexpr auto I1
Definition blockwise_gemm_dlops_v3.hpp:24
static constexpr auto HoPerBlock
Definition blockwise_gemm_dlops_v3.hpp:37
MultiIndex< 3 > AIndex
Definition blockwise_gemm_dlops_v3.hpp:29
MultiIndex< 3 > BIndex
Definition blockwise_gemm_dlops_v3.hpp:30
static constexpr auto c_thread_mtx_
Definition blockwise_gemm_dlops_v3.hpp:54
static constexpr auto a_thread_mtx_
Definition blockwise_gemm_dlops_v3.hpp:44
static constexpr auto HoPerThread
Definition blockwise_gemm_dlops_v3.hpp:41
static constexpr auto I0
Definition blockwise_gemm_dlops_v3.hpp:23
static __device__ CIndex GetBeginOfCThreadDesc_K_N_Ho_Wo(index_t thread_id)
Definition blockwise_gemm_dlops_v3.hpp:91
__device__ BlockwiseGemmDlops_km_kn_m0m1n0n1_v3()
Definition blockwise_gemm_dlops_v3.hpp:57
static constexpr auto KPerThread
Definition blockwise_gemm_dlops_v3.hpp:40
static constexpr auto I3
Definition blockwise_gemm_dlops_v3.hpp:26
MultiIndex< 4 > CIndex
Definition blockwise_gemm_dlops_v3.hpp:31
static constexpr auto I2
Definition blockwise_gemm_dlops_v3.hpp:25
static constexpr auto WoPerThread
Definition blockwise_gemm_dlops_v3.hpp:42
Definition utility/sequence.hpp:43
Definition static_buffer.hpp:16
Definition threadwise_gemm_dlops_v3.hpp:29
Definition threadwise_tensor_slice_transfer.hpp:1260
Definition type.hpp:177
Definition functional2.hpp:33