BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type > Struct Template Reference

BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2&lt; BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type &gt; Struct Template Reference#

Composable Kernel: ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type > Struct Template Reference
ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type > Struct Template Reference

#include <blockwise_gemm_dl_v2r3.hpp>

Public Types

using AIndex = MultiIndex<3>
using BIndex = MultiIndex<3>
using CIndex = MultiIndex<4>

Public Member Functions

__device__ BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2 ()
template<typename CThreadDesc_BM0_BM11_BN0_BN11, typename ABlockBuffer, typename BBlockBuffer, typename CThreadBuffer>
__device__ void Run (const CThreadDesc_BM0_BM11_BN0_BN11 &, const ABlockBuffer &a_block_buf, const BBlockBuffer &b_block_buf, CThreadBuffer &c_thread_buf) const

Static Public Member Functions

__host__ static __device__ constexpr auto MakeABlockDescriptor_BK0_BM0_BM1_BK1 (const ABlockDesc_BK0_BM_BK1 &a_block_desc_bk0_bm_bk1)
__host__ static __device__ constexpr auto MakeBBlockDescriptor_BK0_BN0_BN1_BK1 (const BBlockDesc_BK0_BN_BK1 &b_block_desc_bk0_bn_bk1)
__host__ static __device__ constexpr auto MakeCBlockAdaptor_BM0_BM100_BM101_BM11_BN0_BN100_BN101_BN11_To_BM_BN ()
__host__ static __device__ constexpr auto MakeCBlockAdaptor_BM0_BM100_BM101_BM11_BN0_BN100_BN101_BN11_To_BM0_BM1_BN0_BN1 ()
__host__ static __device__ constexpr auto GetCThreadTensorLengths_BM0_BM1_BN0_BN1 ()
static __device__ CIndex CalculateCThreadOriginOnBlock_BM0_BM1_BN0_BN1 (index_t thread_id)

Static Public Attributes

static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr auto I2 = Number<2>{}
static constexpr auto I3 = Number<3>{}
static constexpr index_t BK0 = ABlockDesc_BK0_BM_BK1{}.GetLength(I0)
static constexpr index_t BK1 = ABlockDesc_BK0_BM_BK1{}.GetLength(I2)
static constexpr index_t BM = ABlockDesc_BK0_BM_BK1{}.GetLength(I1)
static constexpr index_t BN = BBlockDesc_BK0_BN_BK1{}.GetLength(I1)
static constexpr index_t BM100 = BM10BN10ThreadClusterBM10Xs{}[I0]
static constexpr index_t BN100 = BM10BN10ThreadClusterBN10Xs{}[I0]
static constexpr index_t BM101 = BM10BN10ThreadClusterBM10Xs{}[I1]
static constexpr index_t BN101 = BM10BN10ThreadClusterBN10Xs{}[I1]
static constexpr index_t BM11 = BM1PerThreadBM11
static constexpr index_t BN11 = BN1PerThreadBN11
static constexpr index_t BM1 = BM100 * BM101 * BM11
static constexpr index_t BN1 = BN100 * BN101 * BN11
static constexpr index_t BM0 = BM / BM1
static constexpr index_t BN0 = BN / BN1
static constexpr auto a_block_desc_bk0_bm0_bm1_bk1_
static constexpr auto b_block_desc_bk0_bn0_bn1_bk1_

Member Typedef Documentation

◆ AIndex

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
using ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::AIndex = MultiIndex<3>

◆ BIndex

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
using ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::BIndex = MultiIndex<3>

◆ CIndex

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
using ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::CIndex = MultiIndex<4>

Constructor & Destructor Documentation

◆ BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
__device__ ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2 ( )
inline

Member Function Documentation

◆ CalculateCThreadOriginOnBlock_BM0_BM1_BN0_BN1()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
__device__ CIndex ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::CalculateCThreadOriginOnBlock_BM0_BM1_BN0_BN1 ( index_t thread_id)
inlinestatic

◆ GetCThreadTensorLengths_BM0_BM1_BN0_BN1()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
__host__ static __device__ constexpr auto ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::GetCThreadTensorLengths_BM0_BM1_BN0_BN1 ( )
inlinestaticconstexpr

◆ MakeABlockDescriptor_BK0_BM0_BM1_BK1()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
__host__ static __device__ constexpr auto ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::MakeABlockDescriptor_BK0_BM0_BM1_BK1 ( const ABlockDesc_BK0_BM_BK1 & a_block_desc_bk0_bm_bk1)
inlinestaticconstexpr

◆ MakeBBlockDescriptor_BK0_BN0_BN1_BK1()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
__host__ static __device__ constexpr auto ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::MakeBBlockDescriptor_BK0_BN0_BN1_BK1 ( const BBlockDesc_BK0_BN_BK1 & b_block_desc_bk0_bn_bk1)
inlinestaticconstexpr

◆ MakeCBlockAdaptor_BM0_BM100_BM101_BM11_BN0_BN100_BN101_BN11_To_BM0_BM1_BN0_BN1()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
__host__ static __device__ constexpr auto ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::MakeCBlockAdaptor_BM0_BM100_BM101_BM11_BN0_BN100_BN101_BN11_To_BM0_BM1_BN0_BN1 ( )
inlinestaticconstexpr

◆ MakeCBlockAdaptor_BM0_BM100_BM101_BM11_BN0_BN100_BN101_BN11_To_BM_BN()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
__host__ static __device__ constexpr auto ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::MakeCBlockAdaptor_BM0_BM100_BM101_BM11_BN0_BN100_BN101_BN11_To_BM_BN ( )
inlinestaticconstexpr

◆ Run()

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
template<typename CThreadDesc_BM0_BM11_BN0_BN11, typename ABlockBuffer, typename BBlockBuffer, typename CThreadBuffer>
__device__ void ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::Run ( const CThreadDesc_BM0_BM11_BN0_BN11 & ,
const ABlockBuffer & a_block_buf,
const BBlockBuffer & b_block_buf,
CThreadBuffer & c_thread_buf ) const
inline

Member Data Documentation

◆ a_block_desc_bk0_bm0_bm1_bk1_

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
auto ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::a_block_desc_bk0_bm0_bm1_bk1_
staticconstexpr
Initial value:
=
MakeABlockDescriptor_BK0_BM0_BM1_BK1(ABlockDesc_BK0_BM_BK1{})
__host__ static __device__ constexpr auto MakeABlockDescriptor_BK0_BM0_BM1_BK1(const ABlockDesc_BK0_BM_BK1 &a_block_desc_bk0_bm_bk1)
Definition blockwise_gemm_dl_v2r3.hpp:78

◆ b_block_desc_bk0_bn0_bn1_bk1_

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
auto ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::b_block_desc_bk0_bn0_bn1_bk1_
staticconstexpr
Initial value:
=
MakeBBlockDescriptor_BK0_BN0_BN1_BK1(BBlockDesc_BK0_BN_BK1{})
__host__ static __device__ constexpr auto MakeBBlockDescriptor_BK0_BN0_BN1_BK1(const BBlockDesc_BK0_BN_BK1 &b_block_desc_bk0_bn_bk1)
Definition blockwise_gemm_dl_v2r3.hpp:92

◆ BK0

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
index_t ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::BK0 = ABlockDesc_BK0_BM_BK1{}.GetLength(I0)
staticconstexpr

◆ BK1

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
index_t ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::BK1 = ABlockDesc_BK0_BM_BK1{}.GetLength(I2)
staticconstexpr

◆ BM

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
index_t ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::BM = ABlockDesc_BK0_BM_BK1{}.GetLength(I1)
staticconstexpr

◆ BM0

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
index_t ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::BM0 = BM / BM1
staticconstexpr

◆ BM1

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
index_t ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::BM1 = BM100 * BM101 * BM11
staticconstexpr

◆ BM100

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
index_t ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::BM100 = BM10BN10ThreadClusterBM10Xs{}[I0]
staticconstexpr

◆ BM101

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
index_t ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::BM101 = BM10BN10ThreadClusterBM10Xs{}[I1]
staticconstexpr

◆ BM11

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
index_t ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::BM11 = BM1PerThreadBM11
staticconstexpr

◆ BN

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
index_t ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::BN = BBlockDesc_BK0_BN_BK1{}.GetLength(I1)
staticconstexpr

◆ BN0

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
index_t ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::BN0 = BN / BN1
staticconstexpr

◆ BN1

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
index_t ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::BN1 = BN100 * BN101 * BN11
staticconstexpr

◆ BN100

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
index_t ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::BN100 = BM10BN10ThreadClusterBN10Xs{}[I0]
staticconstexpr

◆ BN101

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
index_t ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::BN101 = BM10BN10ThreadClusterBN10Xs{}[I1]
staticconstexpr

◆ BN11

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
index_t ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::BN11 = BN1PerThreadBN11
staticconstexpr

◆ I0

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
auto ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
auto ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::I1 = Number<1>{}
staticconstexpr

◆ I2

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
auto ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::I2 = Number<2>{}
staticconstexpr

◆ I3

template<index_t BlockSize, typename FloatA, typename FloatB, typename FloatC, typename ABlockDesc_BK0_BM_BK1, typename BBlockDesc_BK0_BN_BK1, index_t BM1PerThreadBM11, index_t BN1PerThreadBN11, index_t BK0PerThread, typename BM10BN10ThreadClusterBM10Xs, typename BM10BN10ThreadClusterBN10Xs, index_t AThreadCopyScalarPerVector_BM11, index_t BThreadCopyScalarPerVector_BN11, typename enable_if< ABlockDesc_BK0_BM_BK1::IsKnownAtCompileTime() &&BBlockDesc_BK0_BN_BK1::IsKnownAtCompileTime(), bool >::type = false>
auto ck::BlockwiseGemmDl_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_BM0_2_BN0_2< BlockSize, FloatA, FloatB, FloatC, ABlockDesc_BK0_BM_BK1, BBlockDesc_BK0_BN_BK1, BM1PerThreadBM11, BN1PerThreadBN11, BK0PerThread, BM10BN10ThreadClusterBM10Xs, BM10BN10ThreadClusterBN10Xs, AThreadCopyScalarPerVector_BM11, BThreadCopyScalarPerVector_BN11, type >::I3 = Number<3>{}
staticconstexpr

The documentation for this struct was generated from the following file: