Intrawave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MScaleBlock, NScaleBlock, KScaleBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > Struct Template Reference#
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
Protected Types |
Protected Attributes |
Static Protected Attributes |
List of all members
ck::BlockwiseGemmXdlops_pipeline_blockscale_bpreshuffle_v1< BlockGemmPipelineScheduler::Intrawave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MScaleBlock, NScaleBlock, KScaleBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack > Struct Template Reference
#include <blockwise_gemm_pipeline_xdlops_blockscale_b_preshuffle_v1.hpp>
Inheritance diagram for ck::BlockwiseGemmXdlops_pipeline_blockscale_bpreshuffle_v1< BlockGemmPipelineScheduler::Intrawave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MScaleBlock, NScaleBlock, KScaleBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >:
Public Member Functions | |
| template<bool HasMainLoop, int NumKBlockPerScale, TailNumber TailNum, typename AGridDesc, typename ABlockDesc, typename ABlockTransfer, typename AGridBuffer, typename ABlockBuffer, typename ABlockTransferStep, typename BGridDesc, typename BBlockDesc, typename BBlockTransfer, typename BGridBuffer, typename BBlockBuffer, typename BBlockTransferStep, typename CScaleThreadDesc, typename CThreadBuffer, typename AScaleGridBuffer, typename AScaleGridDesc, typename AScaleThreadDesc, typename AScaleThreadTransfer, typename AScaleThreadTransferStep, typename BScaleGridBuffer, typename BScaleGridDesc, typename BScaleThreadDesc, typename BScaleThreadTransfer, typename BScaleThreadTransferStep> | |
| __device__ void | Run (const AGridDesc &a_grid_desc, const ABlockDesc &a_block_desc, ABlockTransfer &a_blockwise_copy, const AGridBuffer &a_grid_buf, ABlockBuffer &a_block_buf, const ABlockTransferStep &a_block_copy_step, const BGridDesc &b_grid_desc, const BBlockDesc &b_block_desc, BBlockTransfer &b_blockwise_copy, const BGridBuffer &b_grid_buf, BBlockBuffer &b_block_buf, const BBlockTransferStep &b_block_copy_step, const CScaleThreadDesc &c_scale_thread_desc, CThreadBuffer &c_thread_buf, const AScaleGridDesc &a_scale_grid_desc, const AScaleThreadDesc &a_scale_thread_desc, AScaleThreadTransfer &a_scale_thread_copy, const AScaleGridBuffer &a_scale_grid_buf, const AScaleThreadTransferStep &a_scale_thread_copy_step, const BScaleGridDesc &b_scale_grid_desc, const BScaleThreadDesc &b_scale_thread_desc, BScaleThreadTransfer &b_scale_thread_copy, const BScaleGridBuffer &b_scale_grid_buf, const BScaleThreadTransferStep &b_scale_thread_copy_step, index_t num_loop) const |
| __host__ __device__ constexpr auto & | GetCThreadBuffer () |
| Public Member Functions inherited from ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, true > | |
| __host__ __device__ constexpr auto & | GetCThreadBuffer () |
| __host__ __device__ | BlockwiseGemmXdlops_pipeline_base (Tuple4 a_origin=CalculateAThreadOriginDataIndex(), Tuple4 b_origin=CalculateBThreadOriginDataIndex()) |
| Constructor for BlockwiseGemmXdlops_pipeline_base. | |
Static Public Member Functions | |
| template<typename TileDesc_M0_M1_M2_K> | |
| __host__ static __device__ constexpr auto | MakeAGemmMmaTileDescriptor (const TileDesc_M0_M1_M2_K &) |
| __host__ static __device__ constexpr bool | BlockHasHotloop (index_t num_loop) |
| __host__ static __device__ constexpr TailNumber | BlockLoopTailNum (index_t num_loop) |
| static __device__ constexpr auto | HotLoopScheduler () |
| static __device__ auto | CalculateCThreadOriginDataIndex (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >) |
| static __device__ auto | CalculateCThreadOriginDataIndex8D (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >) |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 () |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 () |
| __host__ static __device__ constexpr auto | MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_G_M_N &c_grid_desc_g_m_n) |
| __host__ static __device__ constexpr auto | MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_M_N &c_grid_desc_m_n) |
| Static Public Member Functions inherited from ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, true > | |
| static __device__ auto | GetWaveIdx () |
| static __device__ auto | CalculateAThreadOriginDataIndex () |
| static __device__ auto | CalculateAThreadOriginDataIndex6D () |
| static __device__ auto | CalculateBThreadOriginDataIndex () |
| static __device__ auto | CalculateCThreadOriginDataIndex (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >) |
| static __device__ auto | CalculateCThreadOriginDataIndex8D (Number< m0 >, Number< n0 >, Number< xdlops_i >, Number< blk_i >) |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 () |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_N3_N4 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 () |
| __host__ static __device__ constexpr auto | MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_M_N &c_grid_desc_m_n) |
| __host__ static __device__ constexpr auto | MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2 (const CGridDesc_G_M_N &c_grid_desc_g_m_n) |
| __host__ static __device__ constexpr auto | GetCThreadDesc () |
Static Public Attributes | |
| static constexpr index_t | PrefetchStages = 2 |
| static constexpr index_t | PrefillStages = 1 |
| static constexpr index_t | GlobalBufferNum = 2 |
| static constexpr auto | a_block_desc_m0_m1_m2_k0_k1_k2 |
| static constexpr index_t | A_K1 |
| static constexpr index_t | B_K1 |
| static constexpr auto | I0 |
| static constexpr auto | I1 |
| static constexpr index_t | KGroup |
| static constexpr index_t | KRepeat |
| static constexpr auto | xdlops_gemm |
| static constexpr AMmaTileDesc | a_block_desc_m0_m1_m2_k |
| static constexpr index_t | MWaves |
| static constexpr index_t | NWaves |
| static constexpr index_t | WaveSize |
| Static Public Attributes inherited from ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, true > | |
| static constexpr auto | I0 |
| static constexpr auto | I1 |
| static constexpr auto | I2 |
| static constexpr auto | I3 |
| static constexpr index_t | MWaves |
| static constexpr index_t | NWaves |
| static constexpr index_t | WaveSize |
| static constexpr index_t | A_K0 |
| static constexpr index_t | B_K0 |
| static constexpr index_t | A_K1 |
| static constexpr index_t | B_K1 |
| static constexpr auto | xdlops_gemm |
| static constexpr index_t | AMmaKStride |
| static constexpr index_t | BMmaKStride |
| static constexpr index_t | KPerThread |
| static constexpr index_t | KRepeat |
| static constexpr index_t | KPerInnerLoop |
| static constexpr index_t | KGroup |
| static constexpr AMmaTileDesc | a_block_desc_m0_m1_m2_k |
| static constexpr BMmaTileDesc | b_block_desc_n0_n1_n2_k |
Static Protected Attributes | |
| static constexpr auto | a_thread_desc_ |
| static constexpr auto | b_thread_desc_ |
| static constexpr BTileDesc | b_block_desc_n0_n1_k0_k1 |
| static constexpr auto | c_thread_desc_ |
| Static Protected Attributes inherited from ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, true > | |
| static constexpr auto | a_thread_desc_ |
| static constexpr auto | b_thread_desc_ |
| static constexpr auto | c_thread_desc_ |
Additional Inherited Members | |
| Public Attributes inherited from ck::BlockwiseGemmXdlops_pipeline_base< BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack, true > | |
| StaticBufferTupleOfVector< AddressSpaceEnum::Vgpr, AccDataType, MRepeat *NRepeat, xdlops_gemm.GetRegSizePerXdlops(), true > | c_thread_buf_ |
Member Typedef Documentation
◆ AThreadCopy
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
protected |
Initial value:
ThreadwiseTensorSliceTransfer_v4<ADataType,
ComputeDataType,
decltype(a_block_desc_m0_m1_m2_k0_k1_k2),
decltype(a_thread_desc_),
5,
A_K1,
A_K1>
static constexpr auto a_thread_desc_
Definition blockwise_gemm_dpp.hpp:312
static constexpr index_t A_K1
Definition blockwise_gemm_dpp.hpp:52
static constexpr auto a_block_desc_m0_m1_m2_k0_k1_k2
Definition blockwise_gemm_pipeline_xdlops_b_preshuffle_dequant_v1.hpp:171
static constexpr index_t KGroup
Definition blockwise_gemm_pipeline_xdlops_base.hpp:67
Definition utility/sequence.hpp:43
Definition threadwise_tensor_slice_transfer.hpp:1260
◆ Base
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
| using ck::BlockwiseGemmXdlops_pipeline_blockscale_bpreshuffle_v1< BlockGemmPipelineScheduler::Intrawave, BlockSize, ADataType, BDataType, ComputeDataType, AccDataType, ATileDesc, BTileDesc, AMmaTileDesc, BMmaTileDesc, ABlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector, MPerBlock, NPerBlock, KPerBlock, MScaleBlock, NScaleBlock, KScaleBlock, MPerXDL, NPerXDL, MRepeat, NRepeat, KPack >::Base |
Initial value:
BlockwiseGemmXdlops_pipeline_base<BlockSize,
ADataType,
BDataType,
ComputeDataType,
AccDataType,
ATileDesc,
BTileDesc,
AMmaTileDesc,
BMmaTileDesc,
ABlockTransferSrcScalarPerVector,
BBlockTransferSrcScalarPerVector,
MPerBlock,
NPerBlock,
KPerBlock,
MPerXDL,
NPerXDL,
MRepeat,
NRepeat,
KPack,
true>
__host__ __device__ BlockwiseGemmXdlops_pipeline_base(Tuple4 a_origin=CalculateAThreadOriginDataIndex(), Tuple4 b_origin=CalculateBThreadOriginDataIndex())
Constructor for BlockwiseGemmXdlops_pipeline_base.
Definition blockwise_gemm_pipeline_xdlops_base.hpp:222
Member Function Documentation
◆ BlockHasHotloop()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ BlockLoopTailNum()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ CalculateCThreadOriginDataIndex()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestatic |
◆ CalculateCThreadOriginDataIndex8D()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestatic |
◆ GetCBlockDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ GetCBlockDescriptor_M0_N0_M1_N1_M2_N2_N3_N4()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ GetCThreadBuffer()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlineconstexpr |
◆ GetCThreadDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ HotLoopScheduler()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ MakeAGemmMmaTileDescriptor()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
template<typename TileDesc_M0_M1_M2_K>
|
inlinestaticconstexpr |
◆ MakeCGridDescriptor_G_M0_N0_M1_N1_M2_M3_M4_N2()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
inlinestaticconstexpr |
◆ Run()
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
template<bool HasMainLoop, int NumKBlockPerScale, TailNumber TailNum, typename AGridDesc, typename ABlockDesc, typename ABlockTransfer, typename AGridBuffer, typename ABlockBuffer, typename ABlockTransferStep, typename BGridDesc, typename BBlockDesc, typename BBlockTransfer, typename BGridBuffer, typename BBlockBuffer, typename BBlockTransferStep, typename CScaleThreadDesc, typename CThreadBuffer, typename AScaleGridBuffer, typename AScaleGridDesc, typename AScaleThreadDesc, typename AScaleThreadTransfer, typename AScaleThreadTransferStep, typename BScaleGridBuffer, typename BScaleGridDesc, typename BScaleThreadDesc, typename BScaleThreadTransfer, typename BScaleThreadTransferStep>
|
inline |
Member Data Documentation
◆ a_block_desc_m0_m1_m2_k
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ a_block_desc_m0_m1_m2_k0_k1_k2
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
Initial value:
=
__host__ static __device__ constexpr auto MakeAGemmMmaTileDescriptor(const TileDesc_M0_M1_M2_K &)
Definition blockwise_gemm_pipeline_xdlops_blockscale_b_preshuffle_v1.hpp:163
static constexpr AMmaTileDesc a_block_desc_m0_m1_m2_k
Definition blockwise_gemm_pipeline_xdlops_base.hpp:359
◆ A_K1
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ a_thread_copy_
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
protected |
◆ a_thread_desc_
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexprprotected |
Initial value:
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
static constexpr auto I1
Definition blockwise_gemm_pipeline_xdlops_base.hpp:37
◆ b_block_desc_n0_n1_k0_k1
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexprprotected |
◆ B_K1
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ b_thread_desc_
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexprprotected |
Initial value:
◆ c_thread_desc_
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexprprotected |
◆ GlobalBufferNum
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ I0
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ I1
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ KGroup
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ KRepeat
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ MWaves
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ NWaves
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ PrefetchStages
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ PrefillStages
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ WaveSize
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
◆ xdlops_gemm
template<index_t BlockSize, typename ADataType, typename BDataType, typename ComputeDataType, typename AccDataType, typename ATileDesc, typename BTileDesc, typename AMmaTileDesc, typename BMmaTileDesc, index_t ABlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t MScaleBlock, index_t NScaleBlock, index_t KScaleBlock, index_t MPerXDL, index_t NPerXDL, index_t MRepeat, index_t NRepeat, index_t KPack>
|
staticconstexpr |
The documentation for this struct was generated from the following file: