ComputeBasePtrOfStridedBatch Struct Reference#
ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::ComputeBasePtrOfStridedBatch Struct Reference
#include <device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp>
Public Member Functions | |
| ComputeBasePtrOfStridedBatch (index_t BatchStrideA0, index_t BatchStrideB0, std::array< index_t, NumD0Tensor > BatchStrideD0s, index_t BatchStrideB1, std::array< index_t, NumD1Tensor > BatchStrideD1s, index_t BatchStrideE1) | |
| __host__ __device__ constexpr long_index_t | GetABasePtr (index_t g_idx) const |
| __host__ __device__ constexpr long_index_t | GetBBasePtr (index_t g_idx) const |
| template<index_t I> | |
| __host__ __device__ constexpr long_index_t | GetD0BasePtr (index_t g_idx, Number< I > d1_idx) const |
| __host__ __device__ constexpr long_index_t | GetB1BasePtr (index_t g_idx) const |
| __host__ __device__ constexpr long_index_t | GetCBasePtr (index_t g_idx) const |
| template<index_t I> | |
| __host__ __device__ constexpr auto | GetD1BasePtr (index_t g_idx, Number< I > d1_idx) const |
Constructor & Destructor Documentation
◆ ComputeBasePtrOfStridedBatch()
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
|
inline |
Member Function Documentation
◆ GetABasePtr()
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
|
inlineconstexpr |
◆ GetB1BasePtr()
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
|
inlineconstexpr |
◆ GetBBasePtr()
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
|
inlineconstexpr |
◆ GetCBasePtr()
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
|
inlineconstexpr |
◆ GetD0BasePtr()
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
template<index_t I>
|
inlineconstexpr |
◆ GetD1BasePtr()
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
template<index_t I>
|
inlineconstexpr |
The documentation for this struct was generated from the following file: