ComputeBasePtrOfStridedBatch Struct Reference

ComputeBasePtrOfStridedBatch Struct Reference#

Composable Kernel: ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::ComputeBasePtrOfStridedBatch Struct Reference
ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::ComputeBasePtrOfStridedBatch Struct Reference

#include <device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp>

Public Member Functions

 ComputeBasePtrOfStridedBatch (index_t BatchStrideA0, index_t BatchStrideB0, std::array< index_t, NumD0Tensor > BatchStrideD0s, index_t BatchStrideB1, std::array< index_t, NumD1Tensor > BatchStrideD1s, index_t BatchStrideE1)
__host__ __device__ constexpr long_index_t GetABasePtr (index_t g_idx) const
__host__ __device__ constexpr long_index_t GetBBasePtr (index_t g_idx) const
template<index_t I>
__host__ __device__ constexpr long_index_t GetD0BasePtr (index_t g_idx, Number< I > d1_idx) const
__host__ __device__ constexpr long_index_t GetB1BasePtr (index_t g_idx) const
__host__ __device__ constexpr long_index_t GetCBasePtr (index_t g_idx) const
template<index_t I>
__host__ __device__ constexpr auto GetD1BasePtr (index_t g_idx, Number< I > d1_idx) const

Constructor & Destructor Documentation

◆ ComputeBasePtrOfStridedBatch()

template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::ComputeBasePtrOfStridedBatch::ComputeBasePtrOfStridedBatch ( index_t BatchStrideA0,
index_t BatchStrideB0,
std::array< index_t, NumD0Tensor > BatchStrideD0s,
index_t BatchStrideB1,
std::array< index_t, NumD1Tensor > BatchStrideD1s,
index_t BatchStrideE1 )
inline

Member Function Documentation

◆ GetABasePtr()

template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
__host__ __device__ constexpr long_index_t ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::ComputeBasePtrOfStridedBatch::GetABasePtr ( index_t g_idx) const
inlineconstexpr

◆ GetB1BasePtr()

template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
__host__ __device__ constexpr long_index_t ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::ComputeBasePtrOfStridedBatch::GetB1BasePtr ( index_t g_idx) const
inlineconstexpr

◆ GetBBasePtr()

template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
__host__ __device__ constexpr long_index_t ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::ComputeBasePtrOfStridedBatch::GetBBasePtr ( index_t g_idx) const
inlineconstexpr

◆ GetCBasePtr()

template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
__host__ __device__ constexpr long_index_t ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::ComputeBasePtrOfStridedBatch::GetCBasePtr ( index_t g_idx) const
inlineconstexpr

◆ GetD0BasePtr()

template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
template<index_t I>
__host__ __device__ constexpr long_index_t ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::ComputeBasePtrOfStridedBatch::GetD0BasePtr ( index_t g_idx,
Number< I > d1_idx ) const
inlineconstexpr

◆ GetD1BasePtr()

template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
template<index_t I>
__host__ __device__ constexpr auto ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::ComputeBasePtrOfStridedBatch::GetD1BasePtr ( index_t g_idx,
Number< I > d1_idx ) const
inlineconstexpr

The documentation for this struct was generated from the following file: