BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ > Struct Template Reference

BlockFmhaPipelineQXKSVSCustomPolicy&lt; QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ &gt; Struct Template Reference#

Composable Kernel: ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ > Struct Template Reference
ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ > Struct Template Reference

#include <block_fmha_pipeline_qx_ks_vs_custom_policy.hpp>

Inheritance diagram for ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >:
ck_tile::BlockFmhaPipelineQXCustomPolicy< QLoadOnce_ >

Classes

struct  LdsBufferSequence
struct  LdsBufferSequence< 3, 3, 4, 4 >
struct  LdsBufferSequence< 3, 3, 4, 2 >
struct  LdsBufferSequence< 3, 3, 2, 4 >
struct  LdsBufferSequence< 3, 3, 3, 3 >
struct  LdsBufferSequence< 3, 3, 3, 4 >
struct  LdsBufferSequence< 3, 3, 2, 2 >

Public Types

using QXPolicy = BlockFmhaPipelineQXCustomPolicy<QLoadOnce_>

Static Public Member Functions

template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr auto GetLdsBufferSequence ()
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr auto GetSmemKPackK ()
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr auto GetAlignmentK ()
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr auto GetSmemKPackV ()
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr auto GetAlignmentV ()
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr auto GetAlignmentBias ()
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr auto GetAlignmentO ()
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr auto GetSingleSmemElementSpaceSize ()
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr auto MakeKLdsBlockDescriptor ()
template<typename Problem, index_t IBuf = 0>
static CK_TILE_HOST_DEVICE constexpr auto MakeKLdsStoreBlockDescriptor (number< IBuf >=number< 0 >{})
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr auto MakeKLdsLoadBlockDescriptor ()
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr auto MakeVLdsBlockDescriptor ()
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr ck_tile::index_t GetSmemSizeKV ()
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr ck_tile::index_t GetSmemSize ()
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr std::enable_if_t< std::is_convertible_v< decltype(Problem::kHasDropout), bool >, ck_tile::index_tGetSmemSizeDropout (int)
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr ck_tile::index_t GetSmemSizeDropout (...)
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr auto MakeKDramTileDistribution ()
template<typename Problem>
static CK_TILE_DEVICE constexpr auto MakeVDramTileDistribution ()
template<typename BlockGemm>
static CK_TILE_HOST_DEVICE constexpr auto MakeBiasDramTileDistribution ()
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr auto MakeShuffledVRegBlockDescriptor ()
template<typename Problem>
static CK_TILE_HOST_DEVICE constexpr auto GetKVBlockGemm ()

Static Public Attributes

static constexpr bool AsyncCopy = AsyncCopy_
static constexpr index_t NumPrefetchK = NumPrefetchK_
static constexpr index_t NumPrefetchV = NumPrefetchK_
static constexpr index_t NumKVLdsBuffers = max(NumPrefetchK, NumPrefetchV)

Member Typedef Documentation

◆ QXPolicy

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
using ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::QXPolicy = BlockFmhaPipelineQXCustomPolicy<QLoadOnce_>

Member Function Documentation

◆ GetAlignmentBias()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::GetAlignmentBias ( )
inlinestaticconstexpr

◆ GetAlignmentK()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::GetAlignmentK ( )
inlinestaticconstexpr

◆ GetAlignmentO()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::GetAlignmentO ( )
inlinestaticconstexpr

◆ GetAlignmentV()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::GetAlignmentV ( )
inlinestaticconstexpr

◆ GetKVBlockGemm()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::GetKVBlockGemm ( )
inlinestaticconstexpr

◆ GetLdsBufferSequence()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::GetLdsBufferSequence ( )
inlinestaticconstexpr

◆ GetSingleSmemElementSpaceSize()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::GetSingleSmemElementSpaceSize ( )
inlinestaticconstexpr

◆ GetSmemKPackK()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::GetSmemKPackK ( )
inlinestaticconstexpr

◆ GetSmemKPackV()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::GetSmemKPackV ( )
inlinestaticconstexpr

◆ GetSmemSize()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem>
CK_TILE_HOST_DEVICE constexpr ck_tile::index_t ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::GetSmemSize ( )
inlinestaticconstexpr

◆ GetSmemSizeDropout() [1/2]

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem>
CK_TILE_HOST_DEVICE constexpr ck_tile::index_t ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::GetSmemSizeDropout ( ...)
inlinestaticconstexpr

◆ GetSmemSizeDropout() [2/2]

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem>
CK_TILE_HOST_DEVICE constexpr std::enable_if_t< std::is_convertible_v< decltype(Problem::kHasDropout), bool >, ck_tile::index_t > ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::GetSmemSizeDropout ( int )
inlinestaticconstexpr

◆ GetSmemSizeKV()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem>
CK_TILE_HOST_DEVICE constexpr ck_tile::index_t ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::GetSmemSizeKV ( )
inlinestaticconstexpr

◆ MakeBiasDramTileDistribution()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename BlockGemm>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::MakeBiasDramTileDistribution ( )
inlinestaticconstexpr

◆ MakeKDramTileDistribution()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::MakeKDramTileDistribution ( )
inlinestaticconstexpr

◆ MakeKLdsBlockDescriptor()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::MakeKLdsBlockDescriptor ( )
inlinestaticconstexpr

◆ MakeKLdsLoadBlockDescriptor()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::MakeKLdsLoadBlockDescriptor ( )
inlinestaticconstexpr

◆ MakeKLdsStoreBlockDescriptor()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem, index_t IBuf = 0>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::MakeKLdsStoreBlockDescriptor ( number< IBuf > = number<0>{})
inlinestaticconstexpr

◆ MakeShuffledVRegBlockDescriptor()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::MakeShuffledVRegBlockDescriptor ( )
inlinestaticconstexpr

◆ MakeVDramTileDistribution()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem>
CK_TILE_DEVICE constexpr auto ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::MakeVDramTileDistribution ( )
inlinestaticconstexpr

◆ MakeVLdsBlockDescriptor()

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
template<typename Problem>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::MakeVLdsBlockDescriptor ( )
inlinestaticconstexpr

Member Data Documentation

◆ AsyncCopy

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
bool ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::AsyncCopy = AsyncCopy_
staticconstexpr

◆ NumKVLdsBuffers

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
index_t ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::NumKVLdsBuffers = max(NumPrefetchK, NumPrefetchV)
staticconstexpr

◆ NumPrefetchK

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
index_t ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::NumPrefetchK = NumPrefetchK_
staticconstexpr

◆ NumPrefetchV

template<bool QLoadOnce_, bool AsyncCopy_, index_t NumPrefetchK_, index_t NumPrefetchV_>
index_t ck_tile::BlockFmhaPipelineQXKSVSCustomPolicy< QLoadOnce_, AsyncCopy_, NumPrefetchK_, NumPrefetchV_ >::NumPrefetchV = NumPrefetchK_
staticconstexpr

The documentation for this struct was generated from the following file: