RAII wrapper owning cuBLASLt descriptors for a Linear matmul. More...

Public Types
using	ActivationType = typename TensorDataTypeMap<TComputePrecision>::device_type
using	ParameterType = typename TensorDataTypeMap<TParameterPrecision>::device_type
using	TAccumPrecision = float

Public Member Functions
	CublasLtLinearPlan ()=default
	CublasLtLinearPlan (const CublasLtLinearPlan &)=delete
	CublasLtLinearPlan (CublasLtLinearPlan &&other) noexcept
	~CublasLtLinearPlan ()
bool	isValid () const
CublasLtLinearPlan &	operator= (const CublasLtLinearPlan &)=delete
CublasLtLinearPlan &	operator= (CublasLtLinearPlan &&other) noexcept

Public Attributes
cublasLtMatmulAlgo_t	algorithm {}
bool	has_algorithm { false }
bool	has_bias_epilogue { false }
bool	has_weight_scale { kIsQuantized }
	true when a weight scale pointer is needed (FP8 path)
cublasLtMatrixLayout_t	layoutA { nullptr }
cublasLtMatrixLayout_t	layoutB { nullptr }
cublasLtMatrixLayout_t	layoutC { nullptr }
cublasLtMatmulDesc_t	matmul_desc { nullptr }
cublasLtMatmulPreference_t	preference { nullptr }

Static Public Attributes
static constexpr bool	kIsQuantized = (TParameterPrecision != TComputePrecision)

Detailed Description

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
struct Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >

RAII wrapper owning cuBLASLt descriptors for a Linear matmul.

Owns: matmul_desc - operation descriptor (transpose flags, epilogue, scale pointers) layoutA, layoutB, layoutC - matrix memory layouts preference - algorithm preference used during heuristic search algorithm - selected heuristic algorithm has_algorithm - true when heuristic returned a valid algorithm has_bias_epilogue - true when CUBLASLT_EPILOGUE_BIAS is active has_weight_scale - true when TParameterPrecision != TComputePrecision (FP8 path)

Layout convention (compile-time, driven by kIsQuantized):

Non-quantized (NT row-major): A = activations [outer_size × in_features], opA = N B = weights [out_features × in_features], opB = T C = output [outer_size × out_features]

Quantized (TN column-major, Ada SM 8.9+): A = weights (FP8) [in_features × out_features], opA = T → op(A) = W[out_features, in_features] B = activations [in_features × outer_size], opB = N → op(B) = X^T[in_features, outer_size] C = output [out_features × outer_size] (col-major ≡ row-major Y[outer_size, out_features]) A_SCALE_POINTER = per-tensor weight scale

Non-copyable; move-only.

Member Typedef Documentation

◆ ActivationType

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

using Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::ActivationType = typename TensorDataTypeMap<TComputePrecision>::device_type

◆ ParameterType

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

using Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::ParameterType = typename TensorDataTypeMap<TParameterPrecision>::device_type

◆ TAccumPrecision

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

using Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::TAccumPrecision = float

Constructor & Destructor Documentation

◆ CublasLtLinearPlan() [1/3]

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::CublasLtLinearPlan ( )

default

Here is the caller graph for this function:

◆ ~CublasLtLinearPlan()

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::~CublasLtLinearPlan ( )

inline

◆ CublasLtLinearPlan() [2/3]

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::CublasLtLinearPlan ( const CublasLtLinearPlan< TComputePrecision, TParameterPrecision > & )

delete

Here is the call graph for this function:

◆ CublasLtLinearPlan() [3/3]

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::CublasLtLinearPlan ( CublasLtLinearPlan< TComputePrecision, TParameterPrecision > && other )

inlinenoexcept

Here is the call graph for this function:

Member Function Documentation

◆ isValid()

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

bool Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::isValid ( ) const

inline

Here is the caller graph for this function:

◆ operator=() [1/2]

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

CublasLtLinearPlan & Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::operator= ( const CublasLtLinearPlan< TComputePrecision, TParameterPrecision > & )

delete

Here is the call graph for this function:

◆ operator=() [2/2]

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

CublasLtLinearPlan & Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::operator= ( CublasLtLinearPlan< TComputePrecision, TParameterPrecision > && other )

inlinenoexcept

Here is the call graph for this function:

Member Data Documentation

◆ algorithm

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

cublasLtMatmulAlgo_t Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::algorithm {}

◆ has_algorithm

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

bool Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::has_algorithm { false }

◆ has_bias_epilogue

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

bool Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::has_bias_epilogue { false }

◆ has_weight_scale

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

bool Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::has_weight_scale { kIsQuantized }

true when a weight scale pointer is needed (FP8 path)

◆ kIsQuantized

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

bool Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::kIsQuantized = (TParameterPrecision != TComputePrecision)

staticconstexpr

◆ layoutA

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

cublasLtMatrixLayout_t Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::layoutA { nullptr }

◆ layoutB

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

cublasLtMatrixLayout_t Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::layoutB { nullptr }

◆ layoutC

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

cublasLtMatrixLayout_t Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::layoutC { nullptr }

◆ matmul_desc

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

cublasLtMatmulDesc_t Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::matmul_desc { nullptr }

◆ preference

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>

cublasLtMatmulPreference_t Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::preference { nullptr }

The documentation for this struct was generated from the following file:

/__w/Mila/Mila/Mila/Src/Dnn/Compute/Devices/Cuda/Operations/Common/CublasLtLinearPlan.ixx

Public Types

Public Member Functions

Public Attributes

Static Public Attributes

Detailed Description

Member Typedef Documentation

◆ ActivationType

◆ ParameterType

◆ TAccumPrecision

Constructor & Destructor Documentation

◆ CublasLtLinearPlan() [1/3]

◆ ~CublasLtLinearPlan()

◆ CublasLtLinearPlan() [2/3]

◆ CublasLtLinearPlan() [3/3]

Member Function Documentation

◆ isValid()

◆ operator=() [1/2]

◆ operator=() [2/2]

Member Data Documentation

◆ algorithm

◆ has_algorithm

◆ has_bias_epilogue

◆ has_weight_scale

◆ kIsQuantized

◆ layoutA

◆ layoutB

◆ layoutC

◆ matmul_desc

◆ preference