Mila 0.13.48
Deep Neural Network Library
Loading...
Searching...
No Matches
Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision > Struct Template Referenceexport

RAII wrapper owning cuBLASLt descriptors for a Linear matmul. More...

Public Types

using ActivationType = typename TensorDataTypeMap<TComputePrecision>::device_type
using ParameterType = typename TensorDataTypeMap<TParameterPrecision>::device_type
using TAccumPrecision = float

Public Member Functions

 CublasLtLinearPlan ()=default
 CublasLtLinearPlan (const CublasLtLinearPlan &)=delete
 CublasLtLinearPlan (CublasLtLinearPlan &&other) noexcept
 ~CublasLtLinearPlan ()
bool isValid () const
CublasLtLinearPlanoperator= (const CublasLtLinearPlan &)=delete
CublasLtLinearPlanoperator= (CublasLtLinearPlan &&other) noexcept

Public Attributes

cublasLtMatmulAlgo_t algorithm {}
bool has_algorithm { false }
bool has_bias_epilogue { false }
bool has_weight_scale { kIsQuantized }
 true when a weight scale pointer is needed (FP8 path)
cublasLtMatrixLayout_t layoutA { nullptr }
cublasLtMatrixLayout_t layoutB { nullptr }
cublasLtMatrixLayout_t layoutC { nullptr }
cublasLtMatmulDesc_t matmul_desc { nullptr }
cublasLtMatmulPreference_t preference { nullptr }

Static Public Attributes

static constexpr bool kIsQuantized = (TParameterPrecision != TComputePrecision)

Detailed Description

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
struct Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >

RAII wrapper owning cuBLASLt descriptors for a Linear matmul.

Owns: matmul_desc - operation descriptor (transpose flags, epilogue, scale pointers) layoutA, layoutB, layoutC - matrix memory layouts preference - algorithm preference used during heuristic search algorithm - selected heuristic algorithm has_algorithm - true when heuristic returned a valid algorithm has_bias_epilogue - true when CUBLASLT_EPILOGUE_BIAS is active has_weight_scale - true when TParameterPrecision != TComputePrecision (FP8 path)

Layout convention (compile-time, driven by kIsQuantized):

Non-quantized (NT row-major): A = activations [outer_size × in_features], opA = N B = weights [out_features × in_features], opB = T C = output [outer_size × out_features]

Quantized (TN column-major, Ada SM 8.9+): A = weights (FP8) [in_features × out_features], opA = T → op(A) = W[out_features, in_features] B = activations [in_features × outer_size], opB = N → op(B) = X^T[in_features, outer_size] C = output [out_features × outer_size] (col-major ≡ row-major Y[outer_size, out_features]) A_SCALE_POINTER = per-tensor weight scale

Non-copyable; move-only.

Member Typedef Documentation

◆ ActivationType

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
using Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::ActivationType = typename TensorDataTypeMap<TComputePrecision>::device_type

◆ ParameterType

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
using Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::ParameterType = typename TensorDataTypeMap<TParameterPrecision>::device_type

◆ TAccumPrecision

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
using Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::TAccumPrecision = float

Constructor & Destructor Documentation

◆ CublasLtLinearPlan() [1/3]

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::CublasLtLinearPlan ( )
default
Here is the caller graph for this function:

◆ ~CublasLtLinearPlan()

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::~CublasLtLinearPlan ( )
inline

◆ CublasLtLinearPlan() [2/3]

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::CublasLtLinearPlan ( const CublasLtLinearPlan< TComputePrecision, TParameterPrecision > & )
delete
Here is the call graph for this function:

◆ CublasLtLinearPlan() [3/3]

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::CublasLtLinearPlan ( CublasLtLinearPlan< TComputePrecision, TParameterPrecision > && other)
inlinenoexcept
Here is the call graph for this function:

Member Function Documentation

◆ isValid()

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
bool Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::isValid ( ) const
inline
Here is the caller graph for this function:

◆ operator=() [1/2]

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
CublasLtLinearPlan & Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::operator= ( const CublasLtLinearPlan< TComputePrecision, TParameterPrecision > & )
delete
Here is the call graph for this function:

◆ operator=() [2/2]

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
CublasLtLinearPlan & Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::operator= ( CublasLtLinearPlan< TComputePrecision, TParameterPrecision > && other)
inlinenoexcept
Here is the call graph for this function:

Member Data Documentation

◆ algorithm

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
cublasLtMatmulAlgo_t Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::algorithm {}

◆ has_algorithm

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
bool Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::has_algorithm { false }

◆ has_bias_epilogue

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
bool Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::has_bias_epilogue { false }

◆ has_weight_scale

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
bool Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::has_weight_scale { kIsQuantized }

true when a weight scale pointer is needed (FP8 path)

◆ kIsQuantized

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
bool Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::kIsQuantized = (TParameterPrecision != TComputePrecision)
staticconstexpr

◆ layoutA

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
cublasLtMatrixLayout_t Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::layoutA { nullptr }

◆ layoutB

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
cublasLtMatrixLayout_t Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::layoutB { nullptr }

◆ layoutC

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
cublasLtMatrixLayout_t Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::layoutC { nullptr }

◆ matmul_desc

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
cublasLtMatmulDesc_t Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::matmul_desc { nullptr }

◆ preference

template<TensorDataType TComputePrecision, TensorDataType TParameterPrecision = TComputePrecision>
cublasLtMatmulPreference_t Mila::Dnn::Compute::Cuda::CublasLtLinearPlan< TComputePrecision, TParameterPrecision >::preference { nullptr }

The documentation for this struct was generated from the following file: