Mila 0.13.48
Deep Neural Network Library
Loading...
Searching...
No Matches
Compute.CublasLtPlan Module Reference

Classes

struct  Mila::Dnn::Compute::Cuda::CublasLtMatMulPlan< TComputePrecision >
 RAII wrapper owning cuBLASLt descriptors and the selected heuristic algorithm. More...

Functions

 CublasLtMatMulPlan ()=default
 CublasLtMatMulPlan (const CublasLtMatMulPlan &)=delete
 CublasLtMatMulPlan (CublasLtMatMulPlan &&other) noexcept
 ~CublasLtMatMulPlan ()
template<typename TNative>
CublasLtMatMulPlan< TNative > Mila::Dnn::Compute::Cuda::build_plan (cublasLtHandle_t handle, int outer_size, int in_features, int out_features, bool has_bias, cudaDataType_t data_type, cublasComputeType_t compute_type, cudaDataType_t scale_type)
 Build a cuBLASLt plan for a standard (non-strided) matmul.
template<typename TComputePrecision>
CublasLtMatMulPlan< TComputePrecision > Mila::Dnn::Compute::Cuda::build_strided_plan (cublasLtHandle_t handle, int A_rows, int A_cols, int ldA, long long strideA_elems, int B_rows, int B_cols, int ldB, long long strideB_elems, int C_rows, int C_cols, int ldC, long long strideC_elems, cublasOperation_t opA, cublasOperation_t opB, int strided_batch_count, bool has_bias=false, cublasComputeType_t compute_type=CUBLAS_COMPUTE_32F, cudaDataType_t cuda_data_type=CUDA_R_32F, cudaDataType_t scale_type=CUDA_R_32F, cublasLtOrder_t order=CUBLASLT_ORDER_ROW)
 Build a cuBLASLt matmul plan for strided-batched matmuls.
template<typename TComputePrecision>
void Mila::Dnn::Compute::Cuda::execute_plan (cublasLtHandle_t handle, const CublasLtMatMulPlan< TComputePrecision > &plan, const void *alpha, const TComputePrecision *A, const TComputePrecision *B, const void *beta, TComputePrecision *C, const TComputePrecision *bias, cudaStream_t stream, void *workspace=nullptr, size_t workspaceSize=0)
 Execute a previously-built cuBLASLt plan.
bool isValid () const
CublasLtMatMulPlanoperator= (const CublasLtMatMulPlan &)=delete
CublasLtMatMulPlanoperator= (CublasLtMatMulPlan &&other) noexcept

Variables

cublasLtMatmulAlgo_t algorithm {}
bool has_algorithm { false }
bool has_bias_epilogue { false }
cublasLtMatrixLayout_t layoutA { nullptr }
cublasLtMatrixLayout_t layoutB { nullptr }
cublasLtMatrixLayout_t layoutC { nullptr }
cublasLtMatmulDesc_t matmul_desc { nullptr }
cublasLtMatmulPreference_t preference { nullptr }

Files

file  /__w/Mila/Mila/Mila/Src/Dnn/Compute/Devices/Cuda/Operations/Common/CublasLtPlan.ixx
 Shared cuBLASLt plans for building and executing matmul plans (RAII + builders).