|
| | CublasLtMatMulPlan ()=default |
| | CublasLtMatMulPlan (const CublasLtMatMulPlan &)=delete |
| | CublasLtMatMulPlan (CublasLtMatMulPlan &&other) noexcept |
| | ~CublasLtMatMulPlan () |
| template<typename TNative> |
| CublasLtMatMulPlan< TNative > | Mila::Dnn::Compute::Cuda::build_plan (cublasLtHandle_t handle, int outer_size, int in_features, int out_features, bool has_bias, cudaDataType_t data_type, cublasComputeType_t compute_type, cudaDataType_t scale_type) |
| | Build a cuBLASLt plan for a standard (non-strided) matmul.
|
| template<typename TComputePrecision> |
| CublasLtMatMulPlan< TComputePrecision > | Mila::Dnn::Compute::Cuda::build_strided_plan (cublasLtHandle_t handle, int A_rows, int A_cols, int ldA, long long strideA_elems, int B_rows, int B_cols, int ldB, long long strideB_elems, int C_rows, int C_cols, int ldC, long long strideC_elems, cublasOperation_t opA, cublasOperation_t opB, int strided_batch_count, bool has_bias=false, cublasComputeType_t compute_type=CUBLAS_COMPUTE_32F, cudaDataType_t cuda_data_type=CUDA_R_32F, cudaDataType_t scale_type=CUDA_R_32F, cublasLtOrder_t order=CUBLASLT_ORDER_ROW) |
| | Build a cuBLASLt matmul plan for strided-batched matmuls.
|
| template<typename TComputePrecision> |
| void | Mila::Dnn::Compute::Cuda::execute_plan (cublasLtHandle_t handle, const CublasLtMatMulPlan< TComputePrecision > &plan, const void *alpha, const TComputePrecision *A, const TComputePrecision *B, const void *beta, TComputePrecision *C, const TComputePrecision *bias, cudaStream_t stream, void *workspace=nullptr, size_t workspaceSize=0) |
| | Execute a previously-built cuBLASLt plan.
|
| bool | isValid () const |
| CublasLtMatMulPlan & | operator= (const CublasLtMatMulPlan &)=delete |
| CublasLtMatMulPlan & | operator= (CublasLtMatMulPlan &&other) noexcept |