Mila 0.13.48
Deep Neural Network Library
Loading...
Searching...
No Matches
CublasLtPlan.ixx File Reference

Shared cuBLASLt plans for building and executing matmul plans (RAII + builders). More...

#include <cublasLt.h>
#include <cuda_runtime.h>
#include <cstdint>
#include <stdexcept>
#include <utility>
#include <string>
#include <vector>
#include <iostream>
#include <sstream>
#include <iomanip>
import Logging.Logger;
import CublasLt.Error;
import Dnn.TensorTypes;

Classes

struct  Mila::Dnn::Compute::Cuda::CublasLtMatMulPlan< TComputePrecision >
 RAII wrapper owning cuBLASLt descriptors and the selected heuristic algorithm. More...

Namespaces

namespace  Mila
 Mila main API namespace.
namespace  Mila::Dnn
namespace  Mila::Dnn::Compute
namespace  Mila::Dnn::Compute::Cuda

Functions

template<typename TNative>
CublasLtMatMulPlan< TNative > Mila::Dnn::Compute::Cuda::build_plan (cublasLtHandle_t handle, int outer_size, int in_features, int out_features, bool has_bias, cudaDataType_t data_type, cublasComputeType_t compute_type, cudaDataType_t scale_type)
 Build a cuBLASLt plan for a standard (non-strided) matmul.
template<typename TComputePrecision>
CublasLtMatMulPlan< TComputePrecision > Mila::Dnn::Compute::Cuda::build_strided_plan (cublasLtHandle_t handle, int A_rows, int A_cols, int ldA, long long strideA_elems, int B_rows, int B_cols, int ldB, long long strideB_elems, int C_rows, int C_cols, int ldC, long long strideC_elems, cublasOperation_t opA, cublasOperation_t opB, int strided_batch_count, bool has_bias=false, cublasComputeType_t compute_type=CUBLAS_COMPUTE_32F, cudaDataType_t cuda_data_type=CUDA_R_32F, cudaDataType_t scale_type=CUDA_R_32F, cublasLtOrder_t order=CUBLASLT_ORDER_ROW)
 Build a cuBLASLt matmul plan for strided-batched matmuls.
template<typename TComputePrecision>
void Mila::Dnn::Compute::Cuda::execute_plan (cublasLtHandle_t handle, const CublasLtMatMulPlan< TComputePrecision > &plan, const void *alpha, const TComputePrecision *A, const TComputePrecision *B, const void *beta, TComputePrecision *C, const TComputePrecision *bias, cudaStream_t stream, void *workspace=nullptr, size_t workspaceSize=0)
 Execute a previously-built cuBLASLt plan.

Detailed Description

Shared cuBLASLt plans for building and executing matmul plans (RAII + builders).

Provides templated utilities to build cuBLASLt matmul plans (including strided-batched) and execute them. Designed to be reused by CUDA Linear and Attention operations.