|
Mila 0.13.48
Deep Neural Network Library
|
cuBLASLt plan builders for CudaLinearOp forward and backward passes. More...
#include <cublasLt.h>#include <cuda_fp16.h>#include <cuda_fp8.h>#include <format>#include <stdexcept>#include "Kernels/Linear.cuh"import Logging.Logger;import Compute.CublasLtPlan;Namespaces | |
| namespace | Mila |
| Mila main API namespace. | |
| namespace | Mila::Dnn |
| namespace | Mila::Dnn::Compute |
| namespace | Mila::Dnn::Compute::Cuda |
| namespace | Mila::Dnn::Compute::Cuda::Linear |
| namespace | Mila::Dnn::Compute::Cuda::Linear::Detail |
Typedefs | |
| template<typename TComputePrecision> | |
| using | Mila::Dnn::Compute::Cuda::Linear::Detail::CublasLtMatMulPlan = Mila::Dnn::Compute::Cuda::CublasLtMatMulPlan<TComputePrecision> |
Functions | |
| template<typename TComputePrecision> | |
| CublasLtMatMulPlan< TComputePrecision > | Mila::Dnn::Compute::Cuda::Linear::Detail::build_backward_input_plan (cublasLtHandle_t handle, int batch_size, int in_features, int out_features, cudaDataType_t cuda_data_type, cublasComputeType_t compute_type, cudaDataType_t scale_type) |
| Build cuBLASLt plan for backward input gradient computation. | |
| template<typename TComputePrecision> | |
| CublasLtMatMulPlan< TComputePrecision > | Mila::Dnn::Compute::Cuda::Linear::Detail::build_backward_weight_plan (cublasLtHandle_t handle, int batch_size, int in_features, int out_features, cudaDataType_t cuda_data_type, cublasComputeType_t compute_type, cudaDataType_t scale_type) |
| Build cuBLASLt plan for backward weight gradient computation. | |
| template<typename TComputePrecision> | |
| CublasLtMatMulPlan< TComputePrecision > | Mila::Dnn::Compute::Cuda::Linear::Detail::build_forward_plan (cublasLtHandle_t handle, int batch_size, int in_features, int out_features, bool has_bias, cudaDataType_t cuda_data_type, cublasComputeType_t compute_type, cudaDataType_t scale_type) |
| template<typename TComputePrecision> | |
| void | Mila::Dnn::Compute::Cuda::Linear::Detail::compute_bias_gradient (TComputePrecision *bias_grad, const TComputePrecision *output_grad, int batch_size, int out_features, cudaStream_t stream) |
| Compute bias gradient via reduction sum across batch dimension. | |
cuBLASLt plan builders for CudaLinearOp forward and backward passes.