|
template<typename TDataType , typename TCompute = float>
requires std::is_same_v<TDataType, float> || std::is_same_v<TDataType, half> || std::is_same_v<TDataType, __nv_bfloat16> || std::is_same_v<TDataType, __nv_fp8_e4m3> |
void | Mila::Dnn::Compute::cublaslt_matmul_forward (TDataType *Y, const TDataType *X, const TDataType *weight, const TDataType *bias, int outer_size, int C, int OC, cudaStream_t stream, cublasLtHandle_t cublasLtHandle) |
| cuBLASLt implementation of matrix multiplication with bias addition
|
|