Mila
Deep Neural Network Library
|
CUDA kernel function declarations for neural network operations. More...
#include <cublasLt.h>
#include <cuda_runtime.h>
#include <cuda_fp16.h>
Go to the source code of this file.
Namespaces | |
namespace | Mila |
namespace | Mila::Dnn |
namespace | Mila::Dnn::Compute |
Functions | |
void | Mila::Dnn::Compute::cuda_encoder_forward_fp16 (half *Y, const int *X, const half *wte, const half *wpe, int B, int T, int C, cudaStream_t stream) |
void | Mila::Dnn::Compute::cuda_encoder_forward_fp32 (float *Y, const int *X, const float *wte, const float *wpe, int B, int T, int C, cudaStream_t stream) |
void | Mila::Dnn::Compute::cuda_gelu_backward_fp16 (half *dX, const half *X, const half *dY, const int N, cudaStream_t stream) |
void | Mila::Dnn::Compute::cuda_gelu_backward_fp32 (float *dX, const float *X, const float *dY, const int N, cudaStream_t stream) |
void | Mila::Dnn::Compute::cuda_gelu_forward_fp16 (half *Y, const half *X, int N, cudaStream_t stream) |
void | Mila::Dnn::Compute::cuda_gelu_forward_fp32 (float *Y, const float *X, int N, cudaStream_t stream) |
void | Mila::Dnn::Compute::cuda_layernorm_forward_fp16 (half *Y, half *mean, half *rstd, const half *X, const half *weight, const half *bias, int B, int T, int C, float epsilon, cudaStream_t stream) |
void | Mila::Dnn::Compute::cuda_layernorm_forward_fp32 (float *Y, float *mean, float *rstd, const float *X, const float *weight, const float *bias, int B, int T, int C, float epsilon, cudaStream_t stream) |
void | Mila::Dnn::Compute::cuda_matmul_forward_fp16 (half *Y, const half *X, const half *weight, const half *bias, int outer_size, int C, int OC, cudaStream_t stream) |
void | Mila::Dnn::Compute::cuda_matmul_forward_fp32 (float *Y, const float *X, const float *weight, const float *bias, int outer_size, int C, int OC, cudaStream_t stream) |
void | Mila::Dnn::Compute::cuda_mha_forward_fp16 (half *Y, half *qkvr, half *att, const half *X, int B, int T, int C, int NH, cudaStream_t stream) |
void | Mila::Dnn::Compute::cuda_mha_forward_fp32 (float *Y, float *qkvr, float *att, const float *X, int B, int T, int C, int NH, cudaStream_t stream) |
void | Mila::Dnn::Compute::cuda_residual_forward_fp16 (half *Y, const half *X1, const half *X2, int N, cudaStream_t stream) |
void | Mila::Dnn::Compute::cuda_residual_forward_fp32 (float *Y, const float *X1, const float *X2, int N, cudaStream_t stream) |
template<typename TPrecision > | |
void | Mila::Dnn::Compute::cuda_softmax_crossentropy_backward (TPrecision *dlogits, const TPrecision *dlosses, const TPrecision *probs, const int *targets, int batch_size, int seq_len, int vocab_size, cudaStream_t stream) |
template<typename TPrecision > | |
void | Mila::Dnn::Compute::cuda_softmax_crossentropy_forward (TPrecision *losses, TPrecision *probs, const TPrecision *logits, const int *targets, int batch_size, int seq_len, int vocab_size, cudaStream_t stream) |
template<typename TPrecision > | |
void | Mila::Dnn::Compute::cuda_softmax_forward (TPrecision *Y, const TPrecision *X, int N, int C, cudaStream_t stream) |
template<typename TPrecision > | |
void | Mila::Dnn::Compute::cuda_softmax_forward_general (TPrecision *Y, const TPrecision *X, int outer_size, int dim_size, int inner_size, cudaStream_t stream) |
CUDA kernel function declarations for neural network operations.
This header file declares CUDA kernel functions that implement various neural network operations optimized for execution on NVIDIA GPUs. The operations include:
Each operation provides implementations for both single-precision (fp32) and half-precision (fp16) floating point data types to support different performance and accuracy requirements. These functions serve as the computational backend for the neural network operations in the Mila deep learning framework.