Mila 0.13.48
Deep Neural Network Library
Loading...
Searching...
No Matches
CudaOps.h
Go to the documentation of this file.
1
22#pragma once
23
24#include <cublasLt.h>
25#include <cuda_runtime.h>
26#include <cuda_fp16.h>
27
28namespace Mila::Dnn::Compute
29{
30 // REVIEW: These declaration should no longer be needed.
31
32 // Attention functions
34 float* Y,
35 float* qkvr, float* att,
36 const float* X,
37 int B, int T, int C, int NH,
38 cudaStream_t stream );
39
41 half* Y,
42 half* qkvr, half* att,
43 const half* X,
44 int B, int T, int C, int NH,
45 cudaStream_t stream );
46
47 // SoftmaxCrossEntropy functions
48 template <typename TPrecision>
50 TPrecision* Y_loss,
51 TPrecision* Y,
52 const TPrecision* X,
53 const int* targets,
54 int batch_size,
55 int seq_len,
56 int vocab_size,
57 cudaStream_t stream );
58
59 template <typename TPrecision>
61 TPrecision* dX,
62 const TPrecision* dY_loss,
63 const TPrecision* Y,
64 const int* targets,
65 int batch_size,
66 int seq_len,
67 int vocab_size,
68 cudaStream_t stream );
69}
Definition Device.ixx:15
void cuda_softmax_crossentropy_backward(TPrecision *dX, const TPrecision *dY_loss, const TPrecision *Y, const int *targets, int batch_size, int seq_len, int vocab_size, cudaStream_t stream)
void cuda_mha_forward_fp32(float *Y, float *qkvr, float *att, const float *X, int B, int T, int C, int NH, cudaStream_t stream)
void cuda_mha_forward_fp16(half *Y, half *qkvr, half *att, const half *X, int B, int T, int C, int NH, cudaStream_t stream)
void cuda_softmax_crossentropy_forward(TPrecision *Y_loss, TPrecision *Y, const TPrecision *X, const int *targets, int batch_size, int seq_len, int vocab_size, cudaStream_t stream)