Namespace for CUDA layer normalization implementation details. More...

Classes
struct	cuda_encoder_impl
	Primary template for precision-specific CUDA encoder implementations. More...

struct	cuda_encoder_impl< float >
	Single-precision (float) specialization for CUDA encoder operations. More...

struct	cuda_encoder_impl< half >
	Half-precision (half) specialization for CUDA encoder operations. More...

struct	cuda_gelu_impl

struct	cuda_gelu_impl< float >

struct	cuda_gelu_impl< half >

struct	cuda_layernorm_impl

struct	cuda_layernorm_impl< float >

struct	cuda_layernorm_impl< half >

struct	cuda_matmul_impl

struct	cuda_matmul_impl< float >

struct	cuda_matmul_impl< half >

struct	cuda_mha_impl
	Implementation details for CUDA-based Multi-Head Attention operations. More...

struct	cuda_mha_impl< float >

struct	cuda_mha_impl< half >

struct	cuda_residual_impl

struct	cuda_residual_impl< float >

struct	cuda_residual_impl< half >

struct	cuda_softmax_crossentropy_impl

struct	cuda_softmax_crossentropy_impl< float >

struct	cuda_softmax_crossentropy_impl< half >

struct	cuda_softmax_impl

struct	cuda_softmax_impl< float >

struct	cuda_softmax_impl< half >

Typedefs
using	BackwardFp16Func = void()(half , const half , const half , int, cudaStream_t)

using	BackwardFp32Func = void()(float , const float , const float , int, cudaStream_t)

using	ForwardFp16Func = void()(half , const half *, int, cudaStream_t)

using	ForwardFp32Func = void()(float , const float *, int, cudaStream_t)

Detailed Description

Namespace for CUDA layer normalization implementation details.

Namespace for CUDA fused softmax cross entropy implementation details.

Namespace for CUDA softmax implementation details.

Namespace for CUDA residual implementation details.

Namespace for CUDA matrix multiplication implementation details.

This namespace contains the implementation details for the CUDA layer normalization operation, including specialized templates for different data types (float, half).

This namespace contains the implementation details for the CUDA matrix multiplication operation, including specialized templates for different data types (float, half).

This namespace contains the implementation details for the CUDA residual operation, including specialized templates for different data types (float, half).

This namespace contains the implementation details for the CUDA softmax operation, including specialized templates for different data types (float, half).

This namespace contains the implementation details for the CUDA fused softmax cross entropy operation, including specialized templates for different data types (float, half).

Typedef Documentation

◆ BackwardFp16Func

using Mila::Dnn::Compute::Detail::BackwardFp16Func = typedef void (*)(half*, const half*, const half*, int, cudaStream_t)

◆ BackwardFp32Func

using Mila::Dnn::Compute::Detail::BackwardFp32Func = typedef void (*)(float*, const float*, const float*, int, cudaStream_t)

◆ ForwardFp16Func

using Mila::Dnn::Compute::Detail::ForwardFp16Func = typedef void (*)(half*, const half*, int, cudaStream_t)

◆ ForwardFp32Func

using Mila::Dnn::Compute::Detail::ForwardFp32Func = typedef void (*)(float*, const float*, int, cudaStream_t)