CUDA implementation of the Layer Normalization operation for neural networks.
More...
|
| CudaLayerNormOp (const LayerNormConfig &config) |
| Constructs a new CUDA Layer Normalization operation with the default device context.
|
|
| CudaLayerNormOp (std::shared_ptr< DeviceContext > context, const LayerNormConfig &config) |
| Constructs a new CUDA Layer Normalization operation with a specific device context.
|
|
void | backward (const Tensor< TInput, MR > &input, const Tensor< TOutput, MR > &output, const Tensor< TOutput, MR > &output_gradient, const std::vector< std::shared_ptr< Tensor< TInput, MR > > > ¶meters, std::vector< std::shared_ptr< Tensor< TInput, MR > > > ¶meter_gradients, Tensor< TInput, MR > &input_gradient, const OperationAttributes &properties, const std::vector< std::shared_ptr< Tensor< TOutput, MR > > > &output_state) const |
| Performs the backward pass of the Layer Normalization operation.
|
|
void | forward (const Tensor< TInput, MR > &input, const std::vector< std::shared_ptr< Tensor< TOutput, MR > > > ¶meters, const OperationAttributes &properties, Tensor< TOutput, MR > &output, std::vector< std::shared_ptr< Tensor< TOutput, MR > > > &output_state) const override |
| Performs the forward pass of the Layer Normalization operation on CUDA.
|
|
std::string | getName () const override |
| Gets the name of this operation.
|
|
| UnaryOperation (OperationType operation_type) |
| Constructs a UnaryOperation with the specified operation type.
|
|
| UnaryOperation (OperationType operation_type, std::shared_ptr< DeviceContext > context) |
| Constructs a UnaryOperation with the specified operation type and device context.
|
|
virtual | ~UnaryOperation ()=default |
| Virtual destructor for proper cleanup of derived classes.
|
|
virtual void | backward (const Tensor< TInput, MR > &grad, const std::vector< std::shared_ptr< Tensor< TOutput, MR > > > ¶meters, std::vector< std::shared_ptr< Tensor< TOutput, MR > > > &output_grads) const |
| Executes the backward pass of a unary operation.
|
|
virtual void | backward (const Tensor< TInput, MR > &input, const Tensor< TOutput, MR > &output_grad, const std::vector< std::shared_ptr< Tensor< TOutput, MR > > > ¶meters, std::vector< std::shared_ptr< Tensor< TOutput, MR > > > ¶meter_grads, Tensor< TInput, MR > &input_grad, const OperationAttributes &properties, const std::vector< std::shared_ptr< Tensor< TOutput, MR > > > &output_state) const |
| Executes the comprehensive backward pass of a unary operation.
|
|
virtual void | forward (const Tensor< TInput, MR > &input, const std::vector< std::shared_ptr< Tensor< TOutput, MR > > > ¶meters, const OperationAttributes &properties, Tensor< TOutput, MR > &output, std::vector< std::shared_ptr< Tensor< TOutput, MR > > > &output_state) const =0 |
| Executes the forward pass of a unary operation.
|
|
| OperationBase (OperationType operation_type, std::shared_ptr< DeviceContext > context) |
| Constructs an OperationBase object with a specific device context and compute precision.
|
|
virtual | ~OperationBase ()=default |
| Virtual destructor for the OperationBase class.
|
|
std::shared_ptr< DeviceContext > | getDeviceContext () const |
| Gets the device context associated with this operation.
|
|
DeviceType | getDeviceType () const |
| Gets the device type for this operation.
|
|
OperationType | getOperationType () const |
| Gets the operation type enumeration value.
|
|
template<typename TInput = float, typename TOutput = TInput>
class Mila::Dnn::Compute::CudaLayerNormOp< TInput, TOutput >
CUDA implementation of the Layer Normalization operation for neural networks.
This class provides a CUDA-based implementation of the Layer Normalization operation, which normalizes the activations of a layer for each example in a batch, usually applied before the activation function. Layer normalization helps stabilize the learning process and reduce the training time required to learn the parameters of neural networks.
The normalization is applied across the last dimension (feature dimension) and includes learnable scale (gamma) and shift (beta) parameters. The implementation is optimized for NVIDIA GPUs using CUDA for high-performance computation.
- Template Parameters
-
TPrecision | The data type of the input tensor elements. |
TDataType | The data type of the output tensor elements (defaults to the input type). |