|
| CudaResidualOp (const ResidualConfig &config) |
| Constructs a new CUDA Residual operation with the default device context.
|
|
| CudaResidualOp (std::shared_ptr< DeviceContext > context, const ResidualConfig &config) |
| Constructs a new CUDA Residual operation with a specific device context.
|
|
void | backward (const Tensor< TInput, MR > &input1, const Tensor< TInput, MR > &input2, const Tensor< TOutput, MR > &output, const Tensor< TOutput, MR > &output_gradient, const std::vector< std::shared_ptr< Tensor< TInput, MR > > > ¶meters, std::vector< std::shared_ptr< Tensor< TOutput, MR > > > ¶meter_gradients, Tensor< TInput, MR > &input1_gradient, Tensor< TInput, MR > &input2_gradient, const OperationAttributes &properties, const std::vector< std::shared_ptr< Tensor< TOutput, MR > > > &output_state) const |
| Performs the backward pass of the residual operation.
|
|
void | forward (const Tensor< TInput, MR > &input1, const Tensor< TInput, MR > &input2, const std::vector< std::shared_ptr< Tensor< TInput, MR > > > ¶meters, const OperationAttributes &properties, Tensor< TOutput, MR > &output, std::vector< std::shared_ptr< Tensor< TOutput, MR > > > &output_state) const override |
| Performs the forward pass of the residual operation on CUDA.
|
|
std::string | getName () const override |
| Gets the name of this operation.
|
|
| BinaryOperation (OperationType operation_type) |
| Constructs a BinaryOperation with the specified operation type and precision policy.
|
|
| BinaryOperation (OperationType operation_type, std::shared_ptr< DeviceContext > context) |
| Constructs a BinaryOperation with the specified operation type, device context, and precision policy.
|
|
virtual | ~BinaryOperation ()=default |
| Virtual destructor for proper cleanup of derived classes.
|
|
virtual void | backward (const Tensor< TInput1, MR > &input1, const Tensor< TInput2, MR > &input2, const Tensor< TOutput, MR > &output, const Tensor< TOutput, MR > &output_gradient, const std::vector< std::shared_ptr< Tensor< TInput1, MR > > > ¶meters, std::vector< std::shared_ptr< Tensor< TOutput, MR > > > ¶meter_gradients, Tensor< TInput1, MR > &input1_gradient, Tensor< TInput2, MR > &input2_gradient, const OperationAttributes &attributes, const std::vector< std::shared_ptr< Tensor< TOutput, MR > > > &output_state) const |
| Executes the backward pass of a binary operation.
|
|
virtual void | forward (const Tensor< TInput1, MR > &input1, const Tensor< TInput2, MR > &input2, const std::vector< std::shared_ptr< Tensor< TInput1, MR > > > ¶meters, const OperationAttributes &attributes, Tensor< TOutput, MR > &output, std::vector< std::shared_ptr< Tensor< TOutput, MR > > > &output_state) const =0 |
| Executes the forward pass of a binary operation.
|
|
| OperationBase (OperationType operation_type, std::shared_ptr< DeviceContext > context) |
| Constructs an OperationBase object with a specific device context and compute precision.
|
|
virtual | ~OperationBase ()=default |
| Virtual destructor for the OperationBase class.
|
|
std::shared_ptr< DeviceContext > | getDeviceContext () const |
| Gets the device context associated with this operation.
|
|
DeviceType | getDeviceType () const |
| Gets the device type for this operation.
|
|
OperationType | getOperationType () const |
| Gets the operation type enumeration value.
|
|
template<typename TInput, typename TOutput = TInput>
requires ValidFloatTensorTypes<TInput, TOutput>
class Mila::Dnn::Compute::CudaResidualOp< TInput, TOutput >
CUDA implementation of the residual operation for neural networks.
This class provides a CUDA-based implementation of the residual operation, which performs element-wise addition of two input tensors. It is commonly used in residual connections in neural network architectures such as ResNet and Transformers to help with gradient flow and mitigate the vanishing gradient problem. The implementation is optimized for NVIDIA GPUs.
The implementation leverages CUDA for GPU acceleration, providing efficient computation for large neural network models. It also supports different precision modes via the ComputePrecision policy.
- Template Parameters
-
TInput | The data type of both input tensor elements. |
TOutput | The data type of the output tensor elements (defaults to TInput). |