A class implementing a residual connection module. More...

Inheritance diagram for Mila::Dnn::Residual< TDeviceType, TInput, TOutput >:

Collaboration diagram for Mila::Dnn::Residual< TDeviceType, TInput, TOutput >:

[legend]

Public Types
using	ModuleBase = Module< TDeviceType, TInput, TOutput >
	Alias for base module type.

using	MR = std::conditional_t< TDeviceType==DeviceType::Cuda, CudaMemoryResource, CpuMemoryResource >
	Memory resource type used for tensors, selected based on device type.

Public Types inherited from Mila::Dnn::Module< TDeviceType, TInput, TOutput >
using	MR = std::conditional_t< TDeviceType==DeviceType::Cuda, CudaMemoryResource, CpuMemoryResource >

Public Member Functions
	Residual (const std::string &device_name, const ResidualConfig &config)
	Constructs a new Residual module with a device name.

	Residual (std::shared_ptr< DeviceContext > device_context, const ResidualConfig &config)
	Constructs a new Residual module with a provided device context.

void	backward (const Tensor< TInput, MR > &input, const Tensor< TOutput, MR > &output_grad, Tensor< TInput, MR > &input_grad)
	Performs the backward pass of the Residual connection.

void	forward (const Tensor< TInput, MR > &input, Tensor< TOutput, MR > &output)
	Performs the forward pass of the Residual connection.

std::shared_ptr< Module< TDeviceType, TInput, TOutput > >	getInnerModule ()
	Gets the inner module.

void	load (ModelArchive &archive) override
	Deserializes the module state from a ZIP archive.

size_t	parameterCount () const override
	Gets the number of trainable parameters in this module.

void	save (ModelArchive &zip) const override
	Serializes the module state to a ZIP archive.

std::string	toString () const override
	Converts the module information to a human-readable string.

Public Member Functions inherited from Mila::Dnn::Module< TDeviceType, TInput, TOutput >
	Module (const std::string &device_name, const ComponentConfig &config)
	Constructor with device name.

	Module (std::shared_ptr< DeviceContext > context, const ComponentConfig &config)
	Constructor with a specific device context.

virtual	~Module ()=default
	Virtual destructor for proper cleanup in derived classes.

std::shared_ptr< Compute::DeviceContext >	getDeviceContext () const
	Get the device context for this module.

Compute::DeviceType	getDeviceType () const
	Get the device type of the current device context.

std::string	getName () const
	Get the name of the module.

const auto &	getParameterTensors () const
	Get the parameter tensors of this module.

const ComputePrecision::Policy &	getPrecision () const

const auto &	getStateTensors () const
	Get the state tensors of this module.

bool	isTraining () const
	Check if the module is in training mode.

virtual void	setTraining (bool is_training)
	Set the training mode of this module.

Private Member Functions
void	addTensors (const Tensor< TInput, MR > &a, const Tensor< TInput, MR > &b, Tensor< TInput, MR > &result)
	Adds two tensors element-wise.

void	createOperation ()
	Creates an appropriate operation based on the connection type.

void	createProjection (const std::vector< size_t > &input_shape, const std::vector< size_t > &output_shape)
	Creates a projection layer when input and output dimensions don't match.

void	initializeGateParameters (const std::vector< size_t > &shape)
	Initializes parameters for gated connections.

bool	tensorShapesMatch (const Tensor< TInput, MR > &a, const Tensor< TOutput, MR > &b)
	Checks if two tensor shapes match for residual connection.

Static Private Member Functions
static std::string	connectionTypeToString (ResidualConfig::ConnectionType type)
	Converts connection type enum to string for display purposes.

Private Attributes
ResidualConfig	config_
	Configuration for the Residual module.

std::shared_ptr< Tensor< TOutput, MR > >	gate_weights_
	Learnable gate weights for gated residual connections.

std::shared_ptr< BinaryOperation< TDeviceType, TInput, TOutput, TOutput > >	gated_operation_
	Binary operation for gated residual connections.

Tensor< TInput, MR >	inner_input_grad_ {}
	Temporary tensor to store inner module gradients during backward pass.

std::shared_ptr< Module< TDeviceType, TInput, TOutput > >	inner_module_
	The inner module implementing the transformation F(x).

Tensor< TOutput, MR >	inner_output_ {}
	Temporary tensor to store inner module output during forward pass.

std::vector< std::shared_ptr< Tensor< TOutput, MR > > >	inner_parameter_grads_
	Gradients for inner parameters.

std::shared_ptr< BinaryOperation< TDeviceType, TInput, TOutput, TOutput > >	operation_
	Binary operation for standard and scaled residual connections.

std::vector< std::shared_ptr< Tensor< TOutput, MR > > >	output_state_
	Output state tensors for backward pass.

std::vector< std::shared_ptr< Tensor< TOutput, MR > > >	parameter_grads_
	Gradients for trainable parameters.

std::vector< std::shared_ptr< Tensor< TOutput, MR > > >	parameters_
	Collection of trainable parameters.

std::shared_ptr< Linear< TDeviceType, TInput, TOutput > >	projection_
	Optional projection layer for dimension matching.

Tensor< TOutput, MR >	projection_output_ {}
	Temporary tensor to store projection output during forward pass.

OperationAttributes	properties_
	Operation-specific attributes.

Tensor< TInput, MR >	temp_grad_ {}
	Temporary tensor for gradient accumulation.

Additional Inherited Members
Protected Member Functions inherited from Mila::Dnn::Module< TDeviceType, TInput, TOutput >
const std::string	parametersToString () const
	Helper method to convert parameters to string representation.

const std::string	stateToString () const
	Helper method to convert state tensors to string representation.

Protected Attributes inherited from Mila::Dnn::Module< TDeviceType, TInput, TOutput >
std::unordered_map< std::string, std::shared_ptr< Tensor< TOutput, MR > > >	parameter_map_ = {}
	Map of parameter names to parameter tensors.

std::unordered_map< std::string, std::shared_ptr< Tensor< TOutput, MR > > >	state_map_ = {}
	Map of state names to state tensors.

Detailed Description

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
requires ValidFloatTensorTypes<TInput, TOutput>
class Mila::Dnn::Residual< TDeviceType, TInput, TOutput >

A class implementing a residual connection module.

Residual connections help deep neural networks avoid vanishing gradients by providing shortcut connections. The basic formula is y = x + F(x), where F is a differentiable function (usually a sequence of neural network layers).

This implementation supports three types of residual connections:

Addition: y = x + F(x)
Scaled Addition: y = x + alpha*F(x), where alpha is a scaling factor
Gated: y = g*x + (1-g)*F(x), where g is a learnable parameter

When input and output dimensions don't match, an optional projection layer can be automatically added to make the dimensions compatible.

Template Parameters

TDeviceType	The device type (CPU or CUDA) on which to perform computations.
TInput	The data type of the input tensor elements.
TOutput	The data type of the output tensor elements, defaults to TInput.

Member Typedef Documentation

◆ ModuleBase

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

using Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::ModuleBase = Module<TDeviceType, TInput, TOutput>

export

Alias for base module type.

◆ MR

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

using Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::MR = std::conditional_t<TDeviceType == DeviceType::Cuda, CudaMemoryResource, CpuMemoryResource>

export

Memory resource type used for tensors, selected based on device type.

Constructor & Destructor Documentation

◆ Residual() [1/2]

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::Residual	(	const std::string &	device_name,
		const ResidualConfig &	config
	)

inlineexplicitexport

Constructs a new Residual module with a device name.

Creates a new DeviceContext internally using the provided device name. This constructor is useful for creating standalone modules without pre-existing device contexts.

Parameters

device_name	The name of the device to use (e.g., "CPU", "CUDA:0").
config	Configuration parameters for the Residual module.

Exceptions

std::invalid_argument	If the device name is invalid or the configuration is invalid
std::runtime_error	If device type doesn't match template parameter TDeviceType or inner module type mismatch

Here is the call graph for this function:

◆ Residual() [2/2]

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::Residual	(	std::shared_ptr< DeviceContext >	device_context,
		const ResidualConfig &	config
	)

inlineexplicitexport

Constructs a new Residual module with a provided device context.

Uses a pre-existing DeviceContext instance. This constructor is useful when integrating the module into a larger network that shares device contexts across modules.

Parameters

device_context	The device context to use for this module.
config	Configuration parameters for the Residual module.

Exceptions

std::invalid_argument	If device_context is null or configuration is invalid
std::runtime_error	If device context type doesn't match template parameter TDeviceType or inner module type mismatch

Here is the call graph for this function:

Member Function Documentation

◆ addTensors()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

void Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::addTensors	(	const Tensor< TInput, MR > &	a,
		const Tensor< TInput, MR > &	b,
		Tensor< TInput, MR > &	result
	)

inlineexportprivate

Adds two tensors element-wise.

Helper method to add gradients from different paths during backpropagation.

Parameters

a	First input tensor
b	Second input tensor
result	Output tensor for the sum

Here is the call graph for this function:

Here is the caller graph for this function:

◆ backward()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

void Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::backward	(	const Tensor< TInput, MR > &	input,
		const Tensor< TOutput, MR > &	output_grad,
		Tensor< TInput, MR > &	input_grad
	)

inlineexport

Performs the backward pass of the Residual connection.

Computes gradients for the input tensor and parameters based on the output gradients. Handles backpropagation through the inner module and projection layer (if present).

Parameters

input	The input tensor from the forward pass.
output_grad	The gradient of loss with respect to the output.
input_grad	The tensor to store gradients with respect to input.

Here is the call graph for this function:

◆ connectionTypeToString()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

static std::string Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::connectionTypeToString ( ResidualConfig::ConnectionType type )

inlinestaticexportprivate

Converts connection type enum to string for display purposes.

Parameters

type	The connection type enum value

Returns: std::string Human-readable representation of the connection type

Here is the caller graph for this function:

◆ createOperation()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

void Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::createOperation ( )

inlineexportprivate

Creates an appropriate operation based on the connection type.

Instantiates the correct operation implementation based on the configured connection type (Addition, ScaledAddition, or Gated) and device type.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ createProjection()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

void Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::createProjection	(	const std::vector< size_t > &	input_shape,
		const std::vector< size_t > &	output_shape
	)

inlineexportprivate

Creates a projection layer when input and output dimensions don't match.

Instantiates a Linear layer to project the input to the correct dimensions to match the output of the inner module.

Parameters

input_shape	Shape of the input tensor
output_shape	Shape of the output tensor from inner module

Here is the call graph for this function:

Here is the caller graph for this function:

◆ forward()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

void Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::forward	(	const Tensor< TInput, MR > &	input,
		Tensor< TOutput, MR > &	output
	)

inlineexport

Performs the forward pass of the Residual connection.

Applies the residual transformation based on the configured connection type:

Addition: y = x + F(x)
Scaled Addition: y = x + alpha*F(x)
Gated: y = g*x + (1-g)*F(x)

Handles projection when input and inner module dimensions don't match.

Parameters

input	The input tensor to be processed.
output	The output tensor where the results will be stored.

Exceptions

std::runtime_error If dimensions don't match and projection is disabled.

Here is the call graph for this function:

◆ getInnerModule()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

std::shared_ptr< Module< TDeviceType, TInput, TOutput > > Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::getInnerModule ( )

inlineexport

Gets the inner module.

Returns the inner module that implements the transformation F(x) in the residual connection formula y = x + F(x).

Returns: std::shared_ptr<Module<TDeviceType, TInput, TOutput>> The inner module

◆ initializeGateParameters()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

void Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::initializeGateParameters ( const std::vector< size_t > & shape )

inlineexportprivate

Initializes parameters for gated connections.

Creates and initializes the learnable gate weights for gated residual connections. The gate weights determine how much of the input vs. transformed output to use.

Parameters

shape Shape of the tensor for gate weights

Here is the call graph for this function:

Here is the caller graph for this function:

◆ load()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

void Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::load ( ModelArchive & archive )

inlineoverrideexportvirtual

Deserializes the module state from a ZIP archive.

Loads the state of the inner module, projection layer (if present), and gating parameters (if used) from the provided ZIP archive.

Parameters

zip	The ZIP archive to load the module state from.

Implements Mila::Dnn::Module< TDeviceType, TInput, TOutput >.

Here is the call graph for this function:

◆ parameterCount()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

size_t Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::parameterCount ( ) const

inlineoverrideexportvirtual

Gets the number of trainable parameters in this module.

Counts the total number of trainable parameters in the residual module, including the inner module, projection layer (if present), and gating parameters (if using gated connections).

Returns: size_t The total number of parameters.

Implements Mila::Dnn::Module< TDeviceType, TInput, TOutput >.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ save()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

void Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::save ( ModelArchive & zip ) const

inlineoverrideexportvirtual

Serializes the module state to a ZIP archive.

Saves the state of the inner module, projection layer (if present), and gating parameters (if used) to the provided ZIP archive.

Parameters

zip	The ZIP archive to save the module state to.

Implements Mila::Dnn::Module< TDeviceType, TInput, TOutput >.

◆ tensorShapesMatch()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

bool Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::tensorShapesMatch	(	const Tensor< TInput, MR > &	a,
		const Tensor< TOutput, MR > &	b
	)

inlineexportprivate

Checks if two tensor shapes match for residual connection.

Parameters

a	First tensor to compare
b	Second tensor to compare

Returns: bool True if shapes match, false otherwise

Here is the caller graph for this function:

◆ toString()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

std::string Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::toString ( ) const

inlineoverrideexportvirtual

Converts the module information to a human-readable string.

Includes detailed information about the module configuration including:

Module name
Connection type
Scaling factor (for scaled addition)
Projection status
Inner module information

Returns: std::string A string representation of the module information.

Implements Mila::Dnn::Module< TDeviceType, TInput, TOutput >.

Here is the call graph for this function:

Member Data Documentation

◆ config_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

ResidualConfig Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::config_

exportprivate

Configuration for the Residual module.

◆ gate_weights_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

std::shared_ptr<Tensor<TOutput, MR> > Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::gate_weights_

exportprivate

Learnable gate weights for gated residual connections.

◆ gated_operation_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

std::shared_ptr<BinaryOperation<TDeviceType, TInput, TOutput, TOutput> > Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::gated_operation_

exportprivate

Binary operation for gated residual connections.

◆ inner_input_grad_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

Tensor<TInput, MR> Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::inner_input_grad_ {}

exportprivate

Temporary tensor to store inner module gradients during backward pass.

◆ inner_module_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

std::shared_ptr<Module<TDeviceType, TInput, TOutput> > Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::inner_module_

exportprivate

The inner module implementing the transformation F(x).

◆ inner_output_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

Tensor<TOutput, MR> Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::inner_output_ {}

exportprivate

Temporary tensor to store inner module output during forward pass.

◆ inner_parameter_grads_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

std::vector<std::shared_ptr<Tensor<TOutput, MR> > > Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::inner_parameter_grads_

exportprivate

Gradients for inner parameters.

◆ operation_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

std::shared_ptr<BinaryOperation<TDeviceType, TInput, TOutput, TOutput> > Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::operation_

exportprivate

Binary operation for standard and scaled residual connections.

◆ output_state_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

std::vector<std::shared_ptr<Tensor<TOutput, MR> > > Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::output_state_

exportprivate

Output state tensors for backward pass.

◆ parameter_grads_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

std::vector<std::shared_ptr<Tensor<TOutput, MR> > > Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::parameter_grads_

exportprivate

Gradients for trainable parameters.

◆ parameters_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

std::vector<std::shared_ptr<Tensor<TOutput, MR> > > Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::parameters_

exportprivate

Collection of trainable parameters.

◆ projection_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

std::shared_ptr<Linear<TDeviceType, TInput, TOutput> > Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::projection_

exportprivate

Optional projection layer for dimension matching.

◆ projection_output_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

Tensor<TOutput, MR> Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::projection_output_ {}

exportprivate

Temporary tensor to store projection output during forward pass.

◆ properties_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

OperationAttributes Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::properties_

exportprivate

Operation-specific attributes.

◆ temp_grad_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>

Tensor<TInput, MR> Mila::Dnn::Residual< TDeviceType, TInput, TOutput >::temp_grad_ {}

exportprivate

Temporary tensor for gradient accumulation.

The documentation for this class was generated from the following file:

/home/runner/work/Mila/Mila/Mila/Src/Dnn/Modules/Layers/Residual.ixx

Public Types

Public Member Functions

Private Member Functions

Static Private Member Functions

Private Attributes

Additional Inherited Members

Detailed Description

Member Typedef Documentation

◆ ModuleBase

◆ MR

Constructor & Destructor Documentation

◆ Residual() [1/2]

◆ Residual() [2/2]

Member Function Documentation

◆ addTensors()

◆ backward()

◆ connectionTypeToString()

◆ createOperation()

◆ createProjection()

◆ forward()

◆ getInnerModule()

◆ initializeGateParameters()

◆ load()

◆ parameterCount()

◆ save()

◆ tensorShapesMatch()

◆ toString()

Member Data Documentation

◆ config_

◆ gate_weights_

◆ gated_operation_

◆ inner_input_grad_

◆ inner_module_

◆ inner_output_

◆ inner_parameter_grads_

◆ operation_

◆ output_state_

◆ parameter_grads_

◆ parameters_

◆ projection_

◆ projection_output_

◆ properties_

◆ temp_grad_