Mila
Deep Neural Network Library
Loading...
Searching...
No Matches
Mila::Dnn::Model< TDeviceType, TInput, TOutput > Class Template Referenceexport
module Dnn.Model

A class representing a neural network model. More...

Inheritance diagram for Mila::Dnn::Model< TDeviceType, TInput, TOutput >:
Collaboration diagram for Mila::Dnn::Model< TDeviceType, TInput, TOutput >:

Public Types

using ModuleBase = Module< TDeviceType, TInput, TOutput >
 Base class type for the module.
 
using MR = std::conditional_t< TDeviceType==DeviceType::Cuda, CudaMemoryResource, CpuMemoryResource >
 Memory resource type.
 
- Public Types inherited from Mila::Dnn::Module< TDeviceType, TInput, TOutput >
using MR = std::conditional_t< TDeviceType==DeviceType::Cuda, CudaMemoryResource, CpuMemoryResource >
 

Public Member Functions

 Model ()
 Constructs a new Model object with the default device context.
 
 Model (const std::string &device_name)
 Constructs a new Model object with a specific device name.
 
 Model (std::shared_ptr< DeviceContext > context)
 Constructs a new Model object with a specific device context.
 
 ~Model ()
 Destroys the Model object.
 
virtual void backward ()
 Performs a backward pass through the model.
 
void build ()
 Builds the model.
 
template<typename TMR >
float calculateLoss (const Tensor< TOutput, TMR > &targets)
 Calculate the loss for the given targets and current model outputs.
 
void captureGraphBegin ()
 Start capturing operations for a CUDA graph.
 
void captureGraphEnd ()
 End capturing operations for a CUDA graph.
 
template<typename TDataLoader >
float evaluate (TDataLoader &data_loader, bool verbose=false)
 Evaluate the model on a dataset.
 
void executeGraph ()
 Execute the captured CUDA graph.
 
float forward (const Tensor< TInput, MR > &inputs, const Tensor< TOutput, MR > &targets)
 Performs a forward pass through the model.
 
Compute::ComputeDevicegetDevice () const
 Gets the compute device for this model.
 
cudaStream_t getStream () const
 Gets the current CUDA stream.
 
void loadCheckpoint (const std::string &filename)
 Loads the model's state from a checkpoint file.
 
size_t parameters () const
 Calculates the total number of parameters in the model.
 
template<typename TMR >
Tensor< TOutput, TMR > predict (const Tensor< TInput, TMR > &inputs)
 Predict outputs for the given inputs.
 
void print () const
 Prints the model's structure and total number of parameters.
 
void saveCheckpoint (const std::string &filename) const
 Saves the model's state to a checkpoint file.
 
void setDevice (const std::string &device_name)
 Sets the device to use for this model by name.
 
void setDevice (int device_id)
 Sets the device to use for this model by CUDA device ID.
 
void setTrainingMode (bool training)
 Sets the training mode for the model.
 
template<typename TDataLoader >
std::unordered_map< std::string, float > train (TDataLoader &train_loader, TDataLoader *val_loader=nullptr, const TrainingConfig &config={}, const std::vector< ModelCallback< TInput, TOutput > * > &callbacks={})
 Train the model using the provided data loader and configuration.
 
virtual void updateParameters (float learning_rate, float beta1=0.9f, float beta2=0.999f, float epsilon=1e-8f, float weight_decay=0.0f, size_t step=1)
 Updates the model parameters using the computed gradients.
 
virtual void zeroGrads ()
 Zeros out all gradients in the model.
 
- Public Member Functions inherited from Mila::Dnn::Module< TDeviceType, TInput, TOutput >
 Module (const std::string &device_name, const ComponentConfig &config)
 Constructor with device name.
 
 Module (std::shared_ptr< DeviceContext > context, const ComponentConfig &config)
 Constructor with a specific device context.
 
virtual ~Module ()=default
 Virtual destructor for proper cleanup in derived classes.
 
std::shared_ptr< Compute::DeviceContextgetDeviceContext () const
 Get the device context for this module.
 
Compute::DeviceType getDeviceType () const
 Get the device type of the current device context.
 
std::string getName () const
 Get the name of the module.
 
const auto & getParameterTensors () const
 Get the parameter tensors of this module.
 
const ComputePrecision::PolicygetPrecision () const
 
const auto & getStateTensors () const
 Get the state tensors of this module.
 
bool isTraining () const
 Check if the module is in training mode.
 
virtual void load (ModelArchive &archive)=0
 Load the module state from a zip archive.
 
virtual size_t parameterCount () const =0
 Get the number of trainable parameters in the module.
 
virtual void save (ModelArchive &archive) const =0
 Save the module state to a zip archive.
 
virtual void setTraining (bool is_training)
 Set the training mode of this module.
 
virtual std::string toString () const =0
 Convert the module to a string representation.
 

Protected Attributes

Tensor< TInput, MRlast_inputs_
 The most recent input tensor provided to forward().
 
Tensor< TOutput, MRlast_targets_
 The most recent target tensor provided to forward().
 
- Protected Attributes inherited from Mila::Dnn::Module< TDeviceType, TInput, TOutput >
std::unordered_map< std::string, std::shared_ptr< Tensor< TOutput, MR > > > parameter_map_ = {}
 Map of parameter names to parameter tensors.
 
std::unordered_map< std::string, std::shared_ptr< Tensor< TOutput, MR > > > state_map_ = {}
 Map of state names to state tensors.
 

Private Member Functions

void initializeDevice ()
 Initializes device-specific resources.
 

Private Attributes

cudaGraph_t cuda_graph_ { nullptr }
 CUDA graph for optimized execution.
 
cudaGraphExec_t cuda_graph_exec_ { nullptr }
 Executable instance of the CUDA graph.
 
bool graph_capture_active_ { false }
 Flag indicating whether CUDA graph capture is active.
 
bool graph_initialized_ { false }
 Flag indicating whether CUDA graph has been initialized.
 
bool is_built_ { false }
 Indicates whether the model has been built.
 
bool is_training_ { false }
 Indicates whether the model is in training mode.
 
Compute::DeviceType old_device_type_ { Compute::DeviceType::Cpu }
 Previous device type for cleanup.
 
cudaStream_t stream_ { nullptr }
 The CUDA stream for device memory resource.
 
bool stream_created_ { false }
 Flag indicating whether we created the stream.
 

Additional Inherited Members

- Protected Member Functions inherited from Mila::Dnn::Module< TDeviceType, TInput, TOutput >
const std::string parametersToString () const
 Helper method to convert parameters to string representation.
 
const std::string stateToString () const
 Helper method to convert state tensors to string representation.
 

Detailed Description

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
requires ValidTensorTypes<TInput, TOutput>
class Mila::Dnn::Model< TDeviceType, TInput, TOutput >

A class representing a neural network model.

Template Parameters
TDeviceTypeThe device type (CPU or CUDA) on which to perform computations.
TInputThe input data type for the model.
TOutputThe output data type for the model, defaults to TInput.

Member Typedef Documentation

◆ ModuleBase

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
using Mila::Dnn::Model< TDeviceType, TInput, TOutput >::ModuleBase = Module<TDeviceType, TInput, TOutput>

Base class type for the module.

◆ MR

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
using Mila::Dnn::Model< TDeviceType, TInput, TOutput >::MR = std::conditional_t<TDeviceType == DeviceType::Cuda, CudaMemoryResource, CpuMemoryResource>

Memory resource type.

Constructor & Destructor Documentation

◆ Model() [1/3]

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
Mila::Dnn::Model< TDeviceType, TInput, TOutput >::Model ( )
inline

Constructs a new Model object with the default device context.

Here is the call graph for this function:

◆ Model() [2/3]

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
Mila::Dnn::Model< TDeviceType, TInput, TOutput >::Model ( std::shared_ptr< DeviceContext context)
inline

Constructs a new Model object with a specific device context.

Parameters
contextThe device context to use for this model.
Here is the call graph for this function:

◆ Model() [3/3]

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
Mila::Dnn::Model< TDeviceType, TInput, TOutput >::Model ( const std::string &  device_name)
inline

Constructs a new Model object with a specific device name.

Parameters
device_nameThe name of the device to use (e.g., "CUDA:0", "CPU").
Here is the call graph for this function:

◆ ~Model()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
Mila::Dnn::Model< TDeviceType, TInput, TOutput >::~Model ( )
inline

Destroys the Model object.

Cleans up resources such as CUDA streams.

Here is the call graph for this function:

Member Function Documentation

◆ backward()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
virtual void Mila::Dnn::Model< TDeviceType, TInput, TOutput >::backward ( )
inlinevirtual

Performs a backward pass through the model.

Exceptions
std::runtime_errorif the model has not been built or if forward was not called with targets.
Here is the call graph for this function:

◆ build()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
void Mila::Dnn::Model< TDeviceType, TInput, TOutput >::build ( )
inline

Builds the model.

Sets the training mode for all modules and performs any necessary graph validation or optimizations.

Exceptions
std::runtime_errorif the model has already been built.

◆ calculateLoss()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
template<typename TMR >
float Mila::Dnn::Model< TDeviceType, TInput, TOutput >::calculateLoss ( const Tensor< TOutput, TMR > &  targets)
inline

Calculate the loss for the given targets and current model outputs.

Parameters
targetsThe target tensor.
Returns
The loss value.
Here is the caller graph for this function:

◆ captureGraphBegin()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
void Mila::Dnn::Model< TDeviceType, TInput, TOutput >::captureGraphBegin ( )
inline

Start capturing operations for a CUDA graph.

This begins recording operations to a CUDA graph for later replay. Only applicable for CUDA devices.

Here is the call graph for this function:

◆ captureGraphEnd()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
void Mila::Dnn::Model< TDeviceType, TInput, TOutput >::captureGraphEnd ( )
inline

End capturing operations for a CUDA graph.

This finalizes the CUDA graph and prepares it for execution. Only applicable for CUDA devices.

◆ evaluate()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
template<typename TDataLoader >
float Mila::Dnn::Model< TDeviceType, TInput, TOutput >::evaluate ( TDataLoader &  data_loader,
bool  verbose = false 
)
inline

Evaluate the model on a dataset.

Parameters
data_loaderThe data loader for evaluation data.
verboseWhether to print evaluation progress.
Returns
The average loss on the evaluation dataset.
Here is the call graph for this function:

◆ executeGraph()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
void Mila::Dnn::Model< TDeviceType, TInput, TOutput >::executeGraph ( )
inline

Execute the captured CUDA graph.

Replays the previously captured operations for fast execution. Only applicable for CUDA devices with a previously captured graph.

◆ forward()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
float Mila::Dnn::Model< TDeviceType, TInput, TOutput >::forward ( const Tensor< TInput, MR > &  inputs,
const Tensor< TOutput, MR > &  targets 
)
inline

Performs a forward pass through the model.

Parameters
inputsThe input tensor.
targetsOptional target tensor for loss calculation.
Exceptions
std::runtime_errorif the model has not been built.
Returns
The loss value if targets are provided, otherwise -1.0.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ getDevice()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
Compute::ComputeDevice & Mila::Dnn::Model< TDeviceType, TInput, TOutput >::getDevice ( ) const
inline

Gets the compute device for this model.

Returns
Reference to the model's compute device.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ getStream()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
cudaStream_t Mila::Dnn::Model< TDeviceType, TInput, TOutput >::getStream ( ) const
inline

Gets the current CUDA stream.

Returns
The CUDA stream used by this model.

◆ initializeDevice()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
void Mila::Dnn::Model< TDeviceType, TInput, TOutput >::initializeDevice ( )
inlineprivate

Initializes device-specific resources.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ loadCheckpoint()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
void Mila::Dnn::Model< TDeviceType, TInput, TOutput >::loadCheckpoint ( const std::string &  filename)
inline

Loads the model's state from a checkpoint file.

Parameters
filenameThe path to the checkpoint file to load.

◆ parameters()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
size_t Mila::Dnn::Model< TDeviceType, TInput, TOutput >::parameters ( ) const
inline

Calculates the total number of parameters in the model.

Returns
size_t The total number of parameters.
Here is the caller graph for this function:

◆ predict()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
template<typename TMR >
Tensor< TOutput, TMR > Mila::Dnn::Model< TDeviceType, TInput, TOutput >::predict ( const Tensor< TInput, TMR > &  inputs)
inline

Predict outputs for the given inputs.

Parameters
inputsThe input tensor.
Returns
The output tensor.
Here is the call graph for this function:

◆ print()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
void Mila::Dnn::Model< TDeviceType, TInput, TOutput >::print ( ) const
inline

Prints the model's structure and total number of parameters.

Here is the call graph for this function:

◆ saveCheckpoint()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
void Mila::Dnn::Model< TDeviceType, TInput, TOutput >::saveCheckpoint ( const std::string &  filename) const
inline

Saves the model's state to a checkpoint file.

Parameters
filenameThe path where the checkpoint will be saved.

◆ setDevice() [1/2]

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
void Mila::Dnn::Model< TDeviceType, TInput, TOutput >::setDevice ( const std::string &  device_name)
inline

Sets the device to use for this model by name.

Parameters
device_nameThe name of the device to use (e.g., "CUDA:0", "CPU").
Here is the call graph for this function:
Here is the caller graph for this function:

◆ setDevice() [2/2]

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
void Mila::Dnn::Model< TDeviceType, TInput, TOutput >::setDevice ( int  device_id)
inline

Sets the device to use for this model by CUDA device ID.

Parameters
device_idThe ID of the CUDA device to use.
Here is the call graph for this function:

◆ setTrainingMode()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
void Mila::Dnn::Model< TDeviceType, TInput, TOutput >::setTrainingMode ( bool  training)
inline

Sets the training mode for the model.

Parameters
trainingThe training mode to set.
Here is the caller graph for this function:

◆ train()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
template<typename TDataLoader >
std::unordered_map< std::string, float > Mila::Dnn::Model< TDeviceType, TInput, TOutput >::train ( TDataLoader &  train_loader,
TDataLoader *  val_loader = nullptr,
const TrainingConfig config = {},
const std::vector< ModelCallback< TInput, TOutput > * > &  callbacks = {} 
)
inline

Train the model using the provided data loader and configuration.

Parameters
train_loaderThe data loader for training data.
val_loaderOptional data loader for validation data.
configTraining configuration parameters.
callbacksOptional list of callbacks to be invoked during training.
Returns
A map of final training metrics.

◆ updateParameters()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
virtual void Mila::Dnn::Model< TDeviceType, TInput, TOutput >::updateParameters ( float  learning_rate,
float  beta1 = 0.9f,
float  beta2 = 0.999f,
float  epsilon = 1e-8f,
float  weight_decay = 0.0f,
size_t  step = 1 
)
inlinevirtual

Updates the model parameters using the computed gradients.

Parameters
learning_rateThe learning rate for the update.
beta1Beta1 parameter for Adam optimizer.
beta2Beta2 parameter for Adam optimizer.
epsilonEpsilon parameter for Adam optimizer.
weight_decayWeight decay parameter for regularization.
stepCurrent optimization step for Adam.

◆ zeroGrads()

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
virtual void Mila::Dnn::Model< TDeviceType, TInput, TOutput >::zeroGrads ( )
inlinevirtual

Zeros out all gradients in the model.

Member Data Documentation

◆ cuda_graph_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
cudaGraph_t Mila::Dnn::Model< TDeviceType, TInput, TOutput >::cuda_graph_ { nullptr }
private

CUDA graph for optimized execution.

◆ cuda_graph_exec_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
cudaGraphExec_t Mila::Dnn::Model< TDeviceType, TInput, TOutput >::cuda_graph_exec_ { nullptr }
private

Executable instance of the CUDA graph.

◆ graph_capture_active_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
bool Mila::Dnn::Model< TDeviceType, TInput, TOutput >::graph_capture_active_ { false }
private

Flag indicating whether CUDA graph capture is active.

◆ graph_initialized_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
bool Mila::Dnn::Model< TDeviceType, TInput, TOutput >::graph_initialized_ { false }
private

Flag indicating whether CUDA graph has been initialized.

◆ is_built_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
bool Mila::Dnn::Model< TDeviceType, TInput, TOutput >::is_built_ { false }
private

Indicates whether the model has been built.

◆ is_training_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
bool Mila::Dnn::Model< TDeviceType, TInput, TOutput >::is_training_ { false }
private

Indicates whether the model is in training mode.

◆ last_inputs_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
Tensor<TInput, MR> Mila::Dnn::Model< TDeviceType, TInput, TOutput >::last_inputs_
protected

The most recent input tensor provided to forward().

◆ last_targets_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
Tensor<TOutput, MR> Mila::Dnn::Model< TDeviceType, TInput, TOutput >::last_targets_
protected

The most recent target tensor provided to forward().

◆ old_device_type_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
Compute::DeviceType Mila::Dnn::Model< TDeviceType, TInput, TOutput >::old_device_type_ { Compute::DeviceType::Cpu }
private

Previous device type for cleanup.

◆ stream_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
cudaStream_t Mila::Dnn::Model< TDeviceType, TInput, TOutput >::stream_ { nullptr }
private

The CUDA stream for device memory resource.

◆ stream_created_

template<DeviceType TDeviceType = DeviceType::Cuda, typename TInput = float, typename TOutput = TInput>
bool Mila::Dnn::Model< TDeviceType, TInput, TOutput >::stream_created_ { false }
private

Flag indicating whether we created the stream.


The documentation for this class was generated from the following file: