Mila
Deep Neural Network Library
|
A class representing a neural network model. More...
Public Types | |
using | ModuleBase = Module< TDeviceType, TInput, TOutput > |
Base class type for the module. | |
using | MR = std::conditional_t< TDeviceType==DeviceType::Cuda, CudaMemoryResource, CpuMemoryResource > |
Memory resource type. | |
![]() | |
using | MR = std::conditional_t< TDeviceType==DeviceType::Cuda, CudaMemoryResource, CpuMemoryResource > |
Public Member Functions | |
Model () | |
Constructs a new Model object with the default device context. | |
Model (const std::string &device_name) | |
Constructs a new Model object with a specific device name. | |
Model (std::shared_ptr< DeviceContext > context) | |
Constructs a new Model object with a specific device context. | |
~Model () | |
Destroys the Model object. | |
virtual void | backward () |
Performs a backward pass through the model. | |
void | build () |
Builds the model. | |
template<typename TMR > | |
float | calculateLoss (const Tensor< TOutput, TMR > &targets) |
Calculate the loss for the given targets and current model outputs. | |
void | captureGraphBegin () |
Start capturing operations for a CUDA graph. | |
void | captureGraphEnd () |
End capturing operations for a CUDA graph. | |
template<typename TDataLoader > | |
float | evaluate (TDataLoader &data_loader, bool verbose=false) |
Evaluate the model on a dataset. | |
void | executeGraph () |
Execute the captured CUDA graph. | |
float | forward (const Tensor< TInput, MR > &inputs, const Tensor< TOutput, MR > &targets) |
Performs a forward pass through the model. | |
Compute::ComputeDevice & | getDevice () const |
Gets the compute device for this model. | |
cudaStream_t | getStream () const |
Gets the current CUDA stream. | |
void | loadCheckpoint (const std::string &filename) |
Loads the model's state from a checkpoint file. | |
size_t | parameters () const |
Calculates the total number of parameters in the model. | |
template<typename TMR > | |
Tensor< TOutput, TMR > | predict (const Tensor< TInput, TMR > &inputs) |
Predict outputs for the given inputs. | |
void | print () const |
Prints the model's structure and total number of parameters. | |
void | saveCheckpoint (const std::string &filename) const |
Saves the model's state to a checkpoint file. | |
void | setDevice (const std::string &device_name) |
Sets the device to use for this model by name. | |
void | setDevice (int device_id) |
Sets the device to use for this model by CUDA device ID. | |
void | setTrainingMode (bool training) |
Sets the training mode for the model. | |
template<typename TDataLoader > | |
std::unordered_map< std::string, float > | train (TDataLoader &train_loader, TDataLoader *val_loader=nullptr, const TrainingConfig &config={}, const std::vector< ModelCallback< TInput, TOutput > * > &callbacks={}) |
Train the model using the provided data loader and configuration. | |
virtual void | updateParameters (float learning_rate, float beta1=0.9f, float beta2=0.999f, float epsilon=1e-8f, float weight_decay=0.0f, size_t step=1) |
Updates the model parameters using the computed gradients. | |
virtual void | zeroGrads () |
Zeros out all gradients in the model. | |
![]() | |
Module (const std::string &device_name, const ComponentConfig &config) | |
Constructor with device name. | |
Module (std::shared_ptr< DeviceContext > context, const ComponentConfig &config) | |
Constructor with a specific device context. | |
virtual | ~Module ()=default |
Virtual destructor for proper cleanup in derived classes. | |
std::shared_ptr< Compute::DeviceContext > | getDeviceContext () const |
Get the device context for this module. | |
Compute::DeviceType | getDeviceType () const |
Get the device type of the current device context. | |
std::string | getName () const |
Get the name of the module. | |
const auto & | getParameterTensors () const |
Get the parameter tensors of this module. | |
const ComputePrecision::Policy & | getPrecision () const |
const auto & | getStateTensors () const |
Get the state tensors of this module. | |
bool | isTraining () const |
Check if the module is in training mode. | |
virtual void | load (ModelArchive &archive)=0 |
Load the module state from a zip archive. | |
virtual size_t | parameterCount () const =0 |
Get the number of trainable parameters in the module. | |
virtual void | save (ModelArchive &archive) const =0 |
Save the module state to a zip archive. | |
virtual void | setTraining (bool is_training) |
Set the training mode of this module. | |
virtual std::string | toString () const =0 |
Convert the module to a string representation. | |
Protected Attributes | |
Tensor< TInput, MR > | last_inputs_ |
The most recent input tensor provided to forward(). | |
Tensor< TOutput, MR > | last_targets_ |
The most recent target tensor provided to forward(). | |
![]() | |
std::unordered_map< std::string, std::shared_ptr< Tensor< TOutput, MR > > > | parameter_map_ = {} |
Map of parameter names to parameter tensors. | |
std::unordered_map< std::string, std::shared_ptr< Tensor< TOutput, MR > > > | state_map_ = {} |
Map of state names to state tensors. | |
Private Member Functions | |
void | initializeDevice () |
Initializes device-specific resources. | |
Private Attributes | |
cudaGraph_t | cuda_graph_ { nullptr } |
CUDA graph for optimized execution. | |
cudaGraphExec_t | cuda_graph_exec_ { nullptr } |
Executable instance of the CUDA graph. | |
bool | graph_capture_active_ { false } |
Flag indicating whether CUDA graph capture is active. | |
bool | graph_initialized_ { false } |
Flag indicating whether CUDA graph has been initialized. | |
bool | is_built_ { false } |
Indicates whether the model has been built. | |
bool | is_training_ { false } |
Indicates whether the model is in training mode. | |
Compute::DeviceType | old_device_type_ { Compute::DeviceType::Cpu } |
Previous device type for cleanup. | |
cudaStream_t | stream_ { nullptr } |
The CUDA stream for device memory resource. | |
bool | stream_created_ { false } |
Flag indicating whether we created the stream. | |
Additional Inherited Members | |
![]() | |
const std::string | parametersToString () const |
Helper method to convert parameters to string representation. | |
const std::string | stateToString () const |
Helper method to convert state tensors to string representation. | |
A class representing a neural network model.
TDeviceType | The device type (CPU or CUDA) on which to perform computations. |
TInput | The input data type for the model. |
TOutput | The output data type for the model, defaults to TInput. |
using Mila::Dnn::Model< TDeviceType, TInput, TOutput >::ModuleBase = Module<TDeviceType, TInput, TOutput> |
Base class type for the module.
using Mila::Dnn::Model< TDeviceType, TInput, TOutput >::MR = std::conditional_t<TDeviceType == DeviceType::Cuda, CudaMemoryResource, CpuMemoryResource> |
Memory resource type.
|
inline |
Constructs a new Model object with the default device context.
|
inline |
Constructs a new Model object with a specific device context.
context | The device context to use for this model. |
|
inline |
Constructs a new Model object with a specific device name.
device_name | The name of the device to use (e.g., "CUDA:0", "CPU"). |
|
inline |
Destroys the Model object.
Cleans up resources such as CUDA streams.
|
inlinevirtual |
Performs a backward pass through the model.
std::runtime_error | if the model has not been built or if forward was not called with targets. |
|
inline |
Builds the model.
Sets the training mode for all modules and performs any necessary graph validation or optimizations.
std::runtime_error | if the model has already been built. |
|
inline |
Calculate the loss for the given targets and current model outputs.
targets | The target tensor. |
|
inline |
Start capturing operations for a CUDA graph.
This begins recording operations to a CUDA graph for later replay. Only applicable for CUDA devices.
|
inline |
End capturing operations for a CUDA graph.
This finalizes the CUDA graph and prepares it for execution. Only applicable for CUDA devices.
|
inline |
Evaluate the model on a dataset.
data_loader | The data loader for evaluation data. |
verbose | Whether to print evaluation progress. |
|
inline |
Execute the captured CUDA graph.
Replays the previously captured operations for fast execution. Only applicable for CUDA devices with a previously captured graph.
|
inline |
Performs a forward pass through the model.
inputs | The input tensor. |
targets | Optional target tensor for loss calculation. |
std::runtime_error | if the model has not been built. |
|
inline |
Gets the compute device for this model.
|
inline |
Gets the current CUDA stream.
|
inlineprivate |
Initializes device-specific resources.
|
inline |
Loads the model's state from a checkpoint file.
filename | The path to the checkpoint file to load. |
|
inline |
Calculates the total number of parameters in the model.
|
inline |
Predict outputs for the given inputs.
inputs | The input tensor. |
|
inline |
Prints the model's structure and total number of parameters.
|
inline |
Saves the model's state to a checkpoint file.
filename | The path where the checkpoint will be saved. |
|
inline |
Sets the device to use for this model by name.
device_name | The name of the device to use (e.g., "CUDA:0", "CPU"). |
|
inline |
Sets the device to use for this model by CUDA device ID.
device_id | The ID of the CUDA device to use. |
|
inline |
Sets the training mode for the model.
training | The training mode to set. |
|
inline |
Train the model using the provided data loader and configuration.
train_loader | The data loader for training data. |
val_loader | Optional data loader for validation data. |
config | Training configuration parameters. |
callbacks | Optional list of callbacks to be invoked during training. |
|
inlinevirtual |
Updates the model parameters using the computed gradients.
learning_rate | The learning rate for the update. |
beta1 | Beta1 parameter for Adam optimizer. |
beta2 | Beta2 parameter for Adam optimizer. |
epsilon | Epsilon parameter for Adam optimizer. |
weight_decay | Weight decay parameter for regularization. |
step | Current optimization step for Adam. |
|
inlinevirtual |
Zeros out all gradients in the model.
|
private |
CUDA graph for optimized execution.
|
private |
Executable instance of the CUDA graph.
|
private |
Flag indicating whether CUDA graph capture is active.
|
private |
Flag indicating whether CUDA graph has been initialized.
|
private |
Indicates whether the model has been built.
|
private |
Indicates whether the model is in training mode.
|
protected |
The most recent input tensor provided to forward().
|
protected |
The most recent target tensor provided to forward().
|
private |
Previous device type for cleanup.
|
private |
The CUDA stream for device memory resource.
|
private |
Flag indicating whether we created the stream.