|
using | CompositeModuleBase = CompositeModule< TDeviceType, TDataType > |
| Alias for base module type.
|
|
template<typename TDataType = float> |
using | Mila::Dnn::CpuMLP = MLP< DeviceType::Cpu, TDataType > |
| Type alias for CPU-based MLP module with customizable tensor type.
|
|
template<typename TDataType = float> |
using | Mila::Dnn::CudaMLP = MLP< DeviceType::Cuda, TDataType > |
| Type alias for CUDA-based MLP module with customizable tensor type.
|
|
using | MR = std::conditional_t< TDeviceType==DeviceType::Cuda, CudaMemoryResource, CpuMemoryResource > |
| Memory resource type used for tensors, selected based on device type.
|
|
|
| MLP (const std::string &device_name, const MLPConfig &config) |
| Constructs a new MLP module with a device name.
|
|
| MLP (std::shared_ptr< DeviceContext > device_context, const MLPConfig &config) |
| Constructs a new MLP module with a provided device context.
|
|
void | backward (const Tensor< TDataType, MR > &input, const Tensor< TDataType, MR > &output_grad, Tensor< TDataType, MR > &input_grad) |
| Performs the backward pass of the MLP block.
|
|
void | forward (const Tensor< TDataType, MR > &input, Tensor< TDataType, MR > &output) |
| Performs the forward pass of the MLP block.
|
|
void | initializeModules () |
| Initializes all submodules for the MLP.
|
|
void | load (ModelArchive &archive) override |
| Deserializes the module state from a ZIP archive.
|
|
size_t | parameterCount () const override |
| Gets the number of trainable parameters in this module.
|
|
void | save (ModelArchive &archive) const override |
| Serializes the module state to a ZIP archive.
|
|
std::string | toString () const override |
| Generates a string representation of this module's configuration.
|
|
|
Tensor< TDataType, MR > | act_output_ |
| Output tensor from activation function.
|
|
std::shared_ptr< Module< TDeviceType, TDataType > > | activation_ { nullptr } |
| Activation function module.
|
|
MLPConfig | config_ |
| Configuration for the MLP module.
|
|
std::shared_ptr< Dropout< TDeviceType, TDataType > > | dropout1_ { nullptr } |
| Optional dropout module.
|
|
Tensor< TDataType, MR > | dropout1_output_ |
| Output tensor from dropout.
|
|
std::shared_ptr< Linear< TDeviceType, TDataType > > | fc1_ { nullptr } |
| First linear layer (input_features -> hidden_size).
|
|
Tensor< TDataType, MR > | fc1_output_ |
| Output tensor from first linear layer.
|
|
std::shared_ptr< Linear< TDeviceType, TDataType > > | fc2_ { nullptr } |
| Second linear layer (hidden_size -> input_features).
|
|
Tensor< TDataType, MR > | fc2_output_ |
| Output tensor from second linear layer.
|
|
std::shared_ptr< LayerNorm< TDeviceType, TDataType > > | norm1_ { nullptr } |
| Optional layer normalization module.
|
|
Tensor< TDataType, MR > | norm1_output_ |
| Output tensor from layer normalization.
|
|
Tensor< TDataType, MR > | residual_input_ |
| Cached input tensor for residual connection.
|
|