|
using | CompositeModuleBase = CompositeModule< TDeviceType, TDataType > |
| Alias for base module type.
|
|
template<typename TDataType = float> |
using | Mila::Dnn::CpuTransformerBlock = TransformerBlock< DeviceType::Cpu, TDataType > |
| Type alias for CPU-based transformer block with customizable tensor type.
|
|
template<typename TDataType = float> |
using | Mila::Dnn::CudaTransformerBlock = TransformerBlock< DeviceType::Cuda, TDataType > |
| Type alias for CUDA-based transformer block with customizable tensor type.
|
|
using | MR = std::conditional_t< TDeviceType==DeviceType::Cuda, CudaMemoryResource, CpuMemoryResource > |
| Memory resource type used for tensors, selected based on device type.
|
|
|
| TransformerBlock (const std::string &device_name, const TransformerBlockConfig &config) |
| Constructs a new TransformerBlock module with a device name.
|
|
| TransformerBlock (std::shared_ptr< DeviceContext > device_context, const TransformerBlockConfig &config) |
| Constructs a new TransformerBlock module with a provided device context.
|
|
void | forward (const Tensor< TDataType, MR > &input, Tensor< TDataType, MR > &output) |
| Performs the forward pass of the TransformerBlock.
|
|
void | initializeModules () |
| Initializes the sub-modules and output tensors for the transformer block.
|
|
void | load (ModelArchive &archive) override |
| Deserializes the module state from a ZIP archive.
|
|
size_t | parameterCount () const override |
| Gets the number of trainable parameters in this module.
|
|
void | save (ModelArchive &archive) const override |
| Serializes the module state to a ZIP archive.
|
|
std::string | toString () const override |
| Generates a string representation of this module's configuration.
|
|
|
std::shared_ptr< MultiHeadAttention< TDeviceType, TDataType > > | attn_block_ { nullptr } |
| Multi-head self-attention block including projections.
|
|
Tensor< TDataType, MR > | attn_output_ |
| Output tensor from attention block.
|
|
TransformerBlockConfig | config_ |
| Configuration for the TransformerBlock module.
|
|
std::shared_ptr< Dropout< TDeviceType, TDataType > > | dropout_ { nullptr } |
| Optional dropout module.
|
|
std::shared_ptr< LayerNorm< TDeviceType, TDataType > > | ln_1_ { nullptr } |
| First layer normalization module.
|
|
Tensor< TDataType, MR > | ln_1_output_ |
| Output tensor from first layer normalization.
|
|
std::shared_ptr< LayerNorm< TDeviceType, TDataType > > | ln_2_ { nullptr } |
| Second layer normalization module.
|
|
Tensor< TDataType, MR > | ln_2_output_ |
| Output tensor from second layer normalization.
|
|
std::shared_ptr< MLP< TDeviceType, TDataType > > | mlp_ { nullptr } |
| Feed-forward network (MLP).
|
|
Tensor< TDataType, MR > | mlp_output_ |
| Output tensor from MLP.
|
|
Tensor< TDataType, MR > | res_1_output_ |
| Output tensor from first residual connection.
|
|
Tensor< TDataType, MR > | res_2_output_ |
| Output tensor from second residual connection.
|
|