Mila
Deep Neural Network Library
Loading...
Searching...
No Matches
Dnn.Modules.Gelu Module Reference

Exported Modules

module  Compute.MemoryResource
 
module  Compute.OperationBase
 
module  Compute.CpuMemoryResource
 
module  Dnn.Module
 
module  Serialization.ModelArchive
 
module  Compute.ComputeDevice
 
module  Compute.CudaMemoryResource
 
module  Compute.DeviceContext
 
module  Compute.OperationAttributes
 
module  Compute.DeviceType
 
module  Dnn.TensorTraits
 
module  Compute.Precision
 
module  Compute.OperationRegistry
 
module  Compute.UnaryOperation
 
module  Dnn.Tensor
 

Classes

class  Mila::Dnn::Gelu< TDeviceType, TDataType >
 Gaussian Error Linear Unit (GELU) activation function module. More...
 
class  Mila::Dnn::GeluConfig
 Configuration class for GELU module. More...
 

Typedefs

template<typename TDataType = float>
using Mila::Dnn::CpuGelu = Gelu< DeviceType::Cpu, TDataType >
 Type alias for CPU-specific GELU module.
 
template<typename TDataType = float>
using Mila::Dnn::CudaGelu = Gelu< DeviceType::Cuda, TDataType >
 Type alias for CUDA-specific GELU module.
 
using ModuleBase = Module< TDeviceType, TDataType, TDataType >
 Alias for base module type.
 
using MR = std::conditional_t< TDeviceType==DeviceType::Cuda, CudaMemoryResource, CpuMemoryResource >
 Memory resource type determined based on device type.
 

Functions

 Gelu (const std::string &device_name, const GeluConfig &config)
 Constructs a Gelu module using device name and configuration.
 
 Gelu (std::shared_ptr< DeviceContext > device_context, const GeluConfig &config)
 Constructs a Gelu module with an existing device context and configuration.
 
static std::string approximationMethodToString (GeluConfig::ApproximationMethod method)
 Converts approximation method enum to human-readable string.
 
void backward (const Tensor< TDataType, MR > &input, const Tensor< TDataType, MR > &output_grad, Tensor< TDataType, MR > &input_grad)
 Performs backward propagation, computing gradients for GELU activation.
 
void createOperation ()
 Initializes the appropriate GELU operation implementation.
 
void forward (const Tensor< TDataType, MR > &input, Tensor< TDataType, MR > &output)
 Performs forward propagation through the GELU activation function.
 
GeluConfig::ApproximationMethod getApproximationMethod () const
 Returns the current approximation method used by this GELU instance.
 
void load (ModelArchive &archive) override
 Deserializes module state from a ZIP archive.
 
size_t parameterCount () const override
 Returns the number of trainable parameters in this module.
 
void save (ModelArchive &zip) const override
 Serializes module state to a ZIP archive.
 
std::string toString () const override
 Generates a string representation of this module's configuration.
 

Variables

GeluConfig config_
 Configuration for the GELU module.
 
std::shared_ptr< UnaryOperation< TDeviceType, TDataType, TDataType > > operation_ { nullptr }
 The underlying computational operation that implements GELU.
 
std::vector< std::shared_ptr< Tensor< TDataType, MR > > > output_state_
 Output state cache for backward propagation.
 
std::vector< std::shared_ptr< Tensor< TDataType, MR > > > parameters_
 Parameter tensors for the operation.
 
OperationAttributes properties_
 Additional attributes for operation customization.
 

Files

file  /home/runner/work/Mila/Mila/Mila/Src/Dnn/Modules/Activations/Gelu.ixx
 Implementation of the Gaussian Error Linear Unit (GELU) activation function.
 
file  /home/runner/work/Mila/Mila/Mila/Src/Dnn/Modules/Activations/GeluConfig.ixx
 Configuration interface for the GELU activation module in the Mila DNN framework.