|
template<typename TDataType = float> |
using | Mila::Dnn::CpuGelu = Gelu< DeviceType::Cpu, TDataType > |
| Type alias for CPU-specific GELU module.
|
|
template<typename TDataType = float> |
using | Mila::Dnn::CudaGelu = Gelu< DeviceType::Cuda, TDataType > |
| Type alias for CUDA-specific GELU module.
|
|
using | ModuleBase = Module< TDeviceType, TDataType, TDataType > |
| Alias for base module type.
|
|
using | MR = std::conditional_t< TDeviceType==DeviceType::Cuda, CudaMemoryResource, CpuMemoryResource > |
| Memory resource type determined based on device type.
|
|
|
| Gelu (const std::string &device_name, const GeluConfig &config) |
| Constructs a Gelu module using device name and configuration.
|
|
| Gelu (std::shared_ptr< DeviceContext > device_context, const GeluConfig &config) |
| Constructs a Gelu module with an existing device context and configuration.
|
|
static std::string | approximationMethodToString (GeluConfig::ApproximationMethod method) |
| Converts approximation method enum to human-readable string.
|
|
void | backward (const Tensor< TDataType, MR > &input, const Tensor< TDataType, MR > &output_grad, Tensor< TDataType, MR > &input_grad) |
| Performs backward propagation, computing gradients for GELU activation.
|
|
void | createOperation () |
| Initializes the appropriate GELU operation implementation.
|
|
void | forward (const Tensor< TDataType, MR > &input, Tensor< TDataType, MR > &output) |
| Performs forward propagation through the GELU activation function.
|
|
GeluConfig::ApproximationMethod | getApproximationMethod () const |
| Returns the current approximation method used by this GELU instance.
|
|
void | load (ModelArchive &archive) override |
| Deserializes module state from a ZIP archive.
|
|
size_t | parameterCount () const override |
| Returns the number of trainable parameters in this module.
|
|
void | save (ModelArchive &zip) const override |
| Serializes module state to a ZIP archive.
|
|
std::string | toString () const override |
| Generates a string representation of this module's configuration.
|
|
|
GeluConfig | config_ |
| Configuration for the GELU module.
|
|
std::shared_ptr< UnaryOperation< TDeviceType, TDataType, TDataType > > | operation_ { nullptr } |
| The underlying computational operation that implements GELU.
|
|
std::vector< std::shared_ptr< Tensor< TDataType, MR > > > | output_state_ |
| Output state cache for backward propagation.
|
|
std::vector< std::shared_ptr< Tensor< TDataType, MR > > > | parameters_ |
| Parameter tensors for the operation.
|
|
OperationAttributes | properties_ |
| Additional attributes for operation customization.
|
|