Mila/_2__w_2Mila_2Mila_2Mila_2Src_2Dnn_2Components_2Activations_2Gelu_2Gelu_8ixx-example.html

module;

#include <memory>

#include <vector>

#include <string>

#include <sstream>

#include <type_traits>

#include <stdexcept>

#include <format>

#include <utility>

#include <optional>

#include <algorithm>


export module Dnn.Components.Gelu;

export import Dnn.Components.GeluConfig;


import Dnn.Component;

import Dnn.ComponentType;

import Dnn.Tensor;

import Dnn.ITensor;

import Dnn.TensorDataType;

import Dnn.TensorDataTypeTraits;

import Dnn.TensorTypes;

import Dnn.ApproximationMethod;

import Compute.Device;

import Compute.DeviceId;

import Compute.DeviceType;

import Compute.DeviceTypeTraits;

import Compute.IExecutionContext;

import Compute.ExecutionContextFactory;

import Compute.UnaryOperation;

import Compute.OperationTraits;

import Compute.CpuMemoryResource;

import Serialization.ModelArchive;

import Serialization.Tensor;

import Serialization.Mode;

import Serialization.Metadata;

import Logging.Logger;


namespace Mila::Dnn

{

    using namespace Mila::Dnn::Compute;

    using namespace Mila::Dnn::Serialization;


    export template<DeviceType TDeviceType, TensorDataType TPrecision>

        requires PrecisionSupportedOnDevice<TPrecision, TDeviceType>

    class Gelu : public Component<TDeviceType, TPrecision>

    {

    public:

        using MR = typename DeviceTypeTraits<TDeviceType>::memory_resource;

        using TensorType = Tensor<TPrecision, MR>;

        using ComponentBase = Component<TDeviceType, TPrecision>;


        explicit Gelu( const std::string& name, const GeluConfig& config, std::optional<DeviceId> device_id = std::nullopt )

            : ComponentBase( name ), config_( config )

        {

            config_.validate();


            if ( device_id.has_value() )

            {

                if ( device_id->type != TDeviceType )

                {

                    throw std::invalid_argument( "Gelu: device type mismatch" );

                }


                owned_exec_context_ = createExecutionContext( device_id.value() );


                this->setExecutionContext( owned_exec_context_.get() );

            }

        }


        ~Gelu() override = default;


        // ====================================================================

        // Computation Dispatch

        // ====================================================================


        TensorType& forward( const TensorType& input )

        {

            if ( !this->isBuilt() )

            {

                throw std::runtime_error( "Gelu::forward: component must be built before forward pass" );

            }


            const auto& input_shape = input.shape();


            if ( output_view_->shape() != input_shape )

            {

                output_view_.emplace( output_->view( input_shape ) );

            }


            operation_->forward( input, *output_view_ );


            return *output_view_;

        }


        TensorType& backward( const TensorType& input, const TensorType& output_grad )

        {

            if ( !this->isBuilt() )

            {

                throw std::runtime_error( "Gelu::backward: component must be built before backward pass" );

            }


            // REVIEW

            /*if ( !this->isTraining() )

            {

                throw std::runtime_error( "Gelu::backward: component must be in training mode to compute gradients" );

            }*/


            // Zero input gradient buffer before backward pass. No exeptions.

            // Backend ops use accumulation (atomicAdd/+=) which requires pre-zeroed buffers

            // to prevent gradient buildup across calls. Without this, gradients grow linearly

            // with each call -> explosion.

            zero( *input_grad_ );


            operation_->backward( input, output_grad, *input_grad_ );


            return *input_grad_;

        }


        void synchronize() override

        {

            this->getExecutionContext()->synchronize();

        }


        ApproximationMethod getApproximationMethod() const

        {

            return config_.getApproximationMethod();

        }


        // ====================================================================

        // Serialization

        // ====================================================================


        void save_( ModelArchive& archive, SerializationMode mode ) const override

        {

            (void)mode;


            SerializationMetadata meta;

            meta.set( "type", "Gelu" )

                .set( "version", int64_t( 1 ) )

                .set( "name", this->getName() )

                .set( "template_device", deviceTypeToString( TDeviceType ) )

                .set( "template_precision", static_cast<int64_t>(TPrecision) );


            archive.writeMetadata( "meta.json", meta );


            SerializationMetadata cfg;

            cfg.set( "approximation_method",

                static_cast<int64_t>(config_.getApproximationMethod()) );


            archive.writeMetadata( "config.json", cfg );

        }


        static std::unique_ptr<Gelu> fromArchive_(

            ModelArchive& archive,

            const std::string& component_name,

            IExecutionContext* exec_context )

        {

            try

            {

                SerializationMetadata meta = archive.readMetadata( "meta.json" );

                validateMetadata_( meta, component_name );


                SerializationMetadata cfg = archive.readMetadata( "config.json" );


                GeluConfig config;

                auto approx_method = static_cast<ApproximationMethod>(

                    cfg.getInt( "approximation_method" ));

                config.withApproximationMethod( approx_method );

                config.validate();


                return std::make_unique<Gelu>( component_name, config );

            }

            catch ( const std::exception& e )

            {

                throw std::runtime_error(

                    std::format( "Gelu::fromArchive: error for '{}': {}",

                        component_name, e.what() )

                );

            }

        }


        // ====================================================================

        // Parameters and Gradients

        // ====================================================================


        size_t parameterCount() const override

        {

            return 0;

        }


        std::vector<ITensor*> getParameters() const override

        {

            return {};

        }


        std::vector<ITensor*> getGradients() const override

        {

            return {};

        }


        // ====================================================================

        // Identification and Description

        // ====================================================================


        const ComponentType getType() const override

        {

            return ComponentType::Gelu;

        }


        // ====================================================================

        // State and Configuration

        // ====================================================================


        DeviceId getDeviceId() const override

        {

            return this->getExecutionContext()->getDeviceId();

        }


        MemoryStats getMemoryStats() const override

        {

            MemoryStats stats;


            if ( output_ != nullptr )

            {

                stats.device_state_bytes += output_->getStorageSize();

            }

            if ( input_grad_ != nullptr )

            {

                stats.device_gradient_bytes += input_grad_->getStorageSize();

            }


            return stats;

        }


        std::string toString() const override

        {

            std::ostringstream oss;

            oss << "--------------------" << std::endl;

            oss << "Gelu: " << this->getName() << std::endl;

            oss << "Device: " << deviceTypeToString( this->getDeviceType() ) << std::endl;

            // FIXME: oss << "Approximation Method: " << config_.toString( config_.getApproximationMethod() ) << std::endl;


            return oss.str();

        }


    protected:


        void onExecutionContextSet() override

        {

            createOperation();

        }


        void onBuilding( const BuildContext& build_context ) override

        {

            operation_->build( build_context );

            const auto& input_shape = build_context.inputShape();


            // Allocate owned output and input-gradient tensors with device binding.

            // Buffers are owned by this component and reused across calls.

            DeviceId dev_id = this->getExecutionContext()->getDeviceId();


            output_ = std::make_unique<TensorType>( dev_id, input_shape, this->getName() + ".output" );

            output_view_.emplace( output_->view( input_shape ) );


            if ( build_context.isTrainingMode() )

            {

                input_grad_ = std::make_unique<TensorType>( dev_id, input_shape, this->getName() + ".input_grad" );

                zero( *input_grad_ );

            }

        }


        void onTrainingModeChanging( TrainingMode training_mode ) override

        {

            operation_->setTrainingMode( training_mode );

        }


    private:


        using OpType = typename OperationTraits<OperationType::GeluOp, TDeviceType, TPrecision>::type;


        GeluConfig config_;


        std::unique_ptr<IExecutionContext> owned_exec_context_{ nullptr };

        std::shared_ptr<OpType> operation_{ nullptr };


        std::unique_ptr<TensorType> output_{ nullptr };

        std::optional<TensorType> output_view_;

        std::unique_ptr<TensorType> input_grad_{ nullptr };


        static void validateMetadata_( const SerializationMetadata& meta, const std::string& component_name )

        {

            int64_t version = meta.tryGetInt( "version" ).value_or( 0 );

            if ( version != 1 )

            {

                throw std::runtime_error(

                    std::format( "Gelu: unsupported version {} for '{}'",

                        version, component_name )

                );

            }


            std::string type = meta.tryGetString( "type" ).value_or( "" );

            if ( type != "Gelu" )

            {

                throw std::runtime_error(

                    std::format( "Gelu: type mismatch for '{}': expected 'Gelu', got '{}'",

                        component_name, type )

                );

            }


            std::string file_device = meta.tryGetString( "template_device" ).value_or( "" );

            int64_t file_precision = meta.tryGetInt( "template_precision" ).value_or( -1 );


            std::string expected_device = deviceTypeToString( TDeviceType );

            int64_t expected_precision = static_cast<int64_t>(TPrecision);


            if ( file_device != expected_device )

            {

                throw std::runtime_error(

                    std::format( "Gelu: device mismatch for '{}': archive='{}', expected='{}'",

                        component_name, file_device, expected_device )

                );

            }


            if ( file_precision != expected_precision )

            {

                throw std::runtime_error(

                    std::format( "Gelu: precision mismatch for '{}': archive={}, expected={}",

                        component_name, file_precision, expected_precision )

                );

            }

        }


        void createOperation()

        {

            operation_ = std::make_shared<OpType>( this->getExecutionContext(), config_ );


            if ( !operation_ )

            {

                throw std::runtime_error(

                    std::format( "Gelu: Failed to create compute backend operation for component '{}'",

                        this->getName() )

                );

            }

        }

    };

}

Mila::Dnn::BuildContext
Build-time context for Component::build().
Definition Component.BuildContext.ixx:56

Mila::Dnn::BuildContext::isTrainingMode
bool isTrainingMode() const noexcept
True if output buffers should be allocated at full input shape sequence length with gradient buffers.
Definition Component.BuildContext.ixx:202

Mila::Dnn::BuildContext::inputShape
const shape_t & inputShape() const noexcept
The full input shape this component receives.
Definition Component.BuildContext.ixx:113

Mila::Dnn::Component
Abstract base class for neural network components.
Definition Component.ixx:155

Mila::Dnn::Compute::IExecutionContext
Type-erased execution context interface.
Definition IExecutionContext.ixx:24

Mila::Dnn::Compute::IExecutionContext::synchronize
virtual void synchronize()=0
Synchronize all pending operations.

Mila::Dnn::Compute::IExecutionContext::getDeviceId
virtual DeviceId getDeviceId() const noexcept=0
Get the device identifier.

Mila::Dnn::GeluConfig
Configuration class for GELU module.
Definition Gelu.Config.ixx:31

Mila::Dnn::GeluConfig::validate
void validate() const override
Validate configuration parameters.
Definition Gelu.Config.ixx:62

Mila::Dnn::GeluConfig::withApproximationMethod
Self && withApproximationMethod(this Self &&self, ApproximationMethod method)
Configure the approximation method for GELU computation.
Definition Gelu.Config.ixx:44

Mila::Dnn::GeluConfig::getApproximationMethod
ApproximationMethod getApproximationMethod() const
Get the configured approximation method.
Definition Gelu.Config.ixx:55

Mila::Dnn::Serialization::ModelArchive
ModelArchive provides high-level helpers for component serialization.
Definition ModelArchive.ixx:47

Mila::Dnn::Serialization::ModelArchive::readMetadata
SerializationMetadata readMetadata(const std::string &path) const
Definition ModelArchive.ixx:310

Mila::Dnn::Serialization::ModelArchive::writeMetadata
void writeMetadata(const std::string &path, const SerializationMetadata &metadata)
Definition ModelArchive.ixx:274

Mila::Dnn::Serialization::SerializationMetadata
Type-safe metadata container for component serialization.
Definition SerializationMetadata.ixx:52

Mila::Dnn::Serialization::SerializationMetadata::tryGetInt
std::optional< int64_t > tryGetInt(const std::string &key) const noexcept
Get optional integer value.
Definition SerializationMetadata.ixx:454

Mila::Dnn::Serialization::SerializationMetadata::getInt
int64_t getInt(const std::string &key) const
Get integer value.
Definition SerializationMetadata.ixx:287

Mila::Dnn::Serialization::SerializationMetadata::tryGetString
std::optional< std::string > tryGetString(const std::string &key) const noexcept
Get optional string value.
Definition SerializationMetadata.ixx:432

Mila::Dnn::Serialization::SerializationMetadata::set
SerializationMetadata & set(const std::string &key, MetadataValue value)
Set metadata value with automatic type deduction.
Definition SerializationMetadata.ixx:68

Mila::Dnn::Tensor
Device-aware N-dimensional tensor.
Definition Tensor.ixx:138

Compute.CpuMemoryResource

Compute.DeviceId

Compute.DeviceTypeTraits

Compute.DeviceType

Compute.Device

Compute.ExecutionContextFactory

Compute.IExecutionContext

Compute.OperationTraits

Compute.UnaryOperation

Dnn.ApproximationMethod

Dnn.ComponentType

Dnn.Component

Dnn.Components.GeluConfig

Dnn.Components.Gelu

Dnn.ITensor

Dnn.TensorDataTypeTraits

Dnn.TensorDataType

Dnn.TensorTypes

Dnn.Tensor

Logging.Logger

Serialization.Metadata

Serialization.Mode

Serialization.ModelArchive

Serialization.Tensor

Mila::Dnn::Compute::Cuda::Gelu
Definition CudaGeluOp.Dispatch.ixx:39

Mila::Dnn::Compute
Definition Device.ixx:15

Mila::Dnn::Compute::createExecutionContext
std::unique_ptr< IExecutionContext > createExecutionContext(DeviceId device_id)
Create execution context for specified device.
Definition ExecutionContextFactory.ixx:23

Mila::Dnn::Compute::deviceTypeToString
std::string deviceTypeToString(DeviceType device_type)
Converts a DeviceType to its string representation.
Definition DeviceType.ixx:38

Mila::Dnn::Serialization
Definition ArchiveSerializer.ixx:19

Mila::Dnn::Serialization::SerializationMode
SerializationMode
Modes for serialization and deserialization.
Definition SerializationMode.ixx:17

Mila::Dnn
Definition ActivationType.ixx:13

Mila::Dnn::TrainingMode
TrainingMode
Runtime behavioral state for Components built with RuntimeMode::Training.
Definition Comonent.TrainingMode.ixx:39

Mila::Dnn::ApproximationMethod
ApproximationMethod
Approximation methods usable by activation functions.
Definition ApproximationMethod.ixx:18

Mila::Dnn::ComponentType
ComponentType
Canonical list of framework-known component types.
Definition ComponentType.ixx:29

Mila::Dnn::ComponentType::Gelu
@ Gelu
Definition ComponentType.ixx:36

Mila::Dnn::toString
std::string toString(ComponentType t) noexcept
Convert a ComponentType enum value to its canonical name.
Definition ComponentType.ixx:79

Mila::Dnn::zero
void zero(Tensor< TDataType, TMemoryResource > &tensor, IExecutionContext *exec_context=nullptr)
Zero a tensor using the fastest backend implementation.
Definition TensorOps.Zero.ixx:42

Mila::Dnn::Compute::DeviceId
Lightweight identifier for a compute device.
Definition DeviceId.ixx:38

Mila::Dnn::Compute::DeviceTypeTraits
Definition DeviceTypeTraits.ixx:12

Mila::Dnn::Compute::MemoryStats
Global memory statistics for all TrackedMemoryResource instances.
Definition MemoryResourceTracker.ixx:19

Mila::Dnn::Compute::OperationTraits
Primary traits template for unified compile-time operation dispatch.
Definition OperationTraits.Template.ixx:45
name	Component name identifier (mandatory).
config	GELU configuration (approximation method).
device_id	Optional device identifier. If provided, creates owned ExecutionContext for standalone mode. If nullopt, expects shared context from parent.
std::invalid_argument	if config is invalid (via config.validate()).
std::invalid_argument	if device_id.type does not match TDeviceType.
std::runtime_error	if ExecutionContext creation fails (standalone mode).
std::runtime_error	if backend operation creation fails in onExecutionContextSet().