Mila 0.13.48
Deep Neural Network Library
Loading...
Searching...
No Matches
Dnn.Components.GptBlock Module Reference

Exported Modules

module  Compute.Device
module  Compute.DeviceId
module  Dnn.Tensor
module  Compute.DeviceTypeTraits
module  Dnn.TensorHelpers
module  Dnn.ComponentType
module  Dnn.TensorInitializers
module  Serialization.Mode
module  Compute.ExecutionContext
module  Compute.DeviceType
module  Dnn.ITensor
module  Logging.Logger
module  Compute.IExecutionContext
module  Dnn.CompositeComponent
module  Dnn.Components.MultiHeadAttention
module  Serialization.ModelArchive
module  Dnn.Components.Linear
module  Dnn.TensorDataTypeTraits
module  Dnn.TensorDataType
module  Compute.MemoryResource
module  Dnn.Components.LayerNorm
module  Dnn.Components.MLP
module  Compute.CpuMemoryResource
module  Dnn.Component
module  Dnn.TensorOps
module  Compute.ExecutionContextFactory
module  Dnn.Components.Residual
module  Dnn.ActivationType
module  Dnn.TensorTypes

Classes

class  Mila::Dnn::GptBlock< TDeviceType, TPrecision >
 Transformer encoder block as a composite component. More...
class  Mila::Dnn::GptBlockConfig
 Configuration class for GPT transformer blocks. More...

Typedefs

using AttentionType = MultiHeadAttention<TDeviceType, TPrecision>
using ComponentPtr = typename CompositeComponentBase::ComponentPtr
using CompositeComponentBase = CompositeComponent<TDeviceType, TPrecision>
using ExecutionContextType = ExecutionContext<TDeviceType>
using LayerNormType = LayerNorm<TDeviceType, TPrecision>
using LinearType = Linear<TDeviceType, TPrecision>
using MLPType = MLP<TDeviceType, TPrecision>
using MR = typename DeviceTypeTraits<TDeviceType>::memory_resource
using ResidualType = Residual<TDeviceType, TPrecision>
using TensorType = Tensor<TPrecision, MR>

Functions

 GptBlock (const std::string &name, const GptBlockConfig &config, std::optional< DeviceId > device_id=std::nullopt)
 Construct GptBlock in shared or standalone mode.
 ~GptBlock () override=default
TensorTypebackward (const TensorType &input, const TensorType &output_grad)
 Backward pass returning the input-gradient tensor.
void createGraph ()
TensorTypedecode (const TensorType &input, int position)
 Inference-only single-token decode pass.
TensorTypeforward (const TensorType &input)
 Forward pass with optional KV cache dispatch.
MemoryStats getMemoryStats () const override
 Return the current memory allocation breakdown for this component.
const ComponentType getType () const override
 Get the component type identifier.
void initializeKVCache (int64_t max_seq_len)
 Allocate KV cache buffers on the contained Attention component.
void load_ (ModelArchive &archive, SerializationMode mode)
void onBuilding (const BuildContext &context) override
 Hook invoked by build() to allocate component buffers.
void onTrainingModeChanging (TrainingMode training_mode) override
 Hook invoked when training mode is about to change.
void resetKVCache ()
 Reset KV cache state on the contained Attention component.
void save_ (ModelArchive &archive, SerializationMode mode) const override
 Save all child components recursively.
bool supportsKVCache () const noexcept
 Returns true when the contained Attention supports KV caching.
std::string toString () const override
 Generate a human-readable description.
void validateBuildContext (const BuildContext &context) const
void validateInputShape (const shape_t &input_shape) const
void zeroGradients () override
 Clear all model-owned gradients for this component.

Variables

std::shared_ptr< AttentionTypeattn_ { nullptr }
shape_t cached_input_shape_
GptBlockConfig config_
std::shared_ptr< TensorTyped_input_ { nullptr }
std::shared_ptr< TensorTyped_res1_accum_ { nullptr }
std::shared_ptr< MLPTypeffn_ { nullptr }
bool forward_executed_ { false }
TensorTypelast_attn_out_ { nullptr }
TensorTypelast_ffn_out_ { nullptr }
TensorTypelast_ln1_out_ { nullptr }
TensorTypelast_ln2_out_ { nullptr }
TensorTypelast_out_proj_out_ { nullptr }
TensorTypelast_qkv_out_ { nullptr }
TensorTypelast_res1_out_ { nullptr }
TensorTypelast_res2_out_ { nullptr }
std::shared_ptr< LayerNormTypeln1_ { nullptr }
std::shared_ptr< LayerNormTypeln2_ { nullptr }
std::shared_ptr< LinearTypeout_proj_ { nullptr }
std::unique_ptr< IExecutionContextowned_exec_context_ { nullptr }
std::shared_ptr< LinearTypeqkv_proj_ { nullptr }
std::shared_ptr< ResidualTyperes1_ { nullptr }
std::shared_ptr< ResidualTyperes2_ { nullptr }

Files

file  /__w/Mila/Mila/Mila/Src/Dnn/Components/Transformers/Gpt/GptBlock.ixx
 Transformer encoder block implementation.
file  /__w/Mila/Mila/Mila/Src/Dnn/Components/Transformers/Gpt/GptBlock.Config.ixx
 Configuration for GPT-style transformer block (block-level).