Mila 0.13.48
Deep Neural Network Library
Loading...
Searching...
No Matches
Dnn.Components.MultiHeadAttention Module Reference

Exported Modules

module  Compute.DeviceTypeTraits
module  Dnn.TensorTypes
module  Compute.ExecutionContext
module  Dnn.ITensor
module  Compute.DeviceType
module  Serialization.Mode
module  Dnn.Components.MultiHeadAttentionConfig
module  Compute.ExecutionContextFactory
module  Compute.CpuMemoryResource
module  Dnn.Component
module  Compute.IKvCacheLifecycle
module  Dnn.ComponentType
module  Compute.MemoryResource
module  Compute.OperationTraits
module  Compute.UnaryOperation
module  Dnn.TensorDataType
module  Serialization.ModelArchive
module  Dnn.TensorDataTypeTraits
module  Compute.DeviceId
module  Compute.Device
module  Compute.IPackedKvInference
module  Dnn.Tensor

Classes

class  Mila::Dnn::MultiHeadAttention< TDeviceType, TPrecision >
 Multi-Head Attention module that accepts concatenated QKV input. More...

Typedefs

using ComponentBase = Component<TDeviceType, TPrecision>
using MR = typename DeviceTypeTraits<TDeviceType>::memory_resource
using OpType = typename OperationTraits<OperationType::MultiHeadAttentionOp, TDeviceType, TPrecision>::type
using TensorType = Tensor<TPrecision, MR>

Functions

 MultiHeadAttention (const std::string &name, const MultiHeadAttentionConfig &config, std::optional< DeviceId > device_id=std::nullopt)
 Construct MultiHeadAttention component.
 ~MultiHeadAttention () override=default
TensorTypebackward (const TensorType &input, const TensorType &output_grad)
 Run backward pass and return component-owned input-gradient tensor.
void createOperation ()
TensorTypedecode (const TensorType &input, int position)
 Inference-only single-token decode pass.
TensorTypeforward (const TensorType &input)
 Standard forward pass.
const MultiHeadAttentionConfiggetConfig () const noexcept
DeviceId getDeviceId () const override
 Get the compute device id associated with this component.
std::vector< ITensor * > getGradients () const override
 Return non-owning pointers to parameter gradient tensors.
MemoryStats getMemoryStats () const override
 Return the current memory allocation breakdown for this component.
int64_t getModelDim () const noexcept
int64_t getNumHeads () const noexcept
std::vector< ITensor * > getParameters () const override
 Return non-owning pointers to parameter tensors.
const ComponentType getType () const override
 Get the component type identifier.
void onBuilding (const BuildContext &build_config) override
 Hook invoked by build() to allocate component buffers.
void onExecutionContextSet () override
 Lifecycle hook: Called immediately after ExecutionContext is set.
void onTrainingModeChanging (TrainingMode training_mode) override
 Hook called before TrainingMode transitions.
size_t parameterCount () const override
 Return number of trainable parameters.
TensorTyperesolveOutputView (const shape_t &input_shape)
void save_ (ModelArchive &archive, SerializationMode mode) const override
bool supportsKVCache () const noexcept
 Returns true when the underlying operation implements both IPositionalUnaryOp and IKVCacheLifecycle.
void synchronize () override
 Wait for outstanding device work submitted by this component.
std::string toString () const override
 Produce a short, human-readable description of the component.
void validateConcatenatedQKVShape (const shape_t &shape) const

Variables

bool cache_initialized_ { false }
MultiHeadAttentionConfig config_
std::unique_ptr< IExecutionContextcontext_ { nullptr }
bool decode_active_ { false }
IKvCacheLifecyclekv_cache_op_ { nullptr }
shape_t max_input_shape_
std::shared_ptr< OpTypeoperation_ { nullptr }
std::unique_ptr< TensorTypeoutput_view_ { nullptr }
std::unique_ptr< TensorTypeowned_decode_output_ { nullptr }
std::unique_ptr< TensorTypeowned_input_grad_ { nullptr }
std::unique_ptr< TensorTypeowned_output_ { nullptr }
IPackedKvInferencepositional_op_ { nullptr }

Files

file  /__w/Mila/Mila/Mila/Src/Dnn/Components/Attention/MHA/MultiHeadAttention.ixx
 Multi-Head Attention module (concatenated QKV input).