Exported Modules
module	Compute.DeviceTypeTraits
module	Dnn.TensorTypes
module	Compute.ExecutionContext
module	Dnn.ITensor
module	Compute.DeviceType
module	Serialization.Mode
module	Dnn.Components.MultiHeadAttentionConfig
module	Compute.ExecutionContextFactory
module	Compute.CpuMemoryResource
module	Dnn.Component
module	Compute.IKvCacheLifecycle
module	Dnn.ComponentType
module	Compute.MemoryResource
module	Compute.OperationTraits
module	Compute.UnaryOperation
module	Dnn.TensorDataType
module	Serialization.ModelArchive
module	Dnn.TensorDataTypeTraits
module	Compute.DeviceId
module	Compute.Device
module	Compute.IPackedKvInference
module	Dnn.Tensor

Classes
class	Mila::Dnn::MultiHeadAttention< TDeviceType, TPrecision >
	Multi-Head Attention module that accepts concatenated QKV input. More...

Typedefs
using	ComponentBase = Component<TDeviceType, TPrecision>
using	MR = typename DeviceTypeTraits<TDeviceType>::memory_resource
using	OpType = typename OperationTraits<OperationType::MultiHeadAttentionOp, TDeviceType, TPrecision>::type
using	TensorType = Tensor<TPrecision, MR>

Functions
	MultiHeadAttention (const std::string &name, const MultiHeadAttentionConfig &config, std::optional< DeviceId > device_id=std::nullopt)
	Construct MultiHeadAttention component.
	~MultiHeadAttention () override=default
TensorType &	backward (const TensorType &input, const TensorType &output_grad)
	Run backward pass and return component-owned input-gradient tensor.
void	createOperation ()
TensorType &	decode (const TensorType &input, int position)
	Inference-only single-token decode pass.
TensorType &	forward (const TensorType &input)
	Standard forward pass.
const MultiHeadAttentionConfig &	getConfig () const noexcept
DeviceId	getDeviceId () const override
	Get the compute device id associated with this component.
std::vector< ITensor * >	getGradients () const override
	Return non-owning pointers to parameter gradient tensors.
MemoryStats	getMemoryStats () const override
	Return the current memory allocation breakdown for this component.
int64_t	getModelDim () const noexcept
int64_t	getNumHeads () const noexcept
std::vector< ITensor * >	getParameters () const override
	Return non-owning pointers to parameter tensors.
const ComponentType	getType () const override
	Get the component type identifier.
void	onBuilding (const BuildContext &build_config) override
	Hook invoked by build() to allocate component buffers.
void	onExecutionContextSet () override
	Lifecycle hook: Called immediately after ExecutionContext is set.
void	onTrainingModeChanging (TrainingMode training_mode) override
	Hook called before TrainingMode transitions.
size_t	parameterCount () const override
	Return number of trainable parameters.
TensorType &	resolveOutputView (const shape_t &input_shape)
void	save_ (ModelArchive &archive, SerializationMode mode) const override
bool	supportsKVCache () const noexcept
	Returns true when the underlying operation implements both IPositionalUnaryOp and IKVCacheLifecycle.
void	synchronize () override
	Wait for outstanding device work submitted by this component.
std::string	toString () const override
	Produce a short, human-readable description of the component.
void	validateConcatenatedQKVShape (const shape_t &shape) const

Variables
bool	cache_initialized_ { false }
MultiHeadAttentionConfig	config_
std::unique_ptr< IExecutionContext >	context_ { nullptr }
bool	decode_active_ { false }
IKvCacheLifecycle *	kv_cache_op_ { nullptr }
shape_t	max_input_shape_
std::shared_ptr< OpType >	operation_ { nullptr }
std::unique_ptr< TensorType >	output_view_ { nullptr }
std::unique_ptr< TensorType >	owned_decode_output_ { nullptr }
std::unique_ptr< TensorType >	owned_input_grad_ { nullptr }
std::unique_ptr< TensorType >	owned_output_ { nullptr }
IPackedKvInference *	positional_op_ { nullptr }

Files
file	/__w/Mila/Mila/Mila/Src/Dnn/Components/Attention/MHA/MultiHeadAttention.ixx
	Multi-Head Attention module (concatenated QKV input).

Exported Modules

Classes

Typedefs

Functions

Variables

Files