Mila 0.13.48
Deep Neural Network Library
Loading...
Searching...
No Matches
Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision > Member List

This is the complete list of members for Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >, including all inherited members.

active_max_seq_len_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
allocateStateTensors()Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inlineprivate
asInputTensor(const ITensor &t)Mila::Dnn::Compute::UnaryOperation< DeviceType::Cuda, TPrecision >inlineprotectedstatic
asOutputTensor(ITensor &t)Mila::Dnn::Compute::UnaryOperation< DeviceType::Cuda, TPrecision >inlineprotectedstatic
att_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
att_decode_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
att_decode_tensor_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
att_tensor_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
att_value_decode_plan_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
att_value_plan_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
B_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
backward(const ITensor &input, const ITensor &output_grad, ITensor &input_grad) const overrideMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inlinevirtual
backward_att_plan_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
backward_k_plan_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
backward_q_plan_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
backward_v_plan_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
build(const BuildContext &config) overrideMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inlinevirtual
buildCublasLtPlans()Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inlineprivate
cached_seq_len_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
clearGradients() noexceptMila::Dnn::Compute::Operation< TDeviceType, TInput >inlinevirtual
config_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
ConfigType typedefMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >
context_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
cublaslt_handle_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
CudaExecutionContext typedefMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >
CudaMultiHeadAttentionOp(IExecutionContext *context, const MultiHeadAttentionConfig &config)Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inline
data_typeMila::Dnn::Compute::Operation< TDeviceType, TInput >static
DataTypeTraits typedefMila::Dnn::Compute::Operation< TDeviceType, TInput >
datt_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
datt_tensor_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
decode(const ITensor &input, ITensor &output, int position) overrideMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inlinevirtual
device_typeMila::Dnn::Compute::Operation< TDeviceType, TInput >static
dk_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
dk_tensor_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
dpreatt_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
dpreatt_tensor_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
dq_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
dq_tensor_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
dV_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
dV_tensor_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
dVout_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
dVout_tensor_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
embedding_dim_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
ensureKVCacheEnabled() constMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inlineprivate
forward(const ITensor &input, ITensor &output) const overrideMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inlinevirtual
getComputeTypes(cublasComputeType_t &compute_type, cudaDataType_t &scale_type) constMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inlineprivate
getConfig() constMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inline
getCudaDataType() constMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inlineprivate
getDataType() constMila::Dnn::Compute::Operation< TDeviceType, TInput >inlinevirtual
getDeviceType() constMila::Dnn::Compute::Operation< TDeviceType, TInput >inlinevirtual
getName() const overrideMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inlinevirtual
getOperationType() const overrideMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inlinevirtual
getStateMemorySize() constMila::Dnn::Compute::Operation< TDeviceType, TInput >inlinevirtual
HS_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
initializeKvCache(int batch_size, int max_seq_length) overrideMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inlinevirtual
is_built_Mila::Dnn::Compute::Operation< TDeviceType, TInput >protected
isBuilt() constMila::Dnn::Compute::Operation< TDeviceType, TInput >inlinevirtual
isEvalMode() constMila::Dnn::Compute::Operation< TDeviceType, TInput >inlinevirtual
k_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
k_tensor_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
kv_cache_enabled_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
MR typedefMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >
NativeType typedefMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >
NH_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
preatt_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
preatt_decode_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
preatt_decode_tensor_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
preatt_tensor_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
prefill(const ITensor &input, ITensor &output) overrideMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inlinevirtual
q_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
q_tensor_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
qk_decode_plan_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
qk_score_plan_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
qkv_dim_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
resetKvCache() overrideMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inlinevirtual
setGradients(ITensor *, ITensor *) overrideMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inlinevirtual
setParameters(ITensor *, ITensor *) overrideMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inlinevirtual
setTrainingMode(TrainingMode training_mode)Mila::Dnn::Compute::Operation< TDeviceType, TInput >inlinevirtual
T_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
TensorInputType typedefMila::Dnn::Compute::UnaryOperation< DeviceType::Cuda, TPrecision >
TensorOutputType typedefMila::Dnn::Compute::UnaryOperation< DeviceType::Cuda, TPrecision >
TensorType typedefMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >
training_mode_Mila::Dnn::Compute::Operation< TDeviceType, TInput >protected
UnaryOperationBase typedefMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >
v_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
v_out_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
v_out_decode_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
v_out_decode_tensor_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
v_out_tensor_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
v_tensor_Mila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >private
validateDecodeInputShape(const shape_t &input_shape) constMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inlineprivate
validateInputShape(const shape_t &input_shape) constMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inlineprivate
validatePrefillInputShape(const shape_t &input_shape) constMila::Dnn::Compute::Cuda::MultiHeadAttention::CudaMultiHeadAttentionOp< TPrecision >inlineprivate
~IKvCacheLifecycle()=defaultMila::Dnn::Compute::IKvCacheLifecyclevirtual
~IPackedKvInference() override=defaultMila::Dnn::Compute::IPackedKvInference
~Operation()=defaultMila::Dnn::Compute::Operation< TDeviceType, TInput >virtual
~UnaryOperation()=defaultMila::Dnn::Compute::UnaryOperation< DeviceType::Cuda, TPrecision >virtual