Mila 0.13.48
Deep Neural Network Library
Loading...
Searching...
No Matches
Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision > Member List

This is the complete list of members for Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >, including all inherited members.

active_max_seq_len_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
att_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
att_decode_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
att_decode_opt_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
att_decode_tensor_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
att_opt_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
att_tensor_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
att_tensor_optimized_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
att_value_decode_plan_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
att_value_decode_plan_optimized_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
att_value_partial_prefill_plan_cache_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
att_value_partial_prefill_plan_cache_optimized_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
att_value_plan_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
att_value_prefill_plan_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
att_value_prefill_plan_optimized_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
B_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
backward(const ITensor &input, const ITensor &output_grad, ITensor &input_grad) constMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inline
backward_att_plan_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
backward_k_plan_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
backward_q_plan_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
backward_v_plan_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
build(const BuildContext &context) overrideMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlinevirtual
buildCublasLtPlans()Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlineprivate
buildCublasLtPlans_optimized()Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlineprivate
C_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
cached_seq_len_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
clearGradients() noexceptMila::Dnn::Compute::Operation< DeviceType::Cuda, TPrecision >inlinevirtual
config_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
ConfigType typedefMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >
context_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
cublaslt_handle_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
CudaExecutionContext typedefMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >
CudaGqaOp(IExecutionContext *context, const GqaConfig &config)Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inline
data_typeMila::Dnn::Compute::Operation< DeviceType::Cuda, TPrecision >static
DataTypeTraits typedefMila::Dnn::Compute::Operation< DeviceType::Cuda, TPrecision >
datt_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
datt_tensor_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
decode(const ITensor &q, const ITensor &k, const ITensor &v, ITensor &output, int position) overrideMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlinevirtual
decode_optimized(const ITensor &q, const ITensor &k, const ITensor &v, ITensor &output, int position)Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlineprivate
decodeImpl(const ITensor &q, const ITensor &k, const ITensor &v, ITensor &output, int position)Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlineprivate
device_typeMila::Dnn::Compute::Operation< DeviceType::Cuda, TPrecision >static
dK_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
dK_exp_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
dK_exp_tensor_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
dK_tensor_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
dpreatt_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
dpreatt_tensor_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
dq_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
dq_tensor_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
dV_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
dV_exp_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
dV_exp_tensor_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
dV_tensor_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
dVout_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
dVout_tensor_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
ensureKVCacheEnabled() constMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlineprivate
forward(const ITensor &input, ITensor &output) constMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inline
getComputeTypes(cublasComputeType_t &compute_type, cudaDataType_t &scale_type) constMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlineprivate
getConfig() constMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inline
getCudaDataType() constMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlineprivate
getDataType() constMila::Dnn::Compute::Operation< DeviceType::Cuda, TPrecision >inlinevirtual
getDeviceType() constMila::Dnn::Compute::Operation< DeviceType::Cuda, TPrecision >inlinevirtual
getName() const overrideMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlinevirtual
getOperationType() const overrideMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlinevirtual
getOrBuildPartialAVPlan(int chunk_len)Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlineprivate
getOrBuildPartialAVPlan_optimized(int chunk_len)Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlineprivate
getOrBuildPartialQKPlan(int chunk_len)Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlineprivate
getOrBuildPartialQKPlan_optimized(int chunk_len)Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlineprivate
getStateMemorySize() const overrideMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlinevirtual
GS_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
HS_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
initializeKvCache(int batch_size, int max_seq_length) overrideMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlinevirtual
initializeState(const BuildContext &build_context)Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlineprivate
initializeState_optimized(const BuildContext &build_context)Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlineprivate
is_built_Mila::Dnn::Compute::Operation< DeviceType::Cuda, TPrecision >protected
isBuilt() constMila::Dnn::Compute::Operation< DeviceType::Cuda, TPrecision >inlinevirtual
isEvalMode() constMila::Dnn::Compute::Operation< DeviceType::Cuda, TPrecision >inlinevirtual
k_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
k_exp_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
k_exp_tensor_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
k_opt_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
k_tensor_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
kv_cache_enabled_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
MR typedefMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >
NativeType typedefMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >
NH_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
NKV_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
preatt_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
preatt_decode_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
preatt_decode_opt_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
preatt_decode_tensor_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
preatt_opt_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
preatt_tensor_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
preatt_tensor_optimized_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
prefill(const ITensor &q, const ITensor &k, const ITensor &v, ITensor &output, int position_offset) overrideMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlinevirtual
prefill_chunk_size_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
prefill_optimized(const ITensor &q, const ITensor &k, const ITensor &v, ITensor &output, int position_offset)Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlineprivate
prefillImpl(const ITensor &q, const ITensor &k, const ITensor &v, ITensor &output, int position_offset)Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlineprivate
q_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
q_permute_opt_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
q_permute_tensor_optimized_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
q_tensor_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
qk_decode_plan_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
qk_decode_plan_optimized_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
qk_partial_prefill_plan_cache_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
qk_partial_prefill_plan_cache_optimized_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
qk_prefill_plan_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
qk_prefill_plan_optimized_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
qk_score_plan_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
raw(const std::shared_ptr< TensorType > &t)Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlineprivatestatic
resetKvCache() overrideMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlinevirtual
setGradients(ITensor *, ITensor *) overrideMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlinevirtual
setParameters(ITensor *, ITensor *) overrideMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlinevirtual
setState(const GqaState &state)Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inline
setTrainingMode(TrainingMode training_mode)Mila::Dnn::Compute::Operation< DeviceType::Cuda, TPrecision >inlinevirtual
state_memory_size_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
T_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
TensorType typedefMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >
training_mode_Mila::Dnn::Compute::Operation< DeviceType::Cuda, TPrecision >protected
UnaryOperationBase typedefMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >
use_optimized_path_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
v_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
v_exp_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
v_exp_tensor_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
v_opt_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
v_out_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
v_out_decode_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
v_out_decode_opt_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
v_out_decode_tensor_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
v_out_opt_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
v_out_tensor_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
v_out_tensor_optimized_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
v_tensor_Mila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >private
validateDecodeInputShape(const shape_t &s) constMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlineprivate
validateInputShape(const shape_t &s) constMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlineprivate
validatePrefillInputShape(const shape_t &s) constMila::Dnn::Compute::Cuda::Gqa::CudaGqaOp< TPrecision >inlineprivate
~IKvCacheLifecycle()=defaultMila::Dnn::Compute::IKvCacheLifecyclevirtual
~IKvInference() override=defaultMila::Dnn::Compute::IKvInference
~Operation()=defaultMila::Dnn::Compute::Operation< DeviceType::Cuda, TPrecision >virtual