Mila 0.13.48
Deep Neural Network Library
Loading...
Searching...
No Matches
Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > Member List

This is the complete list of members for Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >, including all inherited members.

addComponent(ComponentPtr component)Mila::Dnn::CompositeComponent< TDeviceType, TPrecision >inline
backward(const TokenIndexType &input, const TensorType &output_grad) overrideMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >inline
Mila::Dnn::LanguageNetwork::backward(const TokenIndexType &input, const TensorType &output_grad)=0Mila::Dnn::LanguageNetwork< TDeviceType, TPrecision >pure virtual
batch_size_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
block_input_ptrs_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
block_output_ptrs_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
build(const BuildContext &context) finalMila::Dnn::Component< TDeviceType, TPrecision >inlinevirtual
build_context_Mila::Dnn::Component< TDeviceType, TPrecision >protected
built_Mila::Dnn::Component< TDeviceType, TPrecision >private
child_component_map_Mila::Dnn::CompositeComponent< TDeviceType, TPrecision >private
child_components_Mila::Dnn::CompositeComponent< TDeviceType, TPrecision >private
childCount() const noexceptMila::Dnn::CompositeComponent< TDeviceType, TPrecision >inline
clearComponents()Mila::Dnn::CompositeComponent< TDeviceType, TPrecision >inline
Component(const std::string &name)Mila::Dnn::Component< TDeviceType, TPrecision >inlineexplicit
ComponentBase typedefMila::Dnn::CompositeComponent< TDeviceType, TPrecision >
ComponentPtr typedefMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >
CompositeBase typedefMila::Dnn::Network< TDeviceType, TPrecision >
CompositeComponent(const std::string &name)Mila::Dnn::CompositeComponent< TDeviceType, TPrecision >inlineexplicit
CompositeComponent(const CompositeComponent &)=deleteMila::Dnn::CompositeComponent< TDeviceType, TPrecision >
CompositeComponent(CompositeComponent &&) noexcept=defaultMila::Dnn::CompositeComponent< TDeviceType, TPrecision >
config_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
createConfigFromMetadata(const PretrainedMetadata &metadata)Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >inlineprivatestatic
createGraph()Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >inlineprivate
createOptimizer(const TConfig &config)Mila::Dnn::Network< TDeviceType, TPrecision >inline
decode(const TokenIndexType &input, int position) overrideMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >inline
Mila::Dnn::LanguageNetwork::decode(const TokenIndexType &input, int position)=0Mila::Dnn::LanguageNetwork< TDeviceType, TPrecision >pure virtual
embedding_shape_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
ensureBuilt(const char *method) constMila::Dnn::Component< TDeviceType, TPrecision >inlineprivate
exec_context_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
final_rmsnorm_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
findComponent(const std::string &path) constMila::Dnn::CompositeComponent< TDeviceType, TPrecision >inline
forward(const TokenIndexType &input) overrideMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >inline
Mila::Dnn::LanguageNetwork::forward(const TokenIndexType &input)=0Mila::Dnn::LanguageNetwork< TDeviceType, TPrecision >pure virtual
getComponent(const std::string &name) constMila::Dnn::CompositeComponent< TDeviceType, TPrecision >inline
getComponentAs(const std::string &name) constMila::Dnn::CompositeComponent< TDeviceType, TPrecision >inlineprotected
getComponents() constMila::Dnn::CompositeComponent< TDeviceType, TPrecision >inline
getDeviceId() const noexceptMila::Dnn::Network< TDeviceType, TPrecision >inlinevirtual
getDeviceType()Mila::Dnn::Component< TDeviceType, TPrecision >inlinestatic
getExecutionContext() constMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >inline
getGradients() const overrideMila::Dnn::CompositeComponent< TDeviceType, TPrecision >inlinevirtual
getMemoryStats() const overrideMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >inlinevirtual
getName() constMila::Dnn::Component< TDeviceType, TPrecision >inline
getParameterNames() constMila::Dnn::Component< TDeviceType, TPrecision >inlinevirtual
getParameters() const overrideMila::Dnn::CompositeComponent< TDeviceType, TPrecision >inlinevirtual
getPrecision() noexceptMila::Dnn::Component< TDeviceType, TPrecision >inlinestatic
getRuntimeMode() const noexceptMila::Dnn::Component< TDeviceType, TPrecision >inline
getTrainingMode() const noexceptMila::Dnn::Component< TDeviceType, TPrecision >inline
getType() const overrideMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >inlinevirtual
gqa_att_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
gqa_att_decode_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
gqa_preatt_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
gqa_preatt_decode_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
gqa_q_permute_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
gqa_v_out_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
gqa_v_out_decode_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
hasChildren() const noexceptMila::Dnn::CompositeComponent< TDeviceType, TPrecision >inline
hasComponent(const std::string &name) constMila::Dnn::CompositeComponent< TDeviceType, TPrecision >inline
hasExecutionContext() const noexceptMila::Dnn::Component< TDeviceType, TPrecision >inlineprotected
input_shape_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
isBuilt() const finalMila::Dnn::Component< TDeviceType, TPrecision >inlinevirtual
isIdentifier(const std::string &s) noexceptMila::Dnn::Component< TDeviceType, TPrecision >inlineprivatestatic
isInferenceMode() const noexceptMila::Dnn::Component< TDeviceType, TPrecision >inline
isTrainingMode() const noexceptMila::Dnn::Component< TDeviceType, TPrecision >inline
LanguageNetwork(const std::string &name)Mila::Dnn::LanguageNetwork< TDeviceType, TPrecision >inlineexplicit
LinearType typedefMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >
LlamaTransformer(const std::string &name, const LlamaConfig &config, DeviceId device_id)Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >inlineexplicit
lm_head_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
LmHeadLinearType typedefMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >
loadParameter(const std::string &name, const Serialization::ITensorBlob &blob)Mila::Dnn::Component< TDeviceType, TPrecision >inlinevirtual
loadParameterFromBlob(const std::string &param_name, const Serialization::ITensorBlob &blob, Tensor< TParameterPrecision, TMemoryResource > &target, const shape_t &expected_shape)Mila::Dnn::Component< TDeviceType, TPrecision >inlineprotected
loadParameters(PretrainedModelReader &reader)Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >inline
logits_ptr_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
MR typedefMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >
name_Mila::Dnn::Component< TDeviceType, TPrecision >private
Network(const std::string &name)Mila::Dnn::Network< TDeviceType, TPrecision >inlineexplicit
NetworkBase typedefMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >
normalized_ptr_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
onBuilding(const BuildContext &context) overrideMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >inlineprotectedvirtual
onExecutionContextSet() overrideMila::Dnn::CompositeComponent< TDeviceType, TPrecision >inlineprotectedvirtual
onTrainingModeChanging(TrainingMode training_mode) overrideMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >inlineprotectedvirtual
operator=(const CompositeComponent &)=deleteMila::Dnn::CompositeComponent< TDeviceType, TPrecision >
operator=(CompositeComponent &&) noexcept=defaultMila::Dnn::CompositeComponent< TDeviceType, TPrecision >
optimize()Mila::Dnn::CompositeComponent< TDeviceType, TPrecision >inlineprotectedvirtual
output_shape_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
parameterCount() const overrideMila::Dnn::CompositeComponent< TDeviceType, TPrecision >inlinevirtual
parseLayerIndex(const std::string &name)Mila::Dnn::Network< TDeviceType, TPrecision >inlineprivate
parseParameterPath(const std::string &full_name) constMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >inlineprivate
parseTensorName(const std::string &tensor_name)Mila::Dnn::Network< TDeviceType, TPrecision >inlineprivate
prefill(const TokenIndexType &input) overrideMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >inline
Mila::Dnn::LanguageNetwork::prefill(const TokenIndexType &input)=0Mila::Dnn::LanguageNetwork< TDeviceType, TPrecision >pure virtual
prefill_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
prefill_chunk_size_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
removeComponent(const std::string &name)Mila::Dnn::CompositeComponent< TDeviceType, TPrecision >inline
RmsNormType typedefMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >
save(ModelArchive &archive, SerializationMode mode) constMila::Dnn::Network< TDeviceType, TPrecision >inline
save_(ModelArchive &archive, SerializationMode) const overrideMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >inlineprotectedvirtual
saveComponentGraph(ModelArchive &archive, SerializationMode mode) constMila::Dnn::Network< TDeviceType, TPrecision >inlineprivate
saveNetworkMetadata(ModelArchive &archive, SerializationMode mode) constMila::Dnn::Network< TDeviceType, TPrecision >inlineprivate
seq_length_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
setExecutionContext(IExecutionContext *context)Mila::Dnn::Component< TDeviceType, TPrecision >inlineprotected
setTrainingMode(TrainingMode mode)Mila::Dnn::Component< TDeviceType, TPrecision >inline
synchronize() overrideMila::Dnn::Network< TDeviceType, TPrecision >inlinevirtual
TensorType typedefMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >
token_embed_out_ptr_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
token_embedding_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
TokenEmbeddingType typedefMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >
TokenIndexType typedefMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >
toString() const overrideMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >inlinevirtual
training_mode_Mila::Dnn::Component< TDeviceType, TPrecision >private
training_mode_mutex_Mila::Dnn::Component< TDeviceType, TPrecision >private
transformer_blocks_Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >private
TransformerBlockType typedefMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >
tryFindComponent(const std::string &path) constMila::Dnn::CompositeComponent< TDeviceType, TPrecision >inline
validateBuildContext(const BuildContext &context) constMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >inlineprivate
validateLeadingShape(const shape_t &leading_shape) constMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >inlineprivate
validateName(const std::string &name)Mila::Dnn::Component< TDeviceType, TPrecision >inlineprivatestatic
verifyArchitectureCompatibility(const PretrainedMetadata &metadata)Mila::Dnn::Network< TDeviceType, TPrecision >inlineprotected
zeroGradients() overrideMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >inlinevirtual
~Component()=defaultMila::Dnn::Component< TDeviceType, TPrecision >virtual
~CompositeComponent()=defaultMila::Dnn::CompositeComponent< TDeviceType, TPrecision >virtual
~LanguageNetwork() override=defaultMila::Dnn::LanguageNetwork< TDeviceType, TPrecision >
~LlamaTransformer() override=defaultMila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy >
~Network() override=defaultMila::Dnn::Network< TDeviceType, TPrecision >