| addComponent(ComponentPtr component) | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | inline |
| backward(const TokenIndexType &input, const TensorType &output_grad) override | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | inline |
| Mila::Dnn::LanguageNetwork::backward(const TokenIndexType &input, const TensorType &output_grad)=0 | Mila::Dnn::LanguageNetwork< TDeviceType, TPrecision > | pure virtual |
| batch_size_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| block_input_ptrs_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| block_output_ptrs_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| build(const BuildContext &context) final | Mila::Dnn::Component< TDeviceType, TPrecision > | inlinevirtual |
| build_context_ | Mila::Dnn::Component< TDeviceType, TPrecision > | protected |
| built_ | Mila::Dnn::Component< TDeviceType, TPrecision > | private |
| child_component_map_ | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | private |
| child_components_ | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | private |
| childCount() const noexcept | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | inline |
| clearComponents() | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | inline |
| Component(const std::string &name) | Mila::Dnn::Component< TDeviceType, TPrecision > | inlineexplicit |
| ComponentBase typedef | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | |
| ComponentPtr typedef | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | |
| CompositeBase typedef | Mila::Dnn::Network< TDeviceType, TPrecision > | |
| CompositeComponent(const std::string &name) | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | inlineexplicit |
| CompositeComponent(const CompositeComponent &)=delete | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | |
| CompositeComponent(CompositeComponent &&) noexcept=default | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | |
| config_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| createConfigFromMetadata(const PretrainedMetadata &metadata) | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | inlineprivatestatic |
| createGraph() | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | inlineprivate |
| createOptimizer(const TConfig &config) | Mila::Dnn::Network< TDeviceType, TPrecision > | inline |
| decode(const TokenIndexType &input, int position) override | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | inline |
| Mila::Dnn::LanguageNetwork::decode(const TokenIndexType &input, int position)=0 | Mila::Dnn::LanguageNetwork< TDeviceType, TPrecision > | pure virtual |
| embedding_shape_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| ensureBuilt(const char *method) const | Mila::Dnn::Component< TDeviceType, TPrecision > | inlineprivate |
| exec_context_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| final_rmsnorm_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| findComponent(const std::string &path) const | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | inline |
| forward(const TokenIndexType &input) override | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | inline |
| Mila::Dnn::LanguageNetwork::forward(const TokenIndexType &input)=0 | Mila::Dnn::LanguageNetwork< TDeviceType, TPrecision > | pure virtual |
| getComponent(const std::string &name) const | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | inline |
| getComponentAs(const std::string &name) const | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | inlineprotected |
| getComponents() const | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | inline |
| getDeviceId() const noexcept | Mila::Dnn::Network< TDeviceType, TPrecision > | inlinevirtual |
| getDeviceType() | Mila::Dnn::Component< TDeviceType, TPrecision > | inlinestatic |
| getExecutionContext() const | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | inline |
| getGradients() const override | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | inlinevirtual |
| getMemoryStats() const override | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | inlinevirtual |
| getName() const | Mila::Dnn::Component< TDeviceType, TPrecision > | inline |
| getParameterNames() const | Mila::Dnn::Component< TDeviceType, TPrecision > | inlinevirtual |
| getParameters() const override | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | inlinevirtual |
| getPrecision() noexcept | Mila::Dnn::Component< TDeviceType, TPrecision > | inlinestatic |
| getRuntimeMode() const noexcept | Mila::Dnn::Component< TDeviceType, TPrecision > | inline |
| getTrainingMode() const noexcept | Mila::Dnn::Component< TDeviceType, TPrecision > | inline |
| getType() const override | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | inlinevirtual |
| gqa_att_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| gqa_att_decode_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| gqa_preatt_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| gqa_preatt_decode_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| gqa_q_permute_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| gqa_v_out_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| gqa_v_out_decode_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| hasChildren() const noexcept | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | inline |
| hasComponent(const std::string &name) const | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | inline |
| hasExecutionContext() const noexcept | Mila::Dnn::Component< TDeviceType, TPrecision > | inlineprotected |
| input_shape_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| isBuilt() const final | Mila::Dnn::Component< TDeviceType, TPrecision > | inlinevirtual |
| isIdentifier(const std::string &s) noexcept | Mila::Dnn::Component< TDeviceType, TPrecision > | inlineprivatestatic |
| isInferenceMode() const noexcept | Mila::Dnn::Component< TDeviceType, TPrecision > | inline |
| isTrainingMode() const noexcept | Mila::Dnn::Component< TDeviceType, TPrecision > | inline |
| LanguageNetwork(const std::string &name) | Mila::Dnn::LanguageNetwork< TDeviceType, TPrecision > | inlineexplicit |
| LinearType typedef | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | |
| LlamaTransformer(const std::string &name, const LlamaConfig &config, DeviceId device_id) | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | inlineexplicit |
| lm_head_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| LmHeadLinearType typedef | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | |
| loadParameter(const std::string &name, const Serialization::ITensorBlob &blob) | Mila::Dnn::Component< TDeviceType, TPrecision > | inlinevirtual |
| loadParameterFromBlob(const std::string ¶m_name, const Serialization::ITensorBlob &blob, Tensor< TParameterPrecision, TMemoryResource > &target, const shape_t &expected_shape) | Mila::Dnn::Component< TDeviceType, TPrecision > | inlineprotected |
| loadParameters(PretrainedModelReader &reader) | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | inline |
| logits_ptr_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| MR typedef | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | |
| name_ | Mila::Dnn::Component< TDeviceType, TPrecision > | private |
| Network(const std::string &name) | Mila::Dnn::Network< TDeviceType, TPrecision > | inlineexplicit |
| NetworkBase typedef | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | |
| normalized_ptr_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| onBuilding(const BuildContext &context) override | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | inlineprotectedvirtual |
| onExecutionContextSet() override | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | inlineprotectedvirtual |
| onTrainingModeChanging(TrainingMode training_mode) override | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | inlineprotectedvirtual |
| operator=(const CompositeComponent &)=delete | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | |
| operator=(CompositeComponent &&) noexcept=default | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | |
| optimize() | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | inlineprotectedvirtual |
| output_shape_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| parameterCount() const override | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | inlinevirtual |
| parseLayerIndex(const std::string &name) | Mila::Dnn::Network< TDeviceType, TPrecision > | inlineprivate |
| parseParameterPath(const std::string &full_name) const | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | inlineprivate |
| parseTensorName(const std::string &tensor_name) | Mila::Dnn::Network< TDeviceType, TPrecision > | inlineprivate |
| prefill(const TokenIndexType &input) override | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | inline |
| Mila::Dnn::LanguageNetwork::prefill(const TokenIndexType &input)=0 | Mila::Dnn::LanguageNetwork< TDeviceType, TPrecision > | pure virtual |
| prefill_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| prefill_chunk_size_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| removeComponent(const std::string &name) | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | inline |
| RmsNormType typedef | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | |
| save(ModelArchive &archive, SerializationMode mode) const | Mila::Dnn::Network< TDeviceType, TPrecision > | inline |
| save_(ModelArchive &archive, SerializationMode) const override | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | inlineprotectedvirtual |
| saveComponentGraph(ModelArchive &archive, SerializationMode mode) const | Mila::Dnn::Network< TDeviceType, TPrecision > | inlineprivate |
| saveNetworkMetadata(ModelArchive &archive, SerializationMode mode) const | Mila::Dnn::Network< TDeviceType, TPrecision > | inlineprivate |
| seq_length_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| setExecutionContext(IExecutionContext *context) | Mila::Dnn::Component< TDeviceType, TPrecision > | inlineprotected |
| setTrainingMode(TrainingMode mode) | Mila::Dnn::Component< TDeviceType, TPrecision > | inline |
| synchronize() override | Mila::Dnn::Network< TDeviceType, TPrecision > | inlinevirtual |
| TensorType typedef | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | |
| token_embed_out_ptr_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| token_embedding_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| TokenEmbeddingType typedef | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | |
| TokenIndexType typedef | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | |
| toString() const override | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | inlinevirtual |
| training_mode_ | Mila::Dnn::Component< TDeviceType, TPrecision > | private |
| training_mode_mutex_ | Mila::Dnn::Component< TDeviceType, TPrecision > | private |
| transformer_blocks_ | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | private |
| TransformerBlockType typedef | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | |
| tryFindComponent(const std::string &path) const | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | inline |
| validateBuildContext(const BuildContext &context) const | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | inlineprivate |
| validateLeadingShape(const shape_t &leading_shape) const | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | inlineprivate |
| validateName(const std::string &name) | Mila::Dnn::Component< TDeviceType, TPrecision > | inlineprivatestatic |
| verifyArchitectureCompatibility(const PretrainedMetadata &metadata) | Mila::Dnn::Network< TDeviceType, TPrecision > | inlineprotected |
| zeroGradients() override | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | inlinevirtual |
| ~Component()=default | Mila::Dnn::Component< TDeviceType, TPrecision > | virtual |
| ~CompositeComponent()=default | Mila::Dnn::CompositeComponent< TDeviceType, TPrecision > | virtual |
| ~LanguageNetwork() override=default | Mila::Dnn::LanguageNetwork< TDeviceType, TPrecision > | |
| ~LlamaTransformer() override=default | Mila::Dnn::LlamaTransformer< TDeviceType, TPrecision, TWeightQuantization, TKvCachePolicy > | |
| ~Network() override=default | Mila::Dnn::Network< TDeviceType, TPrecision > | |