| Base typedef | Mila::Dnn::LanguageModel< TDeviceType, TPrecision > | |
| config_ | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | private |
| configFromMetadata(const PretrainedMetadata &metadata) | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | inlineprivatestatic |
| decode_token_device_ | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | private |
| decode_token_staging_ | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | private |
| ensureTrainingMode(const char *method) const | Mila::Dnn::Model< TDeviceType, TPrecision > | inlineprivate |
| eosToken() const noexcept override | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | inlineprivatevirtual |
| fromPretrained(const std::filesystem::path &path, const LlamaModelConfig &model_config, DeviceId device_id=DeviceId{ TDeviceType, 0 }) | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | inlinestatic |
| fromPretrainedImpl(const std::filesystem::path &path, const LlamaModelConfig &model_config, DeviceId device_id) | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | inlineprivatestatic |
| generate(const std::vector< int32_t > &prompt_tokens, size_t max_new_tokens=64, float temperature=1.0f, int top_k=0) | Mila::Dnn::LanguageModel< TDeviceType, TPrecision > | inline |
| generateStreaming(const std::vector< int32_t > &prompt_tokens, std::function< void(int32_t)> on_token, size_t max_new_tokens=64, float temperature=1.0f, int top_k=0, std::stop_token stop={}) | Mila::Dnn::LanguageModel< TDeviceType, TPrecision > | inline |
| getConfig() const noexcept | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | inline |
| getDeviceId() const noexcept | Mila::Dnn::Model< TDeviceType, TPrecision > | inline |
| getLanguageNetwork() noexcept | Mila::Dnn::LanguageModel< TDeviceType, TPrecision > | inlineprotected |
| getLanguageNetwork() const noexcept | Mila::Dnn::LanguageModel< TDeviceType, TPrecision > | inlineprotected |
| getLastGenerationStatistics() const noexcept | Mila::Dnn::LanguageModel< TDeviceType, TPrecision > | inline |
| getMemoryStats() const | Mila::Dnn::Model< TDeviceType, TPrecision > | inline |
| getRuntimeMode() const noexcept | Mila::Dnn::Model< TDeviceType, TPrecision > | inline |
| isEval() const noexcept | Mila::Dnn::Model< TDeviceType, TPrecision > | inline |
| isInferenceMode() const noexcept | Mila::Dnn::Model< TDeviceType, TPrecision > | inline |
| isTrainingMode() const noexcept | Mila::Dnn::Model< TDeviceType, TPrecision > | inline |
| LanguageModel(const LanguageModel &)=delete | Mila::Dnn::LanguageModel< TDeviceType, TPrecision > | |
| LanguageModel(LanguageModel &&)=default | Mila::Dnn::LanguageModel< TDeviceType, TPrecision > | |
| LanguageModel(std::unique_ptr< LanguageNetwork< TDeviceType, TPrecision > > network, RuntimeMode runtime_mode) | Mila::Dnn::LanguageModel< TDeviceType, TPrecision > | inlineexplicitprotected |
| last_generation_statistics_ | Mila::Dnn::LanguageModel< TDeviceType, TPrecision > | protected |
| LlamaModel(const LlamaModel &)=delete | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | |
| LlamaModel(LlamaModel &&)=default | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | |
| LlamaModel(std::unique_ptr< LanguageNetwork< TDeviceType, TPrecision > > network, const LlamaConfig &config, RuntimeMode runtime_mode) | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | inlineexplicitprivate |
| logits_staging_ | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | private |
| makeTokenTensor(const std::vector< int32_t > &token_ids) const | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | inlineprivate |
| maxSequenceLength() const noexcept override | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | inlineprotectedvirtual |
| Model(const Model &)=delete | Mila::Dnn::Model< TDeviceType, TPrecision > | |
| Model(Model &&)=default | Mila::Dnn::Model< TDeviceType, TPrecision > | |
| Model(std::unique_ptr< NetworkType > network, RuntimeMode runtime_mode) | Mila::Dnn::Model< TDeviceType, TPrecision > | inlineexplicitprotected |
| ModelBase typedef | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | |
| MR typedef | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | |
| network_ | Mila::Dnn::Model< TDeviceType, TPrecision > | protected |
| NetworkType typedef | Mila::Dnn::Model< TDeviceType, TPrecision > | |
| onGenerating(const std::vector< int32_t > &prompt_tokens, const std::function< void(int32_t)> &on_token, size_t max_new_tokens, float temperature, int top_k, std::stop_token stop) override | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | inlineprotectedvirtual |
| onTraining() override | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | inlineprotectedvirtual |
| operator=(const LlamaModel &)=delete | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | |
| operator=(LlamaModel &&)=default | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | |
| Mila::Dnn::LanguageModel::operator=(const LanguageModel &)=delete | Mila::Dnn::LanguageModel< TDeviceType, TPrecision > | |
| Mila::Dnn::LanguageModel::operator=(LanguageModel &&)=default | Mila::Dnn::LanguageModel< TDeviceType, TPrecision > | |
| Mila::Dnn::Model::operator=(const Model &)=delete | Mila::Dnn::Model< TDeviceType, TPrecision > | |
| Mila::Dnn::Model::operator=(Model &&)=default | Mila::Dnn::Model< TDeviceType, TPrecision > | |
| profilePrefill(const std::vector< int32_t > &token_ids) | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | inline |
| runtime_mode_ | Mila::Dnn::Model< TDeviceType, TPrecision > | private |
| sampleFromLogits(const TensorType &logits, int64_t position, float temperature, int top_k, std::mt19937 &rng) | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | inlineprivate |
| sampleToken(const float *logits, size_t vocab_size, float temperature, int top_k, std::mt19937 &rng) | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | inlineprivatestatic |
| setEval(bool eval) | Mila::Dnn::Model< TDeviceType, TPrecision > | inline |
| StagingMR typedef | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | |
| stopTokens() const override | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | inlineprivatevirtual |
| TensorType typedef | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | |
| TokenIndexType typedef | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | |
| toString() const override | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | inlinevirtual |
| train() | Mila::Dnn::Model< TDeviceType, TPrecision > | inline |
| truncateIfNeeded(std::vector< int32_t > &tokens) const | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | inlineprivate |
| vocabSize() const noexcept override | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | inlineprotectedvirtual |
| ~LanguageModel()=default | Mila::Dnn::LanguageModel< TDeviceType, TPrecision > | virtual |
| ~LlamaModel()=default | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > | |
| ~Model()=default | Mila::Dnn::Model< TDeviceType, TPrecision > | virtual |