Mila 0.13.48
Deep Neural Network Library
Loading...
Searching...
No Matches
Mila::Dnn::LlamaModel< TDeviceType, TPrecision > Member List

This is the complete list of members for Mila::Dnn::LlamaModel< TDeviceType, TPrecision >, including all inherited members.

Base typedefMila::Dnn::LanguageModel< TDeviceType, TPrecision >
config_Mila::Dnn::LlamaModel< TDeviceType, TPrecision >private
configFromMetadata(const PretrainedMetadata &metadata)Mila::Dnn::LlamaModel< TDeviceType, TPrecision >inlineprivatestatic
decode_token_device_Mila::Dnn::LlamaModel< TDeviceType, TPrecision >private
decode_token_staging_Mila::Dnn::LlamaModel< TDeviceType, TPrecision >private
ensureTrainingMode(const char *method) constMila::Dnn::Model< TDeviceType, TPrecision >inlineprivate
eosToken() const noexcept overrideMila::Dnn::LlamaModel< TDeviceType, TPrecision >inlineprivatevirtual
fromPretrained(const std::filesystem::path &path, const LlamaModelConfig &model_config, DeviceId device_id=DeviceId{ TDeviceType, 0 })Mila::Dnn::LlamaModel< TDeviceType, TPrecision >inlinestatic
fromPretrainedImpl(const std::filesystem::path &path, const LlamaModelConfig &model_config, DeviceId device_id)Mila::Dnn::LlamaModel< TDeviceType, TPrecision >inlineprivatestatic
generate(const std::vector< int32_t > &prompt_tokens, size_t max_new_tokens=64, float temperature=1.0f, int top_k=0)Mila::Dnn::LanguageModel< TDeviceType, TPrecision >inline
generateStreaming(const std::vector< int32_t > &prompt_tokens, std::function< void(int32_t)> on_token, size_t max_new_tokens=64, float temperature=1.0f, int top_k=0, std::stop_token stop={})Mila::Dnn::LanguageModel< TDeviceType, TPrecision >inline
getConfig() const noexceptMila::Dnn::LlamaModel< TDeviceType, TPrecision >inline
getDeviceId() const noexceptMila::Dnn::Model< TDeviceType, TPrecision >inline
getLanguageNetwork() noexceptMila::Dnn::LanguageModel< TDeviceType, TPrecision >inlineprotected
getLanguageNetwork() const noexceptMila::Dnn::LanguageModel< TDeviceType, TPrecision >inlineprotected
getLastGenerationStatistics() const noexceptMila::Dnn::LanguageModel< TDeviceType, TPrecision >inline
getMemoryStats() constMila::Dnn::Model< TDeviceType, TPrecision >inline
getRuntimeMode() const noexceptMila::Dnn::Model< TDeviceType, TPrecision >inline
isEval() const noexceptMila::Dnn::Model< TDeviceType, TPrecision >inline
isInferenceMode() const noexceptMila::Dnn::Model< TDeviceType, TPrecision >inline
isTrainingMode() const noexceptMila::Dnn::Model< TDeviceType, TPrecision >inline
LanguageModel(const LanguageModel &)=deleteMila::Dnn::LanguageModel< TDeviceType, TPrecision >
LanguageModel(LanguageModel &&)=defaultMila::Dnn::LanguageModel< TDeviceType, TPrecision >
LanguageModel(std::unique_ptr< LanguageNetwork< TDeviceType, TPrecision > > network, RuntimeMode runtime_mode)Mila::Dnn::LanguageModel< TDeviceType, TPrecision >inlineexplicitprotected
last_generation_statistics_Mila::Dnn::LanguageModel< TDeviceType, TPrecision >protected
LlamaModel(const LlamaModel &)=deleteMila::Dnn::LlamaModel< TDeviceType, TPrecision >
LlamaModel(LlamaModel &&)=defaultMila::Dnn::LlamaModel< TDeviceType, TPrecision >
LlamaModel(std::unique_ptr< LanguageNetwork< TDeviceType, TPrecision > > network, const LlamaConfig &config, RuntimeMode runtime_mode)Mila::Dnn::LlamaModel< TDeviceType, TPrecision >inlineexplicitprivate
logits_staging_Mila::Dnn::LlamaModel< TDeviceType, TPrecision >private
makeTokenTensor(const std::vector< int32_t > &token_ids) constMila::Dnn::LlamaModel< TDeviceType, TPrecision >inlineprivate
maxSequenceLength() const noexcept overrideMila::Dnn::LlamaModel< TDeviceType, TPrecision >inlineprotectedvirtual
Model(const Model &)=deleteMila::Dnn::Model< TDeviceType, TPrecision >
Model(Model &&)=defaultMila::Dnn::Model< TDeviceType, TPrecision >
Model(std::unique_ptr< NetworkType > network, RuntimeMode runtime_mode)Mila::Dnn::Model< TDeviceType, TPrecision >inlineexplicitprotected
ModelBase typedefMila::Dnn::LlamaModel< TDeviceType, TPrecision >
MR typedefMila::Dnn::LlamaModel< TDeviceType, TPrecision >
network_Mila::Dnn::Model< TDeviceType, TPrecision >protected
NetworkType typedefMila::Dnn::Model< TDeviceType, TPrecision >
onGenerating(const std::vector< int32_t > &prompt_tokens, const std::function< void(int32_t)> &on_token, size_t max_new_tokens, float temperature, int top_k, std::stop_token stop) overrideMila::Dnn::LlamaModel< TDeviceType, TPrecision >inlineprotectedvirtual
onTraining() overrideMila::Dnn::LlamaModel< TDeviceType, TPrecision >inlineprotectedvirtual
operator=(const LlamaModel &)=deleteMila::Dnn::LlamaModel< TDeviceType, TPrecision >
operator=(LlamaModel &&)=defaultMila::Dnn::LlamaModel< TDeviceType, TPrecision >
Mila::Dnn::LanguageModel::operator=(const LanguageModel &)=deleteMila::Dnn::LanguageModel< TDeviceType, TPrecision >
Mila::Dnn::LanguageModel::operator=(LanguageModel &&)=defaultMila::Dnn::LanguageModel< TDeviceType, TPrecision >
Mila::Dnn::Model::operator=(const Model &)=deleteMila::Dnn::Model< TDeviceType, TPrecision >
Mila::Dnn::Model::operator=(Model &&)=defaultMila::Dnn::Model< TDeviceType, TPrecision >
profilePrefill(const std::vector< int32_t > &token_ids)Mila::Dnn::LlamaModel< TDeviceType, TPrecision >inline
runtime_mode_Mila::Dnn::Model< TDeviceType, TPrecision >private
sampleFromLogits(const TensorType &logits, int64_t position, float temperature, int top_k, std::mt19937 &rng)Mila::Dnn::LlamaModel< TDeviceType, TPrecision >inlineprivate
sampleToken(const float *logits, size_t vocab_size, float temperature, int top_k, std::mt19937 &rng)Mila::Dnn::LlamaModel< TDeviceType, TPrecision >inlineprivatestatic
setEval(bool eval)Mila::Dnn::Model< TDeviceType, TPrecision >inline
StagingMR typedefMila::Dnn::LlamaModel< TDeviceType, TPrecision >
stopTokens() const overrideMila::Dnn::LlamaModel< TDeviceType, TPrecision >inlineprivatevirtual
TensorType typedefMila::Dnn::LlamaModel< TDeviceType, TPrecision >
TokenIndexType typedefMila::Dnn::LlamaModel< TDeviceType, TPrecision >
toString() const overrideMila::Dnn::LlamaModel< TDeviceType, TPrecision >inlinevirtual
train()Mila::Dnn::Model< TDeviceType, TPrecision >inline
truncateIfNeeded(std::vector< int32_t > &tokens) constMila::Dnn::LlamaModel< TDeviceType, TPrecision >inlineprivate
vocabSize() const noexcept overrideMila::Dnn::LlamaModel< TDeviceType, TPrecision >inlineprotectedvirtual
~LanguageModel()=defaultMila::Dnn::LanguageModel< TDeviceType, TPrecision >virtual
~LlamaModel()=defaultMila::Dnn::LlamaModel< TDeviceType, TPrecision >
~Model()=defaultMila::Dnn::Model< TDeviceType, TPrecision >virtual