Mila 0.13.48
Deep Neural Network Library
Loading...
Searching...
No Matches
Mila::Data::BpeVocabulary Member List

This is the complete list of members for Mila::Data::BpeVocabulary, including all inherited members.

addSpecialToken(const std::string &token, TokenId id)Mila::Data::BpeVocabularyprivate
addSpecialTokensFromConfig()Mila::Data::BpeVocabularyprivate
applyMergeAndUpdateCounts(std::vector< std::vector< std::string > > &corpus, const std::string &left, const std::string &right, const std::string &merged, std::unordered_map< std::pair< std::string, std::string >, size_t, PairHash > &pair_counts)Mila::Data::BpeVocabularyprivate
BpeVocabulary()=deleteMila::Data::BpeVocabulary
BpeVocabulary(const BpeVocabularyConfig &config)Mila::Data::BpeVocabularyinlineexplicitprivate
buildFromText(const std::string &corpus)Mila::Data::BpeVocabularyprivate
buildMergeMap()Mila::Data::BpeVocabularyprivate
buildSpecialTokenList()Mila::Data::BpeVocabularyprivate
config_Mila::Data::BpeVocabularyprivate
convertToTokenSequences(const std::vector< std::string > &words)Mila::Data::BpeVocabularyprivate
countPairs(const std::vector< std::vector< std::string > > &corpus) constMila::Data::BpeVocabularyprivate
current_id_Mila::Data::BpeVocabularyprivate
getByteDecoder()Mila::Data::BpeVocabularystatic
getByteEncoder()Mila::Data::BpeVocabularystatic
getConfig() constMila::Data::BpeVocabularyinline
getMergePriority(const std::string &left, const std::string &right) constMila::Data::BpeVocabularyinline
getMergeRules() constMila::Data::BpeVocabularyinline
getMostFrequentPair(const std::unordered_map< std::pair< std::string, std::string >, size_t, PairHash > &counts) constMila::Data::BpeVocabularyprivate
getSize() const overrideMila::Data::BpeVocabularyinlinevirtual
getSpecialTokenId(const std::string &token_str) constMila::Data::BpeVocabularyinline
getSpecialTokenList() constMila::Data::BpeVocabularyinline
id_to_token_Mila::Data::BpeVocabularyprivate
idToToken(TokenId id) const overrideMila::Data::BpeVocabularyinlinevirtual
initializeBaseVocabulary()Mila::Data::BpeVocabularyprivate
isByteLevel() constMila::Data::BpeVocabularyinline
load(const fs::path &path)Mila::Data::BpeVocabularyinlinestatic
loadContent(std::istream &file)Mila::Data::BpeVocabularyprivate
loadGpt2(const fs::path &tokenizer_path)Mila::Data::BpeVocabularystatic
loadLlama32(const fs::path &path)Mila::Data::BpeVocabularystatic
loadMistral(const fs::path &vocab_path, const fs::path &merges_path)Mila::Data::BpeVocabularystatic
logTrainingComplete(std::chrono::steady_clock::time_point start_time)Mila::Data::BpeVocabularyprivate
merge_map_Mila::Data::BpeVocabularyprivate
merges_Mila::Data::BpeVocabularyprivate
preTokenize(const std::string &text) constMila::Data::BpeVocabularyprivate
preTokenizeCorpus(const std::string &text)Mila::Data::BpeVocabularyprivate
runBpeMergeLoop(std::vector< std::vector< std::string > > &corpus_tokens, std::chrono::steady_clock::time_point start_time)Mila::Data::BpeVocabularyprivate
save(const fs::path &path) const overrideMila::Data::BpeVocabularyinline
Mila::Data::TokenizerVocabulary::save(const std::filesystem::path &path) const =0Mila::Data::TokenizerVocabularypure virtual
saveContent(std::ostream &file) constMila::Data::BpeVocabularyprivate
special_token_ids_Mila::Data::BpeVocabularyprivate
special_token_list_Mila::Data::BpeVocabularyprivate
token_to_id_Mila::Data::BpeVocabularyprivate
tokenToId(const std::string &token) const overrideMila::Data::BpeVocabularyinlinevirtual
train(const std::string &corpus, const BpeVocabularyConfig &config)Mila::Data::BpeVocabularyinlinestatic
trainFromFile(const fs::path &corpus_path, const BpeVocabularyConfig &config)Mila::Data::BpeVocabularyinlinestatic
~TokenizerVocabulary()=defaultMila::Data::TokenizerVocabularyvirtual