This is the complete list of members for Mila::Data::BpeVocabulary, including all inherited members.

addSpecialToken(const std::string &token, TokenId id)	Mila::Data::BpeVocabulary	private
addSpecialTokensFromConfig()	Mila::Data::BpeVocabulary	private
applyMergeAndUpdateCounts(std::vector< std::vector< std::string > > &corpus, const std::string &left, const std::string &right, const std::string &merged, std::unordered_map< std::pair< std::string, std::string >, size_t, PairHash > &pair_counts)	Mila::Data::BpeVocabulary	private
BpeVocabulary()=delete	Mila::Data::BpeVocabulary
BpeVocabulary(const BpeVocabularyConfig &config)	Mila::Data::BpeVocabulary	inlineexplicitprivate
buildFromText(const std::string &corpus)	Mila::Data::BpeVocabulary	private
buildMergeMap()	Mila::Data::BpeVocabulary	private
buildSpecialTokenList()	Mila::Data::BpeVocabulary	private
config_	Mila::Data::BpeVocabulary	private
convertToTokenSequences(const std::vector< std::string > &words)	Mila::Data::BpeVocabulary	private
countPairs(const std::vector< std::vector< std::string > > &corpus) const	Mila::Data::BpeVocabulary	private
current_id_	Mila::Data::BpeVocabulary	private
getByteDecoder()	Mila::Data::BpeVocabulary	static
getByteEncoder()	Mila::Data::BpeVocabulary	static
getConfig() const	Mila::Data::BpeVocabulary	inline
getMergePriority(const std::string &left, const std::string &right) const	Mila::Data::BpeVocabulary	inline
getMergeRules() const	Mila::Data::BpeVocabulary	inline
getMostFrequentPair(const std::unordered_map< std::pair< std::string, std::string >, size_t, PairHash > &counts) const	Mila::Data::BpeVocabulary	private
getSize() const override	Mila::Data::BpeVocabulary	inlinevirtual
getSpecialTokenId(const std::string &token_str) const	Mila::Data::BpeVocabulary	inline
getSpecialTokenList() const	Mila::Data::BpeVocabulary	inline
id_to_token_	Mila::Data::BpeVocabulary	private
idToToken(TokenId id) const override	Mila::Data::BpeVocabulary	inlinevirtual
initializeBaseVocabulary()	Mila::Data::BpeVocabulary	private
isByteLevel() const	Mila::Data::BpeVocabulary	inline
load(const fs::path &path)	Mila::Data::BpeVocabulary	inlinestatic
loadContent(std::istream &file)	Mila::Data::BpeVocabulary	private
loadGpt2(const fs::path &tokenizer_path)	Mila::Data::BpeVocabulary	static
loadLlama32(const fs::path &path)	Mila::Data::BpeVocabulary	static
loadMistral(const fs::path &vocab_path, const fs::path &merges_path)	Mila::Data::BpeVocabulary	static
logTrainingComplete(std::chrono::steady_clock::time_point start_time)	Mila::Data::BpeVocabulary	private
merge_map_	Mila::Data::BpeVocabulary	private
merges_	Mila::Data::BpeVocabulary	private
preTokenize(const std::string &text) const	Mila::Data::BpeVocabulary	private
preTokenizeCorpus(const std::string &text)	Mila::Data::BpeVocabulary	private
runBpeMergeLoop(std::vector< std::vector< std::string > > &corpus_tokens, std::chrono::steady_clock::time_point start_time)	Mila::Data::BpeVocabulary	private
save(const fs::path &path) const override	Mila::Data::BpeVocabulary	inline
Mila::Data::TokenizerVocabulary::save(const std::filesystem::path &path) const =0	Mila::Data::TokenizerVocabulary	pure virtual
saveContent(std::ostream &file) const	Mila::Data::BpeVocabulary	private
special_token_ids_	Mila::Data::BpeVocabulary	private
special_token_list_	Mila::Data::BpeVocabulary	private
token_to_id_	Mila::Data::BpeVocabulary	private
tokenToId(const std::string &token) const override	Mila::Data::BpeVocabulary	inlinevirtual
train(const std::string &corpus, const BpeVocabularyConfig &config)	Mila::Data::BpeVocabulary	inlinestatic
trainFromFile(const fs::path &corpus_path, const BpeVocabularyConfig &config)	Mila::Data::BpeVocabulary	inlinestatic
~TokenizerVocabulary()=default	Mila::Data::TokenizerVocabulary	virtual