Mila 0.13.48
Deep Neural Network Library
Loading...
Searching...
No Matches
Mila::Data::BpeTokenizer Member List

This is the complete list of members for Mila::Data::BpeTokenizer, including all inherited members.

BpeTokenizer(BpeVocabulary vocab)Mila::Data::BpeTokenizerinlineexplicit
decode(std::span< const TokenId > tokens) overrideMila::Data::BpeTokenizerinlinevirtual
decodeToken(const std::string &token, std::string &out)Mila::Data::BpeTokenizerinlineprivate
encode(const std::string &text) overrideMila::Data::BpeTokenizerinlinevirtual
encodeSegment(const std::string &text, std::vector< TokenId > &out)Mila::Data::BpeTokenizerinlineprivate
encodeSegmentBpe(const std::vector< std::string > &words, std::vector< TokenId > &out)Mila::Data::BpeTokenizerinlineprivate
encodeSegmentMaxMunch(const std::vector< std::string > &words, std::vector< TokenId > &out)Mila::Data::BpeTokenizerinlineprivate
getBosTokenId() const overrideMila::Data::BpeTokenizerinlinevirtual
getEosTokenId() const overrideMila::Data::BpeTokenizerinlinevirtual
getPadTokenId() const overrideMila::Data::BpeTokenizerinlinevirtual
getVocab() constMila::Data::BpeTokenizerinline
getVocabSize() const overrideMila::Data::BpeTokenizerinlinevirtual
initializePreTokenization()Mila::Data::BpeTokenizerinlineprivate
isValidToken(TokenId tokenId) const overrideMila::Data::BpeTokenizerinlinevirtual
load(const std::filesystem::path &path)Mila::Data::BpeTokenizerinlinestatic
loadGpt2(const std::filesystem::path &path)Mila::Data::BpeTokenizerinlinestatic
loadLlama32(const std::filesystem::path &path)Mila::Data::BpeTokenizerinlinestatic
loadMistral(const std::filesystem::path &vocab_path, const std::filesystem::path &merges_path)Mila::Data::BpeTokenizerinlinestatic
pre_tokenization_regex_Mila::Data::BpeTokenizerprivate
preTokenize(const std::string &text)Mila::Data::BpeTokenizerinlineprivate
tokenToString(TokenId tokenId) const overrideMila::Data::BpeTokenizerinlinevirtual
utf8CharLength(unsigned char first_byte)Mila::Data::BpeTokenizerinlineprivatestatic
vocab_Mila::Data::BpeTokenizerprivate
~Tokenizer()=defaultMila::Data::Tokenizervirtual