◆ ~Tokenizer()
| virtual Mila::Data::Tokenizer::~Tokenizer |
( |
| ) |
|
|
virtualdefault |
◆ decode()
| virtual std::string Mila::Data::Tokenizer::decode |
( |
std::span< const TokenId > | tokens | ) |
|
|
pure virtual |
◆ encode()
| virtual std::vector< TokenId > Mila::Data::Tokenizer::encode |
( |
const std::string & | text | ) |
|
|
pure virtual |
◆ getBosTokenId()
| virtual std::optional< TokenId > Mila::Data::Tokenizer::getBosTokenId |
( |
| ) |
const |
|
pure virtual |
◆ getEosTokenId()
| virtual std::optional< TokenId > Mila::Data::Tokenizer::getEosTokenId |
( |
| ) |
const |
|
pure virtual |
◆ getPadTokenId()
| virtual std::optional< TokenId > Mila::Data::Tokenizer::getPadTokenId |
( |
| ) |
const |
|
pure virtual |
◆ getVocabSize()
| virtual size_t Mila::Data::Tokenizer::getVocabSize |
( |
| ) |
const |
|
pure virtual |
◆ isValidToken()
| virtual bool Mila::Data::Tokenizer::isValidToken |
( |
TokenId | tokenId | ) |
const |
|
pure virtual |
◆ tokenToString()
| virtual std::string Mila::Data::Tokenizer::tokenToString |
( |
TokenId | tokenId | ) |
const |
|
pure virtual |
The documentation for this class was generated from the following file: