|
| | TokenSequenceLoader (const std::filesystem::path &tokens_file, int64_t batch_size, int64_t seq_length, bool is_training, DeviceId device, const TokenSequenceLoaderConfig &config=TokenSequenceLoaderConfig()) |
| | Constructs streaming autoregressive sequence loader.
|
| | TokenSequenceLoader (const TokenSequenceLoader &)=delete |
| | TokenSequenceLoader (TokenSequenceLoader &&)=delete |
| | ~TokenSequenceLoader () noexcept |
| void | allocateBuffers () |
| void | cleanupBuffers () noexcept |
| void | fillBatch (const TokenId *window_buffer, size_t batch_idx, HostType *input_dest, HostType *target_dest) |
| | Fills a batch from the current window buffer.
|
| void | initializeDataset () |
| const TensorType & | inputs () const override |
| | Provides immutable access to input tensor for current batch.
|
| TensorType & | inputs () override |
| | Provides mutable access to input tensor for current batch.
|
| void | loadWindowFromFile (std::ifstream &file, TokenId *buffer, size_t window_idx) |
| | Loads a window from the token file.
|
| void | nextBatch () override |
| | Loads the next batch of data from the dataset.
|
| int64_t | numBatches () const override |
| | Returns the total number of batches in the dataset.
|
| size_t | numTokens () const |
| size_t | numWindows () const |
| TokenSequenceLoader & | operator= (const TokenSequenceLoader &)=delete |
| TokenSequenceLoader & | operator= (TokenSequenceLoader &&)=delete |
| void | prepareSequenceIndices () |
| void | producerThreadFunc () noexcept |
| | Producer thread: streams windows from disk and fills batches.
|
| void | reset () override |
| | Resets the loader to the beginning of the dataset.
|
| int64_t | sequenceLength () const |
| void | shuffleSequenceIndices () |
| void | swapBuffers () noexcept |
| const TensorType & | targets () const override |
| | Provides immutable access to target tensor for current batch.
|
| TensorType & | targets () override |
| | Provides mutable access to target tensor for current batch.
|
| static DeviceId | validateDeviceId (DeviceId device) |
| size_t | windowSizeTokens () const |