|
Mila 0.13.48
Deep Neural Network Library
|
#include <cuda_runtime.h>#include <cublasLt.h>#include <vector>#include <unordered_map>#include <algorithm>#include <string>#include <format>#include <functional>#include <stdexcept>import Logging.Logger;Classes | |
| class | Mila::Dnn::Compute::Cuda::CublasLtPlanCache< TPlan > |
| Generic plan cache keyed on batch size bucket. More... | |
Namespaces | |
| namespace | Mila |
| Mila main API namespace. | |
| namespace | Mila::Dnn |
| namespace | Mila::Dnn::Compute |
| namespace | Mila::Dnn::Compute::Cuda |
Functions | |
| std::vector< int > | Mila::Dnn::Compute::Cuda::computeArchitectureBuckets (int max_batch_size) |
| Computes optimal bucket boundaries for cuBLASLt plan caching based on CUDA device architecture. | |
| int | Mila::Dnn::Compute::Cuda::getBucket (const std::vector< int > &buckets, int batch_size) |
| Fast O(log N) bucket lookup. | |