#include <cuda_runtime.h>
#include <cublasLt.h>
#include <vector>
#include <unordered_map>
#include <algorithm>
#include <string>
#include <format>
#include <functional>
#include <stdexcept>
import Logging.Logger;

Classes
class	Mila::Dnn::Compute::Cuda::CublasLtPlanCache< TPlan >
	Generic plan cache keyed on batch size bucket. More...

Namespaces
namespace	Mila
	Mila main API namespace.
namespace	Mila::Dnn
namespace	Mila::Dnn::Compute
namespace	Mila::Dnn::Compute::Cuda

Functions
std::vector< int >	Mila::Dnn::Compute::Cuda::computeArchitectureBuckets (int max_batch_size)
	Computes optimal bucket boundaries for cuBLASLt plan caching based on CUDA device architecture.
int	Mila::Dnn::Compute::Cuda::getBucket (const std::vector< int > &buckets, int batch_size)
	Fast O(log N) bucket lookup.

Classes

Namespaces

Functions