Mila 0.13.48
Deep Neural Network Library
Loading...
Searching...
No Matches
CublasLtPlanCache.ixx File Reference
#include <cuda_runtime.h>
#include <cublasLt.h>
#include <vector>
#include <unordered_map>
#include <algorithm>
#include <string>
#include <format>
#include <functional>
#include <stdexcept>
import Logging.Logger;

Classes

class  Mila::Dnn::Compute::Cuda::CublasLtPlanCache< TPlan >
 Generic plan cache keyed on batch size bucket. More...

Namespaces

namespace  Mila
 Mila main API namespace.
namespace  Mila::Dnn
namespace  Mila::Dnn::Compute
namespace  Mila::Dnn::Compute::Cuda

Functions

std::vector< int > Mila::Dnn::Compute::Cuda::computeArchitectureBuckets (int max_batch_size)
 Computes optimal bucket boundaries for cuBLASLt plan caching based on CUDA device architecture.
int Mila::Dnn::Compute::Cuda::getBucket (const std::vector< int > &buckets, int batch_size)
 Fast O(log N) bucket lookup.