3#include <cuda_runtime.h>
21 template <
typename SrcT,
typename DstT>
Definition CublasLt.Utils.ixx:15
void launch_convert_copy_kernel(const SrcT *d_src, DstT *d_or_h_dst, size_t n, cudaStream_t stream)
Launch type-converting copy kernel between tensors.
void launch_fast_copy_kernel(const T *d_src, T *d_dst, size_t n, cudaStream_t stream)
Launch optimized same-type copy kernel.