Mila 0.13.48
Deep Neural Network Library
Loading...
Searching...
No Matches
Transfer.Copy.h
Go to the documentation of this file.
1#pragma once
2
3#include <cuda_runtime.h>
4#include <cstddef>
5
7{
21 template <typename SrcT, typename DstT>
22 void launch_convert_copy_kernel( const SrcT* d_src, DstT* d_or_h_dst, size_t n, cudaStream_t stream );
23
36 template <typename T>
37 void launch_fast_copy_kernel( const T* d_src, T* d_dst, size_t n, cudaStream_t stream );
38}
Definition CublasLt.Utils.ixx:15
void launch_convert_copy_kernel(const SrcT *d_src, DstT *d_or_h_dst, size_t n, cudaStream_t stream)
Launch type-converting copy kernel between tensors.
void launch_fast_copy_kernel(const T *d_src, T *d_dst, size_t n, cudaStream_t stream)
Launch optimized same-type copy kernel.