|
template<TensorDataType TSrcDataType, typename TSrcMemoryResource, TensorDataType TDstDataType, typename TDstMemoryResource>
requires isValidTensor<TSrcDataType, TSrcMemoryResource> && isValidTensor<TDstDataType, TDstMemoryResource> |
| static void | copy (const Tensor< TSrcDataType, TSrcMemoryResource > &src, Tensor< TDstDataType, TDstMemoryResource > &dst, IExecutionContext *exec_context=nullptr) |
| | Copies tensor data with optional ExecutionContext.
|
template<TensorDataType TDstDataType, typename TDstMemoryResource>
requires isValidTensor<TDstDataType, TDstMemoryResource> |
| static void | copyFromBlob (const Serialization::ITensorBlob &blob, Tensor< TDstDataType, TDstMemoryResource > &dst, IExecutionContext *exec_context=nullptr) |
template<TensorDataType TSrcDataType, TensorDataType TDstDataType, typename TDstMemoryResource>
requires isValidTensor<TDstDataType, TDstMemoryResource> |
| static void | copyFromBlobWithConversion (const Serialization::ITensorBlob &blob, Tensor< TDstDataType, TDstMemoryResource > &dst, IExecutionContext *exec_context=nullptr) |
| | Copy a blob into a CUDA device tensor with element-wise type conversion.
|
|
| template<TensorDataType TDataType> |
| static void | copyDeviceToDevice (const void *src_data, void *dst_data, size_t count, cudaStream_t stream, int device_id) |
| template<TensorDataType TSrcDataType, TensorDataType TDstDataType> |
| static void | copyDeviceToDeviceWithConversion (const void *src_data, void *dst_data, size_t count, cudaStream_t stream, int device_id) |
| template<TensorDataType TDataType> |
| static void | copyDeviceToHost (const void *src_data, void *dst_data, size_t count, cudaStream_t stream, int device_id) |
| template<TensorDataType TSrcDataType, TensorDataType TDstDataType> |
| static void | copyDeviceToHostWithConversion (const void *src_data, void *dst_data, size_t count, cudaStream_t stream, int device_id) |
| template<TensorDataType TDataType> |
| static void | copyHostToDevice (const void *src_data, void *dst_data, size_t count, cudaStream_t stream, int device_id) |
| template<TensorDataType TSrcDataType, TensorDataType TDstDataType> |
| static void | copyHostToDeviceWithConversion (const void *src_data, void *dst_data, size_t count, cudaStream_t stream, int device_id) |
| template<TensorDataType TDataType> |
| static void | copyHostToHost (const void *src_data, void *dst_data, size_t count) |
| template<TensorDataType TSrcDataType, TensorDataType TDstDataType> |
| static void | copyHostToHostWithConversion (const void *src_data, void *dst_data, size_t count) |
template<TensorDataType TDataType, typename TMemoryResource>
requires isValidTensor<TDataType, TMemoryResource> |
| static const void * | getDataPointer (const Tensor< TDataType, TMemoryResource > &tensor) |
| | Gets raw data pointer from tensor.
|
CUDA specialization of TensorOps for tensor transfer operations.
Provides CUDA-specific implementations of tensor transfer operations with automatic optimization based on memory types and optional type conversion. Uses zero-overhead borrowing of ExecutionContext for stream control.
Key features:
- Automatic transfer direction detection (H2D, D2H, D2D, H2H)
- Optional type conversion during transfer using CUDA kernels
- Stream-based asynchronous execution
- Zero-overhead ExecutionContext borrowing (raw pointer)
- Automatic fallback to default stream with explicit sync
- Memory-efficient staging for host-device conversions