Mila 0.13.48
Deep Neural Network Library
Loading...
Searching...
No Matches
Math.Elementwise.h File Reference

CUDA kernel declarations for element-wise tensor mathematical operations. More...

#include <cuda_runtime.h>
#include <cstddef>

Go to the source code of this file.

Namespaces

namespace  Mila
 Mila main API namespace.
namespace  Mila::Dnn
namespace  Mila::Dnn::Compute
namespace  Mila::Dnn::Compute::Cuda
namespace  Mila::Dnn::Compute::Cuda::Kernels

Functions

template<typename T>
void Mila::Dnn::Compute::Cuda::Kernels::launch_abs_kernel (const T *src, T *dst, size_t n, cudaStream_t stream)
 Launch absolute value: dst = abs(src).
template<typename T>
void Mila::Dnn::Compute::Cuda::Kernels::launch_elementwise_add_kernel (const T *src1, const T *src2, T *dst, size_t n, cudaStream_t stream)
 Launch element-wise tensor addition: dst = src1 + src2.
template<typename T>
void Mila::Dnn::Compute::Cuda::Kernels::launch_elementwise_divide_kernel (const T *src1, const T *src2, T *dst, size_t n, cudaStream_t stream)
 Launch element-wise tensor division: dst = src1 / src2.
template<typename T>
void Mila::Dnn::Compute::Cuda::Kernels::launch_elementwise_equal_kernel (const T *src1, const T *src2, T *dst, size_t n, cudaStream_t stream)
 Launch element-wise equality: dst = (src1 == src2) ?
template<typename T>
void Mila::Dnn::Compute::Cuda::Kernels::launch_elementwise_greater_kernel (const T *src1, const T *src2, T *dst, size_t n, cudaStream_t stream)
 Launch element-wise greater than: dst = (src1 > src2) ?
template<typename T>
void Mila::Dnn::Compute::Cuda::Kernels::launch_elementwise_less_kernel (const T *src1, const T *src2, T *dst, size_t n, cudaStream_t stream)
 Launch element-wise less than: dst = (src1 < src2) ?
template<typename T>
void Mila::Dnn::Compute::Cuda::Kernels::launch_elementwise_max_kernel (const T *src1, const T *src2, T *dst, size_t n, cudaStream_t stream)
 Launch element-wise maximum: dst = max(src1, src2).
template<typename T>
void Mila::Dnn::Compute::Cuda::Kernels::launch_elementwise_min_kernel (const T *src1, const T *src2, T *dst, size_t n, cudaStream_t stream)
 Launch element-wise minimum: dst = min(src1, src2).
template<typename T>
void Mila::Dnn::Compute::Cuda::Kernels::launch_elementwise_multiply_kernel (const T *src1, const T *src2, T *dst, size_t n, cudaStream_t stream)
 Launch element-wise tensor multiplication: dst = src1 * src2.
template<typename T>
void Mila::Dnn::Compute::Cuda::Kernels::launch_elementwise_subtract_kernel (const T *src1, const T *src2, T *dst, size_t n, cudaStream_t stream)
 Launch element-wise tensor subtraction: dst = src1 - src2.
template<typename T>
void Mila::Dnn::Compute::Cuda::Kernels::launch_negate_kernel (const T *src, T *dst, size_t n, cudaStream_t stream)
 Launch negation: dst = -src.
template<typename T>
void Mila::Dnn::Compute::Cuda::Kernels::launch_scalar_add_kernel (const T *src, T *dst, T scalar, size_t n, cudaStream_t stream)
 Launch scalar addition: dst = src + scalar.
template<typename T>
void Mila::Dnn::Compute::Cuda::Kernels::launch_scalar_divide_kernel (const T *src, T *dst, T scalar, size_t n, cudaStream_t stream)
 Launch scalar division: dst = src / scalar.
template<typename T>
void Mila::Dnn::Compute::Cuda::Kernels::launch_scalar_multiply_kernel (const T *src, T *dst, T scalar, size_t n, cudaStream_t stream)
 Launch scalar multiplication: dst = src * scalar.
template<typename T>
void Mila::Dnn::Compute::Cuda::Kernels::launch_scalar_subtract_kernel (const T *src, T *dst, T scalar, size_t n, cudaStream_t stream)
 Launch scalar subtraction: dst = src - scalar.
template<typename T>
void Mila::Dnn::Compute::Cuda::Kernels::launch_sqrt_kernel (const T *src, T *dst, size_t n, cudaStream_t stream)
 Launch square root: dst = sqrt(src).
template<typename T>
void Mila::Dnn::Compute::Cuda::Kernels::launch_square_kernel (const T *src, T *dst, size_t n, cudaStream_t stream)
 Launch square: dst = src * src.

Detailed Description

CUDA kernel declarations for element-wise tensor mathematical operations.

Provides launch function declarations for optimized CUDA kernels that perform element-wise operations on tensors including binary arithmetic, scalar operations, and comparison functions. All kernels are optimized for coalesced memory access and support various data types through template specialization.