12#include <cuda_runtime.h>
32 float* d_partial_sums,
37 cudaStream_t stream );
48 float* d_partial_means,
53 cudaStream_t stream );
75 cudaStream_t stream );
90 cudaStream_t stream );
Definition Math.Elementwise.h:16
void launch_max_reduction_kernel(const T *src, T *d_partial_maxes, size_t count, int grid, int block, size_t shared_bytes, cudaStream_t stream)
Launch max reduction kernel producing per-block partial maxima.
void launch_mean_reduction_kernel(const T *src, float *d_partial_means, size_t count, int grid, int block, size_t shared_bytes, cudaStream_t stream)
Launch mean reduction kernel producing per-block partial sums (float).
void launch_sum_reduction_kernel(const T *src, float *d_partial_sums, size_t count, int grid, int block, size_t shared_bytes, cudaStream_t stream)
Launch sum reduction kernel producing per-block partial sums (float).
void launch_min_reduction_kernel(const T *src, T *d_partial_mins, size_t count, int grid, int block, size_t shared_bytes, cudaStream_t stream)
Launch min reduction kernel producing per-block partial minima.