Mila 0.13.48
Deep Neural Network Library
Loading...
Searching...
No Matches
Math.Reduction.h
Go to the documentation of this file.
1
9
10#pragma once
11
12#include <cuda_runtime.h>
13#include <cstddef>
14
16{
29 template<typename T>
31 const T* src,
32 float* d_partial_sums,
33 size_t count,
34 int grid,
35 int block,
36 size_t shared_bytes,
37 cudaStream_t stream );
38
45 template<typename T>
47 const T* src,
48 float* d_partial_means,
49 size_t count,
50 int grid,
51 int block,
52 size_t shared_bytes,
53 cudaStream_t stream );
54
67 template<typename T>
69 const T* src,
70 T* d_partial_maxes,
71 size_t count,
72 int grid,
73 int block,
74 size_t shared_bytes,
75 cudaStream_t stream );
76
82 template<typename T>
84 const T* src,
85 T* d_partial_mins,
86 size_t count,
87 int grid,
88 int block,
89 size_t shared_bytes,
90 cudaStream_t stream );
91}
Definition Math.Elementwise.h:16
void launch_max_reduction_kernel(const T *src, T *d_partial_maxes, size_t count, int grid, int block, size_t shared_bytes, cudaStream_t stream)
Launch max reduction kernel producing per-block partial maxima.
void launch_mean_reduction_kernel(const T *src, float *d_partial_means, size_t count, int grid, int block, size_t shared_bytes, cudaStream_t stream)
Launch mean reduction kernel producing per-block partial sums (float).
void launch_sum_reduction_kernel(const T *src, float *d_partial_sums, size_t count, int grid, int block, size_t shared_bytes, cudaStream_t stream)
Launch sum reduction kernel producing per-block partial sums (float).
void launch_min_reduction_kernel(const T *src, T *d_partial_mins, size_t count, int grid, int block, size_t shared_bytes, cudaStream_t stream)
Launch min reduction kernel producing per-block partial minima.