CUDA implementation of the Lpe (token + positional embedding) operation. More...

#include <cuda_fp16.h>
#include <string>
#include <stdexcept>
#include <cstdint>
import Compute.OperationRegistrarHelpers;
import Compute.OperationType;
import Compute.CudaLpeOp:Dispatch;
import Dnn.Components.LpeConfig;
import Dnn.TensorDataTypeTraits;
import Dnn.ITensor;
import Compute.CudaTensorDataType;
import Compute.UnaryOperation;
import Dnn.TensorTypes;
import Compute.CudaDeviceMemoryResource;
import Dnn.TensorDataType;
import Dnn.Tensor;
import Compute.DeviceType;
import Compute.IPositionalDecode;
import Dnn.Component;
import Compute.IExecutionContext;
import Compute.ExecutionContext;

Classes
class	Mila::Dnn::Compute::Cuda::Lpe::CudaLpeOp< TInput, TPrecision >
	CUDA implementation of the Lpe (token + positional embedding) operation. More...
class	Mila::Dnn::Compute::Cuda::Lpe::CudaLpeOpRegistrar

Namespaces
namespace	Mila
	Mila main API namespace.
namespace	Mila::Dnn
namespace	Mila::Dnn::Compute
namespace	Mila::Dnn::Compute::Cuda
namespace	Mila::Dnn::Compute::Cuda::Lpe

Detailed Description

CUDA implementation of the Lpe (token + positional embedding) operation.

Supports full-sequence forward/backward passes and a position-aware single-token decode pass via IPositionalDecode.

Classes

Namespaces

Detailed Description