| backward(const TensorType &input, const TensorType &output_grad, TensorType &input_grad) const | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | inline |
| backward_input_plan_cache_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| backward_weight_plan_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| bias_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| bias_grad_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| build(const BuildContext &build_context) override | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | inlinevirtual |
| buildCublasLtPlans() | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | inlineprivate |
| cached_cublaslt_handle_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| cached_in_features_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| cached_outer_size_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| clearGradients() noexcept | Mila::Dnn::Compute::Operation< DeviceType::Cuda, TComputePrecision > | inlinevirtual |
| compute_type_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| ComputeType typedef | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | |
| config_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| context_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| cuda_data_type_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| cuda_weight_data_type_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| CudaExecutionContext typedef | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | |
| CudaLinearOp(IExecutionContext *context, const LinearConfig &config) | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | inline |
| data_type | Mila::Dnn::Compute::Operation< DeviceType::Cuda, TComputePrecision > | static |
| DataTypeTraits typedef | Mila::Dnn::Compute::Operation< DeviceType::Cuda, TComputePrecision > | |
| device_type | Mila::Dnn::Compute::Operation< DeviceType::Cuda, TComputePrecision > | static |
| forward(const TensorType &input, TensorType &output) const | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | inline |
| forward_plan_cache_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| getActivationCudaDataType() const | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | inlineprivate |
| getComputeTypes(cublasComputeType_t &compute_type, cudaDataType_t &scale_type) const | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | inlineprivate |
| getConfig() const | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | inline |
| getDataType() const | Mila::Dnn::Compute::Operation< DeviceType::Cuda, TComputePrecision > | inlinevirtual |
| getDeviceType() const | Mila::Dnn::Compute::Operation< DeviceType::Cuda, TComputePrecision > | inlinevirtual |
| getName() const override | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | inlinevirtual |
| getOperationType() const override | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | inlinevirtual |
| getStateMemorySize() const | Mila::Dnn::Compute::Operation< DeviceType::Cuda, TComputePrecision > | inlinevirtual |
| getWeightCudaDataType() const | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | inlineprivate |
| is_built_ | Mila::Dnn::Compute::Operation< DeviceType::Cuda, TComputePrecision > | protected |
| isBuilt() const | Mila::Dnn::Compute::Operation< DeviceType::Cuda, TComputePrecision > | inlinevirtual |
| isEvalMode() const | Mila::Dnn::Compute::Operation< DeviceType::Cuda, TComputePrecision > | inlinevirtual |
| kIsPerChannelQuantized | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | static |
| kIsPerGroupQuantized | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | static |
| kIsQuantized | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | static |
| kUseW8A16Gemm | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | static |
| kWeightDtype | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | static |
| MR typedef | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | |
| out_features_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| quantize(const ITensorBlob &blob, ITensor &weight_out, ITensor &scales_out, const shape_t &expected_shape) | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | inline |
| scale_type_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| setGradients(ITensor *weight_grad, ITensor *bias_grad) override | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | inlinevirtual |
| setParameters(ITensor *weight, ITensor *bias) override | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | inlinevirtual |
| setTrainingMode(TrainingMode training_mode) | Mila::Dnn::Compute::Operation< DeviceType::Cuda, TComputePrecision > | inlinevirtual |
| setWeightScales(ITensor *scales) | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | inline |
| setWeightZeroPoints(ITensor *zero_points) | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | inline |
| supportsCuBLASLt() const | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | inlineprivate |
| TensorType typedef | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | |
| training_mode_ | Mila::Dnn::Compute::Operation< DeviceType::Cuda, TComputePrecision > | protected |
| use_cublaslt_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| use_wmma_fp4_gemm_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| weight_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| weight_grad_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| weight_group_size_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| weight_in_features_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| weight_out_features_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| weight_scales_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| weight_zero_points_ | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | private |
| WeightType typedef | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | |
| ~CudaLinearOp()=default | Mila::Dnn::Compute::Cuda::Linear::CudaLinearOp< TComputePrecision, TWeightQuant > | |
| ~Operation()=default | Mila::Dnn::Compute::Operation< DeviceType::Cuda, TComputePrecision > | virtual |