|
Mila 0.13.48
Deep Neural Network Library
|
LLaMA inference model. More...
#include <memory>#include <vector>#include <unordered_set>#include <string>#include <sstream>#include <cstdint>#include <stdexcept>#include <filesystem>#include <format>#include <random>#include <chrono>#include <algorithm>#include <numeric>#include <functional>#include <stop_token>#include <cstring>#include <type_traits>import Compute.ExecutionContextFactory;import Compute.CpuMemoryResource;import Compute.DeviceTypeTraits;import Compute.DeviceId;import Dnn.RuntimeMode;import Dnn.Components.LlamaTransformer;import Dnn.Models.LlamaModelConfig;import Dnn.LanguageNetwork;import Serialization.Mode;import Dnn.TensorDataType;import Serialization.PretrainedReader;import Dnn.LanguageModel;import Logging.Logger;import Compute.DeviceTypeTraits.Cpu;import Dnn.LanguageModelConfig;import Dnn.TensorTypes;import Dnn.Quantization.KvCache.QuantPolicy;import Compute.DeviceType;import Dnn.Quantization.Weight.Policies;import Dnn.Quantization.KvCache.Policy;import Compute.Device;import Dnn.Tensor;import Dnn.ITensor;import Dnn.TensorDataTypeTraits;import Dnn.Component;Classes | |
| class | Mila::Dnn::LlamaModel< TDeviceType, TPrecision > |
| LLaMA 3 compatible inference model. More... | |
Namespaces | |
| namespace | Mila |
| Mila main API namespace. | |
| namespace | Mila::Dnn |
LLaMA inference model.
Inference-only wrapper around a loaded LlamaTransformer network.