Mila 0.13.48
Deep Neural Network Library
Loading...
Searching...
No Matches
LlamaModel.ixx File Reference

LLaMA inference model. More...

#include <memory>
#include <vector>
#include <unordered_set>
#include <string>
#include <sstream>
#include <cstdint>
#include <stdexcept>
#include <filesystem>
#include <format>
#include <random>
#include <chrono>
#include <algorithm>
#include <numeric>
#include <functional>
#include <stop_token>
#include <cstring>
#include <type_traits>
import Compute.ExecutionContextFactory;
import Compute.CpuMemoryResource;
import Compute.DeviceTypeTraits;
import Compute.DeviceId;
import Dnn.RuntimeMode;
import Dnn.Components.LlamaTransformer;
import Dnn.Models.LlamaModelConfig;
import Dnn.LanguageNetwork;
import Serialization.Mode;
import Dnn.TensorDataType;
import Serialization.PretrainedReader;
import Dnn.LanguageModel;
import Logging.Logger;
import Compute.DeviceTypeTraits.Cpu;
import Dnn.LanguageModelConfig;
import Dnn.TensorTypes;
import Dnn.Quantization.KvCache.QuantPolicy;
import Compute.DeviceType;
import Dnn.Quantization.Weight.Policies;
import Dnn.Quantization.KvCache.Policy;
import Compute.Device;
import Dnn.Tensor;
import Dnn.ITensor;
import Dnn.TensorDataTypeTraits;
import Dnn.Component;

Classes

class  Mila::Dnn::LlamaModel< TDeviceType, TPrecision >
 LLaMA 3 compatible inference model. More...

Namespaces

namespace  Mila
 Mila main API namespace.
namespace  Mila::Dnn

Detailed Description

LLaMA inference model.

Inference-only wrapper around a loaded LlamaTransformer network.