using System.Text;
using LLama.Native;
namespace LLama.Abstractions;
///
/// The parameters for initializing a LLama context from a model.
///
public interface IContextParams
{
///
/// Model context size (n_ctx)
///
uint? ContextSize { get; set; }
///
/// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
///
uint BatchSize { get; set; }
///
/// Seed for the random number generator (seed)
///
uint Seed { get; set; }
///
/// Use f16 instead of f32 for memory kv (memory_f16)
///
bool UseFp16Memory { get; set; }
///
/// Compute perplexity over the prompt (perplexity)
///
bool Perplexity { get; set; }
///
/// Whether to use embedding mode. (embedding) Note that if this is set to true,
/// The LLamaModel won't produce text response anymore.
///
bool EmbeddingMode { get; set; }
///
/// RoPE base frequency (null to fetch from the model)
///
float? RopeFrequencyBase { get; set; }
///
/// RoPE frequency scaling factor (null to fetch from the model)
///
float? RopeFrequencyScale { get; set; }
///
/// Use experimental mul_mat_q kernels
///
bool MulMatQ { get; set; }
///
/// The encoding to use for models
///
Encoding Encoding { get; set; }
///
/// Number of threads (null = autodetect) (n_threads)
///
uint? Threads { get; set; }
///
/// Number of threads to use for batch processing (null = autodetect) (n_threads)
///
uint? BatchThreads { get; set; }
///
/// YaRN extrapolation mix factor
///
float? YarnExtrapolationFactor { get; set; }
///
/// YaRN magnitude scaling factor
///
float? YarnAttentionFactor { get; set; }
///
/// YaRN low correction dim
///
float? YarnBetaFast { get; set; }
///
/// YaRN high correction dim
///
float? YarnBetaSlow { get; set; }
///
/// YaRN original context length
///
uint? YarnOriginalContext { get; set; }
///
/// YaRN scaling method to use.
///
RopeScalingType? YarnScalingType { get; set; }
}