using System.Collections.Generic; using LLama.Common; using LLama.Native; namespace LLama.Abstractions { /// /// The paramters used for inference. /// public interface IInferenceParams { /// /// number of tokens to keep from initial prompt /// public int TokensKeep { get; set; } /// /// how many new tokens to predict (n_predict), set to -1 to inifinitely generate response /// until it complete. /// public int MaxTokens { get; set; } /// /// logit bias for specific tokens /// public Dictionary? LogitBias { get; set; } /// /// Sequences where the model will stop generating further tokens. /// public IReadOnlyList AntiPrompts { get; set; } /// /// 0 or lower to use vocab size /// public int TopK { get; set; } /// llama_eval /// 1.0 = disabled /// public float TopP { get; set; } /// llama_eval /// 0.0 = disabled /// public float MinP { get; set; } /// /// 1.0 = disabled /// public float TfsZ { get; set; } /// /// 1.0 = disabled /// public float TypicalP { get; set; } /// /// 1.0 = disabled /// public float Temperature { get; set; } /// /// 1.0 = disabled /// public float RepeatPenalty { get; set; } /// /// last n tokens to penalize (0 = disable penalty, -1 = context size) (repeat_last_n) /// public int RepeatLastTokensCount { get; set; } /// /// frequency penalty coefficient /// 0.0 = disabled /// public float FrequencyPenalty { get; set; } /// /// presence penalty coefficient /// 0.0 = disabled /// public float PresencePenalty { get; set; } /// /// Mirostat uses tokens instead of words. /// algorithm described in the paper https://arxiv.org/abs/2007.14966. /// 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 /// public MirostatType Mirostat { get; set; } /// /// target entropy /// public float MirostatTau { get; set; } /// /// learning rate /// public float MirostatEta { get; set; } /// /// consider newlines as a repeatable token (penalize_nl) /// public bool PenalizeNL { get; set; } /// /// Grammar to constrain possible tokens /// SafeLLamaGrammarHandle? Grammar { get; set; } } }