You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

IInferenceParams.cs 3.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. using System.Collections.Generic;
  2. using LLama.Common;
  3. using LLama.Control;
  4. using LLama.Native;
  5. using LLama.Sampling;
  6. using LLama.Transform;
  7. namespace LLama.Abstractions
  8. {
  9. /// <summary>
  10. /// The paramters used for inference.
  11. /// </summary>
  12. public interface IInferenceParams
  13. {
  14. /// <summary>
  15. /// number of tokens to keep from initial prompt
  16. /// </summary>
  17. public int TokensKeep { get; set; }
  18. /// <summary>
  19. /// how many new tokens to predict (n_predict), set to -1 to inifinitely generate response
  20. /// until it complete.
  21. /// </summary>
  22. public int MaxTokens { get; set; }
  23. /// <summary>
  24. /// logit bias for specific tokens
  25. /// </summary>
  26. public Dictionary<int, float>? LogitBias { get; set; }
  27. /// <summary>
  28. /// Sequences where the model will stop generating further tokens.
  29. /// </summary>
  30. public IReadOnlyList<string> AntiPrompts { get; set; }
  31. /// <summary>
  32. /// 0 or lower to use vocab size
  33. /// </summary>
  34. public int TopK { get; set; }
  35. /// <summary>llama_eval
  36. /// 1.0 = disabled
  37. /// </summary>
  38. public float TopP { get; set; }
  39. /// <summary>llama_eval
  40. /// 0.0 = disabled
  41. /// </summary>
  42. public float MinP { get; set; }
  43. /// <summary>
  44. /// 1.0 = disabled
  45. /// </summary>
  46. public float TfsZ { get; set; }
  47. /// <summary>
  48. /// 1.0 = disabled
  49. /// </summary>
  50. public float TypicalP { get; set; }
  51. /// <summary>
  52. /// 1.0 = disabled
  53. /// </summary>
  54. public float Temperature { get; set; }
  55. /// <summary>
  56. /// 1.0 = disabled
  57. /// </summary>
  58. public float RepeatPenalty { get; set; }
  59. /// <summary>
  60. /// last n tokens to penalize (0 = disable penalty, -1 = context size) (repeat_last_n)
  61. /// </summary>
  62. public int RepeatLastTokensCount { get; set; }
  63. /// <summary>
  64. /// frequency penalty coefficient
  65. /// 0.0 = disabled
  66. /// </summary>
  67. public float FrequencyPenalty { get; set; }
  68. /// <summary>
  69. /// presence penalty coefficient
  70. /// 0.0 = disabled
  71. /// </summary>
  72. public float PresencePenalty { get; set; }
  73. /// <summary>
  74. /// Mirostat uses tokens instead of words.
  75. /// algorithm described in the paper https://arxiv.org/abs/2007.14966.
  76. /// 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
  77. /// </summary>
  78. public MirostatType Mirostat { get; set; }
  79. /// <summary>
  80. /// target entropy
  81. /// </summary>
  82. public float MirostatTau { get; set; }
  83. /// <summary>
  84. /// learning rate
  85. /// </summary>
  86. public float MirostatEta { get; set; }
  87. /// <summary>
  88. /// consider newlines as a repeatable token (penalize_nl)
  89. /// </summary>
  90. public bool PenalizeNL { get; set; }
  91. /// <summary>
  92. /// Grammar to constrain possible tokens
  93. /// </summary>
  94. SafeLLamaGrammarHandle? Grammar { get; set; }
  95. /// <summary>
  96. /// Set a custom sampling pipeline to use. <b>If this is set All other sampling parameters are ignored!</b>
  97. /// </summary>
  98. ISamplingPipeline? SamplingPipeline { get; set; }
  99. /// <summary>
  100. /// Set a custom generation control to use. <b>If this is set antiprompt will be ignored!</b>
  101. /// </summary>
  102. IGenerationControl GenerationControl { get; set; }
  103. /// <summary>
  104. /// Set a custom tokenizer to use.
  105. /// </summary>
  106. ITokenizer Tokenizer { get; set; }
  107. }
  108. }