You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

GptParams.cs 6.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Collections.Generic;
  4. using System.Text;
  5. namespace LLama
  6. {
  7. using llama_token = Int32;
  8. public struct GptParams
  9. {
  10. public int seed; // RNG seed
  11. public int n_threads = Math.Max(Environment.ProcessorCount / 2, 1); // number of threads (-1 = autodetect)
  12. public int n_predict = -1; // new tokens to predict
  13. public int n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
  14. public int n_ctx = 512; // context size
  15. public int n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
  16. public int n_keep = 0; // number of tokens to keep from initial prompt
  17. // sampling parameters
  18. public Dictionary<llama_token, float> logit_bias; // logit bias for specific tokens
  19. public int top_k = 40; // <= 0 to use vocab size
  20. public float top_p = 0.95f; // 1.0 = disabled
  21. public float tfs_z = 1.00f; // 1.0 = disabled
  22. public float typical_p = 1.00f; // 1.0 = disabled
  23. public float temp = 0.80f; // 1.0 = disabled
  24. public float repeat_penalty = 1.10f; // 1.0 = disabled
  25. public int repeat_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
  26. public float frequency_penalty = 0.00f; // 0.0 = disabled
  27. public float presence_penalty = 0.00f; // 0.0 = disabled
  28. public int mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
  29. public float mirostat_tau = 5.00f; // target entropy
  30. public float mirostat_eta = 0.10f; // learning rate
  31. public string model = "models/lamma-7B/ggml-model.bin"; // model path
  32. public string prompt = ""; // initial prompt (set to empty string for interactive mode)
  33. public string path_session = ""; // path to file for saving/loading model eval state
  34. public string input_prefix = ""; // string to prefix user inputs with
  35. public string input_suffix = ""; // string to suffix user inputs with
  36. public List<string> antiprompt; // string upon seeing which more user input is prompted
  37. public string lora_adapter = ""; // lora adapter path
  38. public string lora_base = ""; // base model path for the lora adapter
  39. public bool memory_f16 = true; // use f16 instead of f32 for memory kv
  40. public bool random_prompt = false; // randomize prompt if none provided
  41. public bool use_color = false; // use color to distinguish generations and inputs
  42. public bool interactive = false; // interactive mode
  43. public bool embedding = false; // get only sentence embedding
  44. public bool interactive_first = false; // wait for user input immediately
  45. public bool instruct = false; // instruction mode (used for Alpaca models)
  46. public bool penalize_nl = true; // consider newlines as a repeatable token
  47. public bool perplexity = false; // compute perplexity over the prompt
  48. public bool use_mmap = true; // use mmap for faster loads
  49. public bool use_mlock = false; // use mlock to keep model in memory
  50. public bool mem_test = false; // compute maximum memory usage
  51. public bool verbose_prompt = false; // print prompt tokens before generation
  52. public GptParams(int seed = 0, int n_threads = -1, int n_predict = -1,
  53. int n_parts = -1, int n_ctx = 512, int n_batch = 512, int n_keep = 0,
  54. Dictionary<llama_token, float> logit_bias = null, int top_k = 40, float top_p = 0.95f,
  55. float tfs_z = 1.00f, float typical_p = 1.00f, float temp = 0.80f, float repeat_penalty = 1.10f,
  56. int repeat_last_n = 64, float frequency_penalty = 0.00f, float presence_penalty = 0.00f,
  57. int mirostat = 0, float mirostat_tau = 5.00f, float mirostat_eta = 0.10f,
  58. string model = "models/lamma-7B/ggml-model.bin", string prompt = "",
  59. string path_session = "", string input_prefix = "", string input_suffix = "",
  60. List<string> antiprompt = null, string lora_adapter = "", string lora_base = "",
  61. bool memory_f16 = true, bool random_prompt = false, bool use_color = false, bool interactive = false,
  62. bool embedding = false, bool interactive_first = false, bool instruct = false, bool penalize_nl = true,
  63. bool perplexity = false, bool use_mmap = true, bool use_mlock = false, bool mem_test = false,
  64. bool verbose_prompt = false)
  65. {
  66. this.seed = seed;
  67. if(n_threads != -1)
  68. {
  69. this.n_threads = n_threads;
  70. }
  71. this.n_predict = n_predict;
  72. this.n_parts = n_parts;
  73. this.n_ctx = n_ctx;
  74. this.n_batch = n_batch;
  75. this.n_keep = n_keep;
  76. if (logit_bias == null)
  77. {
  78. logit_bias = new Dictionary<llama_token, float>();
  79. }
  80. this.logit_bias = logit_bias;
  81. this.top_k = top_k;
  82. this.top_p = top_p;
  83. this.tfs_z = tfs_z;
  84. this.typical_p = typical_p;
  85. this.temp = temp;
  86. this.repeat_penalty = repeat_penalty;
  87. this.repeat_last_n = repeat_last_n;
  88. this.frequency_penalty = frequency_penalty;
  89. this.presence_penalty = presence_penalty;
  90. this.mirostat = mirostat;
  91. this.mirostat_tau = mirostat_tau;
  92. this.mirostat_eta = mirostat_eta;
  93. this.model = model;
  94. this.prompt = prompt;
  95. this.path_session = path_session;
  96. this.input_prefix = input_prefix;
  97. this.input_suffix = input_suffix;
  98. if (antiprompt == null)
  99. {
  100. antiprompt = new List<string>();
  101. }
  102. this.antiprompt = antiprompt;
  103. this.lora_adapter = lora_adapter;
  104. this.lora_base = lora_base;
  105. this.memory_f16 = memory_f16;
  106. this.random_prompt = random_prompt;
  107. this.use_color = use_color;
  108. this.interactive = interactive;
  109. this.embedding = embedding;
  110. this.interactive_first = interactive_first;
  111. this.instruct = instruct;
  112. this.penalize_nl = penalize_nl;
  113. this.perplexity = perplexity;
  114. this.use_mmap = use_mmap;
  115. this.use_mlock = use_mlock;
  116. this.mem_test = mem_test;
  117. this.verbose_prompt = verbose_prompt;
  118. }
  119. }
  120. }

C#/.NET上易用的LLM高性能推理框架,支持LLaMA和LLaVA系列模型。

Contributors (1)