You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLamaTransforms.cs 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. using LLama.Abstractions;
  2. using LLama.Common;
  3. using Microsoft.VisualBasic;
  4. using System;
  5. using System.Collections.Generic;
  6. using System.IO;
  7. using System.Linq;
  8. using System.Security;
  9. using System.Security.Cryptography.X509Certificates;
  10. using System.Text;
  11. namespace LLama
  12. {
  13. /// <summary>
  14. /// A class that contains all the transforms provided internally by LLama.
  15. /// </summary>
  16. public class LLamaTransforms
  17. {
  18. /// <summary>
  19. /// The default history transform.
  20. /// Uses plain text with the following format:
  21. /// [Author]: [Message]
  22. /// </summary>
  23. public class DefaultHistoryTransform : IHistoryTransform
  24. {
  25. private readonly string defaultUserName = "User";
  26. private readonly string defaultAssistantName = "Assistant";
  27. private readonly string defaultSystemName = "System";
  28. private readonly string defaultUnknownName = "??";
  29. string _userName;
  30. string _assistantName;
  31. string _systemName;
  32. string _unknownName;
  33. bool _isInstructMode;
  34. /// <summary>
  35. ///
  36. /// </summary>
  37. /// <param name="userName"></param>
  38. /// <param name="assistantName"></param>
  39. /// <param name="systemName"></param>
  40. /// <param name="unknownName"></param>
  41. /// <param name="isInstructMode"></param>
  42. public DefaultHistoryTransform(string? userName = null, string? assistantName = null,
  43. string? systemName = null, string? unknownName = null, bool isInstructMode = false)
  44. {
  45. _userName = userName ?? defaultUserName;
  46. _assistantName = assistantName ?? defaultAssistantName;
  47. _systemName = systemName ?? defaultSystemName;
  48. _unknownName = unknownName ?? defaultUnknownName;
  49. _isInstructMode = isInstructMode;
  50. }
  51. /// <inheritdoc />
  52. public virtual string HistoryToText(ChatHistory history)
  53. {
  54. StringBuilder sb = new();
  55. foreach (var message in history.Messages)
  56. {
  57. if (message.AuthorRole == AuthorRole.User)
  58. {
  59. sb.AppendLine($"{_userName}: {message.Content}");
  60. }
  61. else if (message.AuthorRole == AuthorRole.System)
  62. {
  63. sb.AppendLine($"{_systemName}: {message.Content}");
  64. }
  65. else if (message.AuthorRole == AuthorRole.Unknown)
  66. {
  67. sb.AppendLine($"{_unknownName}: {message.Content}");
  68. }
  69. else if (message.AuthorRole == AuthorRole.Assistant)
  70. {
  71. sb.AppendLine($"{_assistantName}: {message.Content}");
  72. }
  73. }
  74. return sb.ToString();
  75. }
  76. /// <inheritdoc />
  77. public virtual ChatHistory TextToHistory(AuthorRole role, string text)
  78. {
  79. ChatHistory history = new ChatHistory();
  80. history.AddMessage(role, TrimNamesFromText(text, role));
  81. return history;
  82. }
  83. /// <summary>
  84. /// Drop the name at the beginning and the end of the text.
  85. /// </summary>
  86. /// <param name="text"></param>
  87. /// <param name="role"></param>
  88. /// <returns></returns>
  89. public virtual string TrimNamesFromText(string text, AuthorRole role)
  90. {
  91. if (role == AuthorRole.User && text.StartsWith($"{_userName}:"))
  92. {
  93. text = text.Substring($"{_userName}:".Length).TrimStart();
  94. }
  95. else if (role == AuthorRole.Assistant && text.EndsWith($"{_assistantName}:"))
  96. {
  97. text = text.Substring(0, text.Length - $"{_assistantName}:".Length).TrimEnd();
  98. }
  99. if (_isInstructMode && role == AuthorRole.Assistant && text.EndsWith("\n> "))
  100. {
  101. text = text.Substring(0, text.Length - "\n> ".Length).TrimEnd();
  102. }
  103. return text;
  104. }
  105. }
  106. /// <summary>
  107. /// A text input transform that only trims the text.
  108. /// </summary>
  109. public class NaiveTextInputTransform : ITextTransform
  110. {
  111. /// <summary>
  112. ///
  113. /// </summary>
  114. public NaiveTextInputTransform()
  115. {
  116. }
  117. /// <inheritdoc />
  118. public string Transform(string text)
  119. {
  120. return text.Trim();
  121. }
  122. }
  123. /// <summary>
  124. /// A no-op text input transform.
  125. /// </summary>
  126. public class EmptyTextOutputStreamTransform : ITextStreamTransform
  127. {
  128. /// <inheritdoc />
  129. public IEnumerable<string> Transform(IEnumerable<string> tokens)
  130. {
  131. return tokens;
  132. }
  133. /// <inheritdoc />
  134. public IAsyncEnumerable<string> TransformAsync(IAsyncEnumerable<string> tokens)
  135. {
  136. return tokens;
  137. }
  138. }
  139. /// <summary>
  140. /// A text output transform that removes the keywords from the response.
  141. /// </summary>
  142. public class KeywordTextOutputStreamTransform : ITextStreamTransform
  143. {
  144. HashSet<string> _keywords;
  145. int _maxKeywordLength;
  146. bool _removeAllMatchedTokens;
  147. /// <summary>
  148. ///
  149. /// </summary>
  150. /// <param name="keywords">Keywords that you want to remove from the response.</param>
  151. /// <param name="redundancyLength">The extra length when searching for the keyword. For example, if your only keyword is "highlight",
  152. /// maybe the token you get is "\r\nhighligt". In this condition, if redundancyLength=0, the token cannot be successfully matched because the length of "\r\nhighligt" (10)
  153. /// has already exceeded the maximum length of the keywords (8). On the contrary, setting redundancyLengyh >= 2 leads to successful match.
  154. /// The larger the redundancyLength is, the lower the processing speed. But as an experience, it won't introduce too much performance impact when redundancyLength <= 5 </param>
  155. /// <param name="removeAllMatchedTokens">If set to true, when getting a matched keyword, all the related tokens will be removed. Otherwise only the part of keyword will be removed.</param>
  156. public KeywordTextOutputStreamTransform(IEnumerable<string> keywords, int redundancyLength = 3, bool removeAllMatchedTokens = false)
  157. {
  158. _keywords = new(keywords);
  159. _maxKeywordLength = keywords.Select(x => x.Length).Max() + redundancyLength;
  160. _removeAllMatchedTokens = removeAllMatchedTokens;
  161. }
  162. /// <inheritdoc />
  163. public IEnumerable<string> Transform(IEnumerable<string> tokens)
  164. {
  165. var window = new Queue<string>();
  166. foreach (var s in tokens)
  167. {
  168. window.Enqueue(s);
  169. var current = string.Join("", window);
  170. if (_keywords.Any(x => current.Contains(x)))
  171. {
  172. var matchedKeyword = _keywords.First(x => current.Contains(x));
  173. int total = window.Count;
  174. for (int i = 0; i < total; i++)
  175. {
  176. window.Dequeue();
  177. }
  178. if (!_removeAllMatchedTokens)
  179. {
  180. yield return current.Replace(matchedKeyword, "");
  181. }
  182. }
  183. if (current.Length >= _maxKeywordLength)
  184. {
  185. if (_keywords.Any(x => current.Contains(x)))
  186. {
  187. var matchedKeyword = _keywords.First(x => current.Contains(x));
  188. int total = window.Count;
  189. for (int i = 0; i < total; i++)
  190. {
  191. window.Dequeue();
  192. }
  193. if (!_removeAllMatchedTokens)
  194. {
  195. yield return current.Replace(matchedKeyword, "");
  196. }
  197. }
  198. else
  199. {
  200. int total = window.Count;
  201. for (int i = 0; i < total; i++)
  202. {
  203. yield return window.Dequeue();
  204. }
  205. }
  206. }
  207. }
  208. int totalCount = window.Count;
  209. for (int i = 0; i < totalCount; i++)
  210. {
  211. yield return window.Dequeue();
  212. }
  213. }
  214. /// <inheritdoc />
  215. public async IAsyncEnumerable<string> TransformAsync(IAsyncEnumerable<string> tokens)
  216. {
  217. var window = new Queue<string>();
  218. await foreach (var s in tokens)
  219. {
  220. window.Enqueue(s);
  221. var current = string.Join("", window);
  222. if (_keywords.Any(x => current.Contains(x)))
  223. {
  224. var matchedKeyword = _keywords.First(x => current.Contains(x));
  225. int total = window.Count;
  226. for (int i = 0; i < total; i++)
  227. {
  228. window.Dequeue();
  229. }
  230. if (!_removeAllMatchedTokens)
  231. {
  232. yield return current.Replace(matchedKeyword, "");
  233. }
  234. }
  235. if (current.Length >= _maxKeywordLength)
  236. {
  237. int total = window.Count;
  238. for (int i = 0; i < total; i++)
  239. {
  240. yield return window.Dequeue();
  241. }
  242. }
  243. }
  244. int totalCount = window.Count;
  245. for (int i = 0; i < totalCount; i++)
  246. {
  247. yield return window.Dequeue();
  248. }
  249. }
  250. }
  251. }
  252. }

C#/.NET上易用的LLM高性能推理框架,支持LLaMA和LLaVA系列模型。