You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLamaCache.cs 2.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. using System;
  2. using System.Collections;
  3. using System.Collections.Generic;
  4. using System.Linq;
  5. using System.Text;
  6. namespace LLama
  7. {
  8. using llama_token = Int32;
  9. /// <summary>
  10. /// Cache for a llama.cpp model.
  11. /// </summary>
  12. public class LLamaCache
  13. {
  14. private Dictionary<llama_token[], LinkedListNode<KeyValuePair<llama_token[], LLamaState>>> _cacheState;
  15. private LinkedList<KeyValuePair<llama_token[], LLamaState>> _cacheList;
  16. private int _capacity;
  17. public int CacheSize
  18. {
  19. get
  20. {
  21. return _cacheState.Values.Select(s => s.Value.Value.Size).Sum();
  22. }
  23. }
  24. /// <summary>
  25. ///
  26. /// </summary>
  27. /// <param name="capacity">The max capacity (bytes).</param>
  28. public LLamaCache(int capacity = 2 << 30)
  29. {
  30. _cacheState = new();
  31. _cacheList = new();
  32. _capacity = capacity;
  33. }
  34. public LLamaState this[llama_token[] key]
  35. {
  36. get
  37. {
  38. var prefixKey = FindLongestPrefixKey(key);
  39. if(prefixKey is null)
  40. {
  41. throw new KeyNotFoundException();
  42. }
  43. var value = _cacheState[prefixKey];
  44. MoveNodeToEnd(prefixKey);
  45. return value.Value.Value;
  46. }
  47. set
  48. {
  49. var node = _cacheList.AddLast(new KeyValuePair<llama_token[], LLamaState>(key, value));
  50. _cacheState[key] = node;
  51. while(CacheSize > _capacity && _cacheList.Count > 0)
  52. {
  53. var topop = _cacheList.First;
  54. _cacheState.Remove(topop.Value.Key);
  55. _cacheList.RemoveFirst();
  56. }
  57. }
  58. }
  59. public bool Contains(llama_token[] key)
  60. {
  61. return FindLongestPrefixKey(key) is not null;
  62. }
  63. private llama_token[]? FindLongestPrefixKey(llama_token[] key)
  64. {
  65. int minLen = 0;
  66. llama_token[]? minKey = null;
  67. var keys = _cacheState.Keys.Select(k => (k, LLamaModelV1.LongestTokenPrefix(k, key)));
  68. foreach(var (k, prefixLen) in keys)
  69. {
  70. if(prefixLen > minLen)
  71. {
  72. minLen = prefixLen;
  73. minKey = k;
  74. }
  75. }
  76. return minKey;
  77. }
  78. private void MoveNodeToEnd(llama_token[] key)
  79. {
  80. if (!_cacheState.TryGetValue(key, out var node))
  81. {
  82. return;
  83. }
  84. _cacheState.Remove(key);
  85. _cacheList.Remove(node);
  86. var newNode = _cacheList.AddLast(new KeyValuePair<llama_token[], LLamaState>(key, node.Value.Value));
  87. _cacheState.Add(key, newNode);
  88. }
  89. }
  90. }

C#/.NET上易用的LLM高性能推理框架,支持LLaMA和LLaVA系列模型。