You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

Utils.cs 4.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. using LLama.Common;
  2. using LLama.Exceptions;
  3. using LLama.Native;
  4. using System;
  5. using System.Collections.Generic;
  6. using System.IO;
  7. using System.Linq;
  8. using System.Runtime.InteropServices;
  9. using System.Text;
  10. namespace LLama
  11. {
  12. using llama_token = Int32;
  13. internal static class Utils
  14. {
  15. public static SafeLLamaContextHandle InitLLamaContextFromModelParams(ModelParams @params)
  16. {
  17. var lparams = NativeApi.llama_context_default_params();
  18. lparams.n_ctx = @params.ContextSize;
  19. lparams.n_batch = @params.BatchSize;
  20. lparams.main_gpu = @params.MainGpu;
  21. lparams.n_gpu_layers = @params.GpuLayerCount;
  22. lparams.seed = @params.Seed;
  23. lparams.f16_kv = @params.UseFp16Memory;
  24. lparams.use_mmap = @params.UseMemoryLock;
  25. lparams.use_mlock = @params.UseMemoryLock;
  26. lparams.logits_all = @params.Perplexity;
  27. lparams.embedding = @params.EmbeddingMode;
  28. lparams.low_vram = @params.LowVram;
  29. if(@params.TensorSplits.Length != 1)
  30. {
  31. throw new ArgumentException("Currently multi-gpu support is not supported by " +
  32. "both llama.cpp and LLamaSharp.");
  33. }
  34. lparams.tensor_split = new TensorSplits()
  35. {
  36. Item1 = @params.TensorSplits[0]
  37. };
  38. if (!File.Exists(@params.ModelPath))
  39. {
  40. throw new FileNotFoundException($"The model file does not exist: {@params.ModelPath}");
  41. }
  42. var ctx_ptr = NativeApi.llama_init_from_file(@params.ModelPath, lparams);
  43. if (ctx_ptr == IntPtr.Zero)
  44. {
  45. throw new RuntimeError($"Failed to load model {@params.ModelPath}.");
  46. }
  47. SafeLLamaContextHandle ctx = new(ctx_ptr);
  48. if (!string.IsNullOrEmpty(@params.LoraAdapter))
  49. {
  50. int err = NativeApi.llama_apply_lora_from_file(ctx, @params.LoraAdapter,
  51. string.IsNullOrEmpty(@params.LoraBase) ? null : @params.LoraBase, @params.Threads);
  52. if (err != 0)
  53. {
  54. throw new RuntimeError("Failed to apply lora adapter.");
  55. }
  56. }
  57. return ctx;
  58. }
  59. public static IEnumerable<llama_token> Tokenize(SafeLLamaContextHandle ctx, string text, bool add_bos, Encoding encoding)
  60. {
  61. var cnt = encoding.GetByteCount(text);
  62. llama_token[] res = new llama_token[cnt + (add_bos ? 1 : 0)];
  63. int n = NativeApi.llama_tokenize(ctx, text, encoding, res, res.Length, add_bos);
  64. if (n < 0)
  65. {
  66. throw new RuntimeError("Error happened during tokenization. It's possibly caused by wrong encoding. Please try to " +
  67. "specify the encoding.");
  68. }
  69. return res.Take(n);
  70. }
  71. public unsafe static Span<float> GetLogits(SafeLLamaContextHandle ctx, int length)
  72. {
  73. var logits = NativeApi.llama_get_logits(ctx);
  74. return new Span<float>(logits, length);
  75. }
  76. public static unsafe int Eval(SafeLLamaContextHandle ctx, llama_token[] tokens, int startIndex, int n_tokens, int n_past, int n_threads)
  77. {
  78. int result;
  79. fixed(llama_token* p = tokens)
  80. {
  81. result = NativeApi.llama_eval_with_pointer(ctx, p + startIndex, n_tokens, n_past, n_threads);
  82. }
  83. return result;
  84. }
  85. public static string TokenToString(llama_token token, SafeLLamaContextHandle ctx, Encoding encoding)
  86. {
  87. return PtrToString(NativeApi.llama_token_to_str(ctx, token), encoding);
  88. }
  89. public static unsafe string PtrToString(IntPtr ptr, Encoding encoding)
  90. {
  91. #if NET6_0_OR_GREATER
  92. if(encoding == Encoding.UTF8)
  93. {
  94. return Marshal.PtrToStringUTF8(ptr);
  95. }
  96. else if(encoding == Encoding.Unicode)
  97. {
  98. return Marshal.PtrToStringUni(ptr);
  99. }
  100. else
  101. {
  102. return Marshal.PtrToStringAuto(ptr);
  103. }
  104. #else
  105. byte* tp = (byte*)ptr.ToPointer();
  106. List<byte> bytes = new();
  107. while (true)
  108. {
  109. byte c = *tp++;
  110. if (c == '\0')
  111. {
  112. break;
  113. }
  114. else
  115. {
  116. bytes.Add(c);
  117. }
  118. }
  119. return encoding.GetString(bytes.ToArray());
  120. #endif
  121. }
  122. }
  123. }

C#/.NET上易用的LLM高性能推理框架,支持LLaMA和LLaVA系列模型。