You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

Utils.cs 3.3 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. using LLama.Native;
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Text;
  5. using LLama.Exceptions;
  6. using System.Diagnostics;
  7. using System.Linq;
  8. using System.Runtime.InteropServices;
  9. using System.IO;
  10. namespace LLama
  11. {
  12. using llama_token = Int32;
  13. internal static class Utils
  14. {
  15. public static SafeLLamaContextHandle llama_init_from_gpt_params(ref LLamaParams @params)
  16. {
  17. var lparams = NativeApi.llama_context_default_params();
  18. lparams.n_ctx = @params.n_ctx;
  19. lparams.n_gpu_layers = @params.n_gpu_layers;
  20. lparams.seed = @params.seed;
  21. lparams.f16_kv = @params.memory_f16;
  22. lparams.use_mmap = @params.use_mmap;
  23. lparams.use_mlock = @params.use_mlock;
  24. lparams.logits_all = @params.perplexity;
  25. lparams.embedding = @params.embedding;
  26. if (!File.Exists(@params.model))
  27. {
  28. throw new FileNotFoundException($"The model file does not exist: {@params.model}");
  29. }
  30. var ctx_ptr = NativeApi.llama_init_from_file(@params.model, lparams);
  31. if(ctx_ptr == IntPtr.Zero )
  32. {
  33. throw new RuntimeError($"Failed to load model {@params.model}.");
  34. }
  35. SafeLLamaContextHandle ctx = new(ctx_ptr);
  36. if (!string.IsNullOrEmpty(@params.lora_adapter))
  37. {
  38. int err = NativeApi.llama_apply_lora_from_file(ctx, @params.lora_adapter,
  39. string.IsNullOrEmpty(@params.lora_base) ? null : @params.lora_base, @params.n_threads);
  40. if(err != 0)
  41. {
  42. throw new RuntimeError("Failed to apply lora adapter.");
  43. }
  44. }
  45. return ctx;
  46. }
  47. public static List<llama_token> llama_tokenize(SafeLLamaContextHandle ctx, string text, bool add_bos, string encodingName)
  48. {
  49. var encoding = Encoding.GetEncoding(encodingName);
  50. var cnt = encoding.GetByteCount(text);
  51. llama_token[] res = new llama_token[cnt + (add_bos ? 1 : 0)];
  52. int n = NativeApi.llama_tokenize(ctx, text, encoding, res, res.Length, add_bos);
  53. if(n < 0)
  54. {
  55. throw new RuntimeError("Error happened during tokenization. It's possibly caused by wrong encoding. Please try to " +
  56. "specify the encoding.");
  57. }
  58. return res.Take(n).ToList();
  59. }
  60. public unsafe static Span<float> llama_get_logits(SafeLLamaContextHandle ctx, int length)
  61. {
  62. var logits = NativeApi.llama_get_logits(ctx);
  63. return new Span<float>(logits, length);
  64. }
  65. public static unsafe string PtrToStringUTF8(IntPtr ptr)
  66. {
  67. #if NET6_0_OR_GREATER
  68. return Marshal.PtrToStringUTF8(ptr);
  69. #else
  70. byte* tp = (byte*)ptr.ToPointer();
  71. List<byte> bytes = new();
  72. while (true)
  73. {
  74. byte c = *tp++;
  75. if(c == '\0')
  76. {
  77. break;
  78. }
  79. else
  80. {
  81. bytes.Add(c);
  82. }
  83. }
  84. return Encoding.UTF8.GetString(bytes.ToArray());
  85. #endif
  86. }
  87. }
  88. }

C#/.NET上易用的LLM高性能推理框架,支持LLaMA和LLaVA系列模型。