You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

Utils.cs 3.1 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. using LLama.Native;
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Text;
  5. using LLama.Exceptions;
  6. using System.Diagnostics;
  7. using System.Linq;
  8. using System.Runtime.InteropServices;
  9. namespace LLama
  10. {
  11. using llama_token = Int32;
  12. internal static class Utils
  13. {
  14. public static SafeLLamaContextHandle llama_init_from_gpt_params(ref LLamaParams @params)
  15. {
  16. var lparams = NativeApi.llama_context_default_params();
  17. lparams.n_ctx = @params.n_ctx;
  18. lparams.n_parts = @params.n_parts;
  19. lparams.n_gpu_layers = @params.n_gpu_layers;
  20. lparams.seed = @params.seed;
  21. lparams.f16_kv = @params.memory_f16;
  22. lparams.use_mmap = @params.use_mmap;
  23. lparams.use_mlock = @params.use_mlock;
  24. lparams.logits_all = @params.perplexity;
  25. lparams.embedding = @params.embedding;
  26. var ctx_ptr = NativeApi.llama_init_from_file(@params.model, lparams);
  27. if(ctx_ptr == IntPtr.Zero )
  28. {
  29. throw new RuntimeError($"Failed to load model {@params.model}.");
  30. }
  31. SafeLLamaContextHandle ctx = new(ctx_ptr);
  32. if (!string.IsNullOrEmpty(@params.lora_adapter))
  33. {
  34. int err = NativeApi.llama_apply_lora_from_file(ctx, @params.lora_adapter,
  35. string.IsNullOrEmpty(@params.lora_base) ? null : @params.lora_base, @params.n_threads);
  36. if(err != 0)
  37. {
  38. throw new RuntimeError("Failed to apply lora adapter.");
  39. }
  40. }
  41. return ctx;
  42. }
  43. public static List<llama_token> llama_tokenize(SafeLLamaContextHandle ctx, string text, bool add_bos, string encoding)
  44. {
  45. var cnt = Encoding.GetEncoding(encoding).GetByteCount(text);
  46. llama_token[] res = new llama_token[cnt + (add_bos ? 1 : 0)];
  47. int n = NativeApi.llama_tokenize(ctx, text, res, res.Length, add_bos);
  48. if(n < 0)
  49. {
  50. throw new RuntimeError("Error happened during tokenization. It's possibly caused by wrong encoding. Please try to " +
  51. "specify the encoding.");
  52. }
  53. return res.Take(n).ToList();
  54. }
  55. public unsafe static Span<float> llama_get_logits(SafeLLamaContextHandle ctx, int length)
  56. {
  57. var logits = NativeApi.llama_get_logits(ctx);
  58. return new Span<float>(logits, length);
  59. }
  60. public static unsafe string PtrToStringUTF8(IntPtr ptr)
  61. {
  62. #if NET6_0_OR_GREATER
  63. return Marshal.PtrToStringUTF8(ptr);
  64. #else
  65. byte* tp = (byte*)ptr.ToPointer();
  66. List<byte> bytes = new();
  67. while (true)
  68. {
  69. byte c = *tp++;
  70. if(c == '\0')
  71. {
  72. break;
  73. }
  74. else
  75. {
  76. bytes.Add(c);
  77. }
  78. }
  79. return Encoding.UTF8.GetString(bytes.ToArray());
  80. #endif
  81. }
  82. }
  83. }

C#/.NET上易用的LLM高性能推理框架,支持LLaMA和LLaVA系列模型。