You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

IModelParamsExtensions.cs 2.1 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. using System.IO;
  2. using System;
  3. using System.Buffers;
  4. using LLama.Abstractions;
  5. using LLama.Native;
  6. namespace LLama.Extensions
  7. {
  8. /// <summary>
  9. /// Extention methods to the IModelParams interface
  10. /// </summary>
  11. public static class IModelParamsExtensions
  12. {
  13. /// <summary>
  14. /// Convert the given `IModelParams` into a `LLamaContextParams`
  15. /// </summary>
  16. /// <param name="params"></param>
  17. /// <param name="result"></param>
  18. /// <returns></returns>
  19. /// <exception cref="FileNotFoundException"></exception>
  20. /// <exception cref="ArgumentException"></exception>
  21. public static MemoryHandle ToLlamaContextParams(this IModelParams @params, out LLamaContextParams result)
  22. {
  23. if (!File.Exists(@params.ModelPath))
  24. throw new FileNotFoundException($"The model file does not exist: {@params.ModelPath}");
  25. if (@params.TensorSplits != null && @params.TensorSplits.Length != 1)
  26. throw new ArgumentException("Currently multi-gpu support is not supported by both llama.cpp and LLamaSharp.");
  27. result = NativeApi.llama_context_default_params();
  28. result.n_ctx = @params.ContextSize;
  29. result.n_batch = @params.BatchSize;
  30. result.main_gpu = @params.MainGpu;
  31. result.n_gpu_layers = @params.GpuLayerCount;
  32. result.seed = @params.Seed;
  33. result.f16_kv = @params.UseFp16Memory;
  34. result.use_mmap = @params.UseMemorymap;
  35. result.use_mlock = @params.UseMemoryLock;
  36. result.logits_all = @params.Perplexity;
  37. result.embedding = @params.EmbeddingMode;
  38. result.low_vram = @params.LowVram;
  39. result.rope_freq_base = @params.RopeFrequencyBase;
  40. result.rope_freq_scale = @params.RopeFrequencyScale;
  41. result.mul_mat_q = @params.MulMatQ;
  42. var pin = @params.TensorSplits.AsMemory().Pin();
  43. unsafe
  44. {
  45. result.tensor_split = (nint)pin.Pointer;
  46. }
  47. return pin;
  48. }
  49. }
  50. }