You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

IModelParamsExtensions.cs 3.3 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. using System.IO;
  2. using System;
  3. using System.Text;
  4. using LLama.Abstractions;
  5. using LLama.Native;
  6. namespace LLama.Extensions;
  7. /// <summary>
  8. /// Extention methods to the IModelParams interface
  9. /// </summary>
  10. public static class IModelParamsExtensions
  11. {
  12. /// <summary>
  13. /// Convert the given `IModelParams` into a `LLamaModelParams`
  14. /// </summary>
  15. /// <param name="params"></param>
  16. /// <param name="result"></param>
  17. /// <returns></returns>
  18. /// <exception cref="FileNotFoundException"></exception>
  19. /// <exception cref="ArgumentException"></exception>
  20. public static IDisposable ToLlamaModelParams(this IModelParams @params, out LLamaModelParams result)
  21. {
  22. if (@params.UseMemoryLock && !NativeApi.llama_supports_mlock())
  23. throw new NotSupportedException("'UseMemoryLock' is not supported (llama_supports_mlock() == false)");
  24. if (@params.UseMemorymap && !NativeApi.llama_supports_mmap())
  25. throw new NotSupportedException("'UseMemorymap' is not supported (llama_supports_mmap() == false)");
  26. var disposer = new GroupDisposable();
  27. result = NativeApi.llama_model_default_params();
  28. result.main_gpu = @params.MainGpu;
  29. result.split_mode = @params.SplitMode;
  30. result.n_gpu_layers = @params.GpuLayerCount < 0 ? int.MaxValue : @params.GpuLayerCount;
  31. result.use_mlock = @params.UseMemoryLock;
  32. result.use_mmap = @params.UseMemorymap;
  33. result.vocab_only = @params.VocabOnly;
  34. unsafe
  35. {
  36. result.tensor_split = (float*)disposer.Add(@params.TensorSplits.Pin()).Pointer;
  37. }
  38. if (@params.MetadataOverrides.Count == 0)
  39. {
  40. unsafe
  41. {
  42. result.kv_overrides = (LLamaModelMetadataOverride*)IntPtr.Zero;
  43. }
  44. }
  45. else
  46. {
  47. // Allocate enough space for all the override items. Pin it in place so we can safely pass it to llama.cpp
  48. // This is one larger than necessary. The last item indicates the end of the overrides.
  49. var overrides = new LLamaModelMetadataOverride[@params.MetadataOverrides.Count + 1];
  50. unsafe
  51. {
  52. result.kv_overrides = (LLamaModelMetadataOverride*)disposer.Add(overrides.AsMemory().Pin()).Pointer;
  53. }
  54. // Convert each item
  55. for (var i = 0; i < @params.MetadataOverrides.Count; i++)
  56. {
  57. unsafe
  58. {
  59. // Get the item to convert
  60. var item = @params.MetadataOverrides[i];
  61. // Create the "native" representation to fill in
  62. var native = new LLamaModelMetadataOverride
  63. {
  64. Tag = item.Type
  65. };
  66. // Write the value into the native struct
  67. item.WriteValue(ref native);
  68. // Convert key chars to bytes
  69. var srcSpan = item.Key.AsSpan();
  70. var dstSpan = new Span<byte>(native.key, 128);
  71. Encoding.UTF8.GetBytes(srcSpan, dstSpan);
  72. // Store it in the array
  73. overrides[i] = native;
  74. }
  75. }
  76. }
  77. return disposer;
  78. }
  79. }