You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

IModelParamsExtensions.cs 3.3 kB

1 year ago
April 2024 Binary Update (#662) * Updated binaries, using [this build](https://github.com/SciSharp/LLamaSharp/actions/runs/8654672719/job/23733195669) for llama.cpp commit `f7001ccc5aa359fcf41bba19d1c99c3d25c9bcc7`. - Added all new functions. - Moved some functions (e.g. `SafeLlamaModelHandle` specific functions) into `SafeLlamaModelHandle.cs` - Exposed tokens on `SafeLlamaModelHandle` and `LLamaWeights` through a `Tokens` property. As new special tokens are added in the future they can be added here. - Changed all token properties to return nullable tokens, to handle some models not having some tokens. - Fixed `DefaultSamplingPipeline` to handle no newline token in some models. * Moved native methods to more specific locations. - Context specific things have been moved into `SafeLLamaContextHandle.cs` and made private - they're exposed through C# properties and methods already. - Checking that GPU layer count is zero if GPU offload is not supported. - Moved methods for creating default structs (`llama_model_quantize_default_params` and `llama_context_default_params`) into relevant structs. * Removed exception if `GpuLayerCount > 0` when GPU is not supported. * - Added low level wrapper methods for new per-sequence state load/save in `SafeLLamaContextHandle` - Added high level wrapper methods (save/load with `State` object or memory mapped file) in `LLamaContext` - Moved native methods for per-sequence state load/save into `SafeLLamaContextHandle` * Added update and defrag methods for KV cache in `SafeLLamaContextHandle` * Updated submodule to `f7001ccc5aa359fcf41bba19d1c99c3d25c9bcc7` * Passing the sequence ID when saving a single sequence state
1 year ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. using System.IO;
  2. using System;
  3. using System.Text;
  4. using LLama.Abstractions;
  5. using LLama.Native;
  6. namespace LLama.Extensions;
  7. /// <summary>
  8. /// Extension methods to the IModelParams interface
  9. /// </summary>
  10. public static class IModelParamsExtensions
  11. {
  12. /// <summary>
  13. /// Convert the given `IModelParams` into a `LLamaModelParams`
  14. /// </summary>
  15. /// <param name="params"></param>
  16. /// <param name="result"></param>
  17. /// <returns></returns>
  18. /// <exception cref="FileNotFoundException"></exception>
  19. /// <exception cref="ArgumentException"></exception>
  20. public static IDisposable ToLlamaModelParams(this IModelParams @params, out LLamaModelParams result)
  21. {
  22. if (@params.UseMemoryLock && !NativeApi.llama_supports_mlock())
  23. throw new NotSupportedException("'UseMemoryLock' is not supported (llama_supports_mlock() == false)");
  24. if (@params.UseMemorymap && !NativeApi.llama_supports_mmap())
  25. throw new NotSupportedException("'UseMemorymap' is not supported (llama_supports_mmap() == false)");
  26. var disposer = new GroupDisposable();
  27. result = LLamaModelParams.Default();
  28. result.main_gpu = @params.MainGpu;
  29. result.split_mode = @params.SplitMode;
  30. result.n_gpu_layers = @params.GpuLayerCount < 0 ? int.MaxValue : @params.GpuLayerCount;
  31. result.use_mlock = @params.UseMemoryLock;
  32. result.use_mmap = @params.UseMemorymap;
  33. result.vocab_only = @params.VocabOnly;
  34. unsafe
  35. {
  36. result.tensor_split = (float*)disposer.Add(@params.TensorSplits.Pin()).Pointer;
  37. }
  38. if (@params.MetadataOverrides.Count == 0)
  39. {
  40. unsafe
  41. {
  42. result.kv_overrides = (LLamaModelMetadataOverride*)IntPtr.Zero;
  43. }
  44. }
  45. else
  46. {
  47. // Allocate enough space for all the override items. Pin it in place so we can safely pass it to llama.cpp
  48. // This is one larger than necessary. The last item indicates the end of the overrides.
  49. var overrides = new LLamaModelMetadataOverride[@params.MetadataOverrides.Count + 1];
  50. unsafe
  51. {
  52. result.kv_overrides = (LLamaModelMetadataOverride*)disposer.Add(overrides.AsMemory().Pin()).Pointer;
  53. }
  54. // Convert each item
  55. for (var i = 0; i < @params.MetadataOverrides.Count; i++)
  56. {
  57. unsafe
  58. {
  59. // Get the item to convert
  60. var item = @params.MetadataOverrides[i];
  61. // Create the "native" representation to fill in
  62. var native = new LLamaModelMetadataOverride
  63. {
  64. Tag = item.Type
  65. };
  66. // Write the value into the native struct
  67. item.WriteValue(ref native);
  68. // Convert key chars to bytes
  69. var srcSpan = item.Key.AsSpan();
  70. var dstSpan = new Span<byte>(native.key, 128);
  71. Encoding.UTF8.GetBytes(srcSpan, dstSpan);
  72. // Store it in the array
  73. overrides[i] = native;
  74. }
  75. }
  76. }
  77. return disposer;
  78. }
  79. }