You can not select more than 25 topics
Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
- using System;
- using System.IO;
- using LLama.Abstractions;
- using LLama.Native;
-
- namespace LLama.Extensions
- {
- /// <summary>
- /// Extention methods to the IContextParams interface
- /// </summary>
- public static class IContextParamsExtensions
- {
- /// <summary>
- /// Convert the given `IModelParams` into a `LLamaContextParams`
- /// </summary>
- /// <param name="params"></param>
- /// <param name="result"></param>
- /// <returns></returns>
- /// <exception cref="FileNotFoundException"></exception>
- /// <exception cref="ArgumentException"></exception>
- public static void ToLlamaContextParams(this IContextParams @params, out LLamaContextParams result)
- {
- result = NativeApi.llama_context_default_params();
- result.n_ctx = @params.ContextSize;
- result.n_batch = @params.BatchSize;
- result.seed = @params.Seed;
- result.f16_kv = @params.UseFp16Memory;
- result.logits_all = @params.Perplexity;
- result.embedding = @params.EmbeddingMode;
- result.rope_freq_base = @params.RopeFrequencyBase ?? 0;
- result.rope_freq_scale = @params.RopeFrequencyScale ?? 0;
-
- // Default YaRN values copied from here: https://github.com/ggerganov/llama.cpp/blob/381efbf480959bb6d1e247a8b0c2328f22e350f8/common/common.h#L67
- result.yarn_ext_factor = @params.YarnExtrapolationFactor ?? -1f;
- result.yarn_attn_factor = @params.YarnAttentionFactor ?? 1f;
- result.yarn_beta_fast = @params.YarnBetaFast ?? 32f;
- result.yarn_beta_slow = @params.YarnBetaSlow ?? 1f;
- result.yarn_orig_ctx = @params.YarnOriginalContext ?? 0;
- result.rope_scaling_type = @params.YarnScalingType ?? RopeScalingType.LLAMA_ROPE_SCALING_UNSPECIFIED;
-
- result.mul_mat_q = @params.MulMatQ;
-
- result.n_threads = Threads(@params.Threads);
- result.n_threads_batch = Threads(@params.BatchThreads);
- }
-
- private static uint Threads(uint? value)
- {
- if (value is > 0)
- return (uint)value;
-
- return (uint)Math.Max(Environment.ProcessorCount / 2, 1);
- }
- }
- }
|