Add an Example and modifications to the interactive executor to enable Llava Models. Just a preview / demotags/0.11.0
| @@ -0,0 +1 @@ | |||||
| <image>\nUSER:\nProvide a full description.\nASSISTANT:\n | |||||
| @@ -13,6 +13,7 @@ public class ExampleRunner | |||||
| { "Chat Session: Automatic conversation", TalkToYourself.Run }, | { "Chat Session: Automatic conversation", TalkToYourself.Run }, | ||||
| { "Chat Session: Chinese characters", ChatChineseGB2312.Run }, | { "Chat Session: Chinese characters", ChatChineseGB2312.Run }, | ||||
| { "Executor: Interactive mode chat", InteractiveModeExecute.Run }, | { "Executor: Interactive mode chat", InteractiveModeExecute.Run }, | ||||
| { "Executor: Llava Interactive mode chat", LlavaInteractiveModeExecute.Run }, | |||||
| { "Executor: Instruct mode chat", InstructModeExecute.Run }, | { "Executor: Instruct mode chat", InstructModeExecute.Run }, | ||||
| { "Executor: Stateless mode chat", StatelessModeExecute.Run }, | { "Executor: Stateless mode chat", StatelessModeExecute.Run }, | ||||
| { "Save and Load: chat session", SaveAndLoadSession.Run }, | { "Save and Load: chat session", SaveAndLoadSession.Run }, | ||||
| @@ -0,0 +1,51 @@ | |||||
| using LLama.Common; | |||||
| namespace LLama.Examples.Examples | |||||
| { | |||||
| public class LlavaInteractiveModeExecute | |||||
| { | |||||
| public static async Task Run() | |||||
| { | |||||
| string multiModalProj = UserSettings.GetMMProjPath(); | |||||
| string modelPath = UserSettings.GetModelPath(); | |||||
| string imagePath = UserSettings.GetImagePath(); | |||||
| var prompt = (await File.ReadAllTextAsync("Assets/vicuna-llava-v16.txt")).Trim(); | |||||
| var parameters = new ModelParams(modelPath) | |||||
| { | |||||
| ContextSize = 4096, | |||||
| Seed = 1337, | |||||
| GpuLayerCount = 5 | |||||
| }; | |||||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||||
| using var context = model.CreateContext(parameters); | |||||
| // Llava Init | |||||
| using var clipModel = LLavaWeights.LoadFromFile(multiModalProj); | |||||
| var ex = new InteractiveExecutor(context, clipModel ); | |||||
| ex.ImagePath = imagePath; | |||||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||||
| Console.WriteLine("The executor has been enabled. In this example, the prompt is printed, the maximum tokens is set to 1024 and the context size is 4096. "); | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| Console.Write(prompt); | |||||
| var inferenceParams = new InferenceParams() { Temperature = 0.1f, AntiPrompts = new List<string> { "USER:" }, MaxTokens = 1024 }; | |||||
| while (true) | |||||
| { | |||||
| await foreach (var text in ex.InferAsync(prompt, inferenceParams)) | |||||
| { | |||||
| Console.Write(text); | |||||
| } | |||||
| Console.ForegroundColor = ConsoleColor.Green; | |||||
| prompt = Console.ReadLine(); | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| @@ -21,7 +21,7 @@ namespace LLama.Examples.Examples | |||||
| Console.ForegroundColor = ConsoleColor.Yellow; | Console.ForegroundColor = ConsoleColor.Yellow; | ||||
| Console.WriteLine("The executor has been enabled. In this example, the inference is an one-time job. That says, the previous input and response has " + | Console.WriteLine("The executor has been enabled. In this example, the inference is an one-time job. That says, the previous input and response has " + | ||||
| "no impact on the current response. Now you can ask it questions. Note that in this example, no prompt was set for LLM and the maximum response tokens is 50. " + | "no impact on the current response. Now you can ask it questions. Note that in this example, no prompt was set for LLM and the maximum response tokens is 50. " + | ||||
| "It may not perform well because of lack of prompt. This is also an example that could indicate the improtance of prompt in LLM. To improve it, you can add " + | |||||
| "It may not perform well because of lack of prompt. This is also an example that could indicate the importance of prompt in LLM. To improve it, you can add " + | |||||
| "a prompt for it yourself!"); | "a prompt for it yourself!"); | ||||
| Console.ForegroundColor = ConsoleColor.White; | Console.ForegroundColor = ConsoleColor.White; | ||||
| @@ -67,6 +67,9 @@ | |||||
| <None Update="Assets\chat-with-kunkun-chinese.txt"> | <None Update="Assets\chat-with-kunkun-chinese.txt"> | ||||
| <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory> | <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory> | ||||
| </None> | </None> | ||||
| <None Update="Assets\vicuna-llava-v16.txt"> | |||||
| <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory> | |||||
| </None> | |||||
| </ItemGroup> | </ItemGroup> | ||||
| </Project> | </Project> | ||||
| @@ -4,54 +4,84 @@ namespace LLama.Examples; | |||||
| internal static class UserSettings | internal static class UserSettings | ||||
| { | { | ||||
| private static readonly string SettingsFilePath = Path.Join(AppContext.BaseDirectory, "DefaultModel.env"); | |||||
| private static readonly string SettingsModelPath = Path.Join(AppContext.BaseDirectory, "DefaultModel.env"); | |||||
| private static readonly string SettingsMMprojPath = Path.Join(AppContext.BaseDirectory, "DefaultMMProj.env"); | |||||
| private static readonly string SettingsImagePath = Path.Join(AppContext.BaseDirectory, "DefaultImage.env"); | |||||
| private static string? ReadDefaultModelPath() | |||||
| private static string? ReadDefaultPath(string file) | |||||
| { | { | ||||
| if (!File.Exists(SettingsFilePath)) | |||||
| if (!File.Exists(file)) | |||||
| return null; | return null; | ||||
| string path = File.ReadAllText(SettingsFilePath).Trim(); | |||||
| string path = File.ReadAllText(file).Trim(); | |||||
| if (!File.Exists(path)) | if (!File.Exists(path)) | ||||
| return null; | return null; | ||||
| return path; | return path; | ||||
| } | } | ||||
| private static void WriteDefaultModelPath(string path) | |||||
| private static void WriteDefaultPath(string settings, string path) | |||||
| { | { | ||||
| File.WriteAllText(SettingsFilePath, path); | |||||
| File.WriteAllText(settings, path); | |||||
| } | } | ||||
| public static string GetModelPath(bool alwaysPrompt = false) | public static string GetModelPath(bool alwaysPrompt = false) | ||||
| { | { | ||||
| var defaultPath = ReadDefaultModelPath(); | |||||
| var defaultPath = ReadDefaultPath(SettingsModelPath); | |||||
| var path = defaultPath is null || alwaysPrompt | var path = defaultPath is null || alwaysPrompt | ||||
| ? PromptUserForPath() | ? PromptUserForPath() | ||||
| : PromptUserForPathWithDefault(defaultPath); | : PromptUserForPathWithDefault(defaultPath); | ||||
| if (File.Exists(path)) | if (File.Exists(path)) | ||||
| WriteDefaultModelPath(path); | |||||
| WriteDefaultPath(SettingsModelPath, path); | |||||
| return path; | return path; | ||||
| } | } | ||||
| // TODO: Refactorize | |||||
| public static string GetMMProjPath(bool alwaysPrompt = false) | |||||
| { | |||||
| var defaultPath = ReadDefaultPath(SettingsMMprojPath); | |||||
| var path = defaultPath is null || alwaysPrompt | |||||
| ? PromptUserForPath("MMProj") | |||||
| : PromptUserForPathWithDefault(defaultPath, "MMProj"); | |||||
| if (File.Exists(path)) | |||||
| WriteDefaultPath(SettingsMMprojPath, path); | |||||
| return path; | |||||
| } | |||||
| // TODO: Refactorize | |||||
| public static string GetImagePath(bool alwaysPrompt = false) | |||||
| { | |||||
| var defaultPath = ReadDefaultPath(SettingsImagePath); | |||||
| var path = defaultPath is null || alwaysPrompt | |||||
| ? PromptUserForPath("image") | |||||
| : PromptUserForPathWithDefault(defaultPath, "image"); | |||||
| if (File.Exists(path)) | |||||
| WriteDefaultPath(SettingsImagePath, path); | |||||
| return path; | |||||
| } | |||||
| private static string PromptUserForPath() | |||||
| private static string PromptUserForPath(string text = "model") | |||||
| { | { | ||||
| return AnsiConsole.Prompt( | return AnsiConsole.Prompt( | ||||
| new TextPrompt<string>("Please input your model path:") | |||||
| new TextPrompt<string>(string.Format("Please input your {0} path:", text) ) | |||||
| .PromptStyle("white") | .PromptStyle("white") | ||||
| .Validate(File.Exists, "[red]ERROR: invalid model file path - file does not exist[/]") | |||||
| .Validate(File.Exists, string.Format("[red]ERROR: invalid {0} file path - file does not exist[/]", text) ) | |||||
| ); | ); | ||||
| } | } | ||||
| private static string PromptUserForPathWithDefault(string defaultPath) | |||||
| private static string PromptUserForPathWithDefault(string defaultPath, string text = "model") | |||||
| { | { | ||||
| return AnsiConsole.Prompt( | return AnsiConsole.Prompt( | ||||
| new TextPrompt<string>("Please input your model path (or ENTER for default):") | |||||
| new TextPrompt<string>(string.Format("Please input your {0} path (or ENTER for default):", text) ) | |||||
| .DefaultValue(defaultPath) | .DefaultValue(defaultPath) | ||||
| .PromptStyle("white") | .PromptStyle("white") | ||||
| .Validate(File.Exists, "[red]ERROR: invalid model file path - file does not exist[/]") | |||||
| .Validate(File.Exists, string.Format("[red]ERROR: invalid {0} file path - file does not exist[/]", text)) | |||||
| ); | ); | ||||
| } | } | ||||
| } | } | ||||
| @@ -31,23 +31,23 @@ namespace LLama.Unittest | |||||
| _llamaWeights.Dispose(); | _llamaWeights.Dispose(); | ||||
| _lLavaWeights.Dispose(); | _lLavaWeights.Dispose(); | ||||
| } | } | ||||
| [Fact(Skip = "Very slow in CI")] | |||||
| [Fact] | |||||
| public void EmbedImageAsFileName() | public void EmbedImageAsFileName() | ||||
| { | { | ||||
| int n_past = 0; | int n_past = 0; | ||||
| Assert.True( _lLavaWeights.EmbedImage( _context, Constants.LLavaImage, ref n_past ) ); | |||||
| } | |||||
| [Fact(Skip = "Very slow in CI")] | |||||
| SafeLlavaImageEmbedHandle emb = _lLavaWeights.CreateImageEmbeddings(_context, Constants.LLavaImage); | |||||
| Assert.True( _lLavaWeights.EvalImageEmbed( _context, emb, ref n_past ) ); | |||||
| } | |||||
| [Fact] | |||||
| public void EmbedImageAsBinary() | public void EmbedImageAsBinary() | ||||
| { | { | ||||
| int n_past = 0; | int n_past = 0; | ||||
| byte[] image = System.IO.File.ReadAllBytes(Constants.LLavaImage); | byte[] image = System.IO.File.ReadAllBytes(Constants.LLavaImage); | ||||
| Assert.True( _lLavaWeights.EmbedImage( _context, image, ref n_past ) ); | |||||
| } | |||||
| SafeLlavaImageEmbedHandle emb = _lLavaWeights.CreateImageEmbeddings(_context, image); | |||||
| Assert.True( _lLavaWeights.EvalImageEmbed( _context, emb, ref n_past ) ); | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| @@ -12,7 +12,14 @@ namespace LLama.Abstractions | |||||
| /// The loaded context for this executor. | /// The loaded context for this executor. | ||||
| /// </summary> | /// </summary> | ||||
| public LLamaContext Context { get; } | public LLamaContext Context { get; } | ||||
| // LLava Section | |||||
| public bool IsMultiModal { get; } | |||||
| public bool MultiModalProject { get; } | |||||
| public LLavaWeights? ClipModel { get; } | |||||
| public string ImagePath { get; set; } | |||||
| /// <summary> | /// <summary> | ||||
| /// Asynchronously infers a response from the model. | /// Asynchronously infers a response from the model. | ||||
| /// </summary> | /// </summary> | ||||
| @@ -64,6 +64,18 @@ namespace LLama | |||||
| /// </summary> | /// </summary> | ||||
| public LLamaContext Context { get; } | public LLamaContext Context { get; } | ||||
| // LLava Section | |||||
| public bool IsMultiModal | |||||
| { | |||||
| get | |||||
| { | |||||
| return ClipModel != null && !string.IsNullOrEmpty(ImagePath); | |||||
| } | |||||
| } | |||||
| public bool MultiModalProject { get; } | |||||
| public LLavaWeights? ClipModel { get; } | |||||
| public string ImagePath { get; set; } | |||||
| /// <summary> | /// <summary> | ||||
| /// Current "mu" value for mirostat sampling | /// Current "mu" value for mirostat sampling | ||||
| /// </summary> | /// </summary> | ||||
| @@ -86,6 +98,13 @@ namespace LLama | |||||
| _last_n_tokens = new FixedSizeQueue<LLamaToken>((int)Context.ContextSize); | _last_n_tokens = new FixedSizeQueue<LLamaToken>((int)Context.ContextSize); | ||||
| _decoder = new StreamingTokenDecoder(context); | _decoder = new StreamingTokenDecoder(context); | ||||
| } | } | ||||
| public StatefulExecutorBase(LLamaContext context, LLavaWeights lLavaWeights, ILogger? logger = null) : | |||||
| this( context, logger ) | |||||
| { | |||||
| ClipModel = lLavaWeights; | |||||
| MultiModalProject = true; | |||||
| } | |||||
| /// <summary> | /// <summary> | ||||
| /// This API is currently not verified. | /// This API is currently not verified. | ||||
| @@ -21,6 +21,11 @@ namespace LLama | |||||
| { | { | ||||
| private bool _is_prompt_run = true; | private bool _is_prompt_run = true; | ||||
| private readonly LLamaToken _llama_token_newline; | private readonly LLamaToken _llama_token_newline; | ||||
| // LLava | |||||
| private int _EmbedImagePosition = -1; | |||||
| private SafeLlavaImageEmbedHandle _imageEmbedHandle = null; | |||||
| private bool _imageInPrompt = false; | |||||
| /// <summary> | /// <summary> | ||||
| /// | /// | ||||
| @@ -32,6 +37,12 @@ namespace LLama | |||||
| { | { | ||||
| _llama_token_newline = NativeApi.llama_token_nl(Context.NativeHandle.ModelHandle); | _llama_token_newline = NativeApi.llama_token_nl(Context.NativeHandle.ModelHandle); | ||||
| } | } | ||||
| public InteractiveExecutor(LLamaContext context, LLavaWeights clipModel, ILogger? logger = null) | |||||
| : base(context, clipModel, logger) | |||||
| { | |||||
| _llama_token_newline = NativeApi.llama_token_nl(Context.NativeHandle.ModelHandle); | |||||
| } | |||||
| /// <inheritdoc /> | /// <inheritdoc /> | ||||
| public override ExecutorBaseState GetStateData() | public override ExecutorBaseState GetStateData() | ||||
| @@ -107,8 +118,38 @@ namespace LLama | |||||
| { | { | ||||
| if (_is_prompt_run) | if (_is_prompt_run) | ||||
| { | { | ||||
| // When running the first input (prompt) in inteactive mode, we should specially process it. | |||||
| _embed_inps = Context.Tokenize(text, true).ToList(); | |||||
| // When running the first input (prompt) in interactive mode, we should specially process it. | |||||
| if (!this.IsMultiModal) | |||||
| { | |||||
| _embed_inps = Context.Tokenize(text, true).ToList(); | |||||
| } | |||||
| else | |||||
| { | |||||
| // If the prompt contains the tag <image> extract this. | |||||
| _imageInPrompt = text.Contains("<image>"); | |||||
| if (_imageInPrompt) | |||||
| { | |||||
| if (!string.IsNullOrEmpty(ImagePath)) | |||||
| { | |||||
| _imageEmbedHandle = SafeLlavaImageEmbedHandle.CreateFromFileName( ClipModel.NativeHandle, Context, ImagePath); | |||||
| } | |||||
| int imageIndex = text.IndexOf("<image>"); | |||||
| // Tokenize segment 1 (before <image> tag) | |||||
| string preImagePrompt = text.Substring(0, imageIndex); | |||||
| var segment1 = Context.Tokenize(preImagePrompt, true); | |||||
| // Remember the position to add the image embeddings | |||||
| _EmbedImagePosition = segment1.Length; | |||||
| string postImagePrompt = text.Substring(imageIndex + 7); | |||||
| var segment2 = Context.Tokenize(postImagePrompt, false); | |||||
| _embed_inps.AddRange(segment1); | |||||
| _embed_inps.AddRange(segment2); | |||||
| } | |||||
| else | |||||
| { | |||||
| _embed_inps = Context.Tokenize(text, true).ToList(); | |||||
| } | |||||
| } | |||||
| } | } | ||||
| else | else | ||||
| { | { | ||||
| @@ -170,9 +211,30 @@ namespace LLama | |||||
| TryReuseMathingPrefix(); | TryReuseMathingPrefix(); | ||||
| var (result, _) = Context.NativeHandle.Decode(_embeds, LLamaSeqId.Zero, batch, ref _pastTokensCount); | |||||
| if (result != DecodeResult.Ok) | |||||
| throw new LLamaDecodeError(result); | |||||
| // Changes to support Multi-Modal LLMs. | |||||
| // | |||||
| (DecodeResult, int) header, end, result; | |||||
| if (IsMultiModal && _EmbedImagePosition > 0) | |||||
| { | |||||
| // Previous to Image | |||||
| header = Context.NativeHandle.Decode(_embeds.GetRange(0, _EmbedImagePosition), LLamaSeqId.Zero, batch, ref _pastTokensCount); | |||||
| if (header.Item1 != DecodeResult.Ok) throw new LLamaDecodeError(header.Item1); | |||||
| // Image | |||||
| ClipModel.EvalImageEmbed(Context, _imageEmbedHandle, ref _pastTokensCount); | |||||
| // Post-image | |||||
| end = Context.NativeHandle.Decode(_embeds.GetRange(_EmbedImagePosition, _embeds.Count - _EmbedImagePosition), LLamaSeqId.Zero, batch, ref _pastTokensCount); | |||||
| _EmbedImagePosition = -1; | |||||
| } | |||||
| else | |||||
| { | |||||
| result = Context.NativeHandle.Decode(_embeds, LLamaSeqId.Zero, batch, ref _pastTokensCount); | |||||
| if (result.Item1 != DecodeResult.Ok) throw new LLamaDecodeError(result.Item1); | |||||
| } | |||||
| if (_embeds.Count > 0 && !string.IsNullOrEmpty(_pathSession)) | if (_embeds.Count > 0 && !string.IsNullOrEmpty(_pathSession)) | ||||
| { | { | ||||
| @@ -23,7 +23,13 @@ namespace LLama | |||||
| private readonly IContextParams _params; | private readonly IContextParams _params; | ||||
| private readonly ILogger? _logger; | private readonly ILogger? _logger; | ||||
| private readonly LLamaBatch _batch; | private readonly LLamaBatch _batch; | ||||
| // LLava Section | |||||
| public bool IsMultiModal => false; | |||||
| public bool MultiModalProject { get; } | |||||
| public LLavaWeights ClipModel { get; } | |||||
| public string ImagePath { get; set; } | |||||
| /// <summary> | /// <summary> | ||||
| /// The context used by the executor when running the inference. | /// The context used by the executor when running the inference. | ||||
| /// </summary> | /// </summary> | ||||
| @@ -46,6 +52,7 @@ namespace LLama | |||||
| Context.Dispose(); | Context.Dispose(); | ||||
| } | } | ||||
| /// <inheritdoc /> | /// <inheritdoc /> | ||||
| public async IAsyncEnumerable<string> InferAsync(string prompt, IInferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) | public async IAsyncEnumerable<string> InferAsync(string prompt, IInferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) | ||||
| { | { | ||||
| @@ -19,30 +19,21 @@ public sealed class LLavaWeights : IDisposable | |||||
| return new LLavaWeights(weights); | return new LLavaWeights(weights); | ||||
| } | } | ||||
| /// <summary> | |||||
| /// Embed the image from file into llama context | |||||
| /// </summary> | |||||
| /// <param name="ctxLlama"></param> | |||||
| /// <param name="Image"></param> | |||||
| /// <param name="n_past"></param> | |||||
| /// <returns></returns> | |||||
| public bool EmbedImage(LLamaContext ctxLlama, string Image, ref int n_past ) | |||||
| public SafeLlavaImageEmbedHandle CreateImageEmbeddings(LLamaContext ctxLlama, Byte[] image ) | |||||
| { | { | ||||
| return NativeHandle.EmbedImage(ctxLlama, Image, ref n_past ); | |||||
| return NativeHandle.CreateImageEmbeddings(ctxLlama, image ); | |||||
| } | } | ||||
| /// <summary> | |||||
| /// Embed the image from binary into llama context. | |||||
| /// </summary> | |||||
| /// <param name="ctxLlama"></param> | |||||
| /// <param name="Image"></param> | |||||
| /// <param name="n_past"></param> | |||||
| /// <returns></returns> | |||||
| public bool EmbedImage(LLamaContext ctxLlama, Byte[] Image, ref int n_past ) | |||||
| public SafeLlavaImageEmbedHandle CreateImageEmbeddings(LLamaContext ctxLlama, string image ) | |||||
| { | { | ||||
| return NativeHandle.EmbedImage(ctxLlama, Image, ref n_past ); | |||||
| return NativeHandle.CreateImageEmbeddings(ctxLlama, image ); | |||||
| } | } | ||||
| public bool EvalImageEmbed(LLamaContext ctxLlama, SafeLlavaImageEmbedHandle imageEmbed, ref int n_past) | |||||
| { | |||||
| return NativeHandle.EvalImageEmbed( ctxLlama, imageEmbed, ref n_past ); | |||||
| } | |||||
| public void Dispose() | public void Dispose() | ||||
| { | { | ||||
| NativeHandle.Dispose(); | NativeHandle.Dispose(); | ||||
| @@ -9,18 +9,20 @@ public static unsafe partial class NativeApi | |||||
| /// <summary> | /// <summary> | ||||
| /// Sanity check for clip <-> llava embed size match | /// Sanity check for clip <-> llava embed size match | ||||
| /// </summary> | /// </summary> | ||||
| /// <returns></returns> | |||||
| /// <param name="ctxLlama">LLama Context</param> | |||||
| /// <param name="ctxClip">Llava Model</param> | |||||
| /// <returns>True if validate successfully</returns> | |||||
| [DllImport(llavaLibraryName, EntryPoint = "llava_validate_embed_size", CallingConvention = CallingConvention.Cdecl)] | [DllImport(llavaLibraryName, EntryPoint = "llava_validate_embed_size", CallingConvention = CallingConvention.Cdecl)] | ||||
| public static extern bool llava_validate_embed_size( SafeLLamaContextHandle ctxLlama, SafeLlavaModelHandle ctxClip); | public static extern bool llava_validate_embed_size( SafeLLamaContextHandle ctxLlama, SafeLlavaModelHandle ctxClip); | ||||
| /// <summary> | /// <summary> | ||||
| /// Build an image embed from image file bytes | /// Build an image embed from image file bytes | ||||
| /// </summary> | /// </summary> | ||||
| /// <param name="ctx_clip"></param> | |||||
| /// <param name="n_threads"></param> | |||||
| /// <param name="image_bytes"></param> | |||||
| /// <param name="image_bytes_length"></param> | |||||
| /// <returns></returns> | |||||
| /// <param name="ctx_clip">SafeHandle to the Clip Model</param> | |||||
| /// <param name="n_threads">Number of threads</param> | |||||
| /// <param name="image_bytes">Binary image in jpeg format</param> | |||||
| /// <param name="image_bytes_length">Bytes lenght of the image</param> | |||||
| /// <returns>SafeHandle to the Embeddings</returns> | |||||
| [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_bytes", | [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_bytes", | ||||
| CallingConvention = CallingConvention.Cdecl)] | CallingConvention = CallingConvention.Cdecl)] | ||||
| public static extern | public static extern | ||||
| @@ -30,10 +32,10 @@ public static unsafe partial class NativeApi | |||||
| /// <summary> | /// <summary> | ||||
| /// Build an image embed from a path to an image filename | /// Build an image embed from a path to an image filename | ||||
| /// </summary> | /// </summary> | ||||
| /// <param name="ctx_clip"></param> | |||||
| /// <param name="n_threads"></param> | |||||
| /// <param name="image_path"></param> | |||||
| /// <returns></returns> | |||||
| /// <param name="ctx_clip">SafeHandle to the Clip Model</param> | |||||
| /// <param name="n_threads">Number of threads</param> | |||||
| /// <param name="image_path">Image filename (jpeg) to generate embeddings</param> | |||||
| /// <returns>SafeHandel to the embeddings</returns> | |||||
| [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_filename", CallingConvention = CallingConvention.Cdecl)] | [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_filename", CallingConvention = CallingConvention.Cdecl)] | ||||
| public static extern | public static extern | ||||
| SafeLlavaImageEmbedHandle llava_image_embed_make_with_filename(SafeLlavaModelHandle ctx_clip, int n_threads, | SafeLlavaImageEmbedHandle llava_image_embed_make_with_filename(SafeLlavaModelHandle ctx_clip, int n_threads, | ||||
| @@ -42,19 +44,19 @@ public static unsafe partial class NativeApi | |||||
| /// <summary> | /// <summary> | ||||
| /// Free an embedding made with llava_image_embed_make_* | /// Free an embedding made with llava_image_embed_make_* | ||||
| /// </summary> | /// </summary> | ||||
| /// <param name="embed"></param> | |||||
| /// <returns></returns> | |||||
| /// <param name="embed">Embeddings to release</param> | |||||
| [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_free", CallingConvention = CallingConvention.Cdecl)] | [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_free", CallingConvention = CallingConvention.Cdecl)] | ||||
| public static extern SafeLlavaImageEmbedHandle llava_image_embed_free(IntPtr embed); | |||||
| public static extern void llava_image_embed_free(IntPtr embed); | |||||
| /// <summary> | /// <summary> | ||||
| /// Write the image represented by embed into the llama context with batch size n_batch, starting at context | /// Write the image represented by embed into the llama context with batch size n_batch, starting at context | ||||
| /// pos n_past. on completion, n_past points to the next position in the context after the image embed. | /// pos n_past. on completion, n_past points to the next position in the context after the image embed. | ||||
| /// </summary> | /// </summary> | ||||
| /// <param name="embed">ctx_llama</param> | |||||
| /// <returns></returns> | |||||
| /// <param name="ctx_llama">Llama Context</param> | |||||
| /// <param name="embed">Embedding handle</param> | |||||
| /// <returns>True on success</returns> | |||||
| [DllImport(llavaLibraryName, EntryPoint = "llava_eval_image_embed", CallingConvention = CallingConvention.Cdecl)] | [DllImport(llavaLibraryName, EntryPoint = "llava_eval_image_embed", CallingConvention = CallingConvention.Cdecl)] | ||||
| public static extern bool llava_eval_image_embed(SafeLLamaContextHandle ctc_llama, SafeLlavaImageEmbedHandle embed, | |||||
| public static extern bool llava_eval_image_embed(SafeLLamaContextHandle ctx_llama, SafeLlavaImageEmbedHandle embed, | |||||
| int n_batch, ref int n_past); | int n_batch, ref int n_past); | ||||
| } | } | ||||
| @@ -11,7 +11,7 @@ using LLama.Exceptions; | |||||
| namespace LLama.Native | namespace LLama.Native | ||||
| { | { | ||||
| /// <summary> | /// <summary> | ||||
| /// A reference to a set of llava model weights | |||||
| /// A reference to a set of llava model weights. | |||||
| /// </summary> | /// </summary> | ||||
| public sealed class SafeLlavaModelHandle | public sealed class SafeLlavaModelHandle | ||||
| : SafeLLamaHandleBase | : SafeLLamaHandleBase | ||||
| @@ -36,9 +36,10 @@ namespace LLama.Native | |||||
| /// <summary> | /// <summary> | ||||
| /// Load a model from the given file path into memory | /// Load a model from the given file path into memory | ||||
| /// </summary> | /// </summary> | ||||
| /// <param name="modelPath"></param> | |||||
| /// <param name="lparams"></param> | |||||
| /// <returns></returns> | |||||
| /// <param name="modelPath">MMP File (Multi-Modal Projections)</param> | |||||
| /// <param name="verbosity">Verbosity level</param> | |||||
| /// <returns>SafeHandle of the Clip Model</returns> | |||||
| /// <exception cref="InvalidOperationException"></exception> | |||||
| /// <exception cref="RuntimeError"></exception> | /// <exception cref="RuntimeError"></exception> | ||||
| public static SafeLlavaModelHandle LoadFromFile(string modelPath, int verbosity ) | public static SafeLlavaModelHandle LoadFromFile(string modelPath, int verbosity ) | ||||
| { | { | ||||
| @@ -56,31 +57,37 @@ namespace LLama.Native | |||||
| } | } | ||||
| /// <summary> | /// <summary> | ||||
| /// Embed the image from file in llama context | |||||
| /// Create the Image Embeddings. | |||||
| /// </summary> | /// </summary> | ||||
| /// <param name="ctxLlama"></param> | |||||
| /// <param name="image"></param> | |||||
| /// <param name="n_past"></param> | |||||
| /// <returns></returns> | |||||
| public bool EmbedImage(LLamaContext ctxLlama, string image, ref int n_past) | |||||
| /// <param name="ctxLlama">LLama Context</param> | |||||
| /// <param name="image">Image filename (it supports jpeg format only)</param> | |||||
| /// <returns>return the SafeHandle of these embeddings</returns> | |||||
| public SafeLlavaImageEmbedHandle CreateImageEmbeddings(LLamaContext ctxLlama, string image) | |||||
| { | { | ||||
| var ImageEmbed = SafeLlavaImageEmbedHandle.CreateFromFileName(this, ctxLlama, image); | |||||
| bool result = NativeApi.llava_eval_image_embed(ctxLlama.NativeHandle, ImageEmbed, (int)ctxLlama.Params.BatchSize, ref n_past ); | |||||
| return result; | |||||
| return SafeLlavaImageEmbedHandle.CreateFromFileName(this, ctxLlama, image); | |||||
| } | } | ||||
| /// <summary> | /// <summary> | ||||
| /// Embed the image from binary in llama context | |||||
| /// Create the Image Embeddings. | |||||
| /// </summary> | |||||
| /// <param name="ctxLlama">LLama Context</param> | |||||
| /// <param name="image">Image in binary format (it supports jpeg format only)</param> | |||||
| /// <returns>return the SafeHandle of these embeddings</returns> | |||||
| public SafeLlavaImageEmbedHandle CreateImageEmbeddings(LLamaContext ctxLlama, byte[] image ) | |||||
| { | |||||
| return SafeLlavaImageEmbedHandle.CreateFromMemory(this, ctxLlama, image ); | |||||
| } | |||||
| /// <summary> | |||||
| /// Evaluates the image embeddings. | |||||
| /// </summary> | /// </summary> | ||||
| /// <param name="ctxLlama"></param> | |||||
| /// <param name="image">jpeg image</param> | |||||
| /// <param name="ctxLlama">Llama Context</param> | |||||
| /// <param name="imageEmbed">The current embeddings to evaluate</param> | |||||
| /// <param name="n_past"></param> | /// <param name="n_past"></param> | ||||
| /// <returns></returns> | |||||
| public bool EmbedImage(LLamaContext ctxLlama, Byte[] image, ref int n_past ) | |||||
| /// <returns>True on success</returns> | |||||
| public bool EvalImageEmbed(LLamaContext ctxLlama, SafeLlavaImageEmbedHandle imageEmbed, ref int n_past) | |||||
| { | { | ||||
| var ImageEmbed = SafeLlavaImageEmbedHandle.CreateFromMemory(this, ctxLlama, image ); | |||||
| bool result = NativeApi.llava_eval_image_embed(ctxLlama.NativeHandle, ImageEmbed, (int)ctxLlama.Params.BatchSize, ref n_past ); | |||||
| return result; | |||||
| return NativeApi.llava_eval_image_embed(ctxLlama.NativeHandle, imageEmbed, (int)ctxLlama.Params.BatchSize, ref n_past ); | |||||
| } | } | ||||
| /// <summary> | /// <summary> | ||||
| @@ -95,7 +102,7 @@ namespace LLama.Native | |||||
| /// <summary> | /// <summary> | ||||
| /// Frees MULTI MODAL PROJECTIONS model / Clip Model | /// Frees MULTI MODAL PROJECTIONS model / Clip Model | ||||
| /// </summary> | /// </summary> | ||||
| /// <param name="ctx"></param> | |||||
| /// <param name="ctx">Internal Pointer to the model</param> | |||||
| [DllImport(NativeApi.llavaLibraryName, EntryPoint = "clip_free", CallingConvention = CallingConvention.Cdecl)] | [DllImport(NativeApi.llavaLibraryName, EntryPoint = "clip_free", CallingConvention = CallingConvention.Cdecl)] | ||||
| private static extern void clip_free(IntPtr ctx); | private static extern void clip_free(IntPtr ctx); | ||||