Llava api (#563)

* Add llava_binaries, update all binaries to make the test * Llava API + LlavaTest Preliminary * First prototype of Load + Unit Test * Temporary run test con branch LlavaAPI * Disable Embed test to review the rest of the test * Restore Embedding test * Use BatchThread to eval image embeddings Test Threads default value to ensure it doesn´t produce problems. * Rename test file * Update action versions * Test only one method, no release embeddings * Revert "Test only one method, no release embeddings" This reverts commit 264e176dcc. * Correct API call * Only test llava related functionality * Cuda and Cblast binaries * Restore build policy * Changes related with code review * Add SafeHandles * Set overwrite to upload-artifact@v4 * Revert to upload-artifact@v3 * revert to upload-artifact@v3
1 year ago · 3b2836eac4
--- a/.github/workflows/compile.yml
+++ b/.github/workflows/compile.yml
@@ -48,12 +48,12 @@ jobs:
          cd build
          cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
          cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
      - uses: actions/upload-artifact@v4
      - uses: actions/upload-artifact@v3
        with:
          path: ./build/libllama.so
          name: llama-bin-linux-${{ matrix.build }}-x64.so
      - name: Upload Llava
        uses: actions/upload-artifact@v4
        uses: actions/upload-artifact@v3
        with:
          path: ./build/examples/llava/libllava_shared.so
          name: llava-bin-linux-${{ matrix.build }}-x64.so
@@ -89,13 +89,13 @@ jobs:
          cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}

      - name: Upload artifacts
        uses: actions/upload-artifact@v4
        uses: actions/upload-artifact@v3
        with:
          path: .\build\bin\Release\llama.dll
          name: llama-bin-win-${{ matrix.build }}-x64.dll

      - name: Upload Llava
        uses: actions/upload-artifact@v4
        uses: actions/upload-artifact@v3
        with:
          path: .\build\bin\Release\llava_shared.dll
          name: llava-bin-win-${{ matrix.build }}-x64.dll
@@ -169,20 +169,35 @@ jobs:
          ls -R
      - name: Upload artifacts (Windows)
        if: ${{ matrix.os == 'windows-latest' }}
        uses: actions/upload-artifact@v4
        uses: actions/upload-artifact@v3
        with:
          path: |
            .\build\bin\Release\llama.dll
            .\build\bin\Release\clblast.dll
          name: llama-bin-win-clblast-x64.dll
      - name: Upload llava artifacts (Windows)
        if: ${{ matrix.os == 'windows-latest' }}
        uses: actions/upload-artifact@v3
        with:
          path: |
            .\build\bin\Release\llava_shared.dll
          name: llava-bin-win-clblast-x64.dll
      - name: Upload artifacts (linux)
        if: ${{ matrix.os == 'ubuntu-22.04' }}
        uses: actions/upload-artifact@v4
        uses: actions/upload-artifact@v3
        with:
          path: |
            ./build/libllama.so
            # ./build/libclblast.so
          name: llama-bin-linux-clblast-x64.so
      - name: Upload llava artifacts (linux)
        if: ${{ matrix.os == 'ubuntu-22.04' }}
        uses: actions/upload-artifact@v3
        with:
          path: |
            ./build/examples/llava/libllava_shared.so
          name: llava-bin-linux-clblast-x64.so

          
  compile-cublas:
    name: Compile (cublas)
@@ -228,16 +243,29 @@ jobs:

      - name: Upload artifacts (Windows)
        if: ${{ matrix.os == 'windows-latest' }}
        uses: actions/upload-artifact@v4
        uses: actions/upload-artifact@v3
        with:
          path: .\build\bin\Release\llama.dll
          name: llama-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll
      - name: Upload llava artifacts (Windows)
        if: ${{ matrix.os == 'windows-latest' }}
        uses: actions/upload-artifact@v3
        with:
          path: .\build\bin\Release\llava_shared.dll
          name: llava-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll
      - name: Upload artifacts (Linux)
        if: ${{ matrix.os == 'ubuntu-20.04' }}
        uses: actions/upload-artifact@v4
        uses: actions/upload-artifact@v3
        with:
          path: ./build/libllama.so
          name: llama-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so
      - name: Upload llava artifacts (Linux)
        if: ${{ matrix.os == 'ubuntu-20.04' }}
        uses: actions/upload-artifact@v3
        with:
          path: ./build/examples/llava/libllava_shared.so
          name: llava-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so
 
    
  compile-macos:
    name: Compile (MacOS)
@@ -268,18 +296,18 @@ jobs:
          cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
          cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
      - name: Upload artifacts
        uses: actions/upload-artifact@v4
        uses: actions/upload-artifact@v3
        with:
          path: ./build/libllama.dylib
          name: llama-bin-osx-${{ matrix.build }}.dylib
      - name: Upload Llava
        uses: actions/upload-artifact@v4
        uses: actions/upload-artifact@v3
        with:
          path: ./build/examples/llava/libllava_shared.dylib
          name: llava-bin-osx-${{ matrix.build }}.dylib
      - name: Upload Metal
        if: ${{ matrix.build != 'x64' }}
        uses: actions/upload-artifact@v4
        uses: actions/upload-artifact@v3
        with:
          path: ./build/bin/ggml-metal.metal
          name: ggml-metal.metal
@@ -347,11 +375,12 @@ jobs:
          cp artifacts/llama-bin-linux-clblast-x64.so/libllama.so deps/clblast/

      - name: Upload artifacts
        uses: actions/upload-artifact@v4
        uses: actions/upload-artifact@v3
        with:
          path: deps/
          name: deps


      - name: Remove Artifacts
        uses: geekyeggo/delete-artifact@v2
        with:
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -28,14 +28,14 @@ jobs:
            os: windows-2019
            config: release
    steps:
    - uses: actions/checkout@v3
    - uses: actions/setup-dotnet@v3
    - uses: actions/checkout@v4
    - uses: actions/setup-dotnet@v4
      with:
        dotnet-version: | 
          7.0.x
          8.0.x
    - name: Cache Packages
      uses: actions/cache@v3
      uses: actions/cache@v4
      with:
        key: "unit_test_models"
        path: LLama.Unittest/Models
--- a/LLama.Unittest/Constants.cs
+++ b/LLama.Unittest/Constants.cs
@@ -3,5 +3,8 @@
    internal static class Constants
    {
        public static string ModelPath = "Models/llama-2-7b-chat.Q3_K_S.gguf";
        public static string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf";
        public static string LLavaMmpPath = "Models/mmproj-model-f16.gguf";
        public static string LLavaImage = "Models/extreme-ironing-taxi-610x427.jpg";
    }
 }
--- a/LLama.Unittest/LLama.Unittest.csproj
+++ b/LLama.Unittest/LLama.Unittest.csproj
@@ -27,8 +27,9 @@
  </ItemGroup>

  <Target Name="DownloadContentFiles" BeforeTargets="Build">
      <DownloadFile SourceUrl="https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf" SkipUnchangedFiles="true">
    </DownloadFile>
    <DownloadFile SourceUrl="https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf" SkipUnchangedFiles="true"></DownloadFile>
    <DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf" DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf" SkipUnchangedFiles="true"></DownloadFile>
    <DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf" DestinationFolder="Models" DestinationFileName="mmproj-model-f16.gguf" SkipUnchangedFiles="true"></DownloadFile>
  </Target>

  <ItemGroup>
@@ -44,5 +45,14 @@
    <None Update="Models\llama-2-7b-chat.Q3_K_S.gguf">
      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
    </None>
    <None Update="Models\llava-v1.6-mistral-7b.Q3_K_XS.gguf">
      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
    </None>
    <None Update="Models\mmproj-model-f16.gguf">
      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
    </None>
    <None Update="Models\extreme-ironing-taxi-610x427.jpg">
      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
    </None>
  </ItemGroup>
 </Project>
--- a/LLama.Unittest/LLamaEmbedderTests.cs
+++ b/LLama.Unittest/LLamaEmbedderTests.cs
@@ -14,6 +14,8 @@ public sealed class LLamaEmbedderTests
        _testOutputHelper = testOutputHelper;
        var @params = new ModelParams(Constants.ModelPath)
        {
            ContextSize = 4096,
            Threads = 5,
            EmbeddingMode = true,
        };
        using var weights = LLamaWeights.LoadFromFile(@params);
@@ -31,6 +33,7 @@ public sealed class LLamaEmbedderTests
        return a.Zip(b, (x, y) => x * y).Sum();
    }


    [Fact]
    public async Task EmbedCompare()
    {
--- a/LLama.Unittest/LLavaWeightsTests.cs
+++ b/LLama.Unittest/LLavaWeightsTests.cs
@@ -0,0 +1,53 @@
 using LLama.Common;
 using LLama.Native;

 namespace LLama.Unittest
 {
    // Test the same things as llama model + image embedings
    //
    public sealed class LLavaWeightTests
        : IDisposable
    {
        private readonly LLamaWeights _llamaWeights;
        private readonly LLavaWeights _lLavaWeights;
        private readonly LLamaContext _context;
        
        public LLavaWeightTests()
        {
            var @params = new ModelParams(Constants.ModelPath)
            {
                // Llava models requires big context
                ContextSize = 4096
            };
            _llamaWeights = LLamaWeights.LoadFromFile(@params);
            _lLavaWeights = LLavaWeights.LoadFromFile(Constants.LLavaMmpPath);
            
            _context = _llamaWeights.CreateContext(@params);
            
        }

        public void Dispose()
        {
            _llamaWeights.Dispose();
            _lLavaWeights.Dispose();
        }

      
        
        [Fact]
        public void EmbedImageAsFileName()
        {
            int n_past = 0;
            Assert.True( _lLavaWeights.EmbedImage( _context, Constants.LLavaImage, ref n_past ) );
        }        
        
        [Fact]
        public void EmbedImageAsBinary()
        {
            int n_past = 0;
            byte[] image = System.IO.File.ReadAllBytes(Constants.LLavaImage);
            Assert.True( _lLavaWeights.EmbedImage( _context, image, ref n_past ) );
        }        
        
    }
 }
--- a/LLama.Unittest/Models/extreme-ironing-taxi-610x427.jpg
+++ b/LLama.Unittest/Models/extreme-ironing-taxi-610x427.jpg
--- a/LLama/LLamaSharp.Runtime.targets
+++ b/LLama/LLamaSharp.Runtime.targets
@@ -67,5 +67,51 @@
        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
        <Link>runtimes/osx-x64/native/libllama.dylib</Link>
      </None>
      
      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/llava_shared.dll">
        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
        <Link>runtimes/win-x64/native/noavx/llava_shared.dll</Link>
      </None>
      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx/llava_shared.dll">
        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
        <Link>runtimes/win-x64/native/avx/llava_shared.dll</Link>
      </None>
      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx2/llava_shared.dll">
        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
        <Link>runtimes/win-x64/native/avx2/llava_shared.dll</Link>
      </None>
      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx512/llava_shared.dll">
        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
        <Link>runtimes/win-x64/native/avx512/llava_shared.dll</Link>
      </None>

      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/libllava_shared.so">
        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
        <Link>runtimes/linux-x64/native/noavx/libllava_shared.so</Link>
      </None>
      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx/libllava_shared.so">
        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
        <Link>runtimes/linux-x64/native/avx/libllava_shared.so</Link>
      </None>
      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx2/libllava_shared.so">
        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
        <Link>runtimes/linux-x64/native/avx2/libllava_shared.so</Link>
      </None>
      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx512/libllava_shared.so">
        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
        <Link>runtimes/linux-x64/native/avx512/libllava_shared.so</Link>
      </None>

      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-arm64/libllava_shared.dylib">
        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
        <Link>runtimes/osx-arm64/native/libllava_shared.dylib</Link>
      </None>

      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-x64/libllava_shared.dylib">
        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
        <Link>runtimes/osx-x64/native/libllava_shared.dylib</Link>
      </None>      
      
      
    </ItemGroup>
 </Project>
--- a/LLama/LLavaWeights.cs
+++ b/LLama/LLavaWeights.cs
@@ -0,0 +1,51 @@

 using System;
 using LLama.Native;

 namespace LLama;

 public sealed class LLavaWeights : IDisposable
 {
    public SafeLlavaModelHandle NativeHandle { get; }   
    
    internal LLavaWeights(SafeLlavaModelHandle weights)
    {
        NativeHandle = weights;
    }
    
    public static LLavaWeights LoadFromFile(string mmProject)
    {
        var weights = SafeLlavaModelHandle.LoadFromFile(mmProject, 1);
        return new LLavaWeights(weights);
    }

    /// <summary>
    /// Embed the image from file into llama context
    /// </summary>
    /// <param name="ctxLlama"></param>
    /// <param name="Image"></param>
    /// <param name="n_past"></param>
    /// <returns></returns>
    public bool EmbedImage(LLamaContext ctxLlama, string Image, ref int n_past )
    {
        return NativeHandle.EmbedImage(ctxLlama, Image, ref n_past );
    }

    /// <summary>
    /// Embed the image from binary into llama context.
    /// </summary>
    /// <param name="ctxLlama"></param>
    /// <param name="Image"></param>
    /// <param name="n_past"></param>
    /// <returns></returns>
    public bool EmbedImage(LLamaContext ctxLlama, Byte[] Image, ref int n_past )
    {
        return NativeHandle.EmbedImage(ctxLlama, Image, ref n_past );
    }
    
    public void Dispose()
    {
        NativeHandle.Dispose();
    }    
    
 }
--- a/LLama/Native/LLavaImageEmbed.cs
+++ b/LLama/Native/LLavaImageEmbed.cs
@@ -0,0 +1,13 @@
 using System.Runtime.InteropServices;

 namespace LLama.Native;

 /// <summary>
 /// LLaVa Image embeddings 
 /// </summary>
 [StructLayout(LayoutKind.Sequential)]
 unsafe public struct LLavaImageEmbed
 {
    public float* embed;
    public int n_image_pos;
 }
--- a/LLama/Native/NativeApi.LLava.cs
+++ b/LLama/Native/NativeApi.LLava.cs
@@ -0,0 +1,60 @@
 using System;
 using System.Runtime.InteropServices;

 namespace LLama.Native;

 using clip_ctx = IntPtr;
 public static unsafe partial class NativeApi
 {
    /// <summary>
    /// Sanity check for clip &lt;-&gt; llava embed size match
    /// </summary>
    /// <returns></returns>
    [DllImport(llavaLibraryName, EntryPoint = "llava_validate_embed_size", CallingConvention = CallingConvention.Cdecl)]
    public static extern bool llava_validate_embed_size( SafeLLamaContextHandle ctxLlama, SafeLlavaModelHandle ctxClip);

    /// <summary>
    /// Build an image embed from image file bytes
    /// </summary>
    /// <param name="ctx_clip"></param>
    /// <param name="n_threads"></param>
    /// <param name="image_bytes"></param>
    /// <param name="image_bytes_length"></param>
    /// <returns></returns>
    [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_bytes",
        CallingConvention = CallingConvention.Cdecl)]
    public static extern 
        SafeLlavaImageEmbedHandle llava_image_embed_make_with_bytes(SafeLlavaModelHandle ctx_clip, int n_threads,
                                                                    byte[] image_bytes, int image_bytes_length);

    /// <summary>
    /// Build an image embed from a path to an image filename
    /// </summary>
    /// <param name="ctx_clip"></param>
    /// <param name="n_threads"></param>
    /// <param name="image_path"></param>
    /// <returns></returns>
    [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_make_with_filename", CallingConvention = CallingConvention.Cdecl)]
    public static extern 
        SafeLlavaImageEmbedHandle llava_image_embed_make_with_filename(SafeLlavaModelHandle ctx_clip, int n_threads,
                                                                       [MarshalAs(UnmanagedType.LPStr)] string image_path);

    /// <summary>
    /// Free an embedding made with llava_image_embed_make_*
    /// </summary>
    /// <param name="embed"></param>
    /// <returns></returns>
    [DllImport(llavaLibraryName, EntryPoint = "llava_image_embed_free", CallingConvention = CallingConvention.Cdecl)]
    public static extern SafeLlavaImageEmbedHandle llava_image_embed_free(IntPtr embed);

    /// <summary>
    /// Write the image represented by embed into the llama context with batch size n_batch, starting at context
    /// pos n_past. on completion, n_past points to the next position in the context after the image embed.
    /// </summary>
    /// <param name="embed">ctx_llama</param>
    /// <returns></returns>
    [DllImport(llavaLibraryName, EntryPoint = "llava_eval_image_embed", CallingConvention = CallingConvention.Cdecl)]
    public static extern bool llava_eval_image_embed(SafeLLamaContextHandle ctc_llama, SafeLlavaImageEmbedHandle embed,
        int n_batch, ref int n_past);
    
 }
--- a/LLama/Native/NativeApi.Load.cs
+++ b/LLama/Native/NativeApi.Load.cs
@@ -235,6 +235,7 @@ namespace LLama.Native
            if (platform == OSPlatform.OSX)
            {
                result.Add($"{prefix}{libraryNamePrefix}{libraryName}{suffix}");
                result.Add($"{prefix}{libraryNamePrefix}{llavaLibraryName}{suffix}");
            }

            return result;
@@ -303,6 +304,11 @@ namespace LLama.Native
                if (result is not null && result != IntPtr.Zero)
                {
                    Log($"{fullPath} is selected and loaded successfully.", LogLevel.Information);
                    
                    // One we have clear the detection and that llama loads successfully we load LLaVa if exist on the
                    // same path. 
                    TryLoad( libraryPath.Replace("llama", "llava_shared"), true);
                    
                    return (IntPtr)result;
                }

@@ -338,6 +344,7 @@ namespace LLama.Native
        }

        internal const string libraryName = "llama";
        internal const string llavaLibraryName = "llava_shared";        
        private const string cudaVersionFile = "version.json";
        private const string loggingPrefix = "[LLamaSharp Native]";
        private static bool enableLogging = false;
--- a/LLama/Native/SafeLlavaImageEmbedHandle.cs
+++ b/LLama/Native/SafeLlavaImageEmbedHandle.cs
@@ -0,0 +1,45 @@
 using System;
 using System.Collections.Generic;
 using System.IO;
 using System.Linq;
 using System.Text;
 using LLama;
 using LLama.Exceptions;


 namespace LLama.Native
 {
    /// <summary>
    /// A Reference to a set of llava Image Embed handle
    /// </summary>
    public sealed class SafeLlavaImageEmbedHandle
        : SafeLLamaHandleBase
    {

        private SafeLlavaImageEmbedHandle(IntPtr handle)
            : base(handle, true)
        {
        }
        
        private SafeLlavaImageEmbedHandle()
        {}

        public static SafeLlavaImageEmbedHandle CreateFromFileName( SafeLlavaModelHandle ctxLlava, LLamaContext ctxLlama, string image )
        {
            return NativeApi.llava_image_embed_make_with_filename(ctxLlava,  (int) ctxLlama.BatchThreads, image);
        }
        
        public static SafeLlavaImageEmbedHandle CreateFromMemory( SafeLlavaModelHandle ctxLlava, LLamaContext ctxLlama, Byte[] image  )
        {
            return NativeApi.llava_image_embed_make_with_bytes(ctxLlava,  (int) ctxLlama.BatchThreads, image, image.Length);
        }
        
        /// <inheritdoc />
        protected override bool ReleaseHandle()
        {
            NativeApi.llava_image_embed_free(DangerousGetHandle());
            SetHandle(IntPtr.Zero);
            return true;
        }
    }
 }
--- a/LLama/Native/SafeLlavaModelHandle.cs
+++ b/LLama/Native/SafeLlavaModelHandle.cs
@@ -0,0 +1,104 @@
 using System;
 using System.Collections.Generic;
 using System.IO;
 using System.Linq;
 using System.Runtime.InteropServices;
 using System.Text;
 using LLama;
 using LLama.Exceptions;


 namespace LLama.Native
 {
    /// <summary>
    /// A reference to a set of llava model weights
    /// </summary>
    public sealed class SafeLlavaModelHandle
        : SafeLLamaHandleBase
    {

        private SafeLlavaModelHandle(IntPtr handle)
            : base(handle, true)
        {
        }

        private SafeLlavaModelHandle()
        {}
        
        /// <inheritdoc />
        protected override bool ReleaseHandle()
        {
            clip_free(DangerousGetHandle());
            SetHandle(IntPtr.Zero);
            return true;
        }

        /// <summary>
        /// Load a model from the given file path into memory
        /// </summary>
        /// <param name="modelPath"></param>
        /// <param name="lparams"></param>
        /// <returns></returns>
        /// <exception cref="RuntimeError"></exception>
        public static SafeLlavaModelHandle LoadFromFile(string modelPath, int verbosity )
        {
            
            // Try to open the model file, this will check:
            // - File exists (automatically throws FileNotFoundException)
            // - File is readable (explicit check)
            // This provides better error messages that llama.cpp, which would throw an access violation exception in both cases.
            using (var fs = new FileStream(modelPath, FileMode.Open))
                if (!fs.CanRead)
                    throw new InvalidOperationException($"Llava MMP Model file '{modelPath}' is not readable");
          
            return clip_model_load(modelPath, verbosity)
                ?? throw new RuntimeError($"Failed to load LLaVa model {modelPath}.");          
        }

        /// <summary>
        /// Embed the image from file in llama context
        /// </summary>
        /// <param name="ctxLlama"></param>
        /// <param name="image"></param>
        /// <param name="n_past"></param>
        /// <returns></returns>
        public bool EmbedImage(LLamaContext ctxLlama, string image, ref int n_past)
        {
            var ImageEmbed = SafeLlavaImageEmbedHandle.CreateFromFileName(this, ctxLlama, image);
            bool result = NativeApi.llava_eval_image_embed(ctxLlama.NativeHandle, ImageEmbed, (int)ctxLlama.Params.BatchSize, ref n_past );
            return result;
        }
        
        /// <summary>
        /// Embed the image from binary in llama context
        /// </summary>
        /// <param name="ctxLlama"></param>
        /// <param name="image">jpeg image</param>
        /// <param name="n_past"></param>
        /// <returns></returns>
        public bool EmbedImage(LLamaContext ctxLlama, Byte[] image, ref int n_past )
        {
            var ImageEmbed = SafeLlavaImageEmbedHandle.CreateFromMemory(this, ctxLlama, image );
            bool result = NativeApi.llava_eval_image_embed(ctxLlama.NativeHandle, ImageEmbed, (int)ctxLlama.Params.BatchSize, ref n_past );
            return result;
        }
        
        /// <summary>
        /// Load MULTI MODAL PROJECTIONS model / Clip Model
        /// </summary>
        /// <param name="mmProj"> Model path/file</param>
        /// <param name="verbosity">Verbosity level</param>
        /// <returns>SafeLlavaModelHandle</returns>
        [DllImport(NativeApi.llavaLibraryName, EntryPoint = "clip_model_load", CallingConvention = CallingConvention.Cdecl)]
        private static extern SafeLlavaModelHandle clip_model_load(string mmProj, int verbosity);

        /// <summary>
        /// Frees MULTI MODAL PROJECTIONS model / Clip Model
        /// </summary>
        /// <param name="ctx"></param>
        [DllImport(NativeApi.llavaLibraryName, EntryPoint = "clip_free", CallingConvention = CallingConvention.Cdecl)]
        private static extern void clip_free(IntPtr ctx);
        
        
    }
 }