using System; using System.IO; using System.Runtime.InteropServices; using LLama.Exceptions; namespace LLama.Native { ///

/// A reference to a set of llava model weights. ///

public sealed class SafeLlavaModelHandle : SafeLLamaHandleBase { /// protected override bool ReleaseHandle() { clip_free(DangerousGetHandle()); SetHandle(IntPtr.Zero); return true; } ///

/// Load a model from the given file path into memory ///

/// MMP File (Multi-Modal Projections) /// Verbosity level /// SafeHandle of the Clip Model /// /// public static SafeLlavaModelHandle LoadFromFile(string modelPath, int verbosity ) { // Try to open the model file, this will check: // - File exists (automatically throws FileNotFoundException) // - File is readable (explicit check) // This provides better error messages that llama.cpp, which would throw an access violation exception in both cases. using (var fs = new FileStream(modelPath, FileMode.Open)) if (!fs.CanRead) throw new InvalidOperationException($"Llava MMP Model file '{modelPath}' is not readable"); var handle = clip_model_load(modelPath, verbosity); if (handle.IsInvalid) throw new LoadWeightsFailedException(modelPath); return handle; } ///

/// Create the Image Embeddings. ///

/// LLama Context /// Image filename (it supports jpeg format only) /// return the SafeHandle of these embeddings public SafeLlavaImageEmbedHandle CreateImageEmbeddings(LLamaContext ctxLlama, string image) { return SafeLlavaImageEmbedHandle.CreateFromFileName(this, ctxLlama, image); } ///

/// Create the Image Embeddings. ///

/// LLama Context /// Image in binary format (it supports jpeg format only) /// return the SafeHandle of these embeddings public SafeLlavaImageEmbedHandle CreateImageEmbeddings(LLamaContext ctxLlama, byte[] image ) { return SafeLlavaImageEmbedHandle.CreateFromMemory(this, ctxLlama, image ); } ///

/// Evaluates the image embeddings. ///

/// Llama Context /// The current embeddings to evaluate /// /// True on success public bool EvalImageEmbed(LLamaContext ctxLlama, SafeLlavaImageEmbedHandle imageEmbed, ref int n_past) { return NativeApi.llava_eval_image_embed(ctxLlama.NativeHandle, imageEmbed, (int)ctxLlama.Params.BatchSize, ref n_past ); } ///

/// Load MULTI MODAL PROJECTIONS model / Clip Model ///

/// Model path/file /// Verbosity level /// SafeLlavaModelHandle [DllImport(NativeApi.llavaLibraryName, EntryPoint = "clip_model_load", CallingConvention = CallingConvention.Cdecl)] private static extern SafeLlavaModelHandle clip_model_load(string mmProj, int verbosity); ///

/// Frees MULTI MODAL PROJECTIONS model / Clip Model ///

/// Internal Pointer to the model [DllImport(NativeApi.llavaLibraryName, EntryPoint = "clip_free", CallingConvention = CallingConvention.Cdecl)] private static extern void clip_free(IntPtr ctx); } }