You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

NativeApi.cs 28 kB

2 years ago
2 years ago
2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608
  1. using System;
  2. using System.Buffers;
  3. using System.Runtime.InteropServices;
  4. using System.Text;
  5. #pragma warning disable IDE1006 // Naming Styles
  6. namespace LLama.Native
  7. {
  8. /// <summary>
  9. /// Callback from llama.cpp with log messages
  10. /// </summary>
  11. /// <param name="level"></param>
  12. /// <param name="message"></param>
  13. public delegate void LLamaLogCallback(LLamaLogLevel level, string message);
  14. /// <summary>
  15. /// Direct translation of the llama.cpp API
  16. /// </summary>
  17. public static partial class NativeApi
  18. {
  19. /// <summary>
  20. /// A method that does nothing. This is a native method, calling it will force the llama native dependencies to be loaded.
  21. /// </summary>
  22. /// <returns></returns>
  23. public static void llama_empty_call()
  24. {
  25. llama_mmap_supported();
  26. }
  27. /// <summary>
  28. /// Get the maximum number of devices supported by llama.cpp
  29. /// </summary>
  30. /// <returns></returns>
  31. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  32. public static extern int llama_max_devices();
  33. /// <summary>
  34. /// Create a LLamaModelParams with default values
  35. /// </summary>
  36. /// <returns></returns>
  37. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  38. public static extern LLamaModelParams llama_model_default_params();
  39. /// <summary>
  40. /// Create a LLamaContextParams with default values
  41. /// </summary>
  42. /// <returns></returns>
  43. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  44. public static extern LLamaContextParams llama_context_default_params();
  45. /// <summary>
  46. /// Create a LLamaModelQuantizeParams with default values
  47. /// </summary>
  48. /// <returns></returns>
  49. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  50. public static extern LLamaModelQuantizeParams llama_model_quantize_default_params();
  51. /// <summary>
  52. /// Check if memory mapping is supported
  53. /// </summary>
  54. /// <returns></returns>
  55. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  56. public static extern bool llama_mmap_supported();
  57. /// <summary>
  58. /// Check if memory lockingis supported
  59. /// </summary>
  60. /// <returns></returns>
  61. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  62. public static extern bool llama_mlock_supported();
  63. /// <summary>
  64. /// Initialize the llama + ggml backend
  65. /// Call once at the start of the program
  66. /// </summary>
  67. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  68. private static extern void llama_backend_init(bool numa);
  69. /// <summary>
  70. /// Apply a LoRA adapter to a loaded model
  71. /// path_base_model is the path to a higher quality model to use as a base for
  72. /// the layers modified by the adapter. Can be NULL to use the current loaded model.
  73. /// The model needs to be reloaded before applying a new adapter, otherwise the adapter
  74. /// will be applied on top of the previous one
  75. /// </summary>
  76. /// <param name="model_ptr"></param>
  77. /// <param name="path_lora"></param>
  78. /// <param name="scale"></param>
  79. /// <param name="path_base_model"></param>
  80. /// <param name="n_threads"></param>
  81. /// <returns>Returns 0 on success</returns>
  82. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  83. public static extern int llama_model_apply_lora_from_file(SafeLlamaModelHandle model_ptr, string path_lora, float scale, string? path_base_model, int n_threads);
  84. /// <summary>
  85. /// Sets the current rng seed.
  86. /// </summary>
  87. /// <param name="ctx"></param>
  88. /// <param name="seed"></param>
  89. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  90. public static extern void llama_set_rng_seed(SafeLLamaContextHandle ctx, uint seed);
  91. /// <summary>
  92. /// Returns the maximum size in bytes of the state (rng, logits, embedding
  93. /// and kv_cache) - will often be smaller after compacting tokens
  94. /// </summary>
  95. /// <param name="ctx"></param>
  96. /// <returns></returns>
  97. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  98. public static extern ulong llama_get_state_size(SafeLLamaContextHandle ctx);
  99. /// <summary>
  100. /// Copies the state to the specified destination address.
  101. /// Destination needs to have allocated enough memory.
  102. /// </summary>
  103. /// <param name="ctx"></param>
  104. /// <param name="dest"></param>
  105. /// <returns>the number of bytes copied</returns>
  106. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  107. public static extern unsafe ulong llama_copy_state_data(SafeLLamaContextHandle ctx, byte* dest);
  108. /// <summary>
  109. /// Set the state reading from the specified address
  110. /// </summary>
  111. /// <param name="ctx"></param>
  112. /// <param name="src"></param>
  113. /// <returns>the number of bytes read</returns>
  114. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  115. public static extern unsafe ulong llama_set_state_data(SafeLLamaContextHandle ctx, byte* src);
  116. /// <summary>
  117. /// Load session file
  118. /// </summary>
  119. /// <param name="ctx"></param>
  120. /// <param name="path_session"></param>
  121. /// <param name="tokens_out"></param>
  122. /// <param name="n_token_capacity"></param>
  123. /// <param name="n_token_count_out"></param>
  124. /// <returns></returns>
  125. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  126. public static extern bool llama_load_session_file(SafeLLamaContextHandle ctx, string path_session, LLamaToken[] tokens_out, ulong n_token_capacity, out ulong n_token_count_out);
  127. /// <summary>
  128. /// Save session file
  129. /// </summary>
  130. /// <param name="ctx"></param>
  131. /// <param name="path_session"></param>
  132. /// <param name="tokens"></param>
  133. /// <param name="n_token_count"></param>
  134. /// <returns></returns>
  135. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  136. public static extern bool llama_save_session_file(SafeLLamaContextHandle ctx, string path_session, LLamaToken[] tokens, ulong n_token_count);
  137. /// <summary>
  138. /// Run the llama inference to obtain the logits and probabilities for the next token.
  139. /// tokens + n_tokens is the provided batch of new tokens to process
  140. /// n_past is the number of tokens to use from previous eval calls
  141. /// </summary>
  142. /// <param name="ctx"></param>
  143. /// <param name="tokens"></param>
  144. /// <param name="n_tokens"></param>
  145. /// <param name="n_past"></param>
  146. /// <returns>Returns 0 on success</returns>
  147. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  148. [Obsolete("use llama_decode() instead")]
  149. public static extern unsafe int llama_eval(SafeLLamaContextHandle ctx, LLamaToken* tokens, int n_tokens, int n_past);
  150. /// <summary>
  151. /// Convert the provided text into tokens.
  152. /// </summary>
  153. /// <param name="ctx"></param>
  154. /// <param name="text"></param>
  155. /// <param name="encoding"></param>
  156. /// <param name="tokens"></param>
  157. /// <param name="n_max_tokens"></param>
  158. /// <param name="add_bos"></param>
  159. /// <param name="special">Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. Does not insert a leading space.</param>
  160. /// <returns>Returns the number of tokens on success, no more than n_max_tokens.
  161. /// Returns a negative number on failure - the number of tokens that would have been returned
  162. /// </returns>
  163. public static int llama_tokenize(SafeLLamaContextHandle ctx, string text, Encoding encoding, LLamaToken[] tokens, int n_max_tokens, bool add_bos, bool special)
  164. {
  165. unsafe
  166. {
  167. // Calculate number of bytes in text and borrow an array that large (+1 for nul byte)
  168. var byteCount = encoding.GetByteCount(text);
  169. var array = ArrayPool<byte>.Shared.Rent(byteCount + 1);
  170. try
  171. {
  172. // Convert to bytes
  173. fixed (char* textPtr = text)
  174. fixed (byte* arrayPtr = array)
  175. {
  176. encoding.GetBytes(textPtr, text.Length, arrayPtr, array.Length);
  177. }
  178. // Add a zero byte to the end to terminate the string
  179. array[byteCount] = 0;
  180. // Do the actual tokenization
  181. fixed (byte* arrayPtr = array)
  182. fixed (LLamaToken* tokensPtr = tokens)
  183. return llama_tokenize(ctx.ModelHandle, arrayPtr, byteCount, tokensPtr, n_max_tokens, add_bos, special);
  184. }
  185. finally
  186. {
  187. ArrayPool<byte>.Shared.Return(array);
  188. }
  189. }
  190. }
  191. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  192. public static extern unsafe byte* llama_token_get_text(SafeLlamaModelHandle model, LLamaToken token);
  193. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  194. public static extern float llama_token_get_score(SafeLlamaModelHandle model, LLamaToken token);
  195. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  196. public static extern LLamaTokenType llama_token_get_type(SafeLlamaModelHandle model, LLamaToken token);
  197. /// <summary>
  198. /// Get the size of the context window for the model for this context
  199. /// </summary>
  200. /// <param name="ctx"></param>
  201. /// <returns></returns>
  202. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  203. public static extern int llama_n_ctx(SafeLLamaContextHandle ctx);
  204. /// <summary>
  205. /// Token logits obtained from the last call to llama_eval()
  206. /// The logits for the last token are stored in the last row
  207. /// Can be mutated in order to change the probabilities of the next token.<br />
  208. /// Rows: n_tokens<br />
  209. /// Cols: n_vocab
  210. /// </summary>
  211. /// <param name="ctx"></param>
  212. /// <returns></returns>
  213. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  214. public static extern unsafe float* llama_get_logits(SafeLLamaContextHandle ctx);
  215. /// <summary>
  216. /// Logits for the ith token. Equivalent to: llama_get_logits(ctx) + i*n_vocab
  217. /// </summary>
  218. /// <param name="ctx"></param>
  219. /// <param name="i"></param>
  220. /// <returns></returns>
  221. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  222. public static extern unsafe float* llama_get_logits_ith(SafeLLamaContextHandle ctx, int i);
  223. /// <summary>
  224. /// Get the embeddings for the input
  225. /// </summary>
  226. /// <param name="ctx"></param>
  227. /// <returns></returns>
  228. public static Span<float> llama_get_embeddings(SafeLLamaContextHandle ctx)
  229. {
  230. unsafe
  231. {
  232. var ptr = llama_get_embeddings_native(ctx);
  233. return new Span<float>(ptr, ctx.EmbeddingSize);
  234. }
  235. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "llama_get_embeddings")]
  236. static extern unsafe float* llama_get_embeddings_native(SafeLLamaContextHandle ctx);
  237. }
  238. /// <summary>
  239. /// Get the "Beginning of sentence" token
  240. /// </summary>
  241. /// <returns></returns>
  242. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  243. public static extern LLamaToken llama_token_bos(SafeLlamaModelHandle model);
  244. /// <summary>
  245. /// Get the "End of sentence" token
  246. /// </summary>
  247. /// <returns></returns>
  248. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  249. public static extern LLamaToken llama_token_eos(SafeLlamaModelHandle model);
  250. /// <summary>
  251. /// Get the "new line" token
  252. /// </summary>
  253. /// <returns></returns>
  254. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  255. public static extern LLamaToken llama_token_nl(SafeLlamaModelHandle model);
  256. /// <summary>
  257. /// Returns -1 if unknown, 1 for true or 0 for false.
  258. /// </summary>
  259. /// <returns></returns>
  260. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  261. public static extern int llama_add_bos_token(SafeLlamaModelHandle model);
  262. /// <summary>
  263. /// Returns -1 if unknown, 1 for true or 0 for false.
  264. /// </summary>
  265. /// <returns></returns>
  266. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  267. public static extern int llama_add_eos_token(SafeLlamaModelHandle model);
  268. /// <summary>
  269. /// codellama infill tokens, Beginning of infill prefix
  270. /// </summary>
  271. /// <returns></returns>
  272. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  273. public static extern int llama_token_prefix(SafeLlamaModelHandle model);
  274. /// <summary>
  275. /// codellama infill tokens, Beginning of infill middle
  276. /// </summary>
  277. /// <returns></returns>
  278. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  279. public static extern int llama_token_middle(SafeLlamaModelHandle model);
  280. /// <summary>
  281. /// codellama infill tokens, Beginning of infill suffix
  282. /// </summary>
  283. /// <returns></returns>
  284. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  285. public static extern int llama_token_suffix(SafeLlamaModelHandle model);
  286. /// <summary>
  287. /// codellama infill tokens, End of infill middle
  288. /// </summary>
  289. /// <returns></returns>
  290. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  291. public static extern int llama_token_eot(SafeLlamaModelHandle model);
  292. /// <summary>
  293. /// Print out timing information for this context
  294. /// </summary>
  295. /// <param name="ctx"></param>
  296. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  297. public static extern void llama_print_timings(SafeLLamaContextHandle ctx);
  298. /// <summary>
  299. /// Reset all collected timing information for this context
  300. /// </summary>
  301. /// <param name="ctx"></param>
  302. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  303. public static extern void llama_reset_timings(SafeLLamaContextHandle ctx);
  304. /// <summary>
  305. /// Print system information
  306. /// </summary>
  307. /// <returns></returns>
  308. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  309. public static extern IntPtr llama_print_system_info();
  310. /// <summary>
  311. /// Get the number of tokens in the model vocabulary
  312. /// </summary>
  313. /// <param name="model"></param>
  314. /// <returns></returns>
  315. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  316. public static extern int llama_n_vocab(SafeLlamaModelHandle model);
  317. /// <summary>
  318. /// Get the size of the context window for the model
  319. /// </summary>
  320. /// <param name="model"></param>
  321. /// <returns></returns>
  322. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  323. public static extern int llama_n_ctx_train(SafeLlamaModelHandle model);
  324. /// <summary>
  325. /// Get the dimension of embedding vectors from this model
  326. /// </summary>
  327. /// <param name="model"></param>
  328. /// <returns></returns>
  329. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  330. public static extern int llama_n_embd(SafeLlamaModelHandle model);
  331. /// <summary>
  332. /// Get the model's RoPE frequency scaling factor
  333. /// </summary>
  334. /// <param name="model"></param>
  335. /// <returns></returns>
  336. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  337. public static extern float llama_rope_freq_scale_train(SafeLlamaModelHandle model);
  338. /// <summary>
  339. /// Get metadata value as a string by key name
  340. /// </summary>
  341. /// <param name="model"></param>
  342. /// <param name="key"></param>
  343. /// <param name="buf"></param>
  344. /// <param name="buf_size"></param>
  345. /// <returns>The length of the string on success, or -1 on failure</returns>
  346. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  347. public static extern unsafe int llama_model_meta_val_str(SafeLlamaModelHandle model, byte* key, byte* buf, long buf_size);
  348. /// <summary>
  349. /// Get the number of metadata key/value pairs
  350. /// </summary>
  351. /// <param name="model"></param>
  352. /// <returns></returns>
  353. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  354. public static extern int llama_model_meta_count(SafeLlamaModelHandle model);
  355. /// <summary>
  356. /// Get metadata key name by index
  357. /// </summary>
  358. /// <param name="model">Model to fetch from</param>
  359. /// <param name="index">Index of key to fetch</param>
  360. /// <param name="dest">buffer to write result into</param>
  361. /// <returns>The length of the string on success, or -1 on failure</returns>
  362. public static int llama_model_meta_key_by_index(SafeLlamaModelHandle model, int index, Span<byte> dest)
  363. {
  364. unsafe
  365. {
  366. fixed (byte* destPtr = dest)
  367. {
  368. return llama_model_meta_key_by_index_native(model, index, destPtr, dest.Length);
  369. }
  370. }
  371. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "llama_model_meta_key_by_index")]
  372. static extern unsafe int llama_model_meta_key_by_index_native(SafeLlamaModelHandle model, int index, byte* buf, long buf_size);
  373. }
  374. /// <summary>
  375. /// Get metadata value as a string by index
  376. /// </summary>
  377. /// <param name="model">Model to fetch from</param>
  378. /// <param name="index">Index of val to fetch</param>
  379. /// <param name="dest">Buffer to write result into</param>
  380. /// <returns>The length of the string on success, or -1 on failure</returns>
  381. public static int llama_model_meta_val_str_by_index(SafeLlamaModelHandle model, int index, Span<byte> dest)
  382. {
  383. unsafe
  384. {
  385. fixed (byte* destPtr = dest)
  386. {
  387. return llama_model_meta_val_str_by_index_native(model, index, destPtr, dest.Length);
  388. }
  389. }
  390. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "llama_model_meta_val_str_by_index")]
  391. static extern unsafe int llama_model_meta_val_str_by_index_native(SafeLlamaModelHandle model, int index, byte* buf, long buf_size);
  392. }
  393. /// <summary>
  394. /// Get a string describing the model type
  395. /// </summary>
  396. /// <param name="model"></param>
  397. /// <param name="buf"></param>
  398. /// <param name="buf_size"></param>
  399. /// <returns>The length of the string on success, or -1 on failure</returns>
  400. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  401. public static extern unsafe int llama_model_desc(SafeLlamaModelHandle model, byte* buf, long buf_size);
  402. /// <summary>
  403. /// Get the size of the model in bytes
  404. /// </summary>
  405. /// <param name="model"></param>
  406. /// <returns>The size of the model</returns>
  407. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  408. public static extern ulong llama_model_size(SafeLlamaModelHandle model);
  409. /// <summary>
  410. /// Get the number of parameters in this model
  411. /// </summary>
  412. /// <param name="model"></param>
  413. /// <returns>The functions return the length of the string on success, or -1 on failure</returns>
  414. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  415. public static extern ulong llama_model_n_params(SafeLlamaModelHandle model);
  416. /// <summary>
  417. /// Convert a single token into text
  418. /// </summary>
  419. /// <param name="model"></param>
  420. /// <param name="llamaToken"></param>
  421. /// <param name="buffer">buffer to write string into</param>
  422. /// <returns>The length written, or if the buffer is too small a negative that indicates the length required</returns>
  423. public static int llama_token_to_piece(SafeLlamaModelHandle model, LLamaToken llamaToken, Span<byte> buffer)
  424. {
  425. unsafe
  426. {
  427. fixed (byte* bufferPtr = buffer)
  428. {
  429. return llama_token_to_piece_native(model, llamaToken, bufferPtr, buffer.Length);
  430. }
  431. }
  432. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "llama_token_to_piece")]
  433. static extern unsafe int llama_token_to_piece_native(SafeLlamaModelHandle model, LLamaToken llamaToken, byte* buffer, int length);
  434. }
  435. /// <summary>
  436. /// Convert text into tokens
  437. /// </summary>
  438. /// <param name="model"></param>
  439. /// <param name="text"></param>
  440. /// <param name="text_len"></param>
  441. /// <param name="tokens"></param>
  442. /// <param name="n_max_tokens"></param>
  443. /// <param name="add_bos"></param>
  444. /// <param name="special">Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. Does not insert a leading space.</param>
  445. /// <returns>Returns the number of tokens on success, no more than n_max_tokens.
  446. /// Returns a negative number on failure - the number of tokens that would have been returned
  447. /// </returns>
  448. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  449. public static extern unsafe int llama_tokenize(SafeLlamaModelHandle model, byte* text, int text_len, LLamaToken* tokens, int n_max_tokens, bool add_bos, bool special);
  450. /// <summary>
  451. /// Register a callback to receive llama log messages
  452. /// </summary>
  453. /// <param name="logCallback"></param>
  454. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  455. public static extern void llama_log_set(LLamaLogCallback logCallback);
  456. /// <summary>
  457. /// Clear the KV cache
  458. /// </summary>
  459. /// <param name="ctx"></param>
  460. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  461. public static extern void llama_kv_cache_clear(SafeLLamaContextHandle ctx);
  462. /// <summary>
  463. /// Removes all tokens that belong to the specified sequence and have positions in [p0, p1)
  464. /// </summary>
  465. /// <param name="ctx"></param>
  466. /// <param name="seq"></param>
  467. /// <param name="p0"></param>
  468. /// <param name="p1"></param>
  469. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  470. public static extern void llama_kv_cache_seq_rm(SafeLLamaContextHandle ctx, LLamaSeqId seq, LLamaPos p0, LLamaPos p1);
  471. /// <summary>
  472. /// Copy all tokens that belong to the specified sequence to another sequence
  473. /// Note that this does not allocate extra KV cache memory - it simply assigns the tokens to the new sequence
  474. /// </summary>
  475. /// <param name="ctx"></param>
  476. /// <param name="src"></param>
  477. /// <param name="dest"></param>
  478. /// <param name="p0"></param>
  479. /// <param name="p1"></param>
  480. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  481. public static extern void llama_kv_cache_seq_cp(SafeLLamaContextHandle ctx, LLamaSeqId src, LLamaSeqId dest, LLamaPos p0, LLamaPos p1);
  482. /// <summary>
  483. /// Removes all tokens that do not belong to the specified sequence
  484. /// </summary>
  485. /// <param name="ctx"></param>
  486. /// <param name="seq"></param>
  487. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  488. public static extern void llama_kv_cache_seq_keep(SafeLLamaContextHandle ctx, LLamaSeqId seq);
  489. /// <summary>
  490. /// Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
  491. /// If the KV cache is RoPEd, the KV data is updated accordingly
  492. /// </summary>
  493. /// <param name="ctx"></param>
  494. /// <param name="seq"></param>
  495. /// <param name="p0"></param>
  496. /// <param name="p1"></param>
  497. /// <param name="delta"></param>
  498. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  499. public static extern void llama_kv_cache_seq_shift(SafeLLamaContextHandle ctx, LLamaSeqId seq, LLamaPos p0, LLamaPos p1, LLamaPos delta);
  500. /// <summary>
  501. /// Allocates a batch of tokens on the heap
  502. /// Each token can be assigned up to n_seq_max sequence ids
  503. /// The batch has to be freed with llama_batch_free()
  504. /// If embd != 0, llama_batch.embd will be allocated with size of n_tokens * embd * sizeof(float)
  505. /// Otherwise, llama_batch.token will be allocated to store n_tokens llama_token
  506. /// The rest of the llama_batch members are allocated with size n_tokens
  507. /// All members are left uninitialized
  508. /// </summary>
  509. /// <param name="n_tokens"></param>
  510. /// <param name="embd"></param>
  511. /// <param name="n_seq_max">Each token can be assigned up to n_seq_max sequence ids</param>
  512. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  513. public static extern LLamaNativeBatch llama_batch_init(int n_tokens, int embd, int n_seq_max);
  514. /// <summary>
  515. /// Frees a batch of tokens allocated with llama_batch_init()
  516. /// </summary>
  517. /// <param name="batch"></param>
  518. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  519. public static extern void llama_batch_free(LLamaNativeBatch batch);
  520. /// <summary>
  521. /// </summary>
  522. /// <param name="ctx"></param>
  523. /// <param name="batch"></param>
  524. /// <returns>Positive return values does not mean a fatal error, but rather a warning:<br />
  525. /// - 0: success<br />
  526. /// - 1: could not find a KV slot for the batch (try reducing the size of the batch or increase the context)<br />
  527. /// - &lt; 0: error<br />
  528. /// </returns>
  529. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  530. public static extern int llama_decode(SafeLLamaContextHandle ctx, LLamaNativeBatch batch);
  531. /// <summary>
  532. /// Set the number of threads used for decoding
  533. /// </summary>
  534. /// <param name="ctx"></param>
  535. /// <param name="n_threads">n_threads is the number of threads used for generation (single token)</param>
  536. /// <param name="n_threads_batch">n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens)</param>
  537. /// <returns></returns>
  538. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  539. public static extern int llama_set_n_threads(SafeLLamaContextHandle ctx, uint n_threads, uint n_threads_batch);
  540. }
  541. }