Browse Source

Merge branch 'master' into master

tags/0.9.1
Martin Evans GitHub 1 year ago
parent
commit
4fc743c9ba
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 184 additions and 19 deletions
  1. +0
    -4
      .github/prepare_release.sh
  2. +8
    -0
      LLama.Examples/Assets/chat-with-kunkun-chinese.txt
  3. +69
    -0
      LLama.Examples/Examples/ChatChineseGB2312.cs
  4. +1
    -0
      LLama.Examples/Examples/Runner.cs
  5. +4
    -1
      LLama.Examples/LLama.Examples.csproj
  6. +1
    -1
      LLama.Unittest/LLama.Unittest.csproj
  7. +1
    -1
      LLama.WebAPI/LLama.WebAPI.csproj
  8. +13
    -7
      LLama/Native/NativeApi.Load.cs
  9. +14
    -0
      LLama/Native/NativeApi.cs
  10. +70
    -3
      LLama/Native/NativeLibraryConfig.cs
  11. +3
    -2
      README.md

+ 0
- 4
.github/prepare_release.sh View File

@@ -23,11 +23,7 @@ fi
mkdir ./temp;
mkdir ./temp/runtimes;
# For sure it could be done better but cp -R did not work on osx
mkdir ./temp/runtimes/osx-arm64
mkdir ./temp/runtimes/osx-x64
cp ./LLama/runtimes/*.* ./temp/runtimes/;
cp ./LLama/runtimes/osx-arm64/*.* ./temp/runtimes/osx-arm64/;
cp ./LLama/runtimes/osx-x64/*.* ./temp/runtimes/osx-x64;
cp ./LLama/runtimes/build/*.* ./temp/;

# get the current version


+ 8
- 0
LLama.Examples/Assets/chat-with-kunkun-chinese.txt View File

@@ -0,0 +1,8 @@
指令:下面是一段你和用户的对话,你叫坤坤,是一个在各方面都拥有丰富经验的助理,你非常乐于回答用户的问题和帮助用户。

用戶:你好,坤坤。
坤坤:你好,有什么我能帮助你的吗?
用戶:中国的首都是哪座城市?
坤坤:中国的首都是北京市。
用戶:特朗普是谁?
坤坤:

+ 69
- 0
LLama.Examples/Examples/ChatChineseGB2312.cs View File

@@ -0,0 +1,69 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using LLama.Common;

namespace LLama.Examples.Examples
{
public class ChatChineseGB2312
{
private static string ConvertFromEncodingToAnother(string input, Encoding original, Encoding target)
{
byte[] bytes = original.GetBytes(input);
var convertedBytes = Encoding.Convert(original, target, bytes);
return target.GetString(convertedBytes);
}

public static async Task Run()
{
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); // Register gb2312 encoding
Console.Write("Please input your model path: ");
var modelPath = Console.ReadLine();
var prompt = File.ReadAllText("Assets/chat-with-kunkun-chinese.txt", encoding: Encoding.GetEncoding("gb2312")).Trim();
prompt = ConvertFromEncodingToAnother(prompt, Encoding.GetEncoding("gb2312"), Encoding.UTF8);

var parameters = new ModelParams(modelPath)
{
ContextSize = 1024,
Seed = 1337,
GpuLayerCount = 20,
Encoding = Encoding.UTF8
};
using var model = LLamaWeights.LoadFromFile(parameters);
using var context = model.CreateContext(parameters);
var executor = new InteractiveExecutor(context);

var session = new ChatSession(executor).WithHistoryTransform(new LLamaTransforms.DefaultHistoryTransform("用户"));

Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("This example shows how to use Chinese with gb2312 encoding, which is common in windows. It's recommended" +
" to use https://huggingface.co/hfl/chinese-alpaca-2-7b-gguf/blob/main/ggml-model-q5_0.gguf, which has been verified by LLamaSharp developers.");
Console.ForegroundColor = ConsoleColor.White;

// show the prompt
Console.Write(prompt);
while (true)
{
await foreach (var text in session.ChatAsync(prompt, new InferenceParams()
{
Temperature = 0.3f,
TopK = 5,
TopP = 0.85f,
AntiPrompts = new List<string> { "用户:" },
MaxTokens = 2048,
RepeatPenalty = 1.05f
}))
{
//Console.Write(text);
Console.Write(ConvertFromEncodingToAnother(text, Encoding.UTF8, Encoding.GetEncoding("gb2312")));
}

Console.ForegroundColor = ConsoleColor.Green;
prompt = Console.ReadLine();
Console.ForegroundColor = ConsoleColor.White;
}
}
}
}

+ 1
- 0
LLama.Examples/Examples/Runner.cs View File

@@ -24,6 +24,7 @@ public class Runner
{ "Coding Assistant.", CodingAssistant.Run },
{ "Batch Decoding.", BatchedDecoding.Run },
{ "SK Kernel Memory.", KernelMemory.Run },
{ "Chinese gb2312 chat", ChatChineseGB2312.Run },
{ "Exit", async () => Environment.Exit(0) }
};



+ 4
- 1
LLama.Examples/LLama.Examples.csproj View File

@@ -31,7 +31,7 @@
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="8.0.0" />
<PackageReference Include="Microsoft.KernelMemory.Core" Version="0.12.231123.1-preview" />
<PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta8" />
<PackageReference Include="Spectre.Console" Version="0.47.0" />
<PackageReference Include="Spectre.Console" Version="0.48.0" />
</ItemGroup>

<ItemGroup>
@@ -71,6 +71,9 @@
<None Update="Assets\sample-SK-Readme.pdf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="Assets\chat-with-kunkun-chinese.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>

</Project>

+ 1
- 1
LLama.Unittest/LLama.Unittest.csproj View File

@@ -16,7 +16,7 @@
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
<PackageReference Include="xunit" Version="2.6.2" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.5.3">
<PackageReference Include="xunit.runner.visualstudio" Version="2.5.4">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
<PrivateAssets>all</PrivateAssets>
</PackageReference>


+ 1
- 1
LLama.WebAPI/LLama.WebAPI.csproj View File

@@ -7,7 +7,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.VisualStudio.Validation" Version="17.6.11" />
<PackageReference Include="Microsoft.VisualStudio.Validation" Version="17.8.8" />
<PackageReference Include="Swashbuckle.AspNetCore" Version="6.5.0" />
</ItemGroup>



+ 13
- 7
LLama/Native/NativeApi.Load.cs View File

@@ -4,6 +4,7 @@ using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text.Json;

@@ -258,6 +259,7 @@ namespace LLama.Native
enableLogging = configuration.Logging;
// We move the flag to avoid loading library when the variable is called else where.
NativeLibraryConfig.LibraryHasLoaded = true;
Log(configuration.ToString(), LogLevel.Information);

if (!string.IsNullOrEmpty(configuration.Path))
{
@@ -273,6 +275,7 @@ namespace LLama.Native

var libraryTryLoadOrder = GetLibraryTryOrder(configuration);

string[] preferredPaths = configuration.SearchDirectories;
string[] possiblePathPrefix = new string[] {
System.AppDomain.CurrentDomain.BaseDirectory,
Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) ?? ""
@@ -280,19 +283,22 @@ namespace LLama.Native

var tryFindPath = (string filename) =>
{
int i = 0;
while (!File.Exists(filename))
foreach(var path in preferredPaths)
{
if (i < possiblePathPrefix.Length)
if (File.Exists(Path.Combine(path, filename)))
{
filename = Path.Combine(possiblePathPrefix[i], filename);
i++;
return Path.Combine(path, filename);
}
else
}

foreach(var path in possiblePathPrefix)
{
if (File.Exists(Path.Combine(path, filename)))
{
break;
return Path.Combine(path, filename);
}
}

return filename;
};



+ 14
- 0
LLama/Native/NativeApi.cs View File

@@ -9,6 +9,17 @@ namespace LLama.Native
{
using llama_token = Int32;

public enum LLamaTokenType
{
LLAMA_TOKEN_TYPE_UNDEFINED = 0,
LLAMA_TOKEN_TYPE_NORMAL = 1,
LLAMA_TOKEN_TYPE_UNKNOWN = 2,
LLAMA_TOKEN_TYPE_CONTROL = 3,
LLAMA_TOKEN_TYPE_USER_DEFINED = 4,
LLAMA_TOKEN_TYPE_UNUSED = 5,
LLAMA_TOKEN_TYPE_BYTE = 6,
}

/// <summary>
/// Callback from llama.cpp with log messages
/// </summary>
@@ -243,6 +254,9 @@ namespace LLama.Native
}
}

[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
public static extern LLamaTokenType llama_token_get_type(SafeLlamaModelHandle model, llama_token token);

/// <summary>
/// Get the size of the context window for the model for this context
/// </summary>


+ 70
- 3
LLama/Native/NativeLibraryConfig.cs View File

@@ -1,4 +1,6 @@
using System;
using System.Collections.Generic;
using System.Linq;

namespace LLama.Native
{
@@ -27,6 +29,10 @@ namespace LLama.Native
private bool _allowFallback = true;
private bool _skipCheck = false;
private bool _logging = false;
/// <summary>
/// search directory -> priority level, 0 is the lowest.
/// </summary>
private List<string> _searchDirectories = new List<string>();

private static void ThrowIfLoaded()
{
@@ -120,13 +126,50 @@ namespace LLama.Native
return this;
}

/// <summary>
/// Add self-defined search directories. Note that the file stucture of the added
/// directories must be the same as the default directory. Besides, the directory
/// won't be used recursively.
/// </summary>
/// <param name="directories"></param>
/// <returns></returns>
public NativeLibraryConfig WithSearchDirectories(IEnumerable<string> directories)
{
ThrowIfLoaded();

_searchDirectories.AddRange(directories);
return this;
}

/// <summary>
/// Add self-defined search directories. Note that the file stucture of the added
/// directories must be the same as the default directory. Besides, the directory
/// won't be used recursively.
/// </summary>
/// <param name="directory"></param>
/// <returns></returns>
public NativeLibraryConfig WithSearchDirectory(string directory)
{
ThrowIfLoaded();

_searchDirectories.Add(directory);
return this;
}

internal static Description CheckAndGatherDescription()
{
if (Instance._allowFallback && Instance._skipCheck)
{
throw new ArgumentException("Cannot skip the check when fallback is allowed.");
}
return new Description(Instance._libraryPath, Instance._useCuda, Instance._avxLevel, Instance._allowFallback, Instance._skipCheck, Instance._logging);
return new Description(
Instance._libraryPath,
Instance._useCuda,
Instance._avxLevel,
Instance._allowFallback,
Instance._skipCheck,
Instance._logging,
Instance._searchDirectories.Concat(new string[] { "./" }).ToArray());
}

internal static string AvxLevelToString(AvxLevel level)
@@ -183,7 +226,31 @@ namespace LLama.Native
Avx512,
}

internal record Description(string Path, bool UseCuda, AvxLevel AvxLevel, bool AllowFallback, bool SkipCheck, bool Logging);
internal record Description(string Path, bool UseCuda, AvxLevel AvxLevel, bool AllowFallback, bool SkipCheck, bool Logging, string[] SearchDirectories)
{
public override string ToString()
{
string avxLevelString = AvxLevel switch
{
AvxLevel.None => "NoAVX",
AvxLevel.Avx => "AVX",
AvxLevel.Avx2 => "AVX2",
AvxLevel.Avx512 => "AVX512",
_ => "Unknown"
};

string searchDirectoriesString = "{ " + string.Join(", ", SearchDirectories) + " }";

return $"NativeLibraryConfig Description:\n" +
$"- Path: {Path}\n" +
$"- PreferCuda: {UseCuda}\n" +
$"- PreferredAvxLevel: {avxLevelString}\n" +
$"- AllowFallback: {AllowFallback}\n" +
$"- SkipCheck: {SkipCheck}\n" +
$"- Logging: {Logging}\n" +
$"- SearchDirectories and Priorities: {searchDirectoriesString}";
}
}
}
#endif
}
}

+ 3
- 2
README.md View File

@@ -11,7 +11,7 @@


**The C#/.NET binding of [llama.cpp](https://github.com/ggerganov/llama.cpp). It provides higher-level APIs to inference the LLaMA Models and deploy it on local device with C#/.NET. It works on
both Windows, Linux and MAC without requirment for compiling llama.cpp yourself. Even without GPU or not enought GPU memory, you can still apply LLaMA models well with this repo. 🤗**
both Windows, Linux and MAC without requirement for compiling llama.cpp yourself. Even without a GPU or not enough GPU memory, you can still apply LLaMA models well with this repo. 🤗**

**Furthermore, it provides integrations with other projects such as [semantic-kernel](https://github.com/microsoft/semantic-kernel), [kernel-memory](https://github.com/microsoft/kernel-memory) and [BotSharp](https://github.com/SciSharp/BotSharp) to provide higher-level applications.**

@@ -129,7 +129,7 @@ Console.Write(prompt);
// run the inference in a loop to chat with LLM
while (prompt != "stop")
{
foreach (var text in session.Chat(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } }))
await foreach (var text in session.ChatAsync(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } }))
{
Console.Write(text);
}
@@ -246,6 +246,7 @@ The llama.cpp commit id will help if you want to compile a DLL yourself.
| v0.5.1 | [Llama2 7b GGUF](https://huggingface.co/TheBloke/llama-2-7B-Guanaco-QLoRA-GGUF)| 6b73ef1 |
| v0.6.0 | | [cb33f43](https://github.com/ggerganov/llama.cpp/commit/cb33f43a2a9f5a5a5f8d290dd97c625d9ba97a2f) |
| v0.7.0, v0.8.0 | [Thespis-13B](https://huggingface.co/TheBloke/Thespis-13B-v0.5-GGUF/tree/main?not-for-all-audiences=true), [LLaMA2-7B](https://huggingface.co/TheBloke/llama-2-7B-Guanaco-QLoRA-GGUF) | [207b519](https://github.com/ggerganov/llama.cpp/commit/207b51900e15cc7f89763a3bb1c565fe11cbb45d) |
| v0.8.1 | | [e937066](https://github.com/ggerganov/llama.cpp/commit/e937066420b79a757bf80e9836eb12b88420a218) |

## License



Loading…
Cancel
Save