Browse Source

Update the Chinese chat sample to use new ChatSession integration

tags/0.9.1
Philipp Bauer 1 year ago
parent
commit
f669a4f5a7
3 changed files with 129 additions and 50 deletions
  1. +24
    -0
      LLama.Examples/Assets/chat-with-kunkun-chinese.json
  2. +104
    -49
      LLama.Examples/Examples/ChatChineseGB2312.cs
  3. +1
    -1
      LLama.Examples/Examples/Runner.cs

+ 24
- 0
LLama.Examples/Assets/chat-with-kunkun-chinese.json View File

@@ -0,0 +1,24 @@
{
"messages": [
{
"author_role": "System",
"content": "������һ������û��ĶԻ��������������һ���ڸ����涼ӵ�зḻ�������������dz����ڻش��û�������Ͱ����û���?"
},
{
"author_role": "User",
"content": "��������?"
},
{
"author_role": "Assistant",
"content": "��ã���ʲô���ܰ��������"
},
{
"author_role": "User",
"content": "�й����׶����������У�"
},
{
"author_role": "Assistant",
"content": "��������˭��"
}
]
}

+ 104
- 49
LLama.Examples/Examples/ChatChineseGB2312.cs View File

@@ -1,69 +1,124 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Text;
using LLama.Common;

namespace LLama.Examples.Examples
namespace LLama.Examples.Examples;

public class ChatChineseGB2312
{
public class ChatChineseGB2312
private static string ConvertEncoding(string input, Encoding original, Encoding target)
{
byte[] bytes = original.GetBytes(input);
var convertedBytes = Encoding.Convert(original, target, bytes);
return target.GetString(convertedBytes);
}

public static async Task Run()
{
private static string ConvertFromEncodingToAnother(string input, Encoding original, Encoding target)
// Register provider for GB2312 encoding
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);

Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("This example shows how to use Chinese with gb2312 encoding, which is common in windows. It's recommended" +
" to use https://huggingface.co/hfl/chinese-alpaca-2-7b-gguf/blob/main/ggml-model-q5_0.gguf, which has been verified by LLamaSharp developers.");
Console.ForegroundColor = ConsoleColor.White;

Console.Write("Please input your model path: ");
var modelPath = Console.ReadLine();

var parameters = new ModelParams(modelPath)
{
ContextSize = 1024,
Seed = 1337,
GpuLayerCount = 5,
Encoding = Encoding.UTF8
};
using var model = LLamaWeights.LoadFromFile(parameters);
using var context = model.CreateContext(parameters);
var executor = new InteractiveExecutor(context);

ChatSession session;
if (Directory.Exists("Assets/chat-with-kunkun-chinese"))
{
Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("Loading session from disk.");
Console.ForegroundColor = ConsoleColor.White;

session = new ChatSession(executor);
session.LoadSession("Assets/chat-with-kunkun-chinese");
}
else
{
byte[] bytes = original.GetBytes(input);
var convertedBytes = Encoding.Convert(original, target, bytes);
return target.GetString(convertedBytes);
var chatHistoryJson = File.ReadAllText("Assets/chat-with-kunkun-chinese.json");
ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory();

session = new ChatSession(executor, chatHistory);
}

public static async Task Run()
session
.WithHistoryTransform(new LLamaTransforms.DefaultHistoryTransform("用户"))
.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(
// User and Assistant in Chinese (User is: 用户, Assistant is: 坤坤)
new string[] { "用户:", "坤坤:" },
redundancyLength: 8));

InferenceParams inferenceParams = new InferenceParams()
{
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); // Register gb2312 encoding
Console.Write("Please input your model path: ");
var modelPath = Console.ReadLine();
var prompt = File.ReadAllText("Assets/chat-with-kunkun-chinese.txt", encoding: Encoding.GetEncoding("gb2312")).Trim();
prompt = ConvertFromEncodingToAnother(prompt, Encoding.GetEncoding("gb2312"), Encoding.UTF8);
Temperature = 0.9f,
AntiPrompts = new List<string> { "用户:" }
};

var parameters = new ModelParams(modelPath)
{
ContextSize = 1024,
Seed = 1337,
GpuLayerCount = 20,
Encoding = Encoding.UTF8
};
using var model = LLamaWeights.LoadFromFile(parameters);
using var context = model.CreateContext(parameters);
var executor = new InteractiveExecutor(context);
Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("The chat session has started.");

var session = new ChatSession(executor).WithHistoryTransform(new LLamaTransforms.DefaultHistoryTransform("用户"));
// show the prompt
Console.ForegroundColor = ConsoleColor.Green;
string userInput = Console.ReadLine() ?? "";

Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("This example shows how to use Chinese with gb2312 encoding, which is common in windows. It's recommended" +
" to use https://huggingface.co/hfl/chinese-alpaca-2-7b-gguf/blob/main/ggml-model-q5_0.gguf, which has been verified by LLamaSharp developers.");
Console.ForegroundColor = ConsoleColor.White;
while (userInput != "exit")
{
// Convert the encoding from gb2312 to utf8 for the language model
// and later saving to the history json file.
userInput = ConvertEncoding(userInput, Encoding.GetEncoding("gb2312"), Encoding.UTF8);

// show the prompt
Console.Write(prompt);
while (true)
if (userInput == "save")
{
await foreach (var text in session.ChatAsync(prompt, new InferenceParams()
session.SaveSession("Assets/chat-with-kunkun-chinese");
Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("Session saved.");
}
else if (userInput == "regenerate")
{
Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("Regenerating last response ...");

await foreach (
var text
in session.RegenerateAssistantMessageAsync(
inferenceParams))
{
Temperature = 0.3f,
TopK = 5,
TopP = 0.85f,
AntiPrompts = new List<string> { "用户:" },
MaxTokens = 2048,
RepeatPenalty = 1.05f
}))
Console.ForegroundColor = ConsoleColor.White;

// Convert the encoding from utf8 to gb2312 for the console output.
Console.Write(ConvertEncoding(text, Encoding.UTF8, Encoding.GetEncoding("gb2312")));
}
}
else
{
await foreach (
var text
in session.ChatAsync(
new ChatHistory.Message(AuthorRole.User, userInput),
inferenceParams))
{
//Console.Write(text);
Console.Write(ConvertFromEncodingToAnother(text, Encoding.UTF8, Encoding.GetEncoding("gb2312")));
Console.ForegroundColor = ConsoleColor.White;
Console.Write(text);
}

Console.ForegroundColor = ConsoleColor.Green;
prompt = Console.ReadLine();
Console.ForegroundColor = ConsoleColor.White;
}

Console.ForegroundColor = ConsoleColor.Green;
userInput = Console.ReadLine() ?? "";

Console.ForegroundColor = ConsoleColor.White;
}
}
}

+ 1
- 1
LLama.Examples/Examples/Runner.cs View File

@@ -9,6 +9,7 @@ public class Runner
{ "Run a chat session with history.", ChatSessionWithHistory.Run },
{ "Run a chat session without stripping the role names.", ChatSessionWithRoleName.Run },
{ "Run a chat session with the role names stripped.", ChatSessionStripRoleName.Run },
{ "Run a chat session in Chinese GB2312 encoding", ChatChineseGB2312.Run },
{ "Interactive mode chat by using executor.", InteractiveModeExecute.Run },
{ "Instruct mode chat by using executor.", InstructModeExecute.Run },
{ "Stateless mode chat by using executor.", StatelessModeExecute.Run },
@@ -24,7 +25,6 @@ public class Runner
{ "Coding Assistant.", CodingAssistant.Run },
{ "Batch Decoding.", BatchedDecoding.Run },
{ "SK Kernel Memory.", KernelMemory.Run },
{ "Chinese gb2312 chat", ChatChineseGB2312.Run },
{ "Exit", async () => Environment.Exit(0) }
};



Loading…
Cancel
Save