using System; using System.Collections.Generic; using System.Text; namespace LLama.Transform { /// /// The default tokenizer of LLamaSharp. This class should not be inherited. /// Note that this class has state. The previous outputs feeded to it will affect its control. /// If you use it in a session, please don't reuse it for another session unless you intend to do so. /// public sealed class DefaultTokenizer: ITokenizer { private Encoding _encoding; private StreamingTokenDecoder _tokenDecoder; /// /// Initialize a new tokenizer with the specified encoding. /// /// public DefaultTokenizer(Encoding encoding) { _encoding = encoding; _tokenDecoder = new StreamingTokenDecoder(encoding); } /// /// /// public IEnumerable Tokenize(LLamaContext context, string text, bool addBos = true, bool special = false) { return context.Tokenize(text, addBos, special); } /// /// /// public string Detokenize(LLamaContext context, int token) { _tokenDecoder.Add(token, context.NativeHandle.ModelHandle); return _tokenDecoder.Read(); } /// /// /// public string Detokenize(LLamaContext context, IEnumerable tokens) { _tokenDecoder.AddRange(tokens, context.NativeHandle.ModelHandle); return _tokenDecoder.Read(); } } }