| @@ -0,0 +1,55 @@ | |||||
| name: CI | |||||
| on: | |||||
| push: | |||||
| branches: [master] | |||||
| pull_request: | |||||
| branches: [master] | |||||
| jobs: | |||||
| build: | |||||
| name: Test | |||||
| runs-on: ${{ matrix.os }} | |||||
| strategy: | |||||
| fail-fast: false | |||||
| matrix: | |||||
| build: [linux-debug, linux-release, macos-debug, macos-release, windows-debug, windows-release] | |||||
| include: | |||||
| - build: linux-debug | |||||
| os: ubuntu-latest | |||||
| config: debug | |||||
| - build: linux-release | |||||
| os: ubuntu-latest | |||||
| config: release | |||||
| - build: macos-debug | |||||
| os: macos-latest | |||||
| config: debug | |||||
| - build: macos-release | |||||
| os: macos-latest | |||||
| config: release | |||||
| - build: windows-debug | |||||
| os: windows-2019 | |||||
| config: debug | |||||
| - build: windows-release | |||||
| os: windows-2019 | |||||
| config: release | |||||
| steps: | |||||
| - uses: actions/checkout@v2 | |||||
| - uses: actions/setup-dotnet@v1 | |||||
| with: | |||||
| dotnet-version: | | |||||
| 6.0.x | |||||
| 7.0.x | |||||
| - name: Cache Gradle packages | |||||
| uses: actions/cache@v3 | |||||
| with: | |||||
| key: "unit_test_models" | |||||
| path: LLama.Unittest/Models | |||||
| # workaround for actions/setup-dotnet#155 | |||||
| - name: Clear package cache | |||||
| run: dotnet clean LLamaSharp.sln && dotnet nuget locals all --clear | |||||
| - name: Restore packages | |||||
| run: dotnet restore LLamaSharp.sln | |||||
| - name: Build | |||||
| run: dotnet build LLamaSharp.sln -c ${{ matrix.config }} --no-restore | |||||
| - name: Test | |||||
| run: dotnet test LLamaSharp.sln -c ${{ matrix.config }} | |||||
| @@ -341,4 +341,7 @@ test/TensorFlowNET.Examples/mnist | |||||
| *.xsd | *.xsd | ||||
| # docs | # docs | ||||
| site/ | |||||
| site/ | |||||
| /LLama.Unittest/Models/*.bin | |||||
| @@ -1,11 +1,15 @@ | |||||
| using LLama; | |||||
| using LLama.Common; | |||||
| namespace LLama.Unittest | namespace LLama.Unittest | ||||
| { | { | ||||
| public class BasicTest | public class BasicTest | ||||
| { | { | ||||
| [Fact] | [Fact] | ||||
| public void SimpleQA() | |||||
| public void LoadModel() | |||||
| { | { | ||||
| var model = new LLamaModel(new ModelParams("Models/llama-2-7b-chat.ggmlv3.q3_K_S.bin", contextSize: 256)); | |||||
| model.Dispose(); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -23,8 +23,22 @@ | |||||
| </PackageReference> | </PackageReference> | ||||
| </ItemGroup> | </ItemGroup> | ||||
| <Target Name="DownloadContentFiles" BeforeTargets="Build"> | |||||
| <DownloadFile SourceUrl="https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q3_K_S.bin" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.ggmlv3.q3_K_S.bin" SkipUnchangedFiles="true"> | |||||
| </DownloadFile> | |||||
| </Target> | |||||
| <ItemGroup> | <ItemGroup> | ||||
| <ProjectReference Include="..\LLama\LLamaSharp.csproj" /> | <ProjectReference Include="..\LLama\LLamaSharp.csproj" /> | ||||
| </ItemGroup> | </ItemGroup> | ||||
| <ItemGroup> | |||||
| <Folder Include="Models\" /> | |||||
| </ItemGroup> | |||||
| <ItemGroup> | |||||
| <None Update="Models\llama-2-7b-chat.ggmlv3.q3_K_S.bin"> | |||||
| <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory> | |||||
| </None> | |||||
| </ItemGroup> | |||||
| </Project> | </Project> | ||||
| @@ -30,6 +30,7 @@ namespace LLama.Common | |||||
| /// <param name="data"></param> | /// <param name="data"></param> | ||||
| public FixedSizeQueue(int size, IEnumerable<T> data) | public FixedSizeQueue(int size, IEnumerable<T> data) | ||||
| { | { | ||||
| #if NETCOREAPP3_0_OR_GREATER | |||||
| // Try an early check on the amount of data supplied (if possible) | // Try an early check on the amount of data supplied (if possible) | ||||
| #if NETSTANDARD2_0 | #if NETSTANDARD2_0 | ||||
| var dataCount = data.Count(); | var dataCount = data.Count(); | ||||
| @@ -52,7 +53,7 @@ namespace LLama.Common | |||||
| throw new ArgumentException($"The max size set for the quene is {size}, but got {count} initial values."); | throw new ArgumentException($"The max size set for the quene is {size}, but got {count} initial values."); | ||||
| #endif | #endif | ||||
| } | } | ||||
| / | |||||
| /// <summary> | /// <summary> | ||||
| /// Replace every item in the queue with the given value | /// Replace every item in the queue with the given value | ||||
| /// </summary> | /// </summary> | ||||
| @@ -84,7 +84,7 @@ namespace LLama.Common | |||||
| /// <summary> | /// <summary> | ||||
| /// how split tensors should be distributed across GPUs | /// how split tensors should be distributed across GPUs | ||||
| /// </summary> | /// </summary> | ||||
| public float[] TensorSplits { get; set; } = new float[] { 0 }; | |||||
| public nint TensorSplits { get; set; } | |||||
| /// <summary> | /// <summary> | ||||
| /// | /// | ||||
| @@ -47,7 +47,8 @@ namespace LLama.Native | |||||
| /// <summary> | /// <summary> | ||||
| /// how to split layers across multiple GPUs | /// how to split layers across multiple GPUs | ||||
| /// </summary> | /// </summary> | ||||
| public float[] tensor_split; | |||||
| public nint tensor_split; | |||||
| /// <summary> | /// <summary> | ||||
| /// ref: https://github.com/ggerganov/llama.cpp/pull/2054 | /// ref: https://github.com/ggerganov/llama.cpp/pull/2054 | ||||
| @@ -78,6 +79,11 @@ namespace LLama.Native | |||||
| [MarshalAs(UnmanagedType.I1)] | [MarshalAs(UnmanagedType.I1)] | ||||
| public bool low_vram; | public bool low_vram; | ||||
| /// <summary> | |||||
| /// if true, use experimental mul_mat_q kernels | |||||
| /// </summary> | |||||
| [MarshalAs(UnmanagedType.I1)] public bool mul_mat_q; | |||||
| /// <summary> | /// <summary> | ||||
| /// use fp16 for KV cache | /// use fp16 for KV cache | ||||
| /// </summary> | /// </summary> | ||||
| @@ -114,9 +120,5 @@ namespace LLama.Native | |||||
| [MarshalAs(UnmanagedType.I1)] | [MarshalAs(UnmanagedType.I1)] | ||||
| public bool embedding; | public bool embedding; | ||||
| } | } | ||||
| public struct TensorSplits | |||||
| { | |||||
| public float Item1; | |||||
| } | |||||
| } | } | ||||
| @@ -28,12 +28,14 @@ namespace LLama | |||||
| lparams.logits_all = @params.Perplexity; | lparams.logits_all = @params.Perplexity; | ||||
| lparams.embedding = @params.EmbeddingMode; | lparams.embedding = @params.EmbeddingMode; | ||||
| lparams.low_vram = @params.LowVram; | lparams.low_vram = @params.LowVram; | ||||
| /* | |||||
| if (@params.TensorSplits.Length != 1) | if (@params.TensorSplits.Length != 1) | ||||
| { | { | ||||
| throw new ArgumentException("Currently multi-gpu support is not supported by " + | throw new ArgumentException("Currently multi-gpu support is not supported by " + | ||||
| "both llama.cpp and LLamaSharp."); | "both llama.cpp and LLamaSharp."); | ||||
| } | |||||
| }*/ | |||||
| lparams.tensor_split = @params.TensorSplits; | lparams.tensor_split = @params.TensorSplits; | ||||
| if (!File.Exists(@params.ModelPath)) | if (!File.Exists(@params.ModelPath)) | ||||