Browse Source

Merge branch 'master' of github.com:SciSharp/LLamaSharp into rinne-dev

tags/v0.4.2-preview^2
Yaohui Liu 2 years ago
parent
commit
9fcbd16b74
No known key found for this signature in database GPG Key ID: E86D01E1809BD23E
9 changed files with 94 additions and 13 deletions
  1. +55
    -0
      .github/workflows/main.yml
  2. +4
    -1
      .gitignore
  3. +6
    -2
      LLama.Unittest/BasicTest.cs
  4. +14
    -0
      LLama.Unittest/LLama.Unittest.csproj
  5. +2
    -1
      LLama/Common/FixedSizeQueue.cs
  6. +1
    -1
      LLama/Common/ModelParams.cs
  7. +8
    -6
      LLama/Native/LLamaContextParams.cs
  8. +4
    -2
      LLama/Utils.cs
  9. BIN
      LLama/runtimes/libllama.dylib

+ 55
- 0
.github/workflows/main.yml View File

@@ -0,0 +1,55 @@
name: CI
on:
push:
branches: [master]
pull_request:
branches: [master]

jobs:
build:
name: Test
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
build: [linux-debug, linux-release, macos-debug, macos-release, windows-debug, windows-release]
include:
- build: linux-debug
os: ubuntu-latest
config: debug
- build: linux-release
os: ubuntu-latest
config: release
- build: macos-debug
os: macos-latest
config: debug
- build: macos-release
os: macos-latest
config: release
- build: windows-debug
os: windows-2019
config: debug
- build: windows-release
os: windows-2019
config: release
steps:
- uses: actions/checkout@v2
- uses: actions/setup-dotnet@v1
with:
dotnet-version: |
6.0.x
7.0.x
- name: Cache Gradle packages
uses: actions/cache@v3
with:
key: "unit_test_models"
path: LLama.Unittest/Models
# workaround for actions/setup-dotnet#155
- name: Clear package cache
run: dotnet clean LLamaSharp.sln && dotnet nuget locals all --clear
- name: Restore packages
run: dotnet restore LLamaSharp.sln
- name: Build
run: dotnet build LLamaSharp.sln -c ${{ matrix.config }} --no-restore
- name: Test
run: dotnet test LLamaSharp.sln -c ${{ matrix.config }}

+ 4
- 1
.gitignore View File

@@ -341,4 +341,7 @@ test/TensorFlowNET.Examples/mnist
*.xsd

# docs
site/
site/

/LLama.Unittest/Models/*.bin


+ 6
- 2
LLama.Unittest/BasicTest.cs View File

@@ -1,11 +1,15 @@
using LLama;
using LLama.Common;

namespace LLama.Unittest
{
public class BasicTest
{
[Fact]
public void SimpleQA()
public void LoadModel()
{
var model = new LLamaModel(new ModelParams("Models/llama-2-7b-chat.ggmlv3.q3_K_S.bin", contextSize: 256));
model.Dispose();
}
}
}

+ 14
- 0
LLama.Unittest/LLama.Unittest.csproj View File

@@ -23,8 +23,22 @@
</PackageReference>
</ItemGroup>

<Target Name="DownloadContentFiles" BeforeTargets="Build">
<DownloadFile SourceUrl="https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q3_K_S.bin" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.ggmlv3.q3_K_S.bin" SkipUnchangedFiles="true">
</DownloadFile>
</Target>

<ItemGroup>
<ProjectReference Include="..\LLama\LLamaSharp.csproj" />
</ItemGroup>

<ItemGroup>
<Folder Include="Models\" />
</ItemGroup>

<ItemGroup>
<None Update="Models\llama-2-7b-chat.ggmlv3.q3_K_S.bin">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
</Project>

+ 2
- 1
LLama/Common/FixedSizeQueue.cs View File

@@ -30,6 +30,7 @@ namespace LLama.Common
/// <param name="data"></param>
public FixedSizeQueue(int size, IEnumerable<T> data)
{
#if NETCOREAPP3_0_OR_GREATER
// Try an early check on the amount of data supplied (if possible)
#if NETSTANDARD2_0
var dataCount = data.Count();
@@ -52,7 +53,7 @@ namespace LLama.Common
throw new ArgumentException($"The max size set for the quene is {size}, but got {count} initial values.");
#endif
}
/
/// <summary>
/// Replace every item in the queue with the given value
/// </summary>


+ 1
- 1
LLama/Common/ModelParams.cs View File

@@ -84,7 +84,7 @@ namespace LLama.Common
/// <summary>
/// how split tensors should be distributed across GPUs
/// </summary>
public float[] TensorSplits { get; set; } = new float[] { 0 };
public nint TensorSplits { get; set; }

/// <summary>
///


+ 8
- 6
LLama/Native/LLamaContextParams.cs View File

@@ -47,7 +47,8 @@ namespace LLama.Native
/// <summary>
/// how to split layers across multiple GPUs
/// </summary>
public float[] tensor_split;
public nint tensor_split;


/// <summary>
/// ref: https://github.com/ggerganov/llama.cpp/pull/2054
@@ -78,6 +79,11 @@ namespace LLama.Native
[MarshalAs(UnmanagedType.I1)]
public bool low_vram;

/// <summary>
/// if true, use experimental mul_mat_q kernels
/// </summary>
[MarshalAs(UnmanagedType.I1)] public bool mul_mat_q;

/// <summary>
/// use fp16 for KV cache
/// </summary>
@@ -114,9 +120,5 @@ namespace LLama.Native
[MarshalAs(UnmanagedType.I1)]
public bool embedding;
}

public struct TensorSplits
{
public float Item1;
}
}


+ 4
- 2
LLama/Utils.cs View File

@@ -28,12 +28,14 @@ namespace LLama
lparams.logits_all = @params.Perplexity;
lparams.embedding = @params.EmbeddingMode;
lparams.low_vram = @params.LowVram;

/*
if (@params.TensorSplits.Length != 1)
{
throw new ArgumentException("Currently multi-gpu support is not supported by " +
"both llama.cpp and LLamaSharp.");
}
}*/

lparams.tensor_split = @params.TensorSplits;

if (!File.Exists(@params.ModelPath))


BIN
LLama/runtimes/libllama.dylib View File


Loading…
Cancel
Save