SciSharp
diff --git a/‎.github/prepare_release.sh
Lines changed: 3 additions & 0 deletions b/‎.github/prepare_release.sh
Lines changed: 3 additions & 0 deletions
diff --git a/‎.github/workflows/release-minor.yml
Lines changed: 24 additions & 1 deletion b/‎.github/workflows/release-minor.yml
Lines changed: 24 additions & 1 deletion
diff --git a/‎.github/workflows/release-patch.yml
Lines changed: 23 additions & 1 deletion b/‎.github/workflows/release-patch.yml
Lines changed: 23 additions & 1 deletion
diff --git a/‎Assets/LLamaSharp-Integrations.png
28.8 KB b/‎Assets/LLamaSharp-Integrations.png
28.8 KB
diff --git a/‎Assets/LLamaSharp-Integrations.vsdx
24.7 KB b/‎Assets/LLamaSharp-Integrations.vsdx
24.7 KB
diff --git a/‎Assets/llava_demo.gif
6.98 MB b/‎Assets/llava_demo.gif
6.98 MB
diff --git a/‎CONTRIBUTING.md
Lines changed: 58 additions & 14 deletions b/‎CONTRIBUTING.md
Lines changed: 58 additions & 14 deletions
diff --git a/‎LLama.Examples/Examples/ChatChineseGB2312.cs
Lines changed: 1 addition & 0 deletions b/‎LLama.Examples/Examples/ChatChineseGB2312.cs
Lines changed: 1 addition & 0 deletions
diff --git a/‎LLama.Examples/Examples/ChatSessionStripRoleName.cs
Lines changed: 2 additions & 0 deletions b/‎LLama.Examples/Examples/ChatSessionStripRoleName.cs
Lines changed: 2 additions & 0 deletions
diff --git a/‎LLama.Examples/Examples/InstructModeExecute.cs
Lines changed: 1 addition & 0 deletions b/‎LLama.Examples/Examples/InstructModeExecute.cs
Lines changed: 1 addition & 0 deletions
diff --git a/‎LLama.Examples/Examples/InteractiveModeExecute.cs
Lines changed: 1 addition & 0 deletions b/‎LLama.Examples/Examples/InteractiveModeExecute.cs
Lines changed: 1 addition & 0 deletions
diff --git a/‎LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
Lines changed: 2 additions & 0 deletions b/‎LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
Lines changed: 2 additions & 0 deletions
diff --git a/‎LLama.Examples/Examples/LoadAndSaveState.cs
Lines changed: 1 addition & 0 deletions b/‎LLama.Examples/Examples/LoadAndSaveState.cs
Lines changed: 1 addition & 0 deletions
diff --git a/‎LLama.KernelMemory/LLamaSharp.KernelMemory.csproj
Lines changed: 2 additions & 2 deletions b/‎LLama.KernelMemory/LLamaSharp.KernelMemory.csproj
Lines changed: 2 additions & 2 deletions
diff --git a/‎LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
Lines changed: 2 additions & 2 deletions b/‎LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
Lines changed: 2 additions & 2 deletions
diff --git a/‎LLama/LLamaSharp.csproj
Lines changed: 6 additions & 5 deletions b/‎LLama/LLamaSharp.csproj
Lines changed: 6 additions & 5 deletions
@@ -77,5 +77,8 @@ do
   nuget pack $nuspec -version $updated_version
 done
 
+# write the version to the file
+echo $updated_version > version.txt
+
 cd ..
 exit 0
@@ -15,7 +15,11 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
+    - name: Configure Git Credentials
+      run: |
+        git config user.name Rinne
+        git config user.email [email protected]  
     - name: Setup NuGet
       uses: nuget/setup-nuget@v1
       with:
@@ -51,3 +55,22 @@ jobs:
 
     - name: Push LLamaSharp packages to nuget.org
       run: dotnet nuget push ./temp/LLamaSharp*.nupkg --source https://www.nuget.org -k ${{ secrets.LLAMA_SHARP_NUGET_KEY }} --skip-duplicate
+
+    # Deploy the documentation to GitHub Pages
+    - uses: actions/setup-python@v5
+      with:
+        python-version: 3.x
+    - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 
+    - uses: actions/cache@v4
+      with:
+        key: mkdocs-material-${{ env.cache_id }}
+        path: .cache
+        restore-keys: |
+          mkdocs-material-
+    - run: pip install mkdocs==1.4.3 mkdocs-material mike==1.1.2 setuptools
+    - run: |
+        git fetch origin gh-pages --depth=1
+        version=$(cat ./temp/version.txt)
+        mike deploy --push --update-aliases --force $version latest
+        mike set-default --push --force latest
+        
@@ -15,7 +15,11 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
+    - name: Configure Git Credentials
+      run: |
+        git config user.name Rinne
+        git config user.email [email protected]  
     - name: Setup NuGet
       uses: nuget/setup-nuget@v1
       with:
@@ -52,3 +56,21 @@ jobs:
     - name: Push LLamaSharp packages to nuget.org
       run: dotnet nuget push ./temp/LLamaSharp*.nupkg --source https://www.nuget.org -k ${{ secrets.LLAMA_SHARP_NUGET_KEY }} --skip-duplicate
 
+    # Deploy the documentation to GitHub Pages
+    - uses: actions/setup-python@v5
+      with:
+        python-version: 3.x
+    - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 
+    - uses: actions/cache@v4
+      with:
+        key: mkdocs-material-${{ env.cache_id }}
+        path: .cache
+        restore-keys: |
+          mkdocs-material-
+    - run: pip install mkdocs==1.4.3 mkdocs-material mike==1.1.2 setuptools
+    - run: |
+        git fetch origin gh-pages --depth=1
+        version=$(cat ./temp/version.txt)
+        mike deploy --push --update-aliases --force $version latest
+        mike set-default --push --force latest
+
@@ -2,21 +2,65 @@
 
 Hi, welcome to develop LLamaSharp with us together! We are always open for every contributor and any format of contributions! If you want to maintain this library actively together, please contact us to get the write access after some PRs. (Email: [email protected])
 
-In this page, we'd like to introduce how to make contributions here easily. 😊
+In this page, we introduce how to make contributions here easily. 😊
 
-## Compile the native library from source
+## The goal of LLamaSharp
 
-Firstly, please clone the [llama.cpp](https://github.com/ggerganov/llama.cpp) repository and following the instructions in [llama.cpp readme](https://github.com/ggerganov/llama.cpp#build) to configure your local environment.
+At the beginning, LLamaSharp is a C# binding of [llama.cpp](https://github.com/ggerganov/llama.cpp). It provided only some wrappers for llama.cpp to let C#/.NET users could run LLM models on their local device efficiently even if without any experience with C++. After around a year of development, more tools and integrations has been added to LLamaSharp, significantly expanding the application of LLamaSharp. Though llama.cpp is still the only backend of LLamaSharp, the goal of this repository is more likely to be an efficient and easy-to-use library of LLM inference, rather than just a binding of llama.cpp.
 
-If you want to support cublas in the compilation, please make sure that you've installed the cuda.
+In this way, our development of LLamaSharp is divided into two main directions:
 
-When building from source, please add `-DBUILD_SHARED_LIBS=ON` to the cmake instruction. For example, when building with cublas but without openblas, use the following instruction:
+1. To make LLamaSharp more efficient. For example, `BatchedExecutor` could accept multiple queries and generate the response for them at the same time, which significantly improves the throughput. This part is always related with native APIs and executors in LLamaSharp.
+2. To make it easier to use LLamaSharp. We believe the best library is to let users build powerful functionalities with simple code. Higher-level APIs and integrations with other libraries are the key points of it.
+
+
+## How to compile the native library from source
+
+If you want to contribute to the first direction of our goal, you may need to compile the native library yourself.
+
+Firstly, please follow the instructions in [llama.cpp readme](https://github.com/ggerganov/llama.cpp#build) to configure your local environment. Most importantly, CMake with version higher than 3.14 should be installed on your device.
+
+Secondly, clone the llama.cpp repositories. You could manually clone it and checkout to the right commit according to [Map of LLamaSharp and llama.cpp versions](https://github.com/SciSharp/LLamaSharp?tab=readme-ov-file#map-of-llamasharp-and-llama.cpp-versions), or use clone the submodule of LLamaSharp when cloning LLamaSharp.
+
+```shell
+git clone --recursive https://github.com/SciSharp/LLamaSharp.git
+```
+
+If you want to support cublas in the compilation, please make sure that you've installed it. If you are using Intel CPU, please check the highest AVX ([Advanced Vector Extensions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions)) level that is supported by your device.
+
+As shown in [llama.cpp cmake file](https://github.com/ggerganov/llama.cpp/blob/master/CMakeLists.txt), there are many options that could be enabled or disabled when building the library. The following ones are commonly used when using it as a native library of LLamaSharp.
+
+```cpp
+option(BUILD_SHARED_LIBS                "build shared libraries") // Please always enable it 
+option(LLAMA_NATIVE                     "llama: enable -march=native flag") // Could be disabled
+option(LLAMA_AVX                        "llama: enable AVX") // Enable it if the highest supported avx level is AVX
+option(LLAMA_AVX2                       "llama: enable AVX2") // Enable it if the highest supported avx level is AVX2
+option(LLAMA_AVX512                     "llama: enable AVX512") // Enable it if the highest supported avx level is AVX512
+option(LLAMA_BLAS                       "llama: use BLAS") // Enable it if you want to use BLAS library to acclerate the computation on CPU
+option(LLAMA_CUDA                       "llama: use CUDA") // Enable it if you have CUDA device
+option(LLAMA_CLBLAST                    "llama: use CLBlast") // Enable it if you have a device with CLBLast or OpenCL support, for example, some AMD GPUs.
+option(LLAMA_VULKAN                     "llama: use Vulkan") // Enable it if you have a device with Vulkan support
+option(LLAMA_METAL                      "llama: use Metal") // Enable it if you are using a MAC with Metal device.
+option(LLAMA_BUILD_TESTS                "llama: build tests") // Please disable it.
+option(LLAMA_BUILD_EXAMPLES             "llama: build examples") // Please disable it.
+option(LLAMA_BUILD_SERVER               "llama: build server example")// Please disable it.
+```
+
+Most importantly, `-DBUILD_SHARED_LIBS=ON` must be added to the cmake instruction and other options depends on you. For example, when building with cublas but without openblas, use the following instruction:
 
 ```bash
+mkdir build && cd build
 cmake .. -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON
+cmake --build . --config Release
 ```
 
-After running `cmake --build . --config Release`, you could find the `llama.dll`, `llama.so` or `llama.dylib` in your build directory. After pasting it to `LLamaSharp/LLama/runtimes` you can use it as the native library in LLamaSharp.
+Now you could find the `llama.dll`, `libllama.so` or `llama.dylib` in your build directory (or `build/bin`). 
+
+To load the compiled native library, please add the following code to the very beginning of your code.
+
+```cs
+NativeLibraryConfig.Instance.WithLibrary("<Your native library path>");
+```
 
 
 ## Add a new feature to LLamaSharp
@@ -39,19 +83,19 @@ You could use exactly the same prompt, the same model and the same parameters to
 
 If the experiment showed that it worked well in llama.cpp but didn't in LLamaSharp, a search for the problem could be started. While the reason of the problem could be various, the best way I think is to add log-print in the code of llama.cpp and use it in LLamaSharp after compilation. Thus, when running LLamaSharp, you could see what happened in the native library.
 
-After finding out the reason, a painful but happy process comes. When working on the BUG fix, there's only one rule to follow, that is keeping the examples working well. If the modification fixed the BUG but impact on other functions, it would not be a good fix.
-
-During the BUG fix process, please don't hesitate to discuss together when you stuck on something.
+During the BUG fix process, please don't hesitate to discuss together when you are blocked.
 
 ## Add integrations
 
-All kinds of integration are welcomed here! Currently the following integrations are under work or on our schedule:
+All kinds of integration are welcomed here! Currently the following integrations have been added but still need improvement:
+
+1. semantic-kernel
+2. kernel-memory
+3. BotSharp (maintained in SciSharp/BotSharp repo)
+4. Langchain (maintained in tryAGI/LangChain repo)
 
-1. BotSharp
-2. semantic-kernel
-3. Unity
+If you find another library that is good to be integrated, please open an issue to let us know!
 
-Besides, for some other integrations, like `ASP.NET core`, `SQL`, `Blazor` and so on, we'll appreciate it if you could help with that. If the time is limited for you, providing an example for it also means a lot!
 
 ## Add examples
 
 
@@ -3,6 +3,7 @@
 
 namespace LLama.Examples.Examples;
 
+// This example shows how to deal with Chinese input with gb2312 encoding.
 public class ChatChineseGB2312
 {
     private static string ConvertEncoding(string input, Encoding original, Encoding target)
 
@@ -2,6 +2,8 @@
 
 namespace LLama.Examples.Examples;
 
+// When using chatsession, it's a common case that you want to strip the role names
+// rather than display them. This example shows how to use transforms to strip them.
 public class ChatSessionStripRoleName
 {
     public static async Task Run()
 
@@ -2,6 +2,7 @@
 
 namespace LLama.Examples.Examples
 {
+    // This example shows how to use InstructExecutor to generate the response.
     public class InstructModeExecute
     {
         public static async Task Run()
 
@@ -2,6 +2,7 @@
 
 namespace LLama.Examples.Examples
 {
+    // This is an example which shows how to chat with LLM with InteractiveExecutor.
     public class InteractiveModeExecute
     {
         public static async Task Run()
 
@@ -5,6 +5,8 @@
 
 namespace LLama.Examples.Examples
 {
+    // This example shows how to chat with LLaVA model with both image and text as input.
+    // It uses the interactive executor to inference.
     public class LlavaInteractiveModeExecute
     {
         public static async Task Run()
 
@@ -2,6 +2,7 @@
 
 namespace LLama.Examples.Examples
 {
+    // This example shows how to save/load state of the executor.
     public class LoadAndSaveState
     {
         public static async Task Run()
 
@@ -4,7 +4,7 @@
     <TargetFrameworks>net6.0;net7.0;net8.0</TargetFrameworks>
     <ImplicitUsings>enable</ImplicitUsings>
     <Nullable>enable</Nullable>
-    <Version>0.8.0</Version>
+    <Version>0.11.0</Version>
     <Authors>Xbotter</Authors>
     <Company>SciSharp STACK</Company>
     <GeneratePackageOnBuild>true</GeneratePackageOnBuild>
@@ -17,7 +17,7 @@
       The integration of LLamaSharp and Microsoft kernel-memory. It could make it easy to support document search for LLamaSharp model inference.
     </Description>
     <PackageReleaseNotes>
-      Support integration with kernel-memory
+      v0.11.0 updated the kernel-memory package and Fixed System.ArgumentException: EmbeddingMode must be true.
     </PackageReleaseNotes>
     <PackageLicenseExpression>MIT</PackageLicenseExpression>
     <PackageOutputPath>packages</PackageOutputPath>
 
@@ -10,7 +10,7 @@
 		<ImplicitUsings>enable</ImplicitUsings>
 		<Nullable>enable</Nullable>
 
-		<Version>0.8.0</Version>
+		<Version>0.11.0</Version>
 		<Authors>Tim Miller, Xbotter</Authors>
 		<Company>SciSharp STACK</Company>
 		<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
@@ -23,7 +23,7 @@
 			The integration of LLamaSharp and Microsoft semantic-kernel.
 		</Description>
 		<PackageReleaseNotes>
-			Support integration with semantic-kernel
+			v0.11.0 updates the semantic-kernel package.
 		</PackageReleaseNotes>
 		<PackageLicenseExpression>MIT</PackageLicenseExpression>
 		<PackageOutputPath>packages</PackageOutputPath>
 
@@ -7,8 +7,8 @@
     <Platforms>AnyCPU;x64;Arm64</Platforms>
     <AllowUnsafeBlocks>True</AllowUnsafeBlocks>
 
-    <Version>0.10.0</Version>
-    <Authors>Yaohui Liu, Martin Evans, Haiping Chen</Authors>
+    <Version>0.11.0</Version>
+    <Authors>Rinne, Martin Evans, jlsantiago and all the other contributors in https://github.com/SciSharp/LLamaSharp/graphs/contributors.</Authors>
     <Company>SciSharp STACK</Company>
     <GeneratePackageOnBuild>true</GeneratePackageOnBuild>
     <Copyright>MIT, SciSharp STACK $([System.DateTime]::UtcNow.ToString(yyyy))</Copyright>
@@ -17,11 +17,12 @@
     <PackageIconUrl>https://avatars3.githubusercontent.com/u/44989469?s=200&amp;v=4</PackageIconUrl>
     <PackageTags>LLama, LLM, GPT, ChatGPT, NLP, AI, Chat Bot, SciSharp</PackageTags>
     <Description>
-      The .NET binding of LLama.cpp, making LLM inference and deployment easy and fast. For model
-      weights to run, please go to https://github.com/SciSharp/LLamaSharp for more information.
+      LLamaSharp is a cross-platform library to run 🦙LLaMA/LLaVA model (and others) in your local device. 
+      Based on [llama.cpp](https://github.com/ggerganov/llama.cpp), inference with LLamaSharp is efficient on both CPU and GPU. 
+      With the higher-level APIs and RAG support, it's convenient to deploy LLM (Large Language Model) in your application with LLamaSharp.
     </Description>
     <PackageReleaseNotes>
-      LLamaSharp 0.10.0 supports automatically device feature detection, adds integration with kernel-memory and fixes some performance issues.
+      LLamaSharp 0.11.0 added support for multi-modal (LLaVA), improved the BatchedExecutor and added state management of `ChatSession`.
     </PackageReleaseNotes>
     <PackageLicenseExpression>MIT</PackageLicenseExpression>
     <PackageOutputPath>packages</PackageOutputPath>
Original file line number	Diff line number	Diff line change
`@@ -3,6 +3,7 @@`
`3`	`3`
`4`	`4`	`namespace LLama.Examples.Examples;`
`5`	`5`
	`6`	`+// This example shows how to deal with Chinese input with gb2312 encoding.`
`6`	`7`	`public class ChatChineseGB2312`
`7`	`8`	`{`
`8`	`9`	`private static string ConvertEncoding(string input, Encoding original, Encoding target)`
Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,8 @@`
`2`	`2`
`3`	`3`	`namespace LLama.Examples.Examples;`
`4`	`4`
	`5`	`+// When using chatsession, it's a common case that you want to strip the role names`
	`6`	`+// rather than display them. This example shows how to use transforms to strip them.`
`5`	`7`	`public class ChatSessionStripRoleName`
`6`	`8`	`{`
`7`	`9`	`public static async Task Run()`
Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,7 @@`
`2`	`2`
`3`	`3`	`namespace LLama.Examples.Examples`
`4`	`4`	`{`
	`5`	`+ // This example shows how to use InstructExecutor to generate the response.`
`5`	`6`	`public class InstructModeExecute`
`6`	`7`	`{`
`7`	`8`	`public static async Task Run()`
Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,7 @@`
`2`	`2`
`3`	`3`	`namespace LLama.Examples.Examples`
`4`	`4`	`{`
	`5`	`+ // This is an example which shows how to chat with LLM with InteractiveExecutor.`
`5`	`6`	`public class InteractiveModeExecute`
`6`	`7`	`{`
`7`	`8`	`public static async Task Run()`
Original file line number	Diff line number	Diff line change
`@@ -5,6 +5,8 @@`
`5`	`5`
`6`	`6`	`namespace LLama.Examples.Examples`
`7`	`7`	`{`
	`8`	`+ // This example shows how to chat with LLaVA model with both image and text as input.`
	`9`	`+ // It uses the interactive executor to inference.`
`8`	`10`	`public class LlavaInteractiveModeExecute`
`9`	`11`	`{`
`10`	`12`	`public static async Task Run()`
Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,7 @@`
`2`	`2`
`3`	`3`	`namespace LLama.Examples.Examples`
`4`	`4`	`{`
	`5`	`+ // This example shows how to save/load state of the executor.`
`5`	`6`	`public class LoadAndSaveState`
`6`	`7`	`{`
`7`	`8`	`public static async Task Run()`