Merge pull request #750 from martindevans/minor_batched_example_improvements

martindevans · web-flow · commit 9abc85627902 · 2024-05-23T03:44:19.000+01:00
Minor BatchedExecutor Example Improvements
diff --git a/LLama.Examples/Examples/BatchedExecutorFork.cs b/LLama.Examples/Examples/BatchedExecutorFork.cs
@@ -11,14 +11,20 @@ namespace LLama.Examples.Examples;
 /// </summary>
 public class BatchedExecutorFork
 {
-    private const int n_split = 16;
-    private const int n_len = 72;
+    /// <summary>
+    /// Set how many tokens to generate before forking
+    /// </summary>
+    private const int ForkTokenCount = 16;
+
+    /// <summary>
+    /// Set total length of the sequence to generate
+    /// </summary>
+    private const int TokenCount = 72;
 
     public static async Task Run()
     {
-        string modelPath = UserSettings.GetModelPath();
-
-        var parameters = new ModelParams(modelPath);
+        // Load model weights
+        var parameters = new ModelParams(UserSettings.GetModelPath());
         using var model = await LLamaWeights.LoadFromFileAsync(parameters);
 
         var prompt = AnsiConsole.Ask("Prompt (or ENTER for default):", "Not many people know that");
@@ -27,7 +33,7 @@ public static async Task Run()
         using var executor = new BatchedExecutor(model, parameters);
 
         // Print some info
-        var name = executor.Model.Metadata.GetValueOrDefault("general.name", "unknown model name");
+        var name = model.Metadata.GetValueOrDefault("general.name", "unknown model name");
         Console.WriteLine($"Created executor with model: {name}");
 
         // Evaluate the initial prompt to create one conversation
@@ -42,17 +48,17 @@ await AnsiConsole
             .Progress()
             .StartAsync(async progress =>
             {
-                var reporter = progress.AddTask("Running Inference (1)", maxValue: n_len);
+                var reporter = progress.AddTask("Running Inference (1)", maxValue: TokenCount);
 
                 // Run inference loop
-                for (var i = 0; i < n_len; i++)
+                for (var i = 0; i < TokenCount; i++)
                 {
                     if (i != 0)
                         await executor.Infer();
 
                     // Occasionally fork all the active conversations
-                    if (i != 0 && i % n_split == 0)
-                        root.Split();
+                    if (i != 0 && i % ForkTokenCount == 0)
+                        root.Fork();
 
                     // Sample all active conversations
                     root.Sample();
@@ -79,8 +85,8 @@ await AnsiConsole
     private class Node
     {
         private readonly StreamingTokenDecoder _decoder;
-
-        private readonly DefaultSamplingPipeline _sampler;
+        
+        private readonly DefaultSamplingPipeline _sampler = new();
         private Conversation? _conversation;
 
         private Node? _left;
@@ -90,7 +96,6 @@ private class Node
 
         public Node(Conversation conversation)
         {
-            _sampler = new DefaultSamplingPipeline();
             _conversation = conversation;
             _decoder = new StreamingTokenDecoder(conversation.Executor.Context);
         }
@@ -117,7 +122,7 @@ public void Sample()
             _conversation.Prompt(token);
         }
 
-        public void Split()
+        public void Fork()
         {
             if (_conversation != null)
             {
@@ -129,8 +134,8 @@ public void Split()
             }
             else
             {
-                _left?.Split();
-                _right?.Split();
+                _left?.Fork();
+                _right?.Fork();
             }
         }
 
diff --git a/LLama.Examples/Examples/BatchedExecutorGuidance.cs b/LLama.Examples/Examples/BatchedExecutorGuidance.cs
@@ -12,13 +12,15 @@ namespace LLama.Examples.Examples;
 /// </summary>
 public class BatchedExecutorGuidance
 {
-    private const int n_len = 32;
+    /// <summary>
+    /// Set how many tokens should be generated
+    /// </summary>
+    private const int TokenCount = 32;
 
     public static async Task Run()
     {
-        string modelPath = UserSettings.GetModelPath();
-
-        var parameters = new ModelParams(modelPath);
+        // Load model weights
+        var parameters = new ModelParams(UserSettings.GetModelPath());
         using var model = await LLamaWeights.LoadFromFileAsync(parameters);
 
         var positivePrompt = AnsiConsole.Ask("Positive Prompt (or ENTER for default):", "My favourite colour is").Trim();
@@ -29,7 +31,7 @@ public static async Task Run()
         using var executor = new BatchedExecutor(model, parameters);
 
         // Print some info
-        var name = executor.Model.Metadata.GetValueOrDefault("general.name", "unknown model name");
+        var name = model.Metadata.GetValueOrDefault("general.name", "unknown model name");
         Console.WriteLine($"Created executor with model: {name}");
 
         // Load the two prompts into two conversations
@@ -48,30 +50,30 @@ await AnsiConsole
         using var unguided = guided.Fork();
 
         // Run inference loop
-        var unguidedSampler = new GuidedSampler(null, weight);
+        var unguidedSampler = new DefaultSamplingPipeline();
         var unguidedDecoder = new StreamingTokenDecoder(executor.Context);
         var guidedSampler = new GuidedSampler(guidance, weight);
         var guidedDecoder = new StreamingTokenDecoder(executor.Context);
         await AnsiConsole
            .Progress()
            .StartAsync(async progress =>
             {
-                var reporter = progress.AddTask("Running Inference", maxValue: n_len);
+                var reporter = progress.AddTask("Running Inference", maxValue: TokenCount);
 
-                for (var i = 0; i < n_len; i++)
+                for (var i = 0; i < TokenCount; i++)
                 {
                     if (i != 0)
                         await executor.Infer();
 
                     // Sample from the "unguided" conversation. This is just a conversation using the same prompt, without any
                     // guidance. This serves as a comparison to show the effect of guidance.
-                    var u = unguidedSampler.Sample(executor.Context.NativeHandle, unguided.Sample(), Array.Empty<LLamaToken>());
+                    var u = unguidedSampler.Sample(executor.Context.NativeHandle, unguided.Sample(), []);
                     unguidedDecoder.Add(u);
                     unguided.Prompt(u);
 
                     // Sample from the "guided" conversation. This sampler will internally use the "guidance" conversation
                     // to steer the conversation. See how this is done in GuidedSampler.ProcessLogits (bottom of this file).
-                    var g = guidedSampler.Sample(executor.Context.NativeHandle, guided.Sample(), Array.Empty<LLamaToken>());
+                    var g = guidedSampler.Sample(executor.Context.NativeHandle, guided.Sample(), []);
                     guidedDecoder.Add(g);
 
                     // Use this token to advance both guided _and_ guidance. Keeping them in sync (except for the initial prompt).
@@ -91,37 +93,34 @@ await AnsiConsole
         AnsiConsole.MarkupLine($"[green]Guided:[/][white]{guidedDecoder.Read().ReplaceLineEndings(" ")}[/]");
     }
 
-    private class GuidedSampler(Conversation? guidance, float weight)
+    private class GuidedSampler(Conversation guidance, float weight)
         : BaseSamplingPipeline
     {
+        protected override LLamaToken ProcessTokenDataArray(SafeLLamaContextHandle ctx, LLamaTokenDataArray candidates, ReadOnlySpan<LLamaToken> lastTokens)
+        {
+            // Get the logits generated by the guidance sequences
+            var guidanceLogits = guidance.Sample();
+
+            // Modify these logits based on the guidance logits
+            candidates.Guidance(ctx, guidanceLogits, weight);
+
+            // Basic sampling
+            candidates.Temperature(ctx, 0.8f);
+            candidates.TopK(ctx, 25);
+            return candidates.SampleToken(ctx);
+        }
+        
         public override void Accept(SafeLLamaContextHandle ctx, LLamaToken token)
         {
         }
-
+        
         public override ISamplingPipeline Clone()
         {
             throw new NotSupportedException();
         }
-
+        
         protected override void ProcessLogits(SafeLLamaContextHandle ctx, Span<float> logits, ReadOnlySpan<LLamaToken> lastTokens)
         {
         }
-
-        protected override LLamaToken ProcessTokenDataArray(SafeLLamaContextHandle ctx, LLamaTokenDataArray candidates, ReadOnlySpan<LLamaToken> lastTokens)
-        {
-            if (guidance != null)
-            {
-                // Get the logits generated by the guidance sequences
-                var guidanceLogits = guidance.Sample();
-
-                // Modify these logits based on the guidance logits
-                candidates.Guidance(ctx, guidanceLogits, weight);
-            }
-
-            candidates.Temperature(ctx, 0.8f);
-            candidates.TopK(ctx, 25);
-
-            return candidates.SampleToken(ctx);
-        }
     }
 }
diff --git a/LLama.Examples/Examples/BatchedExecutorRewind.cs b/LLama.Examples/Examples/BatchedExecutorRewind.cs
@@ -1,4 +1,4 @@
-﻿using LLama.Batched;
+using LLama.Batched;
 using LLama.Common;
 using LLama.Native;
 using LLama.Sampling;
@@ -11,15 +11,25 @@ namespace LLama.Examples.Examples;
 /// </summary>
 public class BatchedExecutorRewind
 {
-    private const int n_generate = 24;
-    private const int n_rewind = 12;
-    private const int n_repeats = 6;
+    /// <summary>
+    /// Set how many tokens to generate before rewinding
+    /// </summary>
+    private const int TokensGenerate = 24;
+
+    /// <summary>
+    /// Set how many tokens to rewind
+    /// </summary>
+    private const int TokensRewind = 12;
+
+    /// <summary>
+    /// Set how many times to generate and rewind
+    /// </summary>
+    private const int RepeatCount = 6;
 
     public static async Task Run()
     {
-        string modelPath = UserSettings.GetModelPath();
-
-        var parameters = new ModelParams(modelPath);
+        // Load model weights
+        var parameters = new ModelParams(UserSettings.GetModelPath());
         using var model = await LLamaWeights.LoadFromFileAsync(parameters);
 
         var prompt = AnsiConsole.Ask("Prompt (or ENTER for default):", "Not many people know that");
@@ -28,23 +38,23 @@ public static async Task Run()
         using var executor = new BatchedExecutor(model, parameters);
 
         // Print some info
-        var name = executor.Model.Metadata.GetValueOrDefault("general.name", "unknown model name");
+        var name = model.Metadata.GetValueOrDefault("general.name", "unknown model name");
         Console.WriteLine($"Created executor with model: {name}");
 
         // Evaluate the initial prompt to create one conversation
         using var conversation = executor.Create();
         conversation.Prompt(executor.Context.Tokenize(prompt));
         
         // Create the start node wrapping the conversation
-        var node = new Node(executor.Context);
+        var node = new Node();
 
         // Print the prompt
         Console.ForegroundColor = ConsoleColor.Green;
         Console.WriteLine(prompt);
 
-        for (var i = 0; i < n_repeats; i++)
+        for (var i = 0; i < RepeatCount; i++)
         {
-            for (var j = 0; j < n_generate; j++)
+            for (var j = 0; j < TokensGenerate; j++)
             {
                 // Run inference
                 await executor.Infer();
@@ -53,21 +63,21 @@ public static async Task Run()
                 var token = node.Sample(conversation);
 
                 // Continue conversation with this token
-                if (j != n_generate - 1)
+                if (j != TokensGenerate - 1)
                     conversation.Prompt(token);
             }
 
             // Write out what we generated
-            node.Write(n_rewind, i + 1);
+            node.Write(executor.Context, TokensRewind, i + 1);
 
             // Rewind back a few tokens
-            conversation.Rewind(n_rewind + 1);
+            conversation.Rewind(TokensRewind + 1);
 
             // Prompt with a token
-            conversation.Prompt(node.GetToken(n_generate - n_rewind - 1));
+            conversation.Prompt(node.GetToken(TokensGenerate - TokensRewind - 1));
 
             // Create a new node around the rewound conversation
-            node = new Node(executor.Context);
+            node = new Node();
         }
 
         Console.WriteLine("Press any key to exit demo");
@@ -76,34 +86,26 @@ public static async Task Run()
 
     private class Node
     {
-        private readonly LLamaContext _context;
-
-        private readonly List<LLamaToken> _tokens = new List<LLamaToken>();
-        private readonly DefaultSamplingPipeline Sampler;
-
-        public Node(LLamaContext context)
-        {
-            _context = context;
-            Sampler = new DefaultSamplingPipeline();
-        }
+        private readonly List<LLamaToken> _tokens = [ ];
+        private readonly DefaultSamplingPipeline _sampler = new();
 
         public LLamaToken Sample(Conversation conversation)
         {
-            var token = Sampler.Sample(_context.NativeHandle, conversation.Sample(), Array.Empty<LLamaToken>());
+            var token = _sampler.Sample(conversation.Executor.Context.NativeHandle, conversation.Sample(), []);
             _tokens.Add(token);
             return token;
         }
 
-        public void Write(int n_rewind, int depth)
+        public void Write(LLamaContext context, int rewind, int depth)
         {
-            var decoder = new StreamingTokenDecoder(_context);
+            var decoder = new StreamingTokenDecoder(context);
 
-            for (var i = 0; i < _tokens.Count - n_rewind; i++)
+            for (var i = 0; i < _tokens.Count - rewind; i++)
                 decoder.Add(_tokens[i]);
 
             AnsiConsole.MarkupLine($"[green]{new string(' ', depth * 3) + decoder.Read().ReplaceLineEndings(" ")}[/]");
 
-            for (var i = _tokens.Count - n_rewind; i < _tokens.Count; i++)
+            for (var i = _tokens.Count - rewind; i < _tokens.Count; i++)
                 decoder.Add(_tokens[i]);
 
             AnsiConsole.MarkupLine($"[maroon]{decoder.Read().ReplaceLineEndings(" ")}[/]");