Implement MinP for ollamaChat

ccreutzi · ccreutzi · commit a7e617064b80 · 2024-08-14T10:00:49.000+02:00
diff --git a/+llms/+internal/callOllamaChatAPI.m b/+llms/+internal/callOllamaChatAPI.m
@@ -29,6 +29,7 @@
     messages
     nvp.Temperature
     nvp.TopP
+    nvp.MinP
     nvp.TopK
     nvp.TailFreeSamplingZ
     nvp.StopSequences
@@ -103,6 +104,7 @@
 dict = dictionary();
 dict("Temperature") = "temperature";
 dict("TopP") = "top_p";
+dict("MinP") = "min_p";
 dict("TopK") = "top_k";
 dict("TailFreeSamplingZ") = "tfs_z";
 dict("StopSequences") = "stop";
diff --git a/ollamaChat.m b/ollamaChat.m
@@ -23,6 +23,12 @@
 %                             words can appear in any particular place.
 %                             This is also known as top-p sampling.
 %
+%   MinP                    - Minimum probability ratio for controlling the
+%                             diversity of the output. Default value is 0;
+%                             higher values imply that only the more likely
+%                             words can appear in any particular place.
+%                             This is also known as min-p sampling.
+%
 %   TopK                    - Maximum number of most likely tokens that are
 %                             considered for output. Default is Inf, allowing
 %                             all tokens. Smaller values reduce diversity in
@@ -67,6 +73,7 @@
         Model             (1,1) string
         Endpoint          (1,1) string
         TopK              (1,1) {mustBeReal,mustBePositive} = Inf
+        MinP              (1,1) {llms.utils.mustBeValidTopP} = 0
         TailFreeSamplingZ (1,1) {mustBeReal} = 1
     end
 
@@ -77,6 +84,7 @@
                 systemPrompt                       {llms.utils.mustBeTextOrEmpty} = []
                 nvp.Temperature                    {llms.utils.mustBeValidTemperature} = 1
                 nvp.TopP                           {llms.utils.mustBeValidTopP} = 1
+                nvp.MinP                           {llms.utils.mustBeValidTopP} = 0
                 nvp.TopK                     (1,1) {mustBeReal,mustBePositive} = Inf
                 nvp.StopSequences                  {llms.utils.mustBeValidStop} = {}
                 nvp.ResponseFormat           (1,1) string {mustBeMember(nvp.ResponseFormat,["text","json"])} = "text"
@@ -103,6 +111,7 @@
             this.ResponseFormat = nvp.ResponseFormat;
             this.Temperature = nvp.Temperature;
             this.TopP = nvp.TopP;
+            this.MinP = nvp.MinP;
             this.TopK = nvp.TopK;
             this.TailFreeSamplingZ = nvp.TailFreeSamplingZ;
             this.StopSequences = nvp.StopSequences;
@@ -146,7 +155,7 @@
             [text, message, response] = llms.internal.callOllamaChatAPI(...
                 this.Model, messagesStruct, ...
                 Temperature=this.Temperature, ...
-                TopP=this.TopP, TopK=this.TopK,...
+                TopP=this.TopP, MinP=this.MinP, TopK=this.TopK,...
                 TailFreeSamplingZ=this.TailFreeSamplingZ,...
                 StopSequences=this.StopSequences, MaxNumTokens=nvp.MaxNumTokens, ...
                 ResponseFormat=this.ResponseFormat,Seed=nvp.Seed, ...
diff --git a/tests/tollamaChat.m b/tests/tollamaChat.m
@@ -50,7 +50,7 @@ function extremeTopK(testCase)
             %% This should work, and it does on some computers. On others, Ollama
             %% receives the parameter, but either Ollama or llama.cpp fails to
             %% honor it correctly.
-            testCase.assumeTrue(false,"disabled due to Ollama/llama.cpp not honoring parameter reliably");
+            % testCase.assumeTrue(false,"disabled due to Ollama/llama.cpp not honoring parameter reliably");
 
             % setting top-k to k=1 leaves no random choice,
             % so we expect to get a fixed response.
@@ -61,11 +61,27 @@ function extremeTopK(testCase)
             testCase.verifyEqual(response1,response2);
         end
 
+        function extremeMinP(testCase)
+            %% This should work, and it does on some computers. On others, Ollama
+            %% receives the parameter, but either Ollama or llama.cpp fails to
+            %% honor it correctly.
+            % testCase.assumeTrue(false,"disabled due to Ollama/llama.cpp not honoring parameter reliably");
+
+            % setting min-p to p=1 means only tokens with the same logit as
+            % the most likely one can be chosen, which will almost certainly
+            % only ever be one, so we expect to get a fixed response.
+            chat = ollamaChat("mistral",MinP=1);
+            prompt = "Min-p sampling with p=1 returns a definite answer.";
+            response1 = generate(chat,prompt);
+            response2 = generate(chat,prompt);
+            testCase.verifyEqual(response1,response2);
+        end
+
         function extremeTfsZ(testCase)
             %% This should work, and it does on some computers. On others, Ollama
             %% receives the parameter, but either Ollama or llama.cpp fails to
             %% honor it correctly.
-            testCase.assumeTrue(false,"disabled due to Ollama/llama.cpp not honoring parameter reliably");
+            % testCase.assumeTrue(false,"disabled due to Ollama/llama.cpp not honoring parameter reliably");
 
             % setting tfs_z to z=0 leaves no random choice, but degrades to
             % greedy sampling, so we expect to get a fixed response.
@@ -235,6 +251,16 @@ function queryModels(testCase)
         "Value", -20, ...
         "Error", "MATLAB:expectedNonnegative"), ...
     ...
+    "MinPTooLarge", struct( ...
+        "Property", "MinP", ...
+        "Value", 20, ...
+        "Error", "MATLAB:notLessEqual"), ...
+    ...
+    "MinPTooSmall", struct( ...
+        "Property", "MinP", ...
+        "Value", -20, ...
+        "Error", "MATLAB:expectedNonnegative"), ...
+    ...
     "WrongTypeStopSequences", struct( ...
         "Property", "StopSequences", ...
         "Value", 123, ...
@@ -329,6 +355,14 @@ function queryModels(testCase)
         "Input",{{ "TopP" -20 }},...
         "Error","MATLAB:expectedNonnegative"),...I
     ...
+    "MinPTooLarge",struct( ...
+        "Input",{{  "MinP" 20 }},...
+        "Error","MATLAB:notLessEqual"),...
+    ...
+    "MinPTooSmall",struct( ...
+        "Input",{{ "MinP" -20 }},...
+        "Error","MATLAB:expectedNonnegative"),...I
+    ...
     "WrongTypeStopSequences",struct( ...
         "Input",{{ "StopSequences" 123}},...
         "Error","MATLAB:validators:mustBeNonzeroLengthText"),...