Remove GPT specific penalties from ollamaChat

ccreutzi · ccreutzi · commit 8a2ea28d6628 · 2024-05-29T15:00:06.000+01:00
diff --git a/+llms/+internal/callAzureChatAPI.m b/+llms/+internal/callAzureChatAPI.m
@@ -1,4 +1,6 @@
 function [text, message, response] = callAzureChatAPI(endpoint, deploymentID, messages, functions, nvp)
+% This function is undocumented and will change in a future release
+
 %callAzureChatAPI Calls the openAI chat completions API on Azure.
 %
 %   MESSAGES and FUNCTIONS should be structs matching the json format
diff --git a/+llms/+internal/callOllamaChatAPI.m b/+llms/+internal/callOllamaChatAPI.m
@@ -1,4 +1,6 @@
 function [text, message, response] = callOllamaChatAPI(model, messages, nvp)
+% This function is undocumented and will change in a future release
+
 %callOllamaChatAPI Calls the ollama chat completions API.
 %
 %   MESSAGES and FUNCTIONS should be structs matching the json format
diff --git a/+llms/+internal/callOpenAIChatAPI.m b/+llms/+internal/callOpenAIChatAPI.m
@@ -1,4 +1,6 @@
 function [text, message, response] = callOpenAIChatAPI(messages, functions, nvp)
+% This function is undocumented and will change in a future release
+
 %callOpenAIChatAPI Calls the openAI chat completions API.
 %
 %   MESSAGES and FUNCTIONS should be structs matching the json format
diff --git a/+llms/+internal/gptPenalties.m b/+llms/+internal/gptPenalties.m
@@ -0,0 +1,12 @@
+classdef (Abstract) gptPenalties
+	% This class is undocumented and will change in a future release
+
+	% Copyright 2024 The MathWorks, Inc.
+	properties
+		%PRESENCEPENALTY   Penalty for using a token in the response that has already been used.
+		PresencePenalty {llms.utils.mustBeValidPenalty} = 0
+
+		%FREQUENCYPENALTY   Penalty for using a token that is frequent in the training data.
+		FrequencyPenalty {llms.utils.mustBeValidPenalty} = 0
+	end
+end
diff --git a/+llms/+internal/sendRequest.m b/+llms/+internal/sendRequest.m
@@ -1,9 +1,11 @@
 function [response, streamedText] = sendRequest(parameters, token, endpoint, timeout, streamFun)
+% This function is undocumented and will change in a future release
+
 %sendRequest Sends a request to an ENDPOINT using PARAMETERS and
 %   api key TOKEN. TIMEOUT is the number of seconds to wait for initial
 %   server connection. STREAMFUN is an optional callback function.
 
-%   Copyright 2023 The MathWorks, Inc.
+%   Copyright 2023-2024 The MathWorks, Inc.
 
 arguments
     parameters
diff --git a/+llms/+internal/textGenerator.m b/+llms/+internal/textGenerator.m
@@ -1,5 +1,5 @@
 classdef (Abstract) textGenerator
-    %
+    % This class is undocumented and will change in a future release
 
     % Copyright 2023-2024 The MathWorks, Inc.
 
@@ -12,12 +12,6 @@
 
         %STOPSEQUENCES   Sequences to stop the generation of tokens.
         StopSequences {llms.utils.mustBeValidStop} = {}
-
-        %PRESENCEPENALTY   Penalty for using a token in the response that has already been used.
-        PresencePenalty {llms.utils.mustBeValidPenalty} = 0
-
-        %FREQUENCYPENALTY   Penalty for using a token that is frequent in the training data.
-        FrequencyPenalty {llms.utils.mustBeValidPenalty} = 0
     end
 
     properties (SetAccess=protected)
diff --git a/azureChat.m b/azureChat.m
@@ -1,4 +1,4 @@
-classdef(Sealed) azureChat < llms.internal.textGenerator
+classdef(Sealed) azureChat < llms.internal.textGenerator & llms.internal.gptPenalties
 %azureChat Chat completion API from Azure.
 %
 %   CHAT = azureChat(endpoint, deploymentID) creates an azureChat object with the
diff --git a/ollamaChat.m b/ollamaChat.m
@@ -13,9 +13,16 @@
 %                             of the output. Default value depends on the model;
 %                             if not specified in the model, defaults to 0.8.
 %
-%   TODO: TopK and TopP, how do they relate to this?
 %   TopProbabilityMass      - Top probability mass value for controlling the
-%                             diversity of the output. Default value is 1.
+%                             diversity of the output. Default value is 1; with
+%                             smaller value TopProbabilityMass=p, only the most
+%                             probable tokens up to a cumulative probability p
+%                             are used.
+%
+%   TopProbabilityNum       - Maximum number of most likely tokens that are
+%                             considered for output. Default is Inf, allowing
+%                             all tokens. Smaller values reduce diversity in
+%                             the output.
 %
 %   StopSequences           - Vector of strings that when encountered, will
 %                             stop the generation of tokens. Default
@@ -24,8 +31,6 @@
 %   ResponseFormat          - The format of response the model returns.
 %                             "text" (default) | "json"
 %
-%   Seed - TODO: Seems to have no effect whatsoever (tested via curl) - cf. https://github.com/ollama/ollama/issues/4660
-%
 %   Mirostat - 0/1/2, eta, tau
 %
 %   RepeatLastN - find a better name! “Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)”
@@ -34,8 +39,6 @@
 %
 %   TailFreeSamplingZ
 %
-%   MaxNumTokens
-%
 %   StreamFun               - Function to callback when streaming the
 %                             result
 %
@@ -48,18 +51,14 @@
 %       generate             - Generate a response using the ollamaChat instance.
 %
 %   ollamaChat Properties: TODO TODO
+%       Model                - Model name (as expected by ollama server)
+%
 %       Temperature          - Temperature of generation.
 %
 %       TopProbabilityMass   - Top probability mass to consider for generation.
 %
 %       StopSequences        - Sequences to stop the generation of tokens.
 %
-%       PresencePenalty      - Penalty for using a token in the
-%                              response that has already been used.
-%
-%       FrequencyPenalty     - Penalty for using a token that is
-%                              frequent in the training data.
-%
 %       SystemPrompt         - System prompt.
 %
 %       ResponseFormat       - Specifies the response format, text or json
@@ -159,52 +158,13 @@
                 TimeOut=this.TimeOut, StreamFun=this.StreamFun);
         end
     end
-
-    methods(Hidden)
-        function mustBeValidFunctionCall(this, functionCall)
-            if ~isempty(functionCall)
-                mustBeTextScalar(functionCall);
-                if isempty(this.FunctionNames)
-                    error("llms:mustSetFunctionsForCall", llms.utils.errorMessageCatalog.getMessage("llms:mustSetFunctionsForCall"));
-                end
-                mustBeMember(functionCall, ["none","auto", this.FunctionNames]);
-            end
-        end
-
-        function toolChoice = convertToolChoice(this, toolChoice)
-            % if toolChoice is empty
-            if isempty(toolChoice)
-                % if Tools is not empty, the default is 'auto'.
-                if ~isempty(this.Tools)
-                    toolChoice = "auto";
-                end
-            elseif ToolChoice ~= "auto"
-                % if toolChoice is not empty, then it must be in the format
-                % {"type": "function", "function": {"name": "my_function"}}
-                toolChoice = struct("type","function","function",struct("name",toolChoice));
-            end
-
-        end
-    end
 end
 
 function mustBeNonzeroLengthTextScalar(content)
 mustBeNonzeroLengthText(content)
 mustBeTextScalar(content)
 end
 
-function [functionsStruct, functionNames] = functionAsStruct(functions)
-numFunctions = numel(functions);
-functionsStruct = cell(1, numFunctions);
-functionNames = strings(1, numFunctions);
-
-for i = 1:numFunctions
-    functionsStruct{i} = struct('type','function', ...
-        'function',encodeStruct(functions(i))) ;
-    functionNames(i) = functions(i).FunctionName;
-end
-end
-
 function mustBeValidMsgs(value)
 if isa(value, "openAIMessages")
     if numel(value.Messages) == 0
@@ -223,4 +183,4 @@ function mustBeIntegerOrEmpty(value)
     if ~isempty(value)
         mustBeInteger(value)
     end
-end
+end
diff --git a/openAIChat.m b/openAIChat.m
@@ -1,4 +1,4 @@
-classdef(Sealed) openAIChat < llms.internal.textGenerator
+classdef(Sealed) openAIChat < llms.internal.textGenerator & llms.internal.gptPenalties
 %openAIChat Chat completion API from OpenAI.
 %
 %   CHAT = openAIChat(systemPrompt) creates an openAIChat object with the

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,6 @@`
`1`	`1`	`function [text, message, response] = callAzureChatAPI(endpoint, deploymentID, messages, functions, nvp)`
	`2`	`+% This function is undocumented and will change in a future release`
	`3`	`+`
`2`	`4`	`%callAzureChatAPI Calls the openAI chat completions API on Azure.`
`3`	`5`	`%`
`4`	`6`	`% MESSAGES and FUNCTIONS should be structs matching the json format`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-classdef(Sealed) azureChat < llms.internal.textGenerator`
	`1`	`+classdef(Sealed) azureChat < llms.internal.textGenerator & llms.internal.gptPenalties`
`2`	`2`	`%azureChat Chat completion API from Azure.`
`3`	`3`	`%`
`4`	`4`	`% CHAT = azureChat(endpoint, deploymentID) creates an azureChat object with the`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-classdef(Sealed) openAIChat < llms.internal.textGenerator`
	`1`	`+classdef(Sealed) openAIChat < llms.internal.textGenerator & llms.internal.gptPenalties`
`2`	`2`	`%openAIChat Chat completion API from OpenAI.`
`3`	`3`	`%`
`4`	`4`	`% CHAT = openAIChat(systemPrompt) creates an openAIChat object with the`