add ollamaChat class

ccreutzi · ccreutzi · commit a6b8d51eefda · 2024-05-27T13:00:15.000+01:00
diff --git a/+llms/+internal/callOllamaChatAPI.m b/+llms/+internal/callOllamaChatAPI.m
@@ -0,0 +1,127 @@
+function [text, message, response] = callOllamaChatAPI(model, messages, nvp)
+%callOllamaChatAPI Calls the ollama chat completions API.
+%
+%   MESSAGES and FUNCTIONS should be structs matching the json format
+%   required by the ollama Chat Completions API.
+%   Ref: https://github.com/ollama/ollama/blob/main/docs/api.md
+%
+%   Currently, the supported NVP are, including the equivalent name in the API:
+%  TODO TODO TODO
+%    - Temperature (temperature)
+%    - TopProbabilityMass (top_p)
+%    - NumCompletions (n)
+%    - StopSequences (stop)
+%    - MaxNumTokens (max_tokens)
+%    - PresencePenalty (presence_penalty)
+%    - FrequencyPenalty (frequence_penalty)
+%    - ResponseFormat (response_format)
+%    - Seed (seed)
+%    - ApiKey
+%    - TimeOut
+%    - StreamFun
+%   More details on the parameters: https://platform.openai.com/docs/api-reference/chat/create
+%
+%   Example
+%
+%   % Create messages struct
+%   messages = {struct("role", "system",...
+%       "content", "You are a helpful assistant");
+%       struct("role", "user", ...
+%       "content", "What is the edit distance between hi and hello?")};
+%
+%   % Create functions struct
+%   functions = {struct("name", "editDistance", ...
+%       "description", "Find edit distance between two strings or documents.", ...
+%       "parameters", struct( ...
+%       "type", "object", ...
+%       "properties", struct(...
+%           "str1", struct(...
+%               "description", "Source string.", ...
+%               "type", "string"),...
+%           "str2", struct(...
+%               "description", "Target string.", ...
+%               "type", "string")),...
+%       "required", ["str1", "str2"]))};
+%
+%   % Define your API key
+%   apiKey = "your-api-key-here"
+%
+%   % Send a request
+%   [text, message] = llms.internal.callOpenAIChatAPI(messages, functions, ApiKey=apiKey)
+
+%   Copyright 2023-2024 The MathWorks, Inc.
+
+arguments
+    model
+    messages
+    nvp.Temperature = 1
+    nvp.TopProbabilityMass = 1
+    nvp.NumCompletions = 1
+    nvp.StopSequences = []
+    nvp.MaxNumTokens = inf
+    nvp.PresencePenalty = 0
+    nvp.FrequencyPenalty = 0
+    nvp.ResponseFormat = "text"
+    nvp.Seed = []
+    nvp.TimeOut = 10
+    nvp.StreamFun = []
+end
+
+URL = "http://localhost:11434/api/chat"; % TODO: model parameter
+
+parameters = buildParametersCall(model, messages, nvp);
+
+[response, streamedText] = llms.internal.sendRequest(parameters,[],URL,nvp.TimeOut,nvp.StreamFun);
+
+% If call errors, "choices" will not be part of response.Body.Data, instead
+% we get response.Body.Data.error
+if response.StatusCode=="OK"
+    % Outputs the first generation
+    if isempty(nvp.StreamFun)
+        message = response.Body.Data.message;
+    else
+        message = struct("role", "assistant", ...
+            "content", streamedText);
+    end
+    text = string(message.content);
+else
+    text = "";
+    message = struct();
+end
+end
+
+function parameters = buildParametersCall(model, messages, nvp)
+% Builds a struct in the format that is expected by the API, combining
+% MESSAGES, FUNCTIONS and parameters in NVP.
+
+parameters = struct();
+parameters.model = model;
+parameters.messages = messages;
+
+parameters.stream = ~isempty(nvp.StreamFun);
+
+options = struct;
+if ~isempty(nvp.Seed)
+    options.seed = nvp.Seed;
+end
+
+dict = mapNVPToParameters;
+
+nvpOptions = keys(dict);
+for opt = nvpOptions.'
+    if isfield(nvp, opt)
+        options.(dict(opt)) = nvp.(opt);
+    end
+end
+
+parameters.options = options;
+end
+
+function dict = mapNVPToParameters()
+dict = dictionary();
+dict("Temperature") = "temperature";
+dict("TopProbabilityMass") = "top_p";
+dict("NumCompletions") = "n";
+dict("StopSequences") = "stop";
+dict("MaxNumTokens") = "num_predict";
+end
diff --git a/+llms/+internal/sendRequest.m b/+llms/+internal/sendRequest.m
@@ -15,18 +15,20 @@
 
 % Define the headers for the API request
 
-headers = [matlab.net.http.HeaderField('Content-Type', 'application/json')...
-    matlab.net.http.HeaderField('Authorization', "Bearer " + token)...
-    matlab.net.http.HeaderField('api-key',token)];
+headers = matlab.net.http.HeaderField('Content-Type', 'application/json');
+if ~isempty(token)
+    headers = [headers ...
+        matlab.net.http.HeaderField('Authorization', "Bearer " + token)...
+        matlab.net.http.HeaderField('api-key',token)];
+end
 
 % Define the request message
 request = matlab.net.http.RequestMessage('post',headers,parameters);
 
-% Create a HTTPOptions object;
+% set the timeout
 httpOpts = matlab.net.http.HTTPOptions;
-
-% Set the ConnectTimeout option
 httpOpts.ConnectTimeout = timeout;
+httpOpts.ResponseTimeout = timeout;
 
 % Send the request and store the response
 if isempty(streamFun)
diff --git a/ollamaChat.m b/ollamaChat.m
@@ -0,0 +1,226 @@
+classdef(Sealed) ollamaChat < llms.internal.textGenerator
+%ollamaChat Chat completion API from Azure.
+%
+%   CHAT = ollamaChat(modelName) creates an ollamaChat object for the given model.
+%
+%   CHAT = ollamaChat(__,systemPrompt) creates an ollamaChat object with the
+%   specified system prompt.
+%
+%   CHAT = ollamaChat(__,Name=Value) specifies additional options
+%   using one or more name-value arguments:
+%
+%   Temperature             - Temperature value for controlling the randomness
+%                             of the output. Default value depends on the model;
+%                             if not specified in the model, defaults to 0.8.
+%
+%   TODO: TopK and TopP, how do they relate to this?
+%   TopProbabilityMass      - Top probability mass value for controlling the
+%                             diversity of the output. Default value is 1.
+%
+%   StopSequences           - Vector of strings that when encountered, will
+%                             stop the generation of tokens. Default
+%                             value is empty.
+%
+%   ResponseFormat          - The format of response the model returns.
+%                             "text" (default) | "json"
+%
+%   Seed - TODO: Seems to have no effect whatsoever (tested via curl) - cf. https://github.com/ollama/ollama/issues/4660
+%
+%   Mirostat - 0/1/2, eta, tau
+%
+%   RepeatLastN - find a better name! “Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)”
+%
+%   RepeatPenalty
+%
+%   TailFreeSamplingZ
+%
+%   MaxNumTokens
+%
+%   StreamFun               - Function to callback when streaming the
+%                             result
+%
+%   TimeOut                 - Connection Timeout in seconds (default: 10 secs)
+%
+%
+%
+%   ollamaChat Functions:
+%       ollamaChat            - Chat completion API from OpenAI.
+%       generate             - Generate a response using the ollamaChat instance.
+%
+%   ollamaChat Properties: TODO TODO
+%       Temperature          - Temperature of generation.
+%
+%       TopProbabilityMass   - Top probability mass to consider for generation.
+%
+%       StopSequences        - Sequences to stop the generation of tokens.
+%
+%       PresencePenalty      - Penalty for using a token in the
+%                              response that has already been used.
+%
+%       FrequencyPenalty     - Penalty for using a token that is
+%                              frequent in the training data.
+%
+%       SystemPrompt         - System prompt.
+%
+%       ResponseFormat       - Specifies the response format, text or json
+%
+%       TimeOut              - Connection Timeout in seconds (default: 10 secs)
+%
+
+% Copyright 2024 The MathWorks, Inc.
+
+    properties(SetAccess=private)
+        Model     (1,1) string
+    end
+
+    methods
+        function this = ollamaChat(modelName, systemPrompt, nvp)
+            arguments
+                modelName                          {mustBeTextScalar}
+                systemPrompt                       {llms.utils.mustBeTextOrEmpty} = []
+                nvp.Temperature                    {llms.utils.mustBeValidTemperature} = 1
+                nvp.TopProbabilityMass             {llms.utils.mustBeValidTopP} = 1
+                nvp.StopSequences                  {llms.utils.mustBeValidStop} = {}
+                nvp.ResponseFormat           (1,1) string {mustBeMember(nvp.ResponseFormat,["text","json"])} = "text"
+                nvp.TimeOut                  (1,1) {mustBeReal,mustBePositive} = 10
+                nvp.StreamFun                (1,1) {mustBeA(nvp.StreamFun,'function_handle')}
+            end
+
+            if isfield(nvp,"StreamFun")
+                this.StreamFun = nvp.StreamFun;
+            else
+                this.StreamFun = [];
+            end
+
+            if ~isempty(systemPrompt)
+                systemPrompt = string(systemPrompt);
+                if ~(strlength(systemPrompt)==0)
+                   this.SystemPrompt = {struct("role", "system", "content", systemPrompt)};
+                end
+            end
+
+            this.Model = modelName;
+            this.ResponseFormat = nvp.ResponseFormat;
+            this.Temperature = nvp.Temperature;
+            this.TopProbabilityMass = nvp.TopProbabilityMass;
+            this.StopSequences = nvp.StopSequences;
+            this.TimeOut = nvp.TimeOut;
+        end
+
+        function [text, message, response] = generate(this, messages, nvp)
+            %generate   Generate a response using the azureChat instance.
+            %
+            %   [TEXT, MESSAGE, RESPONSE] = generate(CHAT, MESSAGES) generates a response
+            %   with the specified MESSAGES.
+            %
+            %   [TEXT, MESSAGE, RESPONSE] = generate(__, Name=Value) specifies additional options
+            %   using one or more name-value arguments:
+            %
+            %       NumCompletions   - Number of completions to generate.
+            %                          Default value is 1.
+            %
+            %       MaxNumTokens     - Maximum number of tokens in the generated response.
+            %                          Default value is inf.
+            %
+            %       ToolChoice       - Function to execute. 'none', 'auto',
+            %                          or specify the function to call.
+            %
+            %       Seed             - An integer value to use to obtain
+            %                          reproducible responses
+            %
+            % Currently, GPT-4 Turbo with vision does not support the message.name
+            % parameter, functions/tools, response_format parameter, stop
+            % sequences, and max_tokens
+
+            arguments
+                this                    (1,1) ollamaChat
+                messages                (1,1) {mustBeValidMsgs}
+                nvp.NumCompletions      (1,1) {mustBePositive, mustBeInteger} = 1
+                nvp.MaxNumTokens        (1,1) {mustBePositive} = inf
+                nvp.Seed                {mustBeIntegerOrEmpty(nvp.Seed)} = []
+            end
+
+            if isstring(messages) && isscalar(messages)
+                messagesStruct = {struct("role", "user", "content", messages)};
+            else
+                messagesStruct = messages.Messages;
+            end
+
+            if ~isempty(this.SystemPrompt)
+                messagesStruct = horzcat(this.SystemPrompt, messagesStruct);
+            end
+
+            [text, message, response] = llms.internal.callOllamaChatAPI(...
+                this.Model, messagesStruct, ...
+                Temperature=this.Temperature, ...
+                NumCompletions=nvp.NumCompletions,...
+                StopSequences=this.StopSequences, MaxNumTokens=nvp.MaxNumTokens, ...
+                ResponseFormat=this.ResponseFormat,Seed=nvp.Seed, ...
+                TimeOut=this.TimeOut, StreamFun=this.StreamFun);
+        end
+    end
+
+    methods(Hidden)
+        function mustBeValidFunctionCall(this, functionCall)
+            if ~isempty(functionCall)
+                mustBeTextScalar(functionCall);
+                if isempty(this.FunctionNames)
+                    error("llms:mustSetFunctionsForCall", llms.utils.errorMessageCatalog.getMessage("llms:mustSetFunctionsForCall"));
+                end
+                mustBeMember(functionCall, ["none","auto", this.FunctionNames]);
+            end
+        end
+
+        function toolChoice = convertToolChoice(this, toolChoice)
+            % if toolChoice is empty
+            if isempty(toolChoice)
+                % if Tools is not empty, the default is 'auto'.
+                if ~isempty(this.Tools)
+                    toolChoice = "auto";
+                end
+            elseif ToolChoice ~= "auto"
+                % if toolChoice is not empty, then it must be in the format
+                % {"type": "function", "function": {"name": "my_function"}}
+                toolChoice = struct("type","function","function",struct("name",toolChoice));
+            end
+
+        end
+    end
+end
+
+function mustBeNonzeroLengthTextScalar(content)
+mustBeNonzeroLengthText(content)
+mustBeTextScalar(content)
+end
+
+function [functionsStruct, functionNames] = functionAsStruct(functions)
+numFunctions = numel(functions);
+functionsStruct = cell(1, numFunctions);
+functionNames = strings(1, numFunctions);
+
+for i = 1:numFunctions
+    functionsStruct{i} = struct('type','function', ...
+        'function',encodeStruct(functions(i))) ;
+    functionNames(i) = functions(i).FunctionName;
+end
+end
+
+function mustBeValidMsgs(value)
+if isa(value, "openAIMessages")
+    if numel(value.Messages) == 0
+        error("llms:mustHaveMessages", llms.utils.errorMessageCatalog.getMessage("llms:mustHaveMessages"));
+    end
+else
+    try
+        llms.utils.mustBeNonzeroLengthTextScalar(value);
+    catch ME
+        error("llms:mustBeMessagesOrTxt", llms.utils.errorMessageCatalog.getMessage("llms:mustBeMessagesOrTxt"));
+    end
+end
+end
+
+function mustBeIntegerOrEmpty(value)
+    if ~isempty(value)
+        mustBeInteger(value)
+    end
+end
diff --git a/tests/tollamaChat.m b/tests/tollamaChat.m