Skip to content

Commit e229935

Browse files
committed
add TailFreeSampling_Z, add comment about currently unsupported ollama settings
1 parent 8d351a2 commit e229935

File tree

3 files changed

+24
-25
lines changed

3 files changed

+24
-25
lines changed

+llms/+internal/callOllamaChatAPI.m

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
nvp.Temperature = 1
4545
nvp.TopProbabilityMass = 1
4646
nvp.TopProbabilityNum = Inf
47+
nvp.TailFreeSamplingZ = 1
4748
nvp.NumCompletions = 1
4849
nvp.StopSequences = []
4950
nvp.MaxNumTokens = inf
@@ -116,6 +117,7 @@
116117
dict("Temperature") = "temperature";
117118
dict("TopProbabilityMass") = "top_p";
118119
dict("TopProbabilityNum") = "top_k";
120+
dict("TailFreeSamplingZ") = "tfs_z";
119121
dict("NumCompletions") = "n";
120122
dict("StopSequences") = "stop";
121123
dict("MaxNumTokens") = "num_predict";

ollamaChat.m

Lines changed: 12 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,8 @@
3131
% ResponseFormat - The format of response the model returns.
3232
% "text" (default) | "json"
3333
%
34-
% Mirostat - 0/1/2, eta, tau
35-
%
36-
% RepeatLastN - find a better name! “Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)”
37-
%
38-
% RepeatPenalty
39-
%
40-
% TailFreeSamplingZ
34+
% TailFreeSamplingZ - Reduce the use of less probable tokens, based on
35+
% the second-order differences of ordered probabilities.
4136
%
4237
% StreamFun - Function to callback when streaming the
4338
% result
@@ -50,29 +45,22 @@
5045
% ollamaChat - Chat completion API from OpenAI.
5146
% generate - Generate a response using the ollamaChat instance.
5247
%
53-
% ollamaChat Properties:
48+
% ollamaChat Properties, in addition to the name-value pairs above:
5449
% Model - Model name (as expected by ollama server)
5550
%
56-
% Temperature - Temperature of generation.
57-
%
58-
% TopProbabilityMass - Top probability mass to consider for generation (top-p sampling).
59-
%
60-
% TopProbabilityNum - Only consider the k most likely tokens for generation (top-k sampling).
61-
%
62-
% StopSequences - Sequences to stop the generation of tokens.
63-
%
6451
% SystemPrompt - System prompt.
65-
%
66-
% ResponseFormat - Specifies the response format, text or json
67-
%
68-
% TimeOut - Connection Timeout in seconds (default: 120 secs)
69-
%
52+
53+
% Ollama model properties not exposed:
54+
% repeat_last_n, repeat_penalty - could not find an example where they made a difference
55+
% mirostat, mirostat_eta, mirostat_tau - looking for the best API design
56+
7057

7158
% Copyright 2024 The MathWorks, Inc.
7259

7360
properties
7461
Model (1,1) string
7562
TopProbabilityNum (1,1) {mustBeReal,mustBePositive} = Inf
63+
TailFreeSamplingZ (1,1) {mustBeReal} = 1
7664
end
7765

7866
methods
@@ -86,6 +74,7 @@
8674
nvp.StopSequences {llms.utils.mustBeValidStop} = {}
8775
nvp.ResponseFormat (1,1) string {mustBeMember(nvp.ResponseFormat,["text","json"])} = "text"
8876
nvp.TimeOut (1,1) {mustBeReal,mustBePositive} = 120
77+
nvp.TailFreeSamplingZ (1,1) {mustBeReal} = 1
8978
nvp.StreamFun (1,1) {mustBeA(nvp.StreamFun,'function_handle')}
9079
end
9180

@@ -107,6 +96,7 @@
10796
this.Temperature = nvp.Temperature;
10897
this.TopProbabilityMass = nvp.TopProbabilityMass;
10998
this.TopProbabilityNum = nvp.TopProbabilityNum;
99+
this.TailFreeSamplingZ = nvp.TailFreeSamplingZ;
110100
this.StopSequences = nvp.StopSequences;
111101
this.TimeOut = nvp.TimeOut;
112102
end
@@ -131,10 +121,6 @@
131121
%
132122
% Seed - An integer value to use to obtain
133123
% reproducible responses
134-
%
135-
% Currently, GPT-4 Turbo with vision does not support the message.name
136-
% parameter, functions/tools, response_format parameter, stop
137-
% sequences, and max_tokens
138124

139125
arguments
140126
this (1,1) ollamaChat
@@ -158,6 +144,7 @@
158144
this.Model, messagesStruct, ...
159145
Temperature=this.Temperature, ...
160146
TopProbabilityMass=this.TopProbabilityMass, TopProbabilityNum=this.TopProbabilityNum,...
147+
TailFreeSamplingZ=this.TailFreeSamplingZ,...
161148
NumCompletions=nvp.NumCompletions,...
162149
StopSequences=this.StopSequences, MaxNumTokens=nvp.MaxNumTokens, ...
163150
ResponseFormat=this.ResponseFormat,Seed=nvp.Seed, ...

tests/tollamaChat.m

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,16 @@ function extremeTopK(testCase)
4848
testCase.verifyEqual(response1,response2);
4949
end
5050

51+
function extremeTfsZ(testCase)
52+
% setting tfs_z to z=0 leaves no random choice,
53+
% so we expect to get a fixed response.
54+
chat = ollamaChat("mistral",TailFreeSamplingZ=0);
55+
prompt = "Sampling with tfs_z=0 returns a definite answer.";
56+
response1 = generate(chat,prompt);
57+
response2 = generate(chat,prompt);
58+
testCase.verifyEqual(response1,response2);
59+
end
60+
5161
function stopSequences(testCase)
5262
chat = ollamaChat("mistral",TopProbabilityNum=1);
5363
prompt = "Top-k sampling with k=1 returns a definite answer.";

0 commit comments

Comments
 (0)