|
31 | 31 | % ResponseFormat - The format of response the model returns.
|
32 | 32 | % "text" (default) | "json"
|
33 | 33 | %
|
34 |
| -% Mirostat - 0/1/2, eta, tau |
35 |
| -% |
36 |
| -% RepeatLastN - find a better name! “Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)” |
37 |
| -% |
38 |
| -% RepeatPenalty |
39 |
| -% |
40 |
| -% TailFreeSamplingZ |
| 34 | +% TailFreeSamplingZ - Reduce the use of less probable tokens, based on |
| 35 | +% the second-order differences of ordered probabilities. |
41 | 36 | %
|
42 | 37 | % StreamFun - Function to callback when streaming the
|
43 | 38 | % result
|
|
50 | 45 | % ollamaChat - Chat completion API from OpenAI.
|
51 | 46 | % generate - Generate a response using the ollamaChat instance.
|
52 | 47 | %
|
53 |
| -% ollamaChat Properties: |
| 48 | +% ollamaChat Properties, in addition to the name-value pairs above: |
54 | 49 | % Model - Model name (as expected by ollama server)
|
55 | 50 | %
|
56 |
| -% Temperature - Temperature of generation. |
57 |
| -% |
58 |
| -% TopProbabilityMass - Top probability mass to consider for generation (top-p sampling). |
59 |
| -% |
60 |
| -% TopProbabilityNum - Only consider the k most likely tokens for generation (top-k sampling). |
61 |
| -% |
62 |
| -% StopSequences - Sequences to stop the generation of tokens. |
63 |
| -% |
64 | 51 | % SystemPrompt - System prompt.
|
65 |
| -% |
66 |
| -% ResponseFormat - Specifies the response format, text or json |
67 |
| -% |
68 |
| -% TimeOut - Connection Timeout in seconds (default: 120 secs) |
69 |
| -% |
| 52 | + |
| 53 | +% Ollama model properties not exposed: |
| 54 | +% repeat_last_n, repeat_penalty - could not find an example where they made a difference |
| 55 | +% mirostat, mirostat_eta, mirostat_tau - looking for the best API design |
| 56 | + |
70 | 57 |
|
71 | 58 | % Copyright 2024 The MathWorks, Inc.
|
72 | 59 |
|
73 | 60 | properties
|
74 | 61 | Model (1,1) string
|
75 | 62 | TopProbabilityNum (1,1) {mustBeReal,mustBePositive} = Inf
|
| 63 | + TailFreeSamplingZ (1,1) {mustBeReal} = 1 |
76 | 64 | end
|
77 | 65 |
|
78 | 66 | methods
|
|
86 | 74 | nvp.StopSequences {llms.utils.mustBeValidStop} = {}
|
87 | 75 | nvp.ResponseFormat (1,1) string {mustBeMember(nvp.ResponseFormat,["text","json"])} = "text"
|
88 | 76 | nvp.TimeOut (1,1) {mustBeReal,mustBePositive} = 120
|
| 77 | + nvp.TailFreeSamplingZ (1,1) {mustBeReal} = 1 |
89 | 78 | nvp.StreamFun (1,1) {mustBeA(nvp.StreamFun,'function_handle')}
|
90 | 79 | end
|
91 | 80 |
|
|
107 | 96 | this.Temperature = nvp.Temperature;
|
108 | 97 | this.TopProbabilityMass = nvp.TopProbabilityMass;
|
109 | 98 | this.TopProbabilityNum = nvp.TopProbabilityNum;
|
| 99 | + this.TailFreeSamplingZ = nvp.TailFreeSamplingZ; |
110 | 100 | this.StopSequences = nvp.StopSequences;
|
111 | 101 | this.TimeOut = nvp.TimeOut;
|
112 | 102 | end
|
|
131 | 121 | %
|
132 | 122 | % Seed - An integer value to use to obtain
|
133 | 123 | % reproducible responses
|
134 |
| - % |
135 |
| - % Currently, GPT-4 Turbo with vision does not support the message.name |
136 |
| - % parameter, functions/tools, response_format parameter, stop |
137 |
| - % sequences, and max_tokens |
138 | 124 |
|
139 | 125 | arguments
|
140 | 126 | this (1,1) ollamaChat
|
|
158 | 144 | this.Model, messagesStruct, ...
|
159 | 145 | Temperature=this.Temperature, ...
|
160 | 146 | TopProbabilityMass=this.TopProbabilityMass, TopProbabilityNum=this.TopProbabilityNum,...
|
| 147 | + TailFreeSamplingZ=this.TailFreeSamplingZ,... |
161 | 148 | NumCompletions=nvp.NumCompletions,...
|
162 | 149 | StopSequences=this.StopSequences, MaxNumTokens=nvp.MaxNumTokens, ...
|
163 | 150 | ResponseFormat=this.ResponseFormat,Seed=nvp.Seed, ...
|
|
0 commit comments