@@ -10,6 +10,22 @@ import { ModelConfig } from './config';
10
10
import { isLanguageModelImagePart , LanguageModelImagePart } from './languageModelParts.js' ;
11
11
import { isChatImagePart , processMessages } from './utils.js' ;
12
12
import { DEFAULT_MAX_TOKEN_OUTPUT } from './constants.js' ;
13
+ import { log } from './extension.js' ;
14
+
15
+ /**
16
+ * Options for controlling cache behavior in the Anthropic language model.
17
+ */
18
+ interface CacheControlOptions {
19
+ /** Add a cache control point to the last tool description (default: true). */
20
+ lastTool ?: boolean ;
21
+
22
+ /** Add a cache control point to the system prompt (default: true). */
23
+ system ?: boolean ;
24
+
25
+ /** Add a cache control point to the last user message (default: false). */
26
+ lastUserMessage ?: boolean ;
27
+ }
28
+
13
29
14
30
export class AnthropicLanguageModel implements positron . ai . LanguageModelChatProvider {
15
31
name : string ;
@@ -56,16 +72,45 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
56
72
progress : vscode . Progress < vscode . ChatResponseFragment2 > ,
57
73
token : vscode . CancellationToken
58
74
) {
59
- const anthropicMessages = processMessages ( messages ) . map ( toAnthropicMessage ) ;
60
- const tools = options . tools ?. map ( tool => toAnthropicTool ( tool ) ) ;
75
+ const cacheControlOptions = isCacheControlOptions ( options . modelOptions ?. cacheControl )
76
+ ? options . modelOptions . cacheControl
77
+ : undefined ;
78
+ const tools = options . tools && toAnthropicTools ( options . tools , cacheControlOptions ?. lastTool ) ;
61
79
const tool_choice = options . toolMode && toAnthropicToolChoice ( options . toolMode ) ;
62
- const stream = this . _client . messages . stream ( {
80
+ const system = options . modelOptions ?. system &&
81
+ toAnthropicSystem ( options . modelOptions . system , cacheControlOptions ?. system ) ;
82
+ const anthropicMessages = toAnthropicMessages ( messages , cacheControlOptions ?. lastUserMessage ) ;
83
+
84
+ const body : Anthropic . MessageStreamParams = {
63
85
model : this . _config . model ,
64
86
max_tokens : options . modelOptions ?. maxTokens ?? this . maxOutputTokens ,
65
- messages : anthropicMessages ,
66
- tool_choice,
67
87
tools,
68
- system : options . modelOptions ?. system ,
88
+ tool_choice,
89
+ system,
90
+ messages : anthropicMessages ,
91
+ } ;
92
+ const stream = this . _client . messages . stream ( body ) ;
93
+
94
+ // Log request information - the request ID is only available upon connection.
95
+ stream . on ( 'connect' , ( ) => {
96
+ if ( log . logLevel <= vscode . LogLevel . Trace ) {
97
+ log . trace ( `[anthropic] SEND messages.stream [${ stream . request_id } ]: ${ JSON . stringify ( body ) } ` ) ;
98
+ } else {
99
+ const userMessages = body . messages . filter ( m => m . role === 'user' ) ;
100
+ const assistantMessages = body . messages . filter ( m => m . role === 'assistant' ) ;
101
+ log . debug (
102
+ `[anthropic] SEND messages.stream [${ stream . request_id } ]: ` +
103
+ `model: ${ body . model } ; ` +
104
+ `cache options: ${ cacheControlOptions ? JSON . stringify ( cacheControlOptions ) : 'default' } ; ` +
105
+ `tools: ${ body . tools ?. map ( t => t . name ) . sort ( ) . join ( ', ' ) ?? 'none' } ; ` +
106
+ `tool choice: ${ body . tool_choice ? JSON . stringify ( body . tool_choice ) : 'default' } ; ` +
107
+ `system chars: ${ body . system ? JSON . stringify ( body . system ) . length : 0 } ; ` +
108
+ `user messages: ${ userMessages . length } ; ` +
109
+ `user message characters: ${ JSON . stringify ( userMessages ) . length } ; ` +
110
+ `assistant messages: ${ assistantMessages . length } ; ` +
111
+ `assistant message characters: ${ JSON . stringify ( assistantMessages ) . length } `
112
+ ) ;
113
+ }
69
114
} ) ;
70
115
71
116
token . onCancellationRequested ( ( ) => {
@@ -102,20 +147,31 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
102
147
}
103
148
throw error ;
104
149
}
150
+
151
+ // Log usage information.
152
+ const message = await stream . finalMessage ( ) ;
153
+ if ( log . logLevel <= vscode . LogLevel . Trace ) {
154
+ log . trace ( `[anthropic] RECV messages.stream [${ stream . request_id } ]: ${ JSON . stringify ( message ) } ` ) ;
155
+ } else {
156
+ log . debug (
157
+ `[anthropic] RECV messages.stream [${ stream . request_id } ]: ` +
158
+ `usage: ${ JSON . stringify ( message . usage ) } `
159
+ ) ;
160
+ }
105
161
}
106
162
107
163
get providerName ( ) : string {
108
164
return AnthropicLanguageModel . source . provider . displayName ;
109
165
}
110
166
111
- private onContentBlock ( block : Anthropic . Messages . ContentBlock , progress : vscode . Progress < vscode . ChatResponseFragment2 > ) : void {
167
+ private onContentBlock ( block : Anthropic . ContentBlock , progress : vscode . Progress < vscode . ChatResponseFragment2 > ) : void {
112
168
switch ( block . type ) {
113
169
case 'tool_use' :
114
170
return this . onToolUseBlock ( block , progress ) ;
115
171
}
116
172
}
117
173
118
- private onToolUseBlock ( block : Anthropic . Messages . ToolUseBlock , progress : vscode . Progress < vscode . ChatResponseFragment2 > ) : void {
174
+ private onToolUseBlock ( block : Anthropic . ToolUseBlock , progress : vscode . Progress < vscode . ChatResponseFragment2 > ) : void {
119
175
progress . report ( {
120
176
index : 0 ,
121
177
part : new vscode . LanguageModelToolCallPart ( block . id , block . name , block . input as any ) ,
@@ -170,6 +226,49 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
170
226
}
171
227
}
172
228
229
+ function toAnthropicMessages ( messages : vscode . LanguageModelChatMessage2 [ ] , cacheLastUserMessage = false ) : Anthropic . MessageParam [ ] {
230
+ const anthropicMessages = processMessages ( messages ) . map ( toAnthropicMessage ) ;
231
+
232
+ if ( cacheLastUserMessage ) {
233
+ // Add a cache control point to the last valid user message.
234
+ for ( let i = anthropicMessages . length - 1 ; i >= 0 ; i -- ) {
235
+ const message = anthropicMessages [ i ] ;
236
+
237
+ // Skip non-user messages.
238
+ if ( message . role !== 'user' ) {
239
+ continue ;
240
+ }
241
+
242
+ if ( typeof message . content === 'string' ) {
243
+ // Content is a single string, make it a text block with a cache control point.
244
+ const text = message . content ;
245
+ message . content = [ {
246
+ type : 'text' ,
247
+ text,
248
+ cache_control : { type : 'ephemeral' } ,
249
+ } ] ;
250
+ log . debug ( `[anthropic] Adding cache control point to last user message block` ) ;
251
+ break ;
252
+ } else {
253
+ // Content is an array, try to add a cache control point to the last content block.
254
+ const lastContentBlock = message . content [ message . content . length - 1 ] ;
255
+
256
+ // Thinking blocks cannot be cache control points.
257
+ if ( lastContentBlock . type === 'thinking'
258
+ || lastContentBlock . type === 'redacted_thinking' ) {
259
+ continue ;
260
+ }
261
+
262
+ lastContentBlock . cache_control = { type : 'ephemeral' } ;
263
+ log . debug ( `[anthropic] Adding cache control point to last user message block` ) ;
264
+ break ;
265
+ }
266
+ }
267
+ }
268
+
269
+ return anthropicMessages ;
270
+ }
271
+
173
272
function toAnthropicMessage ( message : vscode . LanguageModelChatMessage2 ) : Anthropic . MessageParam {
174
273
switch ( message . role ) {
175
274
case vscode . LanguageModelChatMessageRole . Assistant :
@@ -281,6 +380,25 @@ function languageModelImagePartToAnthropicImageBlock(part: LanguageModelImagePar
281
380
} ;
282
381
}
283
382
383
+ function toAnthropicTools ( tools : vscode . LanguageModelChatTool [ ] , cacheLastTool = true ) : Anthropic . ToolUnion [ ] {
384
+ if ( tools . length === 0 ) {
385
+ return [ ] ;
386
+ }
387
+ const anthropicTools = tools . map ( tool => toAnthropicTool ( tool ) ) ;
388
+
389
+ // Ensure a stable sort order for prompt caching.
390
+ anthropicTools . sort ( ( a , b ) => a . name . localeCompare ( b . name ) ) ;
391
+
392
+ if ( cacheLastTool ) {
393
+ // Add a cache control point to the last tool description.
394
+ const lastTool = anthropicTools [ anthropicTools . length - 1 ] ;
395
+ log . debug ( `[anthropic] Adding cache control point to last tool: ${ lastTool . name } ` ) ;
396
+ lastTool . cache_control = { type : 'ephemeral' } ;
397
+ }
398
+
399
+ return anthropicTools ;
400
+ }
401
+
284
402
function toAnthropicTool ( tool : vscode . LanguageModelChatTool ) : Anthropic . ToolUnion {
285
403
const input_schema = tool . inputSchema as Anthropic . Tool . InputSchema ?? {
286
404
type : 'object' ,
@@ -308,3 +426,36 @@ function toAnthropicToolChoice(toolMode: vscode.LanguageModelChatToolMode): Anth
308
426
throw new Error ( `Unsupported tool mode: ${ toolMode } ` ) ;
309
427
}
310
428
}
429
+
430
+ function toAnthropicSystem ( system : unknown , cacheSystem = true ) : Anthropic . MessageCreateParams [ 'system' ] {
431
+ if ( typeof system === 'string' ) {
432
+ const anthropicSystem : Anthropic . MessageCreateParams [ 'system' ] = [ {
433
+ type : 'text' ,
434
+ text : system ,
435
+ } ] ;
436
+
437
+ if ( cacheSystem ) {
438
+ // Add a cache control point to the last system prompt block.
439
+ const lastSystemBlock = anthropicSystem [ anthropicSystem . length - 1 ] ;
440
+ lastSystemBlock . cache_control = { type : 'ephemeral' } ;
441
+ log . debug ( `[anthropic] Adding cache control point to system prompt` ) ;
442
+ }
443
+
444
+ return anthropicSystem ;
445
+ }
446
+ // Pass the system prompt through as-is.
447
+ // We may pass an invalid system prompt; let Anthropic throw the error.
448
+ return system as Anthropic . MessageCreateParams [ 'system' ] ;
449
+ }
450
+
451
+ function isCacheControlOptions ( options : unknown ) : options is CacheControlOptions {
452
+ if ( typeof options !== 'object' || options === null ) {
453
+ return false ;
454
+ }
455
+ const cacheControlOptions = options as CacheControlOptions ;
456
+ return (
457
+ ( cacheControlOptions . lastTool === undefined || typeof cacheControlOptions . lastTool === 'boolean' ) &&
458
+ ( cacheControlOptions . system === undefined || typeof cacheControlOptions . system === 'boolean' ) &&
459
+ ( cacheControlOptions . lastUserMessage === undefined || typeof cacheControlOptions . lastUserMessage === 'boolean' )
460
+ ) ;
461
+ }
0 commit comments