Skip to content

Commit 3699e0d

Browse files
committed
anthropic prompt caching
1 parent 06d80ec commit 3699e0d

File tree

2 files changed

+212
-44
lines changed

2 files changed

+212
-44
lines changed

extensions/positron-assistant/src/anthropic.ts

Lines changed: 159 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,22 @@ import { ModelConfig } from './config';
1010
import { isLanguageModelImagePart, LanguageModelImagePart } from './languageModelParts.js';
1111
import { isChatImagePart, processMessages } from './utils.js';
1212
import { DEFAULT_MAX_TOKEN_OUTPUT } from './constants.js';
13+
import { log } from './extension.js';
14+
15+
/**
16+
* Options for controlling cache behavior in the Anthropic language model.
17+
*/
18+
interface CacheControlOptions {
19+
/** Add a cache control point to the last tool description (default: true). */
20+
lastTool?: boolean;
21+
22+
/** Add a cache control point to the system prompt (default: true). */
23+
system?: boolean;
24+
25+
/** Add a cache control point to the last user message (default: false). */
26+
lastUserMessage?: boolean;
27+
}
28+
1329

1430
export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProvider {
1531
name: string;
@@ -56,16 +72,45 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
5672
progress: vscode.Progress<vscode.ChatResponseFragment2>,
5773
token: vscode.CancellationToken
5874
) {
59-
const anthropicMessages = processMessages(messages).map(toAnthropicMessage);
60-
const tools = options.tools?.map(tool => toAnthropicTool(tool));
75+
const cacheControlOptions = isCacheControlOptions(options.modelOptions?.cacheControl)
76+
? options.modelOptions.cacheControl
77+
: undefined;
78+
const tools = options.tools && toAnthropicTools(options.tools, cacheControlOptions?.lastTool);
6179
const tool_choice = options.toolMode && toAnthropicToolChoice(options.toolMode);
62-
const stream = this._client.messages.stream({
80+
const system = options.modelOptions?.system &&
81+
toAnthropicSystem(options.modelOptions.system, cacheControlOptions?.system);
82+
const anthropicMessages = toAnthropicMessages(messages, cacheControlOptions?.lastUserMessage);
83+
84+
const body: Anthropic.MessageStreamParams = {
6385
model: this._config.model,
6486
max_tokens: options.modelOptions?.maxTokens ?? this.maxOutputTokens,
65-
messages: anthropicMessages,
66-
tool_choice,
6787
tools,
68-
system: options.modelOptions?.system,
88+
tool_choice,
89+
system,
90+
messages: anthropicMessages,
91+
};
92+
const stream = this._client.messages.stream(body);
93+
94+
// Log request information - the request ID is only available upon connection.
95+
stream.on('connect', () => {
96+
if (log.logLevel <= vscode.LogLevel.Trace) {
97+
log.trace(`[anthropic] SEND messages.stream [${stream.request_id}]: ${JSON.stringify(body)}`);
98+
} else {
99+
const userMessages = body.messages.filter(m => m.role === 'user');
100+
const assistantMessages = body.messages.filter(m => m.role === 'assistant');
101+
log.debug(
102+
`[anthropic] SEND messages.stream [${stream.request_id}]: ` +
103+
`model: ${body.model}; ` +
104+
`cache options: ${cacheControlOptions ? JSON.stringify(cacheControlOptions) : 'default'}; ` +
105+
`tools: ${body.tools?.map(t => t.name).sort().join(', ') ?? 'none'}; ` +
106+
`tool choice: ${body.tool_choice ? JSON.stringify(body.tool_choice) : 'default'}; ` +
107+
`system chars: ${body.system ? JSON.stringify(body.system).length : 0}; ` +
108+
`user messages: ${userMessages.length}; ` +
109+
`user message characters: ${JSON.stringify(userMessages).length}; ` +
110+
`assistant messages: ${assistantMessages.length}; ` +
111+
`assistant message characters: ${JSON.stringify(assistantMessages).length}`
112+
);
113+
}
69114
});
70115

71116
token.onCancellationRequested(() => {
@@ -102,20 +147,31 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
102147
}
103148
throw error;
104149
}
150+
151+
// Log usage information.
152+
const message = await stream.finalMessage();
153+
if (log.logLevel <= vscode.LogLevel.Trace) {
154+
log.trace(`[anthropic] RECV messages.stream [${stream.request_id}]: ${JSON.stringify(message)}`);
155+
} else {
156+
log.debug(
157+
`[anthropic] RECV messages.stream [${stream.request_id}]: ` +
158+
`usage: ${JSON.stringify(message.usage)}`
159+
);
160+
}
105161
}
106162

107163
get providerName(): string {
108164
return AnthropicLanguageModel.source.provider.displayName;
109165
}
110166

111-
private onContentBlock(block: Anthropic.Messages.ContentBlock, progress: vscode.Progress<vscode.ChatResponseFragment2>): void {
167+
private onContentBlock(block: Anthropic.ContentBlock, progress: vscode.Progress<vscode.ChatResponseFragment2>): void {
112168
switch (block.type) {
113169
case 'tool_use':
114170
return this.onToolUseBlock(block, progress);
115171
}
116172
}
117173

118-
private onToolUseBlock(block: Anthropic.Messages.ToolUseBlock, progress: vscode.Progress<vscode.ChatResponseFragment2>): void {
174+
private onToolUseBlock(block: Anthropic.ToolUseBlock, progress: vscode.Progress<vscode.ChatResponseFragment2>): void {
119175
progress.report({
120176
index: 0,
121177
part: new vscode.LanguageModelToolCallPart(block.id, block.name, block.input as any),
@@ -170,6 +226,49 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
170226
}
171227
}
172228

229+
function toAnthropicMessages(messages: vscode.LanguageModelChatMessage2[], cacheLastUserMessage = false): Anthropic.MessageParam[] {
230+
const anthropicMessages = processMessages(messages).map(toAnthropicMessage);
231+
232+
if (cacheLastUserMessage) {
233+
// Add a cache control point to the last valid user message.
234+
for (let i = anthropicMessages.length - 1; i >= 0; i--) {
235+
const message = anthropicMessages[i];
236+
237+
// Skip non-user messages.
238+
if (message.role !== 'user') {
239+
continue;
240+
}
241+
242+
if (typeof message.content === 'string') {
243+
// Content is a single string, make it a text block with a cache control point.
244+
const text = message.content;
245+
message.content = [{
246+
type: 'text',
247+
text,
248+
cache_control: { type: 'ephemeral' },
249+
}];
250+
log.debug(`[anthropic] Adding cache control point to last user message block`);
251+
break;
252+
} else {
253+
// Content is an array, try to add a cache control point to the last content block.
254+
const lastContentBlock = message.content[message.content.length - 1];
255+
256+
// Thinking blocks cannot be cache control points.
257+
if (lastContentBlock.type === 'thinking'
258+
|| lastContentBlock.type === 'redacted_thinking') {
259+
continue;
260+
}
261+
262+
lastContentBlock.cache_control = { type: 'ephemeral' };
263+
log.debug(`[anthropic] Adding cache control point to last user message block`);
264+
break;
265+
}
266+
}
267+
}
268+
269+
return anthropicMessages;
270+
}
271+
173272
function toAnthropicMessage(message: vscode.LanguageModelChatMessage2): Anthropic.MessageParam {
174273
switch (message.role) {
175274
case vscode.LanguageModelChatMessageRole.Assistant:
@@ -281,6 +380,25 @@ function languageModelImagePartToAnthropicImageBlock(part: LanguageModelImagePar
281380
};
282381
}
283382

383+
function toAnthropicTools(tools: vscode.LanguageModelChatTool[], cacheLastTool = true): Anthropic.ToolUnion[] {
384+
if (tools.length === 0) {
385+
return [];
386+
}
387+
const anthropicTools = tools.map(tool => toAnthropicTool(tool));
388+
389+
// Ensure a stable sort order for prompt caching.
390+
anthropicTools.sort((a, b) => a.name.localeCompare(b.name));
391+
392+
if (cacheLastTool) {
393+
// Add a cache control point to the last tool description.
394+
const lastTool = anthropicTools[anthropicTools.length - 1];
395+
log.debug(`[anthropic] Adding cache control point to last tool: ${lastTool.name}`);
396+
lastTool.cache_control = { type: 'ephemeral' };
397+
}
398+
399+
return anthropicTools;
400+
}
401+
284402
function toAnthropicTool(tool: vscode.LanguageModelChatTool): Anthropic.ToolUnion {
285403
const input_schema = tool.inputSchema as Anthropic.Tool.InputSchema ?? {
286404
type: 'object',
@@ -308,3 +426,36 @@ function toAnthropicToolChoice(toolMode: vscode.LanguageModelChatToolMode): Anth
308426
throw new Error(`Unsupported tool mode: ${toolMode}`);
309427
}
310428
}
429+
430+
function toAnthropicSystem(system: unknown, cacheSystem = true): Anthropic.MessageCreateParams['system'] {
431+
if (typeof system === 'string') {
432+
const anthropicSystem: Anthropic.MessageCreateParams['system'] = [{
433+
type: 'text',
434+
text: system,
435+
}];
436+
437+
if (cacheSystem) {
438+
// Add a cache control point to the last system prompt block.
439+
const lastSystemBlock = anthropicSystem[anthropicSystem.length - 1];
440+
lastSystemBlock.cache_control = { type: 'ephemeral' };
441+
log.debug(`[anthropic] Adding cache control point to system prompt`);
442+
}
443+
444+
return anthropicSystem;
445+
}
446+
// Pass the system prompt through as-is.
447+
// We may pass an invalid system prompt; let Anthropic throw the error.
448+
return system as Anthropic.MessageCreateParams['system'];
449+
}
450+
451+
function isCacheControlOptions(options: unknown): options is CacheControlOptions {
452+
if (typeof options !== 'object' || options === null) {
453+
return false;
454+
}
455+
const cacheControlOptions = options as CacheControlOptions;
456+
return (
457+
(cacheControlOptions.lastTool === undefined || typeof cacheControlOptions.lastTool === 'boolean') &&
458+
(cacheControlOptions.system === undefined || typeof cacheControlOptions.system === 'boolean') &&
459+
(cacheControlOptions.lastUserMessage === undefined || typeof cacheControlOptions.lastUserMessage === 'boolean')
460+
);
461+
}

0 commit comments

Comments
 (0)