Skip to content

Commit 3315562

Browse files
authored
Assistant: Basic Anthropic prompt caching (#8246)
This PR uses Anthropic's prompt caching API to reduce costs and alleviate organization rate limit pressure – particularly in Databot. By default, we add a cache control point after the system prompt. Callers (e.g. Databot) can disable that if needed. This PR also adds more Assistant logging: * The entire Anthropic API request/response at trace level, and summaries (including token usage) at debug level. * Elements added to the user context message. ### Release Notes #### New Features - Assistant caches prompts for Anthropic models (#8077). #### Bug Fixes - N/A ### QA Notes Try out Assistant and Databot and check the cache read/write debug logs in the "Assistant" output channel.
1 parent cabd23b commit 3315562

File tree

3 files changed

+312
-70
lines changed

3 files changed

+312
-70
lines changed

extensions/positron-assistant/src/anthropic.ts

Lines changed: 109 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,16 @@ import { ModelConfig } from './config';
1010
import { isLanguageModelImagePart, LanguageModelImagePart } from './languageModelParts.js';
1111
import { isChatImagePart, processMessages } from './utils.js';
1212
import { DEFAULT_MAX_TOKEN_OUTPUT } from './constants.js';
13+
import { log } from './extension.js';
14+
15+
/**
16+
* Options for controlling cache behavior in the Anthropic language model.
17+
*/
18+
export interface CacheControlOptions {
19+
/** Add a cache control point to the system prompt (default: true). */
20+
system?: boolean;
21+
}
22+
1323

1424
export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProvider {
1525
name: string;
@@ -39,11 +49,14 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
3949
},
4050
};
4151

42-
constructor(private readonly _config: ModelConfig) {
52+
constructor(
53+
private readonly _config: ModelConfig,
54+
client?: Anthropic,
55+
) {
4356
this.name = _config.name;
4457
this.provider = _config.provider;
4558
this.identifier = _config.id;
46-
this._client = new Anthropic({
59+
this._client = client ?? new Anthropic({
4760
apiKey: _config.apiKey,
4861
});
4962
this.maxOutputTokens = _config.maxOutputTokens ?? DEFAULT_MAX_TOKEN_OUTPUT;
@@ -56,16 +69,45 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
5669
progress: vscode.Progress<vscode.ChatResponseFragment2>,
5770
token: vscode.CancellationToken
5871
) {
59-
const anthropicMessages = processMessages(messages).map(toAnthropicMessage);
60-
const tools = options.tools?.map(tool => toAnthropicTool(tool));
72+
const cacheControlOptions = isCacheControlOptions(options.modelOptions?.cacheControl)
73+
? options.modelOptions.cacheControl
74+
: undefined;
75+
const tools = options.tools && toAnthropicTools(options.tools);
6176
const tool_choice = options.toolMode && toAnthropicToolChoice(options.toolMode);
62-
const stream = this._client.messages.stream({
77+
const system = options.modelOptions?.system &&
78+
toAnthropicSystem(options.modelOptions.system, cacheControlOptions?.system);
79+
const anthropicMessages = toAnthropicMessages(messages);
80+
81+
const body: Anthropic.MessageStreamParams = {
6382
model: this._config.model,
6483
max_tokens: options.modelOptions?.maxTokens ?? this.maxOutputTokens,
65-
messages: anthropicMessages,
66-
tool_choice,
6784
tools,
68-
system: options.modelOptions?.system,
85+
tool_choice,
86+
system,
87+
messages: anthropicMessages,
88+
};
89+
const stream = this._client.messages.stream(body);
90+
91+
// Log request information - the request ID is only available upon connection.
92+
stream.on('connect', () => {
93+
if (log.logLevel <= vscode.LogLevel.Trace) {
94+
log.trace(`[anthropic] SEND messages.stream [${stream.request_id}]: ${JSON.stringify(body)}`);
95+
} else {
96+
const userMessages = body.messages.filter(m => m.role === 'user');
97+
const assistantMessages = body.messages.filter(m => m.role === 'assistant');
98+
log.debug(
99+
`[anthropic] SEND messages.stream [${stream.request_id}]: ` +
100+
`model: ${body.model}; ` +
101+
`cache options: ${cacheControlOptions ? JSON.stringify(cacheControlOptions) : 'default'}; ` +
102+
`tools: ${body.tools?.map(t => t.name).sort().join(', ') ?? 'none'}; ` +
103+
`tool choice: ${body.tool_choice ? JSON.stringify(body.tool_choice) : 'default'}; ` +
104+
`system chars: ${body.system ? JSON.stringify(body.system).length : 0}; ` +
105+
`user messages: ${userMessages.length}; ` +
106+
`user message characters: ${JSON.stringify(userMessages).length}; ` +
107+
`assistant messages: ${assistantMessages.length}; ` +
108+
`assistant message characters: ${JSON.stringify(assistantMessages).length}`
109+
);
110+
}
69111
});
70112

71113
token.onCancellationRequested(() => {
@@ -102,20 +144,31 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
102144
}
103145
throw error;
104146
}
147+
148+
// Log usage information.
149+
const message = await stream.finalMessage();
150+
if (log.logLevel <= vscode.LogLevel.Trace) {
151+
log.trace(`[anthropic] RECV messages.stream [${stream.request_id}]: ${JSON.stringify(message)}`);
152+
} else {
153+
log.debug(
154+
`[anthropic] RECV messages.stream [${stream.request_id}]: ` +
155+
`usage: ${JSON.stringify(message.usage)}`
156+
);
157+
}
105158
}
106159

107160
get providerName(): string {
108161
return AnthropicLanguageModel.source.provider.displayName;
109162
}
110163

111-
private onContentBlock(block: Anthropic.Messages.ContentBlock, progress: vscode.Progress<vscode.ChatResponseFragment2>): void {
164+
private onContentBlock(block: Anthropic.ContentBlock, progress: vscode.Progress<vscode.ChatResponseFragment2>): void {
112165
switch (block.type) {
113166
case 'tool_use':
114167
return this.onToolUseBlock(block, progress);
115168
}
116169
}
117170

118-
private onToolUseBlock(block: Anthropic.Messages.ToolUseBlock, progress: vscode.Progress<vscode.ChatResponseFragment2>): void {
171+
private onToolUseBlock(block: Anthropic.ToolUseBlock, progress: vscode.Progress<vscode.ChatResponseFragment2>): void {
119172
progress.report({
120173
index: 0,
121174
part: new vscode.LanguageModelToolCallPart(block.id, block.name, block.input as any),
@@ -170,6 +223,11 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
170223
}
171224
}
172225

226+
function toAnthropicMessages(messages: vscode.LanguageModelChatMessage2[]): Anthropic.MessageParam[] {
227+
const anthropicMessages = processMessages(messages).map(toAnthropicMessage);
228+
return anthropicMessages;
229+
}
230+
173231
function toAnthropicMessage(message: vscode.LanguageModelChatMessage2): Anthropic.MessageParam {
174232
switch (message.role) {
175233
case vscode.LanguageModelChatMessageRole.Assistant:
@@ -281,6 +339,18 @@ function languageModelImagePartToAnthropicImageBlock(part: LanguageModelImagePar
281339
};
282340
}
283341

342+
function toAnthropicTools(tools: vscode.LanguageModelChatTool[]): Anthropic.ToolUnion[] {
343+
if (tools.length === 0) {
344+
return [];
345+
}
346+
const anthropicTools = tools.map(tool => toAnthropicTool(tool));
347+
348+
// Ensure a stable sort order for prompt caching.
349+
anthropicTools.sort((a, b) => a.name.localeCompare(b.name));
350+
351+
return anthropicTools;
352+
}
353+
284354
function toAnthropicTool(tool: vscode.LanguageModelChatTool): Anthropic.ToolUnion {
285355
const input_schema = tool.inputSchema as Anthropic.Tool.InputSchema ?? {
286356
type: 'object',
@@ -308,3 +378,32 @@ function toAnthropicToolChoice(toolMode: vscode.LanguageModelChatToolMode): Anth
308378
throw new Error(`Unsupported tool mode: ${toolMode}`);
309379
}
310380
}
381+
382+
function toAnthropicSystem(system: unknown, cacheSystem = true): Anthropic.MessageCreateParams['system'] {
383+
if (typeof system === 'string') {
384+
const anthropicSystem: Anthropic.MessageCreateParams['system'] = [{
385+
type: 'text',
386+
text: system,
387+
}];
388+
389+
if (cacheSystem) {
390+
// Add a cache control point to the last system prompt block.
391+
const lastSystemBlock = anthropicSystem[anthropicSystem.length - 1];
392+
lastSystemBlock.cache_control = { type: 'ephemeral' };
393+
log.debug(`[anthropic] Adding cache control point to system prompt`);
394+
}
395+
396+
return anthropicSystem;
397+
}
398+
// Pass the system prompt through as-is.
399+
// We may pass an invalid system prompt; let Anthropic throw the error.
400+
return system as Anthropic.MessageCreateParams['system'];
401+
}
402+
403+
function isCacheControlOptions(options: unknown): options is CacheControlOptions {
404+
if (typeof options !== 'object' || options === null) {
405+
return false;
406+
}
407+
const cacheControlOptions = options as CacheControlOptions;
408+
return cacheControlOptions.system === undefined || typeof cacheControlOptions.system === 'boolean';
409+
}

extensions/positron-assistant/src/participants.ts

Lines changed: 58 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,9 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
309309
response.reference(llmsDocument.uri);
310310

311311
// Add the contents of the file to the prompt
312-
prompts.push(xml.node('instructions', llmsText));
312+
const instructionsNode = xml.node('instructions', llmsText);
313+
prompts.push(instructionsNode);
314+
log.debug(`[context] adding llms.txt context: ${llmsText.length} characters`);
313315
}
314316
}
315317

@@ -334,20 +336,21 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
334336
response.reference(value.uri);
335337

336338
// Add the visible region prompt.
337-
attachmentPrompts.push(xml.node('attachment', visibleText, {
339+
const rangeAttachmentNode = xml.node('attachment', visibleText, {
338340
filePath: path,
339341
description: 'Visible region of the active file',
340342
language: document.languageId,
341343
startLine: value.range.start.line + 1,
342344
endLine: value.range.end.line + 1,
343-
}));
344-
345-
// Add the full document text prompt.
346-
attachmentPrompts.push(xml.node('attachment', documentText, {
345+
});
346+
const documentAttachmentNode = xml.node('attachment', documentText, {
347347
filePath: path,
348348
description: 'Full contents of the active file',
349349
language: document.languageId,
350-
}));
350+
});
351+
attachmentPrompts.push(rangeAttachmentNode, documentAttachmentNode);
352+
log.debug(`[context] adding file range attachment context: ${rangeAttachmentNode.length} characters`);
353+
log.debug(`[context] adding file attachment context: ${documentAttachmentNode.length} characters`);
351354
} else if (value instanceof vscode.Uri) {
352355
const fileStat = await vscode.workspace.fs.stat(value);
353356
if (fileStat.type === vscode.FileType.Directory) {
@@ -369,10 +372,12 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
369372
// response.reference(value);
370373

371374
// Attach the folder's contents.
372-
attachmentPrompts.push(xml.node('attachment', entriesText, {
375+
const attachmentNode = xml.node('attachment', entriesText, {
373376
filePath: path,
374377
description: 'Contents of the directory',
375-
}));
378+
});
379+
attachmentPrompts.push(attachmentNode);
380+
log.debug(`[context] adding directory attachment context: ${attachmentNode.length} characters`);
376381
} else {
377382
// The user attached a file - usually a manually attached file in the workspace.
378383
const document = await vscode.workspace.openTextDocument(value);
@@ -383,11 +388,13 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
383388
response.reference(value);
384389

385390
// Attach the full document text.
386-
attachmentPrompts.push(xml.node('attachment', documentText, {
391+
const attachmentNode = xml.node('attachment', documentText, {
387392
filePath: path,
388393
description: 'Full contents of the file',
389394
language: document.languageId,
390-
}));
395+
});
396+
attachmentPrompts.push(attachmentNode);
397+
log.debug(`[context] adding file attachment context: ${attachmentNode.length} characters`);
391398
}
392399
} else if (value instanceof vscode.ChatReferenceBinaryData) {
393400
if (isChatImageMimeType(value.mimeType)) {
@@ -400,18 +407,20 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
400407
}
401408

402409
// Attach the image.
403-
attachmentPrompts.push(xml.leaf('img', {
410+
const imageNode = xml.leaf('img', {
404411
src: reference.name,
405-
}));
412+
});
413+
attachmentPrompts.push(imageNode);
414+
log.debug(`[context] adding image attachment context: ${data.length} bytes`);
406415

407416
userDataParts.push(
408417
vscode.LanguageModelDataPart.image(data, value.mimeType),
409418
);
410419
} else {
411-
console.warn(`Positron Assistant: Unsupported chat reference binary data type: ${typeof value}`);
420+
log.warn(`Unsupported chat reference binary data type: ${typeof value}`);
412421
}
413422
} else {
414-
console.warn(`Positron Assistant: Unsupported reference type: ${typeof value}`);
423+
log.warn(`Unsupported reference type: ${typeof value}`);
415424
}
416425
}
417426

@@ -429,15 +438,18 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
429438
const executions = positronContext.activeSession.executions
430439
.map((e) => xml.node('execution', JSON.stringify(e)))
431440
.join('\n');
432-
positronContextPrompts.push(
433-
xml.node('session',
434-
xml.node('executions', executions ?? ''), {
435-
description: 'Current active session',
436-
language: positronContext.activeSession.language,
437-
version: positronContext.activeSession.version,
438-
mode: positronContext.activeSession.mode,
439-
identifier: positronContext.activeSession.identifier,
440-
})
441+
const sessionNode = xml.node('session',
442+
xml.node('executions', executions ?? ''), {
443+
description: 'Current active session',
444+
language: positronContext.activeSession.language,
445+
version: positronContext.activeSession.version,
446+
mode: positronContext.activeSession.mode,
447+
identifier: positronContext.activeSession.identifier,
448+
});
449+
positronContextPrompts.push(sessionNode);
450+
log.debug(
451+
`[context] adding active ${positronContext.activeSession.mode} ${positronContext.activeSession.language} session context: ` +
452+
`${sessionNode.length} characters`
441453
);
442454
}
443455
if (positronContext.variables) {
@@ -447,33 +459,33 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
447459
const description = content.length > 0 ?
448460
'Variables defined in the current session' :
449461
'No variables defined in the current session';
450-
positronContextPrompts.push(
451-
xml.node('variables', content, {
452-
description,
453-
})
454-
);
462+
const variablesNode = xml.node('variables', content, {
463+
description,
464+
});
465+
positronContextPrompts.push(variablesNode);
466+
log.debug(`[context] adding variables context: ${variablesNode.length} characters`);
455467
}
456468
if (positronContext.shell) {
457-
positronContextPrompts.push(
458-
xml.node('shell', positronContext.shell, {
459-
description: 'Current active shell',
460-
})
461-
);
469+
const shellNode = xml.node('shell', positronContext.shell, {
470+
description: 'Current active shell',
471+
});
472+
positronContextPrompts.push(shellNode);
473+
log.debug(`[context] adding shell context: ${shellNode.length} characters`);
462474
}
463475
if (positronContext.plots && positronContext.plots.hasPlots) {
464-
positronContextPrompts.push(
465-
xml.node('plots', 'A plot is visible.')
466-
);
476+
const plotsNode = xml.node('plots', 'A plot is visible.');
477+
positronContextPrompts.push(plotsNode);
478+
log.debug(`[context] adding plots context: ${plotsNode.length} characters`);
467479
}
468480
if (positronContext.positronVersion) {
469-
positronContextPrompts.push(
470-
xml.node('version', `Positron version: ${positronContext.positronVersion}`),
471-
);
481+
const versionNode = xml.node('version', `Positron version: ${positronContext.positronVersion}`);
482+
positronContextPrompts.push(versionNode);
483+
log.debug(`[context] adding positron version context: ${versionNode.length} characters`);
472484
}
473485
if (positronContext.currentDate) {
474-
positronContextPrompts.push(
475-
xml.node('date', `Today's date is: ${positronContext.currentDate}`),
476-
);
486+
const dateNode = xml.node('date', `Today's date is: ${positronContext.currentDate}`);
487+
positronContextPrompts.push(dateNode);
488+
log.debug(`[context] adding date context: ${dateNode.length} characters`);
477489
}
478490
if (positronContextPrompts.length > 0) {
479491
prompts.push(xml.node('context', positronContextPrompts.join('\n\n')));
@@ -738,7 +750,7 @@ export class PositronAssistantEditorParticipant extends PositronAssistantPartici
738750
const selectedText = document.getText(selection);
739751
const documentText = document.getText();
740752
const filePath = uriToString(document.uri);
741-
return xml.node('editor',
753+
const editorNode = xml.node('editor',
742754
[
743755
xml.node('document', documentText, {
744756
description: 'Full contents of the active file',
@@ -756,6 +768,8 @@ export class PositronAssistantEditorParticipant extends PositronAssistantPartici
756768
documentOffset: document.offsetAt(selection.active),
757769
},
758770
);
771+
log.debug(`[context] adding editor context: ${editorNode.length} characters`);
772+
return editorNode;
759773
}
760774

761775
}

0 commit comments

Comments
 (0)