Skip to content

Assistant: Basic Anthropic prompt caching #8246

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 109 additions & 10 deletions extensions/positron-assistant/src/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@ import { ModelConfig } from './config';
import { isLanguageModelImagePart, LanguageModelImagePart } from './languageModelParts.js';
import { isChatImagePart, processMessages } from './utils.js';
import { DEFAULT_MAX_TOKEN_OUTPUT } from './constants.js';
import { log } from './extension.js';

/**
* Options for controlling cache behavior in the Anthropic language model.
*/
export interface CacheControlOptions {
/** Add a cache control point to the system prompt (default: true). */
system?: boolean;
}


export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProvider {
name: string;
Expand Down Expand Up @@ -39,11 +49,14 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
},
};

constructor(private readonly _config: ModelConfig) {
constructor(
private readonly _config: ModelConfig,
client?: Anthropic,
) {
this.name = _config.name;
this.provider = _config.provider;
this.identifier = _config.id;
this._client = new Anthropic({
this._client = client ?? new Anthropic({
apiKey: _config.apiKey,
});
this.maxOutputTokens = _config.maxOutputTokens ?? DEFAULT_MAX_TOKEN_OUTPUT;
Expand All @@ -56,16 +69,45 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
progress: vscode.Progress<vscode.ChatResponseFragment2>,
token: vscode.CancellationToken
) {
const anthropicMessages = processMessages(messages).map(toAnthropicMessage);
const tools = options.tools?.map(tool => toAnthropicTool(tool));
const cacheControlOptions = isCacheControlOptions(options.modelOptions?.cacheControl)
? options.modelOptions.cacheControl
: undefined;
const tools = options.tools && toAnthropicTools(options.tools);
const tool_choice = options.toolMode && toAnthropicToolChoice(options.toolMode);
const stream = this._client.messages.stream({
const system = options.modelOptions?.system &&
toAnthropicSystem(options.modelOptions.system, cacheControlOptions?.system);
const anthropicMessages = toAnthropicMessages(messages);

const body: Anthropic.MessageStreamParams = {
model: this._config.model,
max_tokens: options.modelOptions?.maxTokens ?? this.maxOutputTokens,
messages: anthropicMessages,
tool_choice,
tools,
system: options.modelOptions?.system,
tool_choice,
system,
messages: anthropicMessages,
};
const stream = this._client.messages.stream(body);

// Log request information - the request ID is only available upon connection.
stream.on('connect', () => {
if (log.logLevel <= vscode.LogLevel.Trace) {
log.trace(`[anthropic] SEND messages.stream [${stream.request_id}]: ${JSON.stringify(body)}`);
} else {
const userMessages = body.messages.filter(m => m.role === 'user');
const assistantMessages = body.messages.filter(m => m.role === 'assistant');
log.debug(
`[anthropic] SEND messages.stream [${stream.request_id}]: ` +
`model: ${body.model}; ` +
`cache options: ${cacheControlOptions ? JSON.stringify(cacheControlOptions) : 'default'}; ` +
`tools: ${body.tools?.map(t => t.name).sort().join(', ') ?? 'none'}; ` +
`tool choice: ${body.tool_choice ? JSON.stringify(body.tool_choice) : 'default'}; ` +
`system chars: ${body.system ? JSON.stringify(body.system).length : 0}; ` +
`user messages: ${userMessages.length}; ` +
`user message characters: ${JSON.stringify(userMessages).length}; ` +
`assistant messages: ${assistantMessages.length}; ` +
`assistant message characters: ${JSON.stringify(assistantMessages).length}`
);
}
});

token.onCancellationRequested(() => {
Expand Down Expand Up @@ -102,20 +144,31 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
}
throw error;
}

// Log usage information.
const message = await stream.finalMessage();
if (log.logLevel <= vscode.LogLevel.Trace) {
log.trace(`[anthropic] RECV messages.stream [${stream.request_id}]: ${JSON.stringify(message)}`);
} else {
log.debug(
`[anthropic] RECV messages.stream [${stream.request_id}]: ` +
`usage: ${JSON.stringify(message.usage)}`
);
}
}

get providerName(): string {
return AnthropicLanguageModel.source.provider.displayName;
}

private onContentBlock(block: Anthropic.Messages.ContentBlock, progress: vscode.Progress<vscode.ChatResponseFragment2>): void {
private onContentBlock(block: Anthropic.ContentBlock, progress: vscode.Progress<vscode.ChatResponseFragment2>): void {
switch (block.type) {
case 'tool_use':
return this.onToolUseBlock(block, progress);
}
}

private onToolUseBlock(block: Anthropic.Messages.ToolUseBlock, progress: vscode.Progress<vscode.ChatResponseFragment2>): void {
private onToolUseBlock(block: Anthropic.ToolUseBlock, progress: vscode.Progress<vscode.ChatResponseFragment2>): void {
progress.report({
index: 0,
part: new vscode.LanguageModelToolCallPart(block.id, block.name, block.input as any),
Expand Down Expand Up @@ -170,6 +223,11 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
}
}

function toAnthropicMessages(messages: vscode.LanguageModelChatMessage2[]): Anthropic.MessageParam[] {
const anthropicMessages = processMessages(messages).map(toAnthropicMessage);
return anthropicMessages;
}

function toAnthropicMessage(message: vscode.LanguageModelChatMessage2): Anthropic.MessageParam {
switch (message.role) {
case vscode.LanguageModelChatMessageRole.Assistant:
Expand Down Expand Up @@ -281,6 +339,18 @@ function languageModelImagePartToAnthropicImageBlock(part: LanguageModelImagePar
};
}

function toAnthropicTools(tools: vscode.LanguageModelChatTool[]): Anthropic.ToolUnion[] {
if (tools.length === 0) {
return [];
}
const anthropicTools = tools.map(tool => toAnthropicTool(tool));

// Ensure a stable sort order for prompt caching.
anthropicTools.sort((a, b) => a.name.localeCompare(b.name));

return anthropicTools;
}

function toAnthropicTool(tool: vscode.LanguageModelChatTool): Anthropic.ToolUnion {
const input_schema = tool.inputSchema as Anthropic.Tool.InputSchema ?? {
type: 'object',
Expand Down Expand Up @@ -308,3 +378,32 @@ function toAnthropicToolChoice(toolMode: vscode.LanguageModelChatToolMode): Anth
throw new Error(`Unsupported tool mode: ${toolMode}`);
}
}

function toAnthropicSystem(system: unknown, cacheSystem = true): Anthropic.MessageCreateParams['system'] {
if (typeof system === 'string') {
const anthropicSystem: Anthropic.MessageCreateParams['system'] = [{
type: 'text',
text: system,
}];

if (cacheSystem) {
// Add a cache control point to the last system prompt block.
const lastSystemBlock = anthropicSystem[anthropicSystem.length - 1];
lastSystemBlock.cache_control = { type: 'ephemeral' };
log.debug(`[anthropic] Adding cache control point to system prompt`);
}

return anthropicSystem;
}
// Pass the system prompt through as-is.
// We may pass an invalid system prompt; let Anthropic throw the error.
return system as Anthropic.MessageCreateParams['system'];
}

function isCacheControlOptions(options: unknown): options is CacheControlOptions {
if (typeof options !== 'object' || options === null) {
return false;
}
const cacheControlOptions = options as CacheControlOptions;
return cacheControlOptions.system === undefined || typeof cacheControlOptions.system === 'boolean';
}
103 changes: 59 additions & 44 deletions extensions/positron-assistant/src/participants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import { PositronAssistantToolName } from './types.js';
import { StreamingTagLexer } from './streamingTagLexer.js';
import { ReplaceStringProcessor } from './replaceStringProcessor.js';
import { ReplaceSelectionProcessor } from './replaceSelectionProcessor.js';
import { log } from './extension.js';

export enum ParticipantID {
/** The participant used in the chat pane in Ask mode. */
Expand Down Expand Up @@ -297,7 +298,9 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
response.reference(llmsDocument.uri);

// Add the contents of the file to the prompt
prompts.push(xml.node('instructions', llmsText));
const instructionsNode = xml.node('instructions', llmsText);
prompts.push(instructionsNode);
log.debug(`[context] adding llms.txt context: ${llmsText.length} characters`);
}
}

Expand All @@ -322,20 +325,21 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
response.reference(value.uri);

// Add the visible region prompt.
attachmentPrompts.push(xml.node('attachment', visibleText, {
const rangeAttachmentNode = xml.node('attachment', visibleText, {
filePath: path,
description: 'Visible region of the active file',
language: document.languageId,
startLine: value.range.start.line + 1,
endLine: value.range.end.line + 1,
}));

// Add the full document text prompt.
attachmentPrompts.push(xml.node('attachment', documentText, {
});
const documentAttachmentNode = xml.node('attachment', documentText, {
filePath: path,
description: 'Full contents of the active file',
language: document.languageId,
}));
});
attachmentPrompts.push(rangeAttachmentNode, documentAttachmentNode);
log.debug(`[context] adding file range attachment context: ${rangeAttachmentNode.length} characters`);
log.debug(`[context] adding file attachment context: ${documentAttachmentNode.length} characters`);
} else if (value instanceof vscode.Uri) {
const fileStat = await vscode.workspace.fs.stat(value);
if (fileStat.type === vscode.FileType.Directory) {
Expand All @@ -357,10 +361,12 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
// response.reference(value);

// Attach the folder's contents.
attachmentPrompts.push(xml.node('attachment', entriesText, {
const attachmentNode = xml.node('attachment', entriesText, {
filePath: path,
description: 'Contents of the directory',
}));
});
attachmentPrompts.push(attachmentNode);
log.debug(`[context] adding directory attachment context: ${attachmentNode.length} characters`);
} else {
// The user attached a file - usually a manually attached file in the workspace.
const document = await vscode.workspace.openTextDocument(value);
Expand All @@ -371,11 +377,13 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
response.reference(value);

// Attach the full document text.
attachmentPrompts.push(xml.node('attachment', documentText, {
const attachmentNode = xml.node('attachment', documentText, {
filePath: path,
description: 'Full contents of the file',
language: document.languageId,
}));
});
attachmentPrompts.push(attachmentNode);
log.debug(`[context] adding file attachment context: ${attachmentNode.length} characters`);
}
} else if (value instanceof vscode.ChatReferenceBinaryData) {
if (isChatImageMimeType(value.mimeType)) {
Expand All @@ -388,18 +396,20 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
}

// Attach the image.
attachmentPrompts.push(xml.leaf('img', {
const imageNode = xml.leaf('img', {
src: reference.name,
}));
});
attachmentPrompts.push(imageNode);
log.debug(`[context] adding image attachment context: ${data.length} bytes`);

userDataParts.push(
vscode.LanguageModelDataPart.image(data, value.mimeType),
);
} else {
console.warn(`Positron Assistant: Unsupported chat reference binary data type: ${typeof value}`);
log.warn(`Unsupported chat reference binary data type: ${typeof value}`);
}
} else {
console.warn(`Positron Assistant: Unsupported reference type: ${typeof value}`);
log.warn(`Unsupported reference type: ${typeof value}`);
}
}

Expand All @@ -417,15 +427,18 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
const executions = positronContext.activeSession.executions
.map((e) => xml.node('execution', JSON.stringify(e)))
.join('\n');
positronContextPrompts.push(
xml.node('session',
xml.node('executions', executions ?? ''), {
description: 'Current active session',
language: positronContext.activeSession.language,
version: positronContext.activeSession.version,
mode: positronContext.activeSession.mode,
identifier: positronContext.activeSession.identifier,
})
const sessionNode = xml.node('session',
xml.node('executions', executions ?? ''), {
description: 'Current active session',
language: positronContext.activeSession.language,
version: positronContext.activeSession.version,
mode: positronContext.activeSession.mode,
identifier: positronContext.activeSession.identifier,
});
positronContextPrompts.push(sessionNode);
log.debug(
`[context] adding active ${positronContext.activeSession.mode} ${positronContext.activeSession.language} session context: ` +
`${sessionNode.length} characters`
);
}
if (positronContext.variables) {
Expand All @@ -435,33 +448,33 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
const description = content.length > 0 ?
'Variables defined in the current session' :
'No variables defined in the current session';
positronContextPrompts.push(
xml.node('variables', content, {
description,
})
);
const variablesNode = xml.node('variables', content, {
description,
});
positronContextPrompts.push(variablesNode);
log.debug(`[context] adding variables context: ${variablesNode.length} characters`);
}
if (positronContext.shell) {
positronContextPrompts.push(
xml.node('shell', positronContext.shell, {
description: 'Current active shell',
})
);
const shellNode = xml.node('shell', positronContext.shell, {
description: 'Current active shell',
});
positronContextPrompts.push(shellNode);
log.debug(`[context] adding shell context: ${shellNode.length} characters`);
}
if (positronContext.plots && positronContext.plots.hasPlots) {
positronContextPrompts.push(
xml.node('plots', 'A plot is visible.')
);
const plotsNode = xml.node('plots', 'A plot is visible.');
positronContextPrompts.push(plotsNode);
log.debug(`[context] adding plots context: ${plotsNode.length} characters`);
}
if (positronContext.positronVersion) {
positronContextPrompts.push(
xml.node('version', `Positron version: ${positronContext.positronVersion}`),
);
const versionNode = xml.node('version', `Positron version: ${positronContext.positronVersion}`);
positronContextPrompts.push(versionNode);
log.debug(`[context] adding positron version context: ${versionNode.length} characters`);
}
if (positronContext.currentDate) {
positronContextPrompts.push(
xml.node('date', `Today's date is: ${positronContext.currentDate}`),
);
const dateNode = xml.node('date', `Today's date is: ${positronContext.currentDate}`);
positronContextPrompts.push(dateNode);
log.debug(`[context] adding date context: ${dateNode.length} characters`);
}
if (positronContextPrompts.length > 0) {
prompts.push(xml.node('context', positronContextPrompts.join('\n\n')));
Expand Down Expand Up @@ -731,7 +744,7 @@ export class PositronAssistantEditorParticipant extends PositronAssistantPartici
const selectedText = document.getText(selection);
const documentText = document.getText();
const filePath = uriToString(document.uri);
return xml.node('editor',
const editorNode = xml.node('editor',
[
xml.node('document', documentText, {
description: 'Full contents of the active file',
Expand All @@ -749,6 +762,8 @@ export class PositronAssistantEditorParticipant extends PositronAssistantPartici
documentOffset: document.offsetAt(selection.active),
},
);
log.debug(`[context] adding editor context: ${editorNode.length} characters`);
return editorNode;
}

}
Expand Down
Loading