diff --git a/extensions/positron-assistant/src/anthropic.ts b/extensions/positron-assistant/src/anthropic.ts index 5b9aeec0e1a..ff2947e2412 100644 --- a/extensions/positron-assistant/src/anthropic.ts +++ b/extensions/positron-assistant/src/anthropic.ts @@ -10,6 +10,16 @@ import { ModelConfig } from './config'; import { isLanguageModelImagePart, LanguageModelImagePart } from './languageModelParts.js'; import { isChatImagePart, processMessages } from './utils.js'; import { DEFAULT_MAX_TOKEN_OUTPUT } from './constants.js'; +import { log } from './extension.js'; + +/** + * Options for controlling cache behavior in the Anthropic language model. + */ +export interface CacheControlOptions { + /** Add a cache control point to the system prompt (default: true). */ + system?: boolean; +} + export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProvider { name: string; @@ -39,11 +49,14 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv }, }; - constructor(private readonly _config: ModelConfig) { + constructor( + private readonly _config: ModelConfig, + client?: Anthropic, + ) { this.name = _config.name; this.provider = _config.provider; this.identifier = _config.id; - this._client = new Anthropic({ + this._client = client ?? new Anthropic({ apiKey: _config.apiKey, }); this.maxOutputTokens = _config.maxOutputTokens ?? DEFAULT_MAX_TOKEN_OUTPUT; @@ -56,16 +69,45 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv progress: vscode.Progress, token: vscode.CancellationToken ) { - const anthropicMessages = processMessages(messages).map(toAnthropicMessage); - const tools = options.tools?.map(tool => toAnthropicTool(tool)); + const cacheControlOptions = isCacheControlOptions(options.modelOptions?.cacheControl) + ? options.modelOptions.cacheControl + : undefined; + const tools = options.tools && toAnthropicTools(options.tools); const tool_choice = options.toolMode && toAnthropicToolChoice(options.toolMode); - const stream = this._client.messages.stream({ + const system = options.modelOptions?.system && + toAnthropicSystem(options.modelOptions.system, cacheControlOptions?.system); + const anthropicMessages = toAnthropicMessages(messages); + + const body: Anthropic.MessageStreamParams = { model: this._config.model, max_tokens: options.modelOptions?.maxTokens ?? this.maxOutputTokens, - messages: anthropicMessages, - tool_choice, tools, - system: options.modelOptions?.system, + tool_choice, + system, + messages: anthropicMessages, + }; + const stream = this._client.messages.stream(body); + + // Log request information - the request ID is only available upon connection. + stream.on('connect', () => { + if (log.logLevel <= vscode.LogLevel.Trace) { + log.trace(`[anthropic] SEND messages.stream [${stream.request_id}]: ${JSON.stringify(body)}`); + } else { + const userMessages = body.messages.filter(m => m.role === 'user'); + const assistantMessages = body.messages.filter(m => m.role === 'assistant'); + log.debug( + `[anthropic] SEND messages.stream [${stream.request_id}]: ` + + `model: ${body.model}; ` + + `cache options: ${cacheControlOptions ? JSON.stringify(cacheControlOptions) : 'default'}; ` + + `tools: ${body.tools?.map(t => t.name).sort().join(', ') ?? 'none'}; ` + + `tool choice: ${body.tool_choice ? JSON.stringify(body.tool_choice) : 'default'}; ` + + `system chars: ${body.system ? JSON.stringify(body.system).length : 0}; ` + + `user messages: ${userMessages.length}; ` + + `user message characters: ${JSON.stringify(userMessages).length}; ` + + `assistant messages: ${assistantMessages.length}; ` + + `assistant message characters: ${JSON.stringify(assistantMessages).length}` + ); + } }); token.onCancellationRequested(() => { @@ -102,20 +144,31 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv } throw error; } + + // Log usage information. + const message = await stream.finalMessage(); + if (log.logLevel <= vscode.LogLevel.Trace) { + log.trace(`[anthropic] RECV messages.stream [${stream.request_id}]: ${JSON.stringify(message)}`); + } else { + log.debug( + `[anthropic] RECV messages.stream [${stream.request_id}]: ` + + `usage: ${JSON.stringify(message.usage)}` + ); + } } get providerName(): string { return AnthropicLanguageModel.source.provider.displayName; } - private onContentBlock(block: Anthropic.Messages.ContentBlock, progress: vscode.Progress): void { + private onContentBlock(block: Anthropic.ContentBlock, progress: vscode.Progress): void { switch (block.type) { case 'tool_use': return this.onToolUseBlock(block, progress); } } - private onToolUseBlock(block: Anthropic.Messages.ToolUseBlock, progress: vscode.Progress): void { + private onToolUseBlock(block: Anthropic.ToolUseBlock, progress: vscode.Progress): void { progress.report({ index: 0, part: new vscode.LanguageModelToolCallPart(block.id, block.name, block.input as any), @@ -170,6 +223,11 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv } } +function toAnthropicMessages(messages: vscode.LanguageModelChatMessage2[]): Anthropic.MessageParam[] { + const anthropicMessages = processMessages(messages).map(toAnthropicMessage); + return anthropicMessages; +} + function toAnthropicMessage(message: vscode.LanguageModelChatMessage2): Anthropic.MessageParam { switch (message.role) { case vscode.LanguageModelChatMessageRole.Assistant: @@ -281,6 +339,18 @@ function languageModelImagePartToAnthropicImageBlock(part: LanguageModelImagePar }; } +function toAnthropicTools(tools: vscode.LanguageModelChatTool[]): Anthropic.ToolUnion[] { + if (tools.length === 0) { + return []; + } + const anthropicTools = tools.map(tool => toAnthropicTool(tool)); + + // Ensure a stable sort order for prompt caching. + anthropicTools.sort((a, b) => a.name.localeCompare(b.name)); + + return anthropicTools; +} + function toAnthropicTool(tool: vscode.LanguageModelChatTool): Anthropic.ToolUnion { const input_schema = tool.inputSchema as Anthropic.Tool.InputSchema ?? { type: 'object', @@ -308,3 +378,32 @@ function toAnthropicToolChoice(toolMode: vscode.LanguageModelChatToolMode): Anth throw new Error(`Unsupported tool mode: ${toolMode}`); } } + +function toAnthropicSystem(system: unknown, cacheSystem = true): Anthropic.MessageCreateParams['system'] { + if (typeof system === 'string') { + const anthropicSystem: Anthropic.MessageCreateParams['system'] = [{ + type: 'text', + text: system, + }]; + + if (cacheSystem) { + // Add a cache control point to the last system prompt block. + const lastSystemBlock = anthropicSystem[anthropicSystem.length - 1]; + lastSystemBlock.cache_control = { type: 'ephemeral' }; + log.debug(`[anthropic] Adding cache control point to system prompt`); + } + + return anthropicSystem; + } + // Pass the system prompt through as-is. + // We may pass an invalid system prompt; let Anthropic throw the error. + return system as Anthropic.MessageCreateParams['system']; +} + +function isCacheControlOptions(options: unknown): options is CacheControlOptions { + if (typeof options !== 'object' || options === null) { + return false; + } + const cacheControlOptions = options as CacheControlOptions; + return cacheControlOptions.system === undefined || typeof cacheControlOptions.system === 'boolean'; +} diff --git a/extensions/positron-assistant/src/participants.ts b/extensions/positron-assistant/src/participants.ts index 74d3a5b4a64..1c2cf772c04 100644 --- a/extensions/positron-assistant/src/participants.ts +++ b/extensions/positron-assistant/src/participants.ts @@ -16,6 +16,7 @@ import { PositronAssistantToolName } from './types.js'; import { StreamingTagLexer } from './streamingTagLexer.js'; import { ReplaceStringProcessor } from './replaceStringProcessor.js'; import { ReplaceSelectionProcessor } from './replaceSelectionProcessor.js'; +import { log } from './extension.js'; export enum ParticipantID { /** The participant used in the chat pane in Ask mode. */ @@ -297,7 +298,9 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici response.reference(llmsDocument.uri); // Add the contents of the file to the prompt - prompts.push(xml.node('instructions', llmsText)); + const instructionsNode = xml.node('instructions', llmsText); + prompts.push(instructionsNode); + log.debug(`[context] adding llms.txt context: ${llmsText.length} characters`); } } @@ -322,20 +325,21 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici response.reference(value.uri); // Add the visible region prompt. - attachmentPrompts.push(xml.node('attachment', visibleText, { + const rangeAttachmentNode = xml.node('attachment', visibleText, { filePath: path, description: 'Visible region of the active file', language: document.languageId, startLine: value.range.start.line + 1, endLine: value.range.end.line + 1, - })); - - // Add the full document text prompt. - attachmentPrompts.push(xml.node('attachment', documentText, { + }); + const documentAttachmentNode = xml.node('attachment', documentText, { filePath: path, description: 'Full contents of the active file', language: document.languageId, - })); + }); + attachmentPrompts.push(rangeAttachmentNode, documentAttachmentNode); + log.debug(`[context] adding file range attachment context: ${rangeAttachmentNode.length} characters`); + log.debug(`[context] adding file attachment context: ${documentAttachmentNode.length} characters`); } else if (value instanceof vscode.Uri) { const fileStat = await vscode.workspace.fs.stat(value); if (fileStat.type === vscode.FileType.Directory) { @@ -357,10 +361,12 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici // response.reference(value); // Attach the folder's contents. - attachmentPrompts.push(xml.node('attachment', entriesText, { + const attachmentNode = xml.node('attachment', entriesText, { filePath: path, description: 'Contents of the directory', - })); + }); + attachmentPrompts.push(attachmentNode); + log.debug(`[context] adding directory attachment context: ${attachmentNode.length} characters`); } else { // The user attached a file - usually a manually attached file in the workspace. const document = await vscode.workspace.openTextDocument(value); @@ -371,11 +377,13 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici response.reference(value); // Attach the full document text. - attachmentPrompts.push(xml.node('attachment', documentText, { + const attachmentNode = xml.node('attachment', documentText, { filePath: path, description: 'Full contents of the file', language: document.languageId, - })); + }); + attachmentPrompts.push(attachmentNode); + log.debug(`[context] adding file attachment context: ${attachmentNode.length} characters`); } } else if (value instanceof vscode.ChatReferenceBinaryData) { if (isChatImageMimeType(value.mimeType)) { @@ -388,18 +396,20 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici } // Attach the image. - attachmentPrompts.push(xml.leaf('img', { + const imageNode = xml.leaf('img', { src: reference.name, - })); + }); + attachmentPrompts.push(imageNode); + log.debug(`[context] adding image attachment context: ${data.length} bytes`); userDataParts.push( vscode.LanguageModelDataPart.image(data, value.mimeType), ); } else { - console.warn(`Positron Assistant: Unsupported chat reference binary data type: ${typeof value}`); + log.warn(`Unsupported chat reference binary data type: ${typeof value}`); } } else { - console.warn(`Positron Assistant: Unsupported reference type: ${typeof value}`); + log.warn(`Unsupported reference type: ${typeof value}`); } } @@ -417,15 +427,18 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici const executions = positronContext.activeSession.executions .map((e) => xml.node('execution', JSON.stringify(e))) .join('\n'); - positronContextPrompts.push( - xml.node('session', - xml.node('executions', executions ?? ''), { - description: 'Current active session', - language: positronContext.activeSession.language, - version: positronContext.activeSession.version, - mode: positronContext.activeSession.mode, - identifier: positronContext.activeSession.identifier, - }) + const sessionNode = xml.node('session', + xml.node('executions', executions ?? ''), { + description: 'Current active session', + language: positronContext.activeSession.language, + version: positronContext.activeSession.version, + mode: positronContext.activeSession.mode, + identifier: positronContext.activeSession.identifier, + }); + positronContextPrompts.push(sessionNode); + log.debug( + `[context] adding active ${positronContext.activeSession.mode} ${positronContext.activeSession.language} session context: ` + + `${sessionNode.length} characters` ); } if (positronContext.variables) { @@ -435,33 +448,33 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici const description = content.length > 0 ? 'Variables defined in the current session' : 'No variables defined in the current session'; - positronContextPrompts.push( - xml.node('variables', content, { - description, - }) - ); + const variablesNode = xml.node('variables', content, { + description, + }); + positronContextPrompts.push(variablesNode); + log.debug(`[context] adding variables context: ${variablesNode.length} characters`); } if (positronContext.shell) { - positronContextPrompts.push( - xml.node('shell', positronContext.shell, { - description: 'Current active shell', - }) - ); + const shellNode = xml.node('shell', positronContext.shell, { + description: 'Current active shell', + }); + positronContextPrompts.push(shellNode); + log.debug(`[context] adding shell context: ${shellNode.length} characters`); } if (positronContext.plots && positronContext.plots.hasPlots) { - positronContextPrompts.push( - xml.node('plots', 'A plot is visible.') - ); + const plotsNode = xml.node('plots', 'A plot is visible.'); + positronContextPrompts.push(plotsNode); + log.debug(`[context] adding plots context: ${plotsNode.length} characters`); } if (positronContext.positronVersion) { - positronContextPrompts.push( - xml.node('version', `Positron version: ${positronContext.positronVersion}`), - ); + const versionNode = xml.node('version', `Positron version: ${positronContext.positronVersion}`); + positronContextPrompts.push(versionNode); + log.debug(`[context] adding positron version context: ${versionNode.length} characters`); } if (positronContext.currentDate) { - positronContextPrompts.push( - xml.node('date', `Today's date is: ${positronContext.currentDate}`), - ); + const dateNode = xml.node('date', `Today's date is: ${positronContext.currentDate}`); + positronContextPrompts.push(dateNode); + log.debug(`[context] adding date context: ${dateNode.length} characters`); } if (positronContextPrompts.length > 0) { prompts.push(xml.node('context', positronContextPrompts.join('\n\n'))); @@ -731,7 +744,7 @@ export class PositronAssistantEditorParticipant extends PositronAssistantPartici const selectedText = document.getText(selection); const documentText = document.getText(); const filePath = uriToString(document.uri); - return xml.node('editor', + const editorNode = xml.node('editor', [ xml.node('document', documentText, { description: 'Full contents of the active file', @@ -749,6 +762,8 @@ export class PositronAssistantEditorParticipant extends PositronAssistantPartici documentOffset: document.offsetAt(selection.active), }, ); + log.debug(`[context] adding editor context: ${editorNode.length} characters`); + return editorNode; } } diff --git a/extensions/positron-assistant/src/test/anthropic.test.ts b/extensions/positron-assistant/src/test/anthropic.test.ts index 308b7cae4c0..a7dc308c2c8 100644 --- a/extensions/positron-assistant/src/test/anthropic.test.ts +++ b/extensions/positron-assistant/src/test/anthropic.test.ts @@ -7,9 +7,26 @@ import * as assert from 'assert'; import * as positron from 'positron'; import * as vscode from 'vscode'; import * as sinon from 'sinon'; -import { AnthropicLanguageModel } from '../anthropic'; +import { AnthropicLanguageModel, CacheControlOptions } from '../anthropic'; import { ModelConfig } from '../config'; import { EMPTY_TOOL_RESULT_PLACEHOLDER } from '../utils.js'; +import Anthropic from '@anthropic-ai/sdk'; +import { MessageStream } from '@anthropic-ai/sdk/lib/MessageStream.js'; +import { mock } from './utils.js'; + +class MockAnthropicClient { + messages = { + stream: sinon.stub< + Parameters, + ReturnType + >().returns(mock({ + on: (event, listener) => mock({}), + abort: () => { }, + done: () => Promise.resolve(), + finalMessage: () => Promise.resolve(mock({})), + })) + }; +} type ChatMessageValidateInfo = { testName: string; @@ -19,21 +36,13 @@ type ChatMessageValidateInfo = { suite('AnthropicLanguageModel', () => { let model: AnthropicLanguageModel; - let mockClient: any; + let mockClient: MockAnthropicClient; let mockProgress: vscode.Progress; let mockCancellationToken: vscode.CancellationToken; setup(() => { // Create a mock Anthropic client - mockClient = { - messages: { - stream: sinon.stub().returns({ - on: sinon.stub(), - abort: sinon.stub(), - done: sinon.stub().resolves() - }) - } - }; + mockClient = new MockAnthropicClient(); // Create a mock configuration const config: ModelConfig = { @@ -46,10 +55,7 @@ suite('AnthropicLanguageModel', () => { }; // Create an instance of the AnthropicLanguageModel - model = new AnthropicLanguageModel(config); - - // Replace the client with our mock - (model as any)._client = mockClient; + model = new AnthropicLanguageModel(config, mockClient as unknown as Anthropic); // Create mock progress mockProgress = { @@ -232,11 +238,134 @@ suite('AnthropicLanguageModel', () => { const streamCall = mockClient.messages.stream.getCall(0); assert.ok(streamCall, 'Stream method was not called'); - const messagesPassedToAnthropicClient: vscode.LanguageModelChatMessage2[] = streamCall.args[0].messages; + const messagesPassedToAnthropicClient = streamCall.args[0].messages; assert.strictEqual(messagesPassedToAnthropicClient.length, 1, 'Exactly one message should be passed to the Anthropic client'); + assert.ok(typeof messagesPassedToAnthropicClient[0].content !== 'string', 'Expected a content block object, got a string'); testCase.validate(messagesPassedToAnthropicClient[0].content); }); }); }); + + test('provideLanguageModelResponse cache_control default behavior', async () => { + const toolA = { + name: 'toolA', + description: 'Tool A', + inputSchema: { type: 'object' as const, properties: {} } + } satisfies vscode.LanguageModelChatTool; + const toolB = { + name: 'toolB', + description: 'Tool B', + inputSchema: { type: 'object' as const, properties: {} } + } satisfies vscode.LanguageModelChatTool; + const system = 'System prompt'; + + // Call the method under test. + await model.provideLanguageModelResponse( + [ + vscode.LanguageModelChatMessage.User('Hi'), + vscode.LanguageModelChatMessage.User('Bye'), + ], + { + // Define the request tools, not sorted by name, so we can test sorting behavior. + tools: [toolB, toolA], + modelOptions: { system }, + }, + 'test-extension', + mockProgress, + mockCancellationToken + ); + + sinon.assert.calledOnce(mockClient.messages.stream); + const body = mockClient.messages.stream.getCall(0).args[0]; + + assert.deepStrictEqual(body.tools, [ + { + name: toolA.name, + description: toolA.description, + input_schema: toolA.inputSchema, + }, + { + name: toolB.name, + description: toolB.description, + input_schema: toolB.inputSchema, + }, + ] satisfies Anthropic.ToolUnion[], 'Unexpected tools in request body'); + + assert.deepStrictEqual(body.system, [ + { + type: 'text', + text: system, + cache_control: { type: 'ephemeral' }, + }, + ] satisfies Anthropic.TextBlockParam[], 'Unexpected system prompt in request body'); + + assert.deepStrictEqual(body.messages, [ + { role: 'user', content: [{ type: 'text', text: 'Hi' }] }, + { role: 'user', content: [{ type: 'text', text: 'Bye' }] }, + ] satisfies Anthropic.MessageCreateParams['messages'], 'Unexpected user messages in request body'); + }); + + test('provideLanguageModelResponse cache_control all disabled', async () => { + const toolA = { + name: 'toolA', + description: 'Tool A', + inputSchema: { type: 'object' as const, properties: {} } + } satisfies vscode.LanguageModelChatTool; + const toolB = { + name: 'toolB', + description: 'Tool B', + inputSchema: { type: 'object' as const, properties: {} } + } satisfies vscode.LanguageModelChatTool; + const system = 'System prompt'; + + // Call the method under test with no cacheControl options to test default behavior. + await model.provideLanguageModelResponse( + [ + vscode.LanguageModelChatMessage.User('Hi'), + vscode.LanguageModelChatMessage.User('Bye'), + ], + { + // Define the request tools, not sorted by name, so we can test sorting behavior. + tools: [toolB, toolA], + modelOptions: { + system, + cacheControl: { + system: false, + } satisfies CacheControlOptions, + }, + }, + 'test-extension', + mockProgress, + mockCancellationToken + ); + + sinon.assert.calledOnce(mockClient.messages.stream); + const body = mockClient.messages.stream.getCall(0).args[0]; + + assert.deepStrictEqual(body.tools, [ + { + name: toolA.name, + description: toolA.description, + input_schema: toolA.inputSchema, + }, + { + name: toolB.name, + description: toolB.description, + input_schema: toolB.inputSchema, + }, + ] satisfies Anthropic.ToolUnion[], 'Unexpected tools in request body'); + + assert.deepStrictEqual(body.system, [ + { + type: 'text', + text: system, + }, + ] satisfies Anthropic.TextBlockParam[], 'Unexpected system prompt in request body'); + + assert.deepStrictEqual(body.messages, [ + { role: 'user', content: [{ type: 'text', text: 'Hi' }] }, + { role: 'user', content: [{ type: 'text', text: 'Bye' }] }, + ] satisfies Anthropic.MessageCreateParams['messages'], 'Unexpected user messages in request body'); + }); });