posit-dev · seeM · Jun 25, 2025 · Jun 23, 2025 · Jun 24, 2025 · Jun 24, 2025
diff --git a/extensions/positron-assistant/src/anthropic.ts b/extensions/positron-assistant/src/anthropic.ts
@@ -10,6 +10,16 @@ import { ModelConfig } from './config';
 import { isLanguageModelImagePart, LanguageModelImagePart } from './languageModelParts.js';
 import { isChatImagePart, processMessages } from './utils.js';
 import { DEFAULT_MAX_TOKEN_OUTPUT } from './constants.js';
+import { log } from './extension.js';
+
+/**
+ * Options for controlling cache behavior in the Anthropic language model.
+ */
+export interface CacheControlOptions {
+	/** Add a cache control point to the system prompt (default: true). */
+	system?: boolean;
+}
+
 
 export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProvider {
 	name: string;
@@ -39,11 +49,14 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
 		},
 	};
 
-	constructor(private readonly _config: ModelConfig) {
+	constructor(
+		private readonly _config: ModelConfig,
+		client?: Anthropic,
+	) {
 		this.name = _config.name;
 		this.provider = _config.provider;
 		this.identifier = _config.id;
-		this._client = new Anthropic({
+		this._client = client ?? new Anthropic({
 			apiKey: _config.apiKey,
 		});
 		this.maxOutputTokens = _config.maxOutputTokens ?? DEFAULT_MAX_TOKEN_OUTPUT;
@@ -56,16 +69,45 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
 		progress: vscode.Progress<vscode.ChatResponseFragment2>,
 		token: vscode.CancellationToken
 	) {
-		const anthropicMessages = processMessages(messages).map(toAnthropicMessage);
-		const tools = options.tools?.map(tool => toAnthropicTool(tool));
+		const cacheControlOptions = isCacheControlOptions(options.modelOptions?.cacheControl)
+			? options.modelOptions.cacheControl
+			: undefined;
+		const tools = options.tools && toAnthropicTools(options.tools);
 		const tool_choice = options.toolMode && toAnthropicToolChoice(options.toolMode);
-		const stream = this._client.messages.stream({
+		const system = options.modelOptions?.system &&
+			toAnthropicSystem(options.modelOptions.system, cacheControlOptions?.system);
+		const anthropicMessages = toAnthropicMessages(messages);
+
+		const body: Anthropic.MessageStreamParams = {
 			model: this._config.model,
 			max_tokens: options.modelOptions?.maxTokens ?? this.maxOutputTokens,
-			messages: anthropicMessages,
-			tool_choice,
 			tools,
-			system: options.modelOptions?.system,
+			tool_choice,
+			system,
+			messages: anthropicMessages,
+		};
+		const stream = this._client.messages.stream(body);
+
+		// Log request information - the request ID is only available upon connection.
+		stream.on('connect', () => {
+			if (log.logLevel <= vscode.LogLevel.Trace) {
+				log.trace(`[anthropic] SEND messages.stream [${stream.request_id}]: ${JSON.stringify(body)}`);
+			} else {
+				const userMessages = body.messages.filter(m => m.role === 'user');
+				const assistantMessages = body.messages.filter(m => m.role === 'assistant');
+				log.debug(
+					`[anthropic] SEND messages.stream [${stream.request_id}]: ` +
+					`model: ${body.model}; ` +
+					`cache options: ${cacheControlOptions ? JSON.stringify(cacheControlOptions) : 'default'}; ` +
+					`tools: ${body.tools?.map(t => t.name).sort().join(', ') ?? 'none'}; ` +
+					`tool choice: ${body.tool_choice ? JSON.stringify(body.tool_choice) : 'default'}; ` +
+					`system chars: ${body.system ? JSON.stringify(body.system).length : 0}; ` +
+					`user messages: ${userMessages.length}; ` +
+					`user message characters: ${JSON.stringify(userMessages).length}; ` +
+					`assistant messages: ${assistantMessages.length}; ` +
+					`assistant message characters: ${JSON.stringify(assistantMessages).length}`
+				);
+			}
 		});
 
 		token.onCancellationRequested(() => {
@@ -102,20 +144,31 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
 			}
 			throw error;
 		}
+
+		// Log usage information.
+		const message = await stream.finalMessage();
+		if (log.logLevel <= vscode.LogLevel.Trace) {
+			log.trace(`[anthropic] RECV messages.stream [${stream.request_id}]: ${JSON.stringify(message)}`);
+		} else {
+			log.debug(
+				`[anthropic] RECV messages.stream [${stream.request_id}]: ` +
+				`usage: ${JSON.stringify(message.usage)}`
+			);
+		}
 	}
 
 	get providerName(): string {
 		return AnthropicLanguageModel.source.provider.displayName;
 	}
 
-	private onContentBlock(block: Anthropic.Messages.ContentBlock, progress: vscode.Progress<vscode.ChatResponseFragment2>): void {
+	private onContentBlock(block: Anthropic.ContentBlock, progress: vscode.Progress<vscode.ChatResponseFragment2>): void {
 		switch (block.type) {
 			case 'tool_use':
 				return this.onToolUseBlock(block, progress);
 		}
 	}
 
-	private onToolUseBlock(block: Anthropic.Messages.ToolUseBlock, progress: vscode.Progress<vscode.ChatResponseFragment2>): void {
+	private onToolUseBlock(block: Anthropic.ToolUseBlock, progress: vscode.Progress<vscode.ChatResponseFragment2>): void {
 		progress.report({
 			index: 0,
 			part: new vscode.LanguageModelToolCallPart(block.id, block.name, block.input as any),
@@ -170,6 +223,11 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
 	}
 }
 
+function toAnthropicMessages(messages: vscode.LanguageModelChatMessage2[]): Anthropic.MessageParam[] {
+	const anthropicMessages = processMessages(messages).map(toAnthropicMessage);
+	return anthropicMessages;
+}
+
 function toAnthropicMessage(message: vscode.LanguageModelChatMessage2): Anthropic.MessageParam {
 	switch (message.role) {
 		case vscode.LanguageModelChatMessageRole.Assistant:
@@ -281,6 +339,18 @@ function languageModelImagePartToAnthropicImageBlock(part: LanguageModelImagePar
 	};
 }
 
+function toAnthropicTools(tools: vscode.LanguageModelChatTool[]): Anthropic.ToolUnion[] {
+	if (tools.length === 0) {
+		return [];
+	}
+	const anthropicTools = tools.map(tool => toAnthropicTool(tool));
+
+	// Ensure a stable sort order for prompt caching.
+	anthropicTools.sort((a, b) => a.name.localeCompare(b.name));
+
+	return anthropicTools;
+}
+
 function toAnthropicTool(tool: vscode.LanguageModelChatTool): Anthropic.ToolUnion {
 	const input_schema = tool.inputSchema as Anthropic.Tool.InputSchema ?? {
 		type: 'object',
@@ -308,3 +378,32 @@ function toAnthropicToolChoice(toolMode: vscode.LanguageModelChatToolMode): Anth
 			throw new Error(`Unsupported tool mode: ${toolMode}`);
 	}
 }
+
+function toAnthropicSystem(system: unknown, cacheSystem = true): Anthropic.MessageCreateParams['system'] {
+	if (typeof system === 'string') {
+		const anthropicSystem: Anthropic.MessageCreateParams['system'] = [{
+			type: 'text',
+			text: system,
+		}];
+
+		if (cacheSystem) {
+			// Add a cache control point to the last system prompt block.
+			const lastSystemBlock = anthropicSystem[anthropicSystem.length - 1];
+			lastSystemBlock.cache_control = { type: 'ephemeral' };
+			log.debug(`[anthropic] Adding cache control point to system prompt`);
+		}
+
+		return anthropicSystem;
+	}
+	// Pass the system prompt through as-is.
+	// We may pass an invalid system prompt; let Anthropic throw the error.
+	return system as Anthropic.MessageCreateParams['system'];
+}
+
+function isCacheControlOptions(options: unknown): options is CacheControlOptions {
+	if (typeof options !== 'object' || options === null) {
+		return false;
+	}
+	const cacheControlOptions = options as CacheControlOptions;
+	return cacheControlOptions.system === undefined || typeof cacheControlOptions.system === 'boolean';
+}
diff --git a/extensions/positron-assistant/src/participants.ts b/extensions/positron-assistant/src/participants.ts
@@ -16,6 +16,7 @@ import { PositronAssistantToolName } from './types.js';
 import { StreamingTagLexer } from './streamingTagLexer.js';
 import { ReplaceStringProcessor } from './replaceStringProcessor.js';
 import { ReplaceSelectionProcessor } from './replaceSelectionProcessor.js';
+import { log } from './extension.js';
 
 export enum ParticipantID {
 	/** The participant used in the chat pane in Ask mode. */
@@ -297,7 +298,9 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
 				response.reference(llmsDocument.uri);
 
 				// Add the contents of the file to the prompt
-				prompts.push(xml.node('instructions', llmsText));
+				const instructionsNode = xml.node('instructions', llmsText);
+				prompts.push(instructionsNode);
+				log.debug(`[context] adding llms.txt context: ${llmsText.length} characters`);
 			}
 		}
 
@@ -322,20 +325,21 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
 					response.reference(value.uri);
 
 					// Add the visible region prompt.
-					attachmentPrompts.push(xml.node('attachment', visibleText, {
+					const rangeAttachmentNode = xml.node('attachment', visibleText, {
 						filePath: path,
 						description: 'Visible region of the active file',
 						language: document.languageId,
 						startLine: value.range.start.line + 1,
 						endLine: value.range.end.line + 1,
-					}));
-
-					// Add the full document text prompt.
-					attachmentPrompts.push(xml.node('attachment', documentText, {
+					});
+					const documentAttachmentNode = xml.node('attachment', documentText, {
 						filePath: path,
 						description: 'Full contents of the active file',
 						language: document.languageId,
-					}));
+					});
+					attachmentPrompts.push(rangeAttachmentNode, documentAttachmentNode);
+					log.debug(`[context] adding file range attachment context: ${rangeAttachmentNode.length} characters`);
+					log.debug(`[context] adding file attachment context: ${documentAttachmentNode.length} characters`);
 				} else if (value instanceof vscode.Uri) {
 					const fileStat = await vscode.workspace.fs.stat(value);
 					if (fileStat.type === vscode.FileType.Directory) {
@@ -357,10 +361,12 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
 						// response.reference(value);
 
 						// Attach the folder's contents.
-						attachmentPrompts.push(xml.node('attachment', entriesText, {
+						const attachmentNode = xml.node('attachment', entriesText, {
 							filePath: path,
 							description: 'Contents of the directory',
-						}));
+						});
+						attachmentPrompts.push(attachmentNode);
+						log.debug(`[context] adding directory attachment context: ${attachmentNode.length} characters`);
 					} else {
 						// The user attached a file - usually a manually attached file in the workspace.
 						const document = await vscode.workspace.openTextDocument(value);
@@ -371,11 +377,13 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
 						response.reference(value);
 
 						// Attach the full document text.
-						attachmentPrompts.push(xml.node('attachment', documentText, {
+						const attachmentNode = xml.node('attachment', documentText, {
 							filePath: path,
 							description: 'Full contents of the file',
 							language: document.languageId,
-						}));
+						});
+						attachmentPrompts.push(attachmentNode);
+						log.debug(`[context] adding file attachment context: ${attachmentNode.length} characters`);
 					}
 				} else if (value instanceof vscode.ChatReferenceBinaryData) {
 					if (isChatImageMimeType(value.mimeType)) {
@@ -388,18 +396,20 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
 						}
 
 						// Attach the image.
-						attachmentPrompts.push(xml.leaf('img', {
+						const imageNode = xml.leaf('img', {
 							src: reference.name,
-						}));
+						});
+						attachmentPrompts.push(imageNode);
+						log.debug(`[context] adding image attachment context: ${data.length} bytes`);
 
 						userDataParts.push(
 							vscode.LanguageModelDataPart.image(data, value.mimeType),
 						);
 					} else {
-						console.warn(`Positron Assistant: Unsupported chat reference binary data type: ${typeof value}`);
+						log.warn(`Unsupported chat reference binary data type: ${typeof value}`);
 					}
 				} else {
-					console.warn(`Positron Assistant: Unsupported reference type: ${typeof value}`);
+					log.warn(`Unsupported reference type: ${typeof value}`);
 				}
 			}
 
@@ -417,15 +427,18 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
 			const executions = positronContext.activeSession.executions
 				.map((e) => xml.node('execution', JSON.stringify(e)))
 				.join('\n');
-			positronContextPrompts.push(
-				xml.node('session',
-					xml.node('executions', executions ?? ''), {
-					description: 'Current active session',
-					language: positronContext.activeSession.language,
-					version: positronContext.activeSession.version,
-					mode: positronContext.activeSession.mode,
-					identifier: positronContext.activeSession.identifier,
-				})
+			const sessionNode = xml.node('session',
+				xml.node('executions', executions ?? ''), {
+				description: 'Current active session',
+				language: positronContext.activeSession.language,
+				version: positronContext.activeSession.version,
+				mode: positronContext.activeSession.mode,
+				identifier: positronContext.activeSession.identifier,
+			});
+			positronContextPrompts.push(sessionNode);
+			log.debug(
+				`[context] adding active ${positronContext.activeSession.mode} ${positronContext.activeSession.language} session context: ` +
+				`${sessionNode.length} characters`
 			);
 		}
 		if (positronContext.variables) {
@@ -435,33 +448,33 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
 			const description = content.length > 0 ?
 				'Variables defined in the current session' :
 				'No variables defined in the current session';
-			positronContextPrompts.push(
-				xml.node('variables', content, {
-					description,
-				})
-			);
+			const variablesNode = xml.node('variables', content, {
+				description,
+			});
+			positronContextPrompts.push(variablesNode);
+			log.debug(`[context] adding variables context: ${variablesNode.length} characters`);
 		}
 		if (positronContext.shell) {
-			positronContextPrompts.push(
-				xml.node('shell', positronContext.shell, {
-					description: 'Current active shell',
-				})
-			);
+			const shellNode = xml.node('shell', positronContext.shell, {
+				description: 'Current active shell',
+			});
+			positronContextPrompts.push(shellNode);
+			log.debug(`[context] adding shell context: ${shellNode.length} characters`);
 		}
 		if (positronContext.plots && positronContext.plots.hasPlots) {
-			positronContextPrompts.push(
-				xml.node('plots', 'A plot is visible.')
-			);
+			const plotsNode = xml.node('plots', 'A plot is visible.');
+			positronContextPrompts.push(plotsNode);
+			log.debug(`[context] adding plots context: ${plotsNode.length} characters`);
 		}
 		if (positronContext.positronVersion) {
-			positronContextPrompts.push(
-				xml.node('version', `Positron version: ${positronContext.positronVersion}`),
-			);
+			const versionNode = xml.node('version', `Positron version: ${positronContext.positronVersion}`);
+			positronContextPrompts.push(versionNode);
+			log.debug(`[context] adding positron version context: ${versionNode.length} characters`);
 		}
 		if (positronContext.currentDate) {
-			positronContextPrompts.push(
-				xml.node('date', `Today's date is: ${positronContext.currentDate}`),
-			);
+			const dateNode = xml.node('date', `Today's date is: ${positronContext.currentDate}`);
+			positronContextPrompts.push(dateNode);
+			log.debug(`[context] adding date context: ${dateNode.length} characters`);
 		}
 		if (positronContextPrompts.length > 0) {
 			prompts.push(xml.node('context', positronContextPrompts.join('\n\n')));
@@ -731,7 +744,7 @@ export class PositronAssistantEditorParticipant extends PositronAssistantPartici
 		const selectedText = document.getText(selection);
 		const documentText = document.getText();
 		const filePath = uriToString(document.uri);
-		return xml.node('editor',
+		const editorNode = xml.node('editor',
 			[
 				xml.node('document', documentText, {
 					description: 'Full contents of the active file',
@@ -749,6 +762,8 @@ export class PositronAssistantEditorParticipant extends PositronAssistantPartici
 				documentOffset: document.offsetAt(selection.active),
 			},
 		);
+		log.debug(`[context] adding editor context: ${editorNode.length} characters`);
+		return editorNode;
 	}
 
 }