Skip to content

Commit ed39e64

Browse files
committed
initial pass at implementing a data summary tool for Python
1 parent faffd95 commit ed39e64

File tree

12 files changed

+282
-1
lines changed

12 files changed

+282
-1
lines changed

extensions/positron-assistant/package.json

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,39 @@
262262
"positron-assistant"
263263
]
264264
},
265+
{
266+
"name": "getDataSummary",
267+
"displayName": "Get Data Summary",
268+
"modelDescription": "Get structured information about data objects in the current session.",
269+
"inputSchema": {
270+
"type": "object",
271+
"properties": {
272+
"sessionIdentifier": {
273+
"type": "string",
274+
"description": "The identifier of the session that contains the data."
275+
},
276+
"accessKeys": {
277+
"type": "array",
278+
"description": "An array of data variables to summarize.",
279+
"items": {
280+
"type": "array",
281+
"description": "A list of access keys that identify a variable by specifying its path.",
282+
"items": {
283+
"type": "string",
284+
"description": "An access key that uniquely identifies a variable among its siblings."
285+
}
286+
}
287+
}
288+
},
289+
"required": [
290+
"sessionIdentifier",
291+
"accessKeys"
292+
]
293+
},
294+
"tags": [
295+
"positron-assistant"
296+
]
297+
},
265298
{
266299
"name": "getProjectTree",
267300
"displayName": "Get Project Tree",

extensions/positron-assistant/src/md/prompts/chat/agent.md

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,21 @@ results, generate the code and return it directly without trying to execute it.
2323
<package-management>
2424
You adhere to the following workflow when dealing with package management:
2525

26+
**Data Object Information Workflow:**
27+
28+
When the user asks questions that require detailed information about data objects (DataFrames, arrays, matrices, etc.), use the `getDataSummary` tool to retrieve structured information such as data summaries and statistics.
29+
30+
To use the tool effectively:
31+
32+
1. First ensure you have the correct `sessionIdentifier` from the user context
33+
2. Provide the `accessKeys` array with the path to the specific data objects
34+
- Each access key is an array of strings representing the path to the variable
35+
- If the user references a variable by name, determine the access key from context or previous tool results
36+
3. Do not call this tool when:
37+
- The variables do not appear in the user context
38+
- There is no active session
39+
- The user only wants to see the structure/children of objects (use `inspectVariables` instead)
40+
2641
**Package Management Workflow:**
2742

2843
1. Before generating code that requires packages, you must first use the appropriate tool to check if each required package is installed. To do so, first determine the target language from the user's request or context
@@ -44,4 +59,4 @@ You adhere to the following workflow when dealing with package management:
4459
4. Never use Python tools when generating R code, or R tools when generating Python code
4560
5. Never instruct users to install, load, or import packages that are already loaded in their session
4661
6. Do not generate conditional code (if/then statements) to check package availability. Use the provided tools to determine package status and generate only the necessary installation or loading code based on the tool results
47-
</package-management>
62+
</package-management>

extensions/positron-assistant/src/tools.ts

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,55 @@ export function registerAssistantTools(
283283
context.subscriptions.push(inspectVariablesTool);
284284

285285
context.subscriptions.push(ProjectTreeTool);
286+
287+
const getDataSummaryTool = vscode.lm.registerTool<{ sessionIdentifier: string; accessKeys: Array<Array<string>> }>(PositronAssistantToolName.GetDataSummary, {
288+
/**
289+
* Called to get a data summary of one or more variables in the current session.
290+
* @param options The options for the tool invocation.
291+
* @param token The cancellation token.
292+
* @returns A vscode.LanguageModelToolResult containing the data summary.
293+
*/
294+
invoke: async (options, token) => {
295+
296+
// If no session identifier is provided, return an empty array.
297+
if (!options.input.sessionIdentifier || options.input.sessionIdentifier === 'undefined') {
298+
return new vscode.LanguageModelToolResult([
299+
new vscode.LanguageModelTextPart('[[]]')
300+
]);
301+
}
302+
303+
// temporarily only enable for Python sessions
304+
let session: positron.LanguageRuntimeSession | undefined;
305+
const sessions = await positron.runtime.getActiveSessions();
306+
if (sessions && sessions.length > 0) {
307+
session = sessions.find(
308+
(session) => session.metadata.sessionId === options.input.sessionIdentifier,
309+
);
310+
}
311+
if (!session) {
312+
return new vscode.LanguageModelToolResult([
313+
new vscode.LanguageModelTextPart('[[]]')
314+
]);
315+
}
316+
317+
if (session.runtimeMetadata.languageId !== 'python') {
318+
return new vscode.LanguageModelToolResult([
319+
new vscode.LanguageModelTextPart('[[]]')
320+
]);
321+
}
322+
323+
// Call the Positron API to get the session variable data summaries
324+
const result = await positron.runtime.getSessionVariableDataSummaries(
325+
options.input.sessionIdentifier,
326+
options.input.accessKeys);
327+
328+
// Return the result as a JSON string to the model
329+
return new vscode.LanguageModelToolResult([
330+
new vscode.LanguageModelTextPart(JSON.stringify(result))
331+
]);
332+
}
333+
});
334+
context.subscriptions.push(getDataSummaryTool);
286335
}
287336

288337
/**

extensions/positron-assistant/src/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ export enum PositronAssistantToolName {
77
DocumentEdit = 'documentEdit',
88
EditFile = 'positron_editFile_internal',
99
ExecuteCode = 'executeCode',
10+
GetDataSummary = 'getDataSummary',
1011
GetPlot = 'getPlot',
1112
InspectVariables = 'inspectVariables',
1213
SelectionEdit = 'selectionEdit',

extensions/positron-python/python_files/posit/positron/variables.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
InspectRequest,
3232
ListRequest,
3333
RefreshParams,
34+
SummarizeDataRequest,
3435
UpdateParams,
3536
Variable,
3637
VariableKind,
@@ -137,6 +138,10 @@ def handle_msg(
137138
elif isinstance(request, ViewRequest):
138139
self._perform_view_action(request.params.path)
139140

141+
elif isinstance(request, SummarizeDataRequest):
142+
for path in request.params.paths:
143+
self._perform_get_variable_summary(path)
144+
140145
else:
141146
logger.warning(f"Unhandled request: {request}")
142147

@@ -707,6 +712,88 @@ def _send_details(self, _path: list[str], value: Any = None):
707712
msg = InspectedVariable(children=children, length=len(children))
708713
self._send_result(msg.dict())
709714

715+
def _perform_get_variable_summary(self, path: list[str]) -> None:
716+
"""RPC handler for getting variable data summary."""
717+
try:
718+
self._summarize_data(path)
719+
except Exception as err:
720+
self._send_error(
721+
JsonRpcErrorCode.INTERNAL_ERROR,
722+
f"Error summarizing variable at '{path}': {err}",
723+
)
724+
725+
def _summarize_data(self, path: list[str]):
726+
"""Compute statistical summary for a variable without opening a data explorer."""
727+
from .data_explorer import DataExplorerState, _get_table_view, _value_type_is_supported
728+
from .data_explorer_comm import FormatOptions
729+
730+
is_found, value = self._find_var(path)
731+
if not is_found:
732+
raise ValueError(f"Cannot find variable at '{path}' to summarize")
733+
734+
if not _value_type_is_supported(value):
735+
raise ValueError(f"Variable at '{path}' is not supported for summary")
736+
737+
# Create a temporary table view without a comm
738+
temp_state = DataExplorerState("temp_summary")
739+
try:
740+
table_view = _get_table_view(value, None, temp_state, self.kernel.job_queue)
741+
except Exception as e:
742+
raise ValueError(f"Failed to create table view: {e}") from e
743+
744+
# Get the number of columns and build schema manually
745+
try:
746+
num_columns = table_view.table.shape[1]
747+
num_rows = table_view.table.shape[0]
748+
749+
# Get column schemas directly using the internal method
750+
column_schemas = []
751+
for i in range(num_columns):
752+
column_schema = table_view._get_single_column_schema(i) # noqa: SLF001
753+
column_schemas.append(column_schema)
754+
755+
# Create schema object manually
756+
from .data_explorer_comm import TableSchema
757+
758+
schema = TableSchema(columns=column_schemas)
759+
except Exception as e:
760+
raise ValueError(f"Failed to get schema: {e}") from e
761+
762+
# Create default format options
763+
format_options = FormatOptions(
764+
large_num_digits=4,
765+
small_num_digits=6,
766+
max_integral_digits=7,
767+
max_value_length=1000,
768+
thousands_sep=None,
769+
)
770+
771+
profiles = []
772+
for i, column in enumerate(schema.columns):
773+
try:
774+
summary_stats = table_view._prof_summary_stats(i, format_options) # noqa: SLF001
775+
profiles.append(
776+
{
777+
"column_name": column.column_name,
778+
"type_display": column.type_display,
779+
"summary_stats": summary_stats,
780+
}
781+
)
782+
except Exception as e:
783+
# Skip columns that can't be summarized
784+
logger.warning(f"Skipping summary stats for column {i} ({column.column_name}): {e}")
785+
continue
786+
787+
self._send_result(
788+
{
789+
"schema": {
790+
"num_rows": num_rows,
791+
"num_columns": num_columns,
792+
},
793+
"column_profiles": profiles,
794+
}
795+
)
796+
710797

711798
def _summarize_variable(key: Any, value: Any, display_name: str | None = None) -> Variable | None:
712799
"""

extensions/positron-python/python_files/posit/positron/variables_comm.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,9 @@ class VariablesBackendRequest(str, enum.Enum):
183183
# Request a viewer for a variable
184184
View = "view"
185185

186+
# Summarize data
187+
SummarizeData = "summarize_data"
188+
186189

187190
class ListRequest(BaseModel):
188191
"""
@@ -361,6 +364,7 @@ class VariablesBackendMessageContent(BaseModel):
361364
InspectRequest,
362365
ClipboardFormatRequest,
363366
ViewRequest,
367+
SummarizeDataRequest,
364368
] = Field(..., discriminator="method")
365369

366370

@@ -417,6 +421,33 @@ class RefreshParams(BaseModel):
417421
)
418422

419423

424+
class SummarizeDataParams(BaseModel):
425+
paths: List[List[StrictStr]] = Field(
426+
description="Array of paths to variables to summarize, each path is an array of access keys.",
427+
)
428+
429+
430+
class SummarizeDataRequest(BaseModel):
431+
"""
432+
Request that the runtime summarize data in a variable.
433+
"""
434+
435+
params: SummarizeDataParams = Field(
436+
description="Parameters to the SummarizeData method",
437+
)
438+
439+
method: Literal[VariablesBackendRequest.SummarizeData] = Field(
440+
description="The JSON-RPC method name (summarize_data)",
441+
)
442+
443+
jsonrpc: str = (
444+
Field(
445+
default="2.0",
446+
description="The JSON-RPC version specifier",
447+
),
448+
)
449+
450+
420451
VariableList.update_forward_refs()
421452

422453
InspectedVariable.update_forward_refs()
@@ -450,3 +481,9 @@ class RefreshParams(BaseModel):
450481
UpdateParams.update_forward_refs()
451482

452483
RefreshParams.update_forward_refs()
484+
485+
SummarizeDataParams.update_forward_refs()
486+
487+
SummarizeDataRequest.update_forward_refs()
488+
489+
VariablesBackendMessageContent.update_forward_refs()

src/positron-dts/positron.d.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1788,6 +1788,11 @@ declare module 'positron' {
17881788
accessKeys?: Array<Array<string>>):
17891789
Thenable<Array<Array<RuntimeVariable>>>;
17901790

1791+
export function getSessionVariableDataSummaries(
1792+
sessionId: string,
1793+
accessKeys: Array<Array<string>>):
1794+
Thenable<Array<string>>;
1795+
17911796
/**
17921797
* Register a handler for runtime client instances. This handler will be called
17931798
* whenever a new client instance is created by a language runtime of the given

src/vs/workbench/api/browser/positron/mainThreadLanguageRuntime.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1583,6 +1583,32 @@ export class MainThreadLanguageRuntime
15831583
}
15841584
}
15851585

1586+
$getSessionVariableDataSummaries(handle: number, accessKeys: Array<Array<string>>): Promise<Array<string>> {
1587+
const sessionId = this.findSession(handle).sessionId;
1588+
const instances = this._positronVariablesService.positronVariablesInstances;
1589+
for (const instance of instances) {
1590+
if (instance.session.sessionId === sessionId) {
1591+
return this.getSessionVariableDataSummaries(instance, accessKeys);
1592+
}
1593+
}
1594+
throw new Error(`No variables provider found for session ${sessionId}`);
1595+
}
1596+
1597+
async getSessionVariableDataSummaries(instance: IPositronVariablesInstance, accessKeys: Array<Array<string>>): Promise<Array<string>> {
1598+
const client = instance.getClientInstance();
1599+
if (!client) {
1600+
throw new Error(`No variables provider available for session ${instance.session.sessionId}`);
1601+
}
1602+
if (accessKeys.length === 0) {
1603+
throw new Error('No access keys provided for variable data retrieval');
1604+
}
1605+
const result = [];
1606+
for (const accessKey of accessKeys) {
1607+
result.push((await client.comm.summarizeData(accessKey)));
1608+
}
1609+
return result;
1610+
}
1611+
15861612
// Signals that language runtime discovery is complete.
15871613
$completeLanguageRuntimeDiscovery(): void {
15881614
this._runtimeStartupService.completeDiscovery(this._id);

src/vs/workbench/api/common/positron/extHost.positron.api.impl.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,10 @@ export function createPositronApiFactoryAndRegisterActors(accessor: ServicesAcce
141141
Thenable<Array<Array<positron.RuntimeVariable>>> {
142142
return extHostLanguageRuntime.getSessionVariables(sessionId, accessKeys);
143143
},
144+
getSessionVariableDataSummaries(sessionId: string, accessKeys: Array<Array<string>>):
145+
Thenable<Array<string>> {
146+
return extHostLanguageRuntime.getSessionVariableDataSummaries(sessionId, accessKeys);
147+
},
144148
registerClientHandler(handler: positron.RuntimeClientHandler): vscode.Disposable {
145149
return extHostLanguageRuntime.registerClientHandler(handler);
146150
},

src/vs/workbench/api/common/positron/extHost.positron.protocol.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ export interface MainThreadLanguageRuntimeShape extends IDisposable {
5353
$interruptSession(handle: number): Promise<void>;
5454
$focusSession(handle: number): void;
5555
$getSessionVariables(handle: number, accessKeys?: Array<Array<string>>): Promise<Array<Array<Variable>>>;
56+
$getSessionVariableDataSummaries(handle: number, accessKeys: Array<Array<string>>): Promise<Array<string>>;
5657
$emitLanguageRuntimeMessage(handle: number, handled: boolean, message: SerializableObjectWithBuffers<ILanguageRuntimeMessage>): void;
5758
$emitLanguageRuntimeState(handle: number, clock: number, state: RuntimeState): void;
5859
$emitLanguageRuntimeExit(handle: number, exit: ILanguageRuntimeExit): void;

0 commit comments

Comments
 (0)