@@ -51,7 +51,7 @@ use crate::physical_plan::PyExecutionPlan;
51
51
use crate :: record_batch:: PyRecordBatchStream ;
52
52
use crate :: sql:: logical:: PyLogicalPlan ;
53
53
use crate :: utils:: {
54
- get_tokio_runtime, py_obj_to_scalar_value, validate_pycapsule, wait_for_future,
54
+ get_tokio_runtime, is_ipython_env , py_obj_to_scalar_value, validate_pycapsule, wait_for_future,
55
55
} ;
56
56
use crate :: {
57
57
errors:: PyDataFusionResult ,
@@ -289,21 +289,33 @@ impl PyParquetColumnOptions {
289
289
#[ derive( Clone ) ]
290
290
pub struct PyDataFrame {
291
291
df : Arc < DataFrame > ,
292
+
293
+ // In IPython environment cache batches between __repr__ and _repr_html_ calls.
294
+ batches : Option < ( Vec < RecordBatch > , bool ) > ,
292
295
}
293
296
294
297
impl PyDataFrame {
295
298
/// creates a new PyDataFrame
296
299
pub fn new ( df : DataFrame ) -> Self {
297
- Self { df : Arc :: new ( df) }
300
+ Self {
301
+ df : Arc :: new ( df) ,
302
+ batches : None ,
303
+ }
298
304
}
299
305
300
- fn prepare_repr_string ( & self , py : Python , as_html : bool ) -> PyDataFusionResult < String > {
306
+ fn prepare_repr_string ( & mut self , py : Python , as_html : bool ) -> PyDataFusionResult < String > {
301
307
// Get the Python formatter and config
302
308
let PythonFormatter { formatter, config } = get_python_formatter_with_config ( py) ?;
303
- let ( batches, has_more) = wait_for_future (
304
- py,
305
- collect_record_batches_to_display ( self . df . as_ref ( ) . clone ( ) , config) ,
306
- ) ??;
309
+
310
+ let should_cache = * is_ipython_env ( py) && self . batches . is_none ( ) ;
311
+ let ( batches, has_more) = match self . batches . take ( ) {
312
+ Some ( b) => b,
313
+ None => wait_for_future (
314
+ py,
315
+ collect_record_batches_to_display ( self . df . as_ref ( ) . clone ( ) , config) ,
316
+ ) ??,
317
+ } ;
318
+
307
319
if batches. is_empty ( ) {
308
320
// This should not be reached, but do it for safety since we index into the vector below
309
321
return Ok ( "No data to display" . to_string ( ) ) ;
@@ -313,7 +325,7 @@ impl PyDataFrame {
313
325
314
326
// Convert record batches to PyObject list
315
327
let py_batches = batches
316
- . into_iter ( )
328
+ . iter ( )
317
329
. map ( |rb| rb. to_pyarrow ( py) )
318
330
. collect :: < PyResult < Vec < PyObject > > > ( ) ?;
319
331
@@ -334,6 +346,10 @@ impl PyDataFrame {
334
346
let html_result = formatter. call_method ( method_name, ( ) , Some ( & kwargs) ) ?;
335
347
let html_str: String = html_result. extract ( ) ?;
336
348
349
+ if should_cache {
350
+ self . batches = Some ( ( batches, has_more) ) ;
351
+ }
352
+
337
353
Ok ( html_str)
338
354
}
339
355
}
@@ -361,7 +377,7 @@ impl PyDataFrame {
361
377
}
362
378
}
363
379
364
- fn __repr__ ( & self , py : Python ) -> PyDataFusionResult < String > {
380
+ fn __repr__ ( & mut self , py : Python ) -> PyDataFusionResult < String > {
365
381
self . prepare_repr_string ( py, false )
366
382
}
367
383
@@ -396,7 +412,7 @@ impl PyDataFrame {
396
412
Ok ( format ! ( "DataFrame()\n {batches_as_displ}{additional_str}" ) )
397
413
}
398
414
399
- fn _repr_html_ ( & self , py : Python ) -> PyDataFusionResult < String > {
415
+ fn _repr_html_ ( & mut self , py : Python ) -> PyDataFusionResult < String > {
400
416
self . prepare_repr_string ( py, true )
401
417
}
402
418
0 commit comments