@@ -24,6 +24,7 @@ use arrow::compute::can_cast_types;
24
24
use arrow:: error:: ArrowError ;
25
25
use arrow:: ffi:: FFI_ArrowSchema ;
26
26
use arrow:: ffi_stream:: FFI_ArrowArrayStream ;
27
+ use arrow:: pyarrow:: FromPyArrow ;
27
28
use datafusion:: arrow:: datatypes:: Schema ;
28
29
use datafusion:: arrow:: pyarrow:: { PyArrowType , ToPyArrow } ;
29
30
use datafusion:: arrow:: util:: pretty;
@@ -295,6 +296,46 @@ impl PyDataFrame {
295
296
pub fn new ( df : DataFrame ) -> Self {
296
297
Self { df : Arc :: new ( df) }
297
298
}
299
+
300
+ fn prepare_repr_string ( & self , py : Python , as_html : bool ) -> PyDataFusionResult < String > {
301
+ // Get the Python formatter and config
302
+ let PythonFormatter { formatter, config } = get_python_formatter_with_config ( py) ?;
303
+ let ( batches, has_more) = wait_for_future (
304
+ py,
305
+ collect_record_batches_to_display ( self . df . as_ref ( ) . clone ( ) , config) ,
306
+ ) ??;
307
+ if batches. is_empty ( ) {
308
+ // This should not be reached, but do it for safety since we index into the vector below
309
+ return Ok ( "No data to display" . to_string ( ) ) ;
310
+ }
311
+
312
+ let table_uuid = uuid:: Uuid :: new_v4 ( ) . to_string ( ) ;
313
+
314
+ // Convert record batches to PyObject list
315
+ let py_batches = batches
316
+ . into_iter ( )
317
+ . map ( |rb| rb. to_pyarrow ( py) )
318
+ . collect :: < PyResult < Vec < PyObject > > > ( ) ?;
319
+
320
+ let py_schema = self . schema ( ) . into_pyobject ( py) ?;
321
+
322
+ let kwargs = pyo3:: types:: PyDict :: new ( py) ;
323
+ let py_batches_list = PyList :: new ( py, py_batches. as_slice ( ) ) ?;
324
+ kwargs. set_item ( "batches" , py_batches_list) ?;
325
+ kwargs. set_item ( "schema" , py_schema) ?;
326
+ kwargs. set_item ( "has_more" , has_more) ?;
327
+ kwargs. set_item ( "table_uuid" , table_uuid) ?;
328
+
329
+ let method_name = match as_html {
330
+ true => "format_html" ,
331
+ false => "format_str" ,
332
+ } ;
333
+
334
+ let html_result = formatter. call_method ( method_name, ( ) , Some ( & kwargs) ) ?;
335
+ let html_str: String = html_result. extract ( ) ?;
336
+
337
+ Ok ( html_str)
338
+ }
298
339
}
299
340
300
341
#[ pymethods]
@@ -321,18 +362,27 @@ impl PyDataFrame {
321
362
}
322
363
323
364
fn __repr__ ( & self , py : Python ) -> PyDataFusionResult < String > {
324
- // Get the Python formatter config
325
- let PythonFormatter {
326
- formatter : _,
327
- config,
328
- } = get_python_formatter_with_config ( py) ?;
329
- let ( batches, has_more) = wait_for_future (
330
- py,
331
- collect_record_batches_to_display ( self . df . as_ref ( ) . clone ( ) , config) ,
332
- ) ??;
365
+ self . prepare_repr_string ( py, false )
366
+ }
367
+
368
+ #[ staticmethod]
369
+ #[ expect( unused_variables) ]
370
+ fn default_str_repr < ' py > (
371
+ batches : Vec < Bound < ' py , PyAny > > ,
372
+ schema : & Bound < ' py , PyAny > ,
373
+ has_more : bool ,
374
+ table_uuid : & str ,
375
+ ) -> PyResult < String > {
376
+ let batches = batches
377
+ . into_iter ( )
378
+ . map ( |batch| RecordBatch :: from_pyarrow_bound ( & batch) )
379
+ . collect :: < PyResult < Vec < RecordBatch > > > ( ) ?
380
+ . into_iter ( )
381
+ . filter ( |batch| batch. num_rows ( ) > 0 )
382
+ . collect :: < Vec < _ > > ( ) ;
383
+
333
384
if batches. is_empty ( ) {
334
- // This should not be reached, but do it for safety since we index into the vector below
335
- return Ok ( "No data to display" . to_string ( ) ) ;
385
+ return Ok ( "No data to display" . to_owned ( ) ) ;
336
386
}
337
387
338
388
let batches_as_displ =
@@ -347,38 +397,7 @@ impl PyDataFrame {
347
397
}
348
398
349
399
fn _repr_html_ ( & self , py : Python ) -> PyDataFusionResult < String > {
350
- // Get the Python formatter and config
351
- let PythonFormatter { formatter, config } = get_python_formatter_with_config ( py) ?;
352
- let ( batches, has_more) = wait_for_future (
353
- py,
354
- collect_record_batches_to_display ( self . df . as_ref ( ) . clone ( ) , config) ,
355
- ) ??;
356
- if batches. is_empty ( ) {
357
- // This should not be reached, but do it for safety since we index into the vector below
358
- return Ok ( "No data to display" . to_string ( ) ) ;
359
- }
360
-
361
- let table_uuid = uuid:: Uuid :: new_v4 ( ) . to_string ( ) ;
362
-
363
- // Convert record batches to PyObject list
364
- let py_batches = batches
365
- . into_iter ( )
366
- . map ( |rb| rb. to_pyarrow ( py) )
367
- . collect :: < PyResult < Vec < PyObject > > > ( ) ?;
368
-
369
- let py_schema = self . schema ( ) . into_pyobject ( py) ?;
370
-
371
- let kwargs = pyo3:: types:: PyDict :: new ( py) ;
372
- let py_batches_list = PyList :: new ( py, py_batches. as_slice ( ) ) ?;
373
- kwargs. set_item ( "batches" , py_batches_list) ?;
374
- kwargs. set_item ( "schema" , py_schema) ?;
375
- kwargs. set_item ( "has_more" , has_more) ?;
376
- kwargs. set_item ( "table_uuid" , table_uuid) ?;
377
-
378
- let html_result = formatter. call_method ( "format_html" , ( ) , Some ( & kwargs) ) ?;
379
- let html_str: String = html_result. extract ( ) ?;
380
-
381
- Ok ( html_str)
400
+ self . prepare_repr_string ( py, true )
382
401
}
383
402
384
403
/// Calculate summary statistics for a DataFrame
0 commit comments