Skip to content

Commit ae65240

Browse files
committed
Instead of trying to detect notebook vs console, collect one time when we have any kind if ipython environment.
1 parent 14e8efa commit ae65240

File tree

2 files changed

+37
-10
lines changed

2 files changed

+37
-10
lines changed

src/dataframe.rs

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ use crate::physical_plan::PyExecutionPlan;
5151
use crate::record_batch::PyRecordBatchStream;
5252
use crate::sql::logical::PyLogicalPlan;
5353
use crate::utils::{
54-
get_tokio_runtime, py_obj_to_scalar_value, validate_pycapsule, wait_for_future,
54+
get_tokio_runtime, is_ipython_env, py_obj_to_scalar_value, validate_pycapsule, wait_for_future,
5555
};
5656
use crate::{
5757
errors::PyDataFusionResult,
@@ -289,21 +289,33 @@ impl PyParquetColumnOptions {
289289
#[derive(Clone)]
290290
pub struct PyDataFrame {
291291
df: Arc<DataFrame>,
292+
293+
// In IPython environment cache batches between __repr__ and _repr_html_ calls.
294+
batches: Option<(Vec<RecordBatch>, bool)>,
292295
}
293296

294297
impl PyDataFrame {
295298
/// creates a new PyDataFrame
296299
pub fn new(df: DataFrame) -> Self {
297-
Self { df: Arc::new(df) }
300+
Self {
301+
df: Arc::new(df),
302+
batches: None,
303+
}
298304
}
299305

300-
fn prepare_repr_string(&self, py: Python, as_html: bool) -> PyDataFusionResult<String> {
306+
fn prepare_repr_string(&mut self, py: Python, as_html: bool) -> PyDataFusionResult<String> {
301307
// Get the Python formatter and config
302308
let PythonFormatter { formatter, config } = get_python_formatter_with_config(py)?;
303-
let (batches, has_more) = wait_for_future(
304-
py,
305-
collect_record_batches_to_display(self.df.as_ref().clone(), config),
306-
)??;
309+
310+
let should_cache = *is_ipython_env(py) && self.batches.is_none();
311+
let (batches, has_more) = match self.batches.take() {
312+
Some(b) => b,
313+
None => wait_for_future(
314+
py,
315+
collect_record_batches_to_display(self.df.as_ref().clone(), config),
316+
)??,
317+
};
318+
307319
if batches.is_empty() {
308320
// This should not be reached, but do it for safety since we index into the vector below
309321
return Ok("No data to display".to_string());
@@ -313,7 +325,7 @@ impl PyDataFrame {
313325

314326
// Convert record batches to PyObject list
315327
let py_batches = batches
316-
.into_iter()
328+
.iter()
317329
.map(|rb| rb.to_pyarrow(py))
318330
.collect::<PyResult<Vec<PyObject>>>()?;
319331

@@ -334,6 +346,10 @@ impl PyDataFrame {
334346
let html_result = formatter.call_method(method_name, (), Some(&kwargs))?;
335347
let html_str: String = html_result.extract()?;
336348

349+
if should_cache {
350+
self.batches = Some((batches, has_more));
351+
}
352+
337353
Ok(html_str)
338354
}
339355
}
@@ -361,7 +377,7 @@ impl PyDataFrame {
361377
}
362378
}
363379

364-
fn __repr__(&self, py: Python) -> PyDataFusionResult<String> {
380+
fn __repr__(&mut self, py: Python) -> PyDataFusionResult<String> {
365381
self.prepare_repr_string(py, false)
366382
}
367383

@@ -396,7 +412,7 @@ impl PyDataFrame {
396412
Ok(format!("DataFrame()\n{batches_as_displ}{additional_str}"))
397413
}
398414

399-
fn _repr_html_(&self, py: Python) -> PyDataFusionResult<String> {
415+
fn _repr_html_(&mut self, py: Python) -> PyDataFusionResult<String> {
400416
self.prepare_repr_string(py, true)
401417
}
402418

src/utils.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,17 @@ pub(crate) fn get_tokio_runtime() -> &'static TokioRuntime {
3939
RUNTIME.get_or_init(|| TokioRuntime(tokio::runtime::Runtime::new().unwrap()))
4040
}
4141

42+
#[inline]
43+
pub(crate) fn is_ipython_env(py: Python) -> &'static bool {
44+
static IS_IPYTHON_ENV: OnceLock<bool> = OnceLock::new();
45+
IS_IPYTHON_ENV.get_or_init(|| {
46+
py.import("IPython")
47+
.and_then(|ipython| ipython.call_method0("get_ipython"))
48+
.map(|ipython| !ipython.is_none())
49+
.unwrap_or(false)
50+
})
51+
}
52+
4253
/// Utility to get the Global Datafussion CTX
4354
#[inline]
4455
pub(crate) fn get_global_ctx() -> &'static SessionContext {

0 commit comments

Comments
 (0)