Closed
Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
import pandas as pd
df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df_outer = pd.DataFrame({"a": [{"x": df}]})
print(df_outer)
Issue Description
The above code crashed with the stack trace:
Stack trace
---------------------------------------------------------------------------
StopIteration Traceback (most recent call last)
Cell In[1], line 5
3 df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
4 df_outer = pd.DataFrame({"a": [{"x": df}]})
----> 5 print(df_outer)
File ~/Library/Python/3.10/lib/python/site-packages/pandas/core/frame.py:1214, in DataFrame.__repr__(self)
1211 return buf.getvalue()
1213 repr_params = fmt.get_dataframe_repr_params()
-> 1214 return self.to_string(**repr_params)
File ~/Library/Python/3.10/lib/python/site-packages/pandas/util/_decorators.py:333, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
327 if len(args) > num_allow_args:
328 warnings.warn(
329 msg.format(arguments=_format_argument_list(allow_args)),
330 FutureWarning,
331 stacklevel=find_stack_level(),
332 )
--> 333 return func(*args, **kwargs)
File ~/Library/Python/3.10/lib/python/site-packages/pandas/core/frame.py:1394, in DataFrame.to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, max_rows, max_cols, show_dimensions, decimal, line_width, min_rows, max_colwidth, encoding)
1375 with option_context("display.max_colwidth", max_colwidth):
1376 formatter = fmt.DataFrameFormatter(
1377 self,
1378 columns=columns,
(...)
1392 decimal=decimal,
1393 )
-> 1394 return fmt.DataFrameRenderer(formatter).to_string(
1395 buf=buf,
1396 encoding=encoding,
1397 line_width=line_width,
1398 )
File ~/Library/Python/3.10/lib/python/site-packages/pandas/io/formats/format.py:962, in DataFrameRenderer.to_string(self, buf, encoding, line_width)
959 from pandas.io.formats.string import StringFormatter
961 string_formatter = StringFormatter(self.fmt, line_width=line_width)
--> 962 string = string_formatter.to_string()
963 return save_to_buffer(string, buf=buf, encoding=encoding)
File ~/Library/Python/3.10/lib/python/site-packages/pandas/io/formats/string.py:29, in StringFormatter.to_string(self)
28 def to_string(self) -> str:
---> 29 text = self._get_string_representation()
30 if self.fmt.should_show_dimensions:
31 text = f"{text}{self.fmt.dimensions_info}"
File ~/Library/Python/3.10/lib/python/site-packages/pandas/io/formats/string.py:44, in StringFormatter._get_string_representation(self)
41 if self.fmt.frame.empty:
42 return self._empty_info_line
---> 44 strcols = self._get_strcols()
46 if self.line_width is None:
47 # no need to wrap around just print the whole frame
48 return self.adj.adjoin(1, *strcols)
File ~/Library/Python/3.10/lib/python/site-packages/pandas/io/formats/string.py:35, in StringFormatter._get_strcols(self)
34 def _get_strcols(self) -> list[list[str]]:
---> 35 strcols = self.fmt.get_strcols()
36 if self.fmt.is_truncated:
37 strcols = self._insert_dot_separators(strcols)
File ~/Library/Python/3.10/lib/python/site-packages/pandas/io/formats/format.py:476, in DataFrameFormatter.get_strcols(self)
472 def get_strcols(self) -> list[list[str]]:
473 """
474 Render a DataFrame to a list of columns (as lists of strings).
475 """
--> 476 strcols = self._get_strcols_without_index()
478 if self.index:
479 str_index = self._get_formatted_index(self.tr_frame)
File ~/Library/Python/3.10/lib/python/site-packages/pandas/io/formats/format.py:740, in DataFrameFormatter._get_strcols_without_index(self)
736 cheader = str_columns[i]
737 header_colwidth = max(
738 int(self.col_space.get(c, 0)), *(self.adj.len(x) for x in cheader)
739 )
--> 740 fmt_values = self.format_col(i)
741 fmt_values = _make_fixed_width(
742 fmt_values, self.justify, minimum=header_colwidth, adj=self.adj
743 )
745 max_len = max(*(self.adj.len(x) for x in fmt_values), header_colwidth)
File ~/Library/Python/3.10/lib/python/site-packages/pandas/io/formats/format.py:754, in DataFrameFormatter.format_col(self, i)
752 frame = self.tr_frame
753 formatter = self._get_formatter(i)
--> 754 return format_array(
755 frame.iloc[:, i]._values,
756 formatter,
757 float_format=self.float_format,
758 na_rep=self.na_rep,
759 space=self.col_space.get(frame.columns[i]),
760 decimal=self.decimal,
761 leading_space=self.index,
762 )
File ~/Library/Python/3.10/lib/python/site-packages/pandas/io/formats/format.py:1161, in format_array(values, formatter, float_format, na_rep, digits, space, justify, decimal, leading_space, quoting, fallback_formatter)
1145 digits = get_option("display.precision")
1147 fmt_obj = fmt_klass(
1148 values,
1149 digits=digits,
(...)
1158 fallback_formatter=fallback_formatter,
1159 )
-> 1161 return fmt_obj.get_result()
File ~/Library/Python/3.10/lib/python/site-packages/pandas/io/formats/format.py:1194, in _GenericArrayFormatter.get_result(self)
1193 def get_result(self) -> list[str]:
-> 1194 fmt_values = self._format_strings()
1195 return _make_fixed_width(fmt_values, self.justify)
File ~/Library/Python/3.10/lib/python/site-packages/pandas/io/formats/format.py:1259, in _GenericArrayFormatter._format_strings(self)
1257 for i, v in enumerate(vals):
1258 if (not is_float_type[i] or self.formatter is not None) and leading_space:
-> 1259 fmt_values.append(f" {_format(v)}")
1260 elif is_float_type[i]:
1261 fmt_values.append(float_format(v))
File ~/Library/Python/3.10/lib/python/site-packages/pandas/io/formats/format.py:1239, in _GenericArrayFormatter._format_strings.<locals>._format(x)
1236 return repr(x)
1237 else:
1238 # object dtype
-> 1239 return str(formatter(x))
File ~/Library/Python/3.10/lib/python/site-packages/pandas/io/formats/printing.py:219, in pprint_thing(thing, _nest_lvl, escape_chars, default_escapes, quote_strings, max_seq_items)
215 return str(thing)
216 elif isinstance(thing, dict) and _nest_lvl < get_option(
217 "display.pprint_nest_depth"
218 ):
--> 219 result = _pprint_dict(
220 thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items
221 )
222 elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"):
223 result = _pprint_seq(
224 thing,
225 _nest_lvl,
(...)
228 max_seq_items=max_seq_items,
229 )
File ~/Library/Python/3.10/lib/python/site-packages/pandas/io/formats/printing.py:155, in _pprint_dict(seq, _nest_lvl, max_seq_items, **kwds)
149 nitems = max_seq_items or get_option("max_seq_items") or len(seq)
151 for k, v in list(seq.items())[:nitems]:
152 pairs.append(
153 pfmt.format(
154 key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
--> 155 val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
156 )
157 )
159 if nitems < len(seq):
160 return fmt.format(things=", ".join(pairs) + ", ...")
File ~/Library/Python/3.10/lib/python/site-packages/pandas/io/formats/printing.py:223, in pprint_thing(thing, _nest_lvl, escape_chars, default_escapes, quote_strings, max_seq_items)
219 result = _pprint_dict(
220 thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items
221 )
222 elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"):
--> 223 result = _pprint_seq(
224 thing,
225 _nest_lvl,
226 escape_chars=escape_chars,
227 quote_strings=quote_strings,
228 max_seq_items=max_seq_items,
229 )
230 elif isinstance(thing, str) and quote_strings:
231 result = f"'{as_escaped_string(thing)}'"
File ~/Library/Python/3.10/lib/python/site-packages/pandas/io/formats/printing.py:120, in _pprint_seq(seq, _nest_lvl, max_seq_items, **kwds)
118 s = iter(seq)
119 # handle sets, no slicing
--> 120 r = [
121 pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
122 for i in range(min(nitems, len(seq)))
123 ]
124 body = ", ".join(r)
126 if nitems < len(seq):
File ~/Library/Python/3.10/lib/python/site-packages/pandas/io/formats/printing.py:121, in <listcomp>(.0)
118 s = iter(seq)
119 # handle sets, no slicing
120 r = [
--> 121 pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
122 for i in range(min(nitems, len(seq)))
123 ]
124 body = ", ".join(r)
126 if nitems < len(seq):
StopIteration:
My interpretation is that this happens because pandas treats the nested DataFrame as a normal sequence and tries to iterate on it, but for DataFrames len(df) != len(list(df))
because the former is #rows and the latter is #columns.
This issue is essentially the same as #49195, but that issue was incorrectly triaged as been an issue with an external library.
Expected Behavior
Any reasonable repr output. Should not crash.
Installed Versions
INSTALLED VERSIONS
------------------
commit : d9cdd2ee5a58015ef6f4d15c7226110c9aab8140
python : 3.10.8.final.0
python-bits : 64
OS : Darwin
OS-release : 23.5.0
Version : Darwin Kernel Version 23.5.0: Wed May 1 20:12:58 PDT 2024; root:xnu-10063.121.3~5/RELEASE_ARM64_T6000
machine : arm64
processor : arm
byteorder : little
LC_ALL : en_US.UTF-8
LANG : en_US.UTF-8
LOCALE : en_US.UTF-8
pandas : 2.2.2
numpy : 1.26.4
pytz : 2024.1
dateutil : 2.9.0.post0
setuptools : 63.2.0
pip : 23.2.1
Cython : None
pytest : 7.4.2
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : None
IPython : 8.14.0
pandas_datareader : None
adbc-driver-postgresql: None
adbc-driver-sqlite : None
bs4 : None
bottleneck : None
dataframe-api-compat : None
fastparquet : None
fsspec : None
gcsfs : None
matplotlib : None
numba : None
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : None
pyreadstat : None
python-calamine : None
pyxlsb : None
s3fs : None
scipy : None
sqlalchemy : None
tables : None
tabulate : 0.9.0
xarray : None
xlrd : None
zstandard : None
tzdata : 2024.1
qtpy : None
pyqt5 : None