Skip to content

Commit f30ddb9

Browse files
committed
feature pandas-dev#49580: support new-style float_format string in to_csv
1 parent 51e9d08 commit f30ddb9

File tree

3 files changed

+173
-2
lines changed

3 files changed

+173
-2
lines changed

pandas/io/formats/format.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ def __init__(
455455
self.na_rep = na_rep
456456
self.formatters = self._initialize_formatters(formatters)
457457
self.justify = self._initialize_justify(justify)
458-
self.float_format = float_format
458+
self.float_format = self._validate_float_format(float_format)
459459
self.sparsify = self._initialize_sparsify(sparsify)
460460
self.show_index_names = index_names
461461
self.decimal = decimal
@@ -850,6 +850,35 @@ def _get_column_name_list(self) -> list[Hashable]:
850850
names.append("" if columns.name is None else columns.name)
851851
return names
852852

853+
def _validate_float_format(
854+
self, fmt: FloatFormatType | None
855+
) -> FloatFormatType | None:
856+
"""
857+
Validates and processes the float_format argument.
858+
Converts new-style format strings to callables.
859+
"""
860+
861+
if fmt is None:
862+
return None
863+
864+
if callable(fmt):
865+
return fmt
866+
867+
if isinstance(fmt, str):
868+
if "%" in fmt:
869+
# Keeps old-style format strings as they are (C code handles them)
870+
return fmt
871+
else:
872+
873+
try:
874+
_ = fmt.format(1.0) # Test with an arbitrary float
875+
return lambda x: fmt.format(x)
876+
except (ValueError, KeyError, IndexError) as e:
877+
878+
raise ValueError(f"Invalid new-style format string {repr(fmt)}") from e
879+
880+
# If fmt is neither None, nor callable, nor a successfully processed string,
881+
raise ValueError("float_format must be a string or callable")
853882

854883
class DataFrameRenderer:
855884
"""Class for creating dataframe output in multiple formats.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import pytest
2+
import numpy as np
3+
import pandas as pd
4+
5+
pytestmark = pytest.mark.usefixtures("benchmark")
6+
7+
def test_benchmark_old_style_format(benchmark):
8+
df = pd.DataFrame(np.random.rand(1000, 1000))
9+
benchmark(lambda: df.to_csv(float_format="%.6f"))
10+
11+
def test_benchmark_new_style_format(benchmark):
12+
df = pd.DataFrame(np.random.rand(1000, 1000))
13+
benchmark(lambda: df.to_csv(float_format="{:.6f}"))
14+
15+
def test_benchmark_new_style_thousands(benchmark):
16+
df = pd.DataFrame(np.random.rand(1000, 1000))
17+
benchmark(lambda: df.to_csv(float_format="{:,.2f}"))
18+
19+
def test_benchmark_callable_format(benchmark):
20+
df = pd.DataFrame(np.random.rand(1000, 1000))
21+
benchmark(lambda: df.to_csv(float_format=lambda x: f"{x:.6f}"))

pandas/tests/io/formats/test_to_csv.py

Lines changed: 122 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
compat,
1515
)
1616
import pandas._testing as tm
17-
17+
import warnings
1818

1919
class TestToCSV:
2020
def test_to_csv_with_single_column(self):
@@ -741,3 +741,124 @@ def test_to_csv_iterative_compression_buffer(compression):
741741
pd.read_csv(buffer, compression=compression, index_col=0), df
742742
)
743743
assert not buffer.closed
744+
745+
746+
def test_new_style_float_format_basic():
747+
df = pd.DataFrame({"A": [1234.56789, 9876.54321]})
748+
result = df.to_csv(float_format="{:.2f}")
749+
expected = ",A\n0,1234.57\n1,9876.54\n"
750+
assert result == expected
751+
752+
def test_new_style_float_format_thousands():
753+
df = pd.DataFrame({"A": [1234.56789, 9876.54321]})
754+
result = df.to_csv(float_format="{:,.2f}")
755+
expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
756+
assert result == expected
757+
758+
def test_new_style_scientific_format():
759+
df = pd.DataFrame({"A": [0.000123, 0.000456]})
760+
result = df.to_csv(float_format="{:.2e}")
761+
expected = ",A\n0,1.23e-04\n1,4.56e-04\n"
762+
assert result == expected
763+
764+
def test_new_style_with_nan():
765+
df = pd.DataFrame({"A": [1.23, np.nan, 4.56]})
766+
result = df.to_csv(float_format="{:.2f}", na_rep="NA")
767+
expected = ",A\n0,1.23\n1,NA\n2,4.56\n"
768+
assert result == expected
769+
770+
def test_new_style_with_mixed_types():
771+
df = pd.DataFrame({"A": [1.23, 4.56], "B": ["x", "y"]})
772+
result = df.to_csv(float_format="{:.2f}")
773+
expected = ",A,B\n0,1.23,x\n1,4.56,y\n"
774+
assert result == expected
775+
776+
def test_new_style_with_mixed_types_in_column():
777+
df = pd.DataFrame({"A": [1.23, "text", 4.56]})
778+
with warnings.catch_warnings(record=True) as w:
779+
warnings.simplefilter("always")
780+
result = df.to_csv(float_format="{:.2f}")
781+
782+
expected = ",A\n0,1.23\n1,text\n2,4.56\n"
783+
assert result == expected
784+
785+
def test_invalid_new_style_format_missing_brace():
786+
df = pd.DataFrame({"A": [1.23]})
787+
with pytest.raises(ValueError, match="Invalid new-style format string '{:.2f"):
788+
df.to_csv(float_format="{:.2f")
789+
790+
def test_invalid_new_style_format_specifier():
791+
df = pd.DataFrame({"A": [1.23]})
792+
with pytest.raises(ValueError, match="Invalid new-style format string '{:.2z}'"):
793+
df.to_csv(float_format="{:.2z}")
794+
795+
def test_old_style_format_compatibility():
796+
df = pd.DataFrame({"A": [1234.56789, 9876.54321]})
797+
result = df.to_csv(float_format="%.2f")
798+
expected = ",A\n0,1234.57\n1,9876.54\n"
799+
assert result == expected
800+
801+
def test_callable_float_format_compatibility():
802+
df = pd.DataFrame({"A": [1234.56789, 9876.54321]})
803+
result = df.to_csv(float_format=lambda x: f"{x:,.2f}")
804+
expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
805+
assert result == expected
806+
807+
def test_no_float_format():
808+
df = pd.DataFrame({"A": [1.23, 4.56]})
809+
result = df.to_csv(float_format=None)
810+
expected = ",A\n0,1.23\n1,4.56\n"
811+
assert result == expected
812+
813+
def test_large_numbers():
814+
df = pd.DataFrame({"A": [1e308, 2e308]})
815+
result = df.to_csv(float_format="{:.2e}")
816+
expected = ",A\n0,1.00e+308\n1,inf\n"
817+
assert result == expected
818+
819+
def test_zero_and_negative():
820+
df = pd.DataFrame({"A": [0.0, -1.23456]})
821+
result = df.to_csv(float_format="{:+.2f}")
822+
expected = ",A\n0,+0.00\n1,-1.23\n"
823+
assert result == expected
824+
825+
def test_unicode_format():
826+
df = pd.DataFrame({"A": [1.23, 4.56]})
827+
result = df.to_csv(float_format="{:.2f}€", encoding="utf-8")
828+
expected = ",A\n0,1.23€\n1,4.56€\n"
829+
assert result == expected
830+
831+
def test_empty_dataframe():
832+
df = pd.DataFrame({"A": []})
833+
result = df.to_csv(float_format="{:.2f}")
834+
expected = ",A\n"
835+
assert result == expected
836+
837+
def test_multi_column_float():
838+
df = pd.DataFrame({"A": [1.23, 4.56], "B": [7.89, 0.12]})
839+
result = df.to_csv(float_format="{:.2f}")
840+
expected = ",A,B\n0,1.23,7.89\n1,4.56,0.12\n"
841+
assert result == expected
842+
843+
def test_invalid_float_format_type():
844+
df = pd.DataFrame({"A": [1.23]})
845+
with pytest.raises(ValueError, match="float_format must be a string or callable"):
846+
df.to_csv(float_format=123)
847+
848+
def test_new_style_with_inf():
849+
df = pd.DataFrame({"A": [1.23, np.inf, -np.inf]})
850+
result = df.to_csv(float_format="{:.2f}", na_rep="NA")
851+
expected = ",A\n0,1.23\n1,inf\n2,-inf\n"
852+
assert result == expected
853+
854+
def test_new_style_with_precision_edge():
855+
df = pd.DataFrame({"A": [1.23456789]})
856+
result = df.to_csv(float_format="{:.10f}")
857+
expected = ",A\n0,1.2345678900\n"
858+
assert result == expected
859+
860+
def test_new_style_with_template():
861+
df = pd.DataFrame({"A": [1234.56789]})
862+
result = df.to_csv(float_format="Value: {:,.2f}")
863+
expected = ',A\n0,"Value: 1,234.57"\n'
864+
assert result == expected

0 commit comments

Comments
 (0)