Skip to content

Commit b5e441e

Browse files
authored
BUG: Decimal(NaN) incorrectly allowed in ArrowEA constructor with tim… (#61773)
* BUG: Decimal(NaN) incorrectly allowed in ArrowEA constructor with timestamp type * GH ref * BUG: ArrowEA constructor with timestamp type * mypy fixup * mypy fixup
1 parent 16fd208 commit b5e441e

File tree

4 files changed

+68
-3
lines changed

4 files changed

+68
-3
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -706,6 +706,8 @@ Datetimelike
706706
- Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`)
707707
- Bug in :meth:`to_datetime` with ``format="ISO8601"`` and ``utc=True`` where naive timestamps incorrectly inherited timezone offset from previous timestamps in a series. (:issue:`61389`)
708708
- Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`)
709+
- Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`61773`)
710+
- Bug in constructing arrays with a timezone-aware :class:`ArrowDtype` from timezone-naive datetime objects incorrectly treating those as UTC times instead of wall times like :class:`DatetimeTZDtype` (:issue:`61775`)
709711
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
710712

711713
Timedelta

pandas/core/arrays/arrow/array.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
from pandas.core.arrays.masked import BaseMaskedArray
6464
from pandas.core.arrays.string_ import StringDtype
6565
import pandas.core.common as com
66+
from pandas.core.construction import extract_array
6667
from pandas.core.indexers import (
6768
check_array_indexer,
6869
unpack_tuple_and_ellipses,
@@ -500,6 +501,33 @@ def _box_pa_array(
500501
value = to_timedelta(value, unit=pa_type.unit).as_unit(pa_type.unit)
501502
value = value.to_numpy()
502503

504+
if pa_type is not None and pa.types.is_timestamp(pa_type):
505+
# Use DatetimeArray to exclude Decimal(NaN) (GH#61774) and
506+
# ensure constructor treats tznaive the same as non-pyarrow
507+
# dtypes (GH#61775)
508+
from pandas.core.arrays.datetimes import (
509+
DatetimeArray,
510+
tz_to_dtype,
511+
)
512+
513+
pass_dtype = tz_to_dtype(tz=pa_type.tz, unit=pa_type.unit)
514+
value = extract_array(value, extract_numpy=True)
515+
if isinstance(value, DatetimeArray):
516+
dta = value
517+
else:
518+
dta = DatetimeArray._from_sequence(
519+
value, copy=copy, dtype=pass_dtype
520+
)
521+
dta_mask = dta.isna()
522+
value_i8 = cast("npt.NDArray", dta.view("i8"))
523+
if not value_i8.flags["WRITEABLE"]:
524+
# e.g. test_setitem_frame_2d_values
525+
value_i8 = value_i8.copy()
526+
dta = DatetimeArray._from_sequence(value_i8, dtype=dta.dtype)
527+
value_i8[dta_mask] = 0 # GH#61776 avoid __sub__ overflow
528+
pa_array = pa.array(dta._ndarray, type=pa_type, mask=dta_mask)
529+
return pa_array
530+
503531
try:
504532
pa_array = pa.array(value, type=pa_type, from_pandas=True)
505533
except (pa.ArrowInvalid, pa.ArrowTypeError):

pandas/tests/extension/test_arrow.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2686,14 +2686,15 @@ def test_dt_tz_localize_unsupported_tz_options():
26862686
ser.dt.tz_localize("UTC", nonexistent="NaT")
26872687

26882688

2689+
@pytest.mark.xfail(reason="Converts to UTC before localizing GH#61780")
26892690
def test_dt_tz_localize_none():
26902691
ser = pd.Series(
26912692
[datetime(year=2023, month=1, day=2, hour=3), None],
26922693
dtype=ArrowDtype(pa.timestamp("ns", tz="US/Pacific")),
26932694
)
26942695
result = ser.dt.tz_localize(None)
26952696
expected = pd.Series(
2696-
[datetime(year=2023, month=1, day=2, hour=3), None],
2697+
[ser[0].tz_localize(None), None],
26972698
dtype=ArrowDtype(pa.timestamp("ns")),
26982699
)
26992700
tm.assert_series_equal(result, expected)
@@ -2753,7 +2754,7 @@ def test_dt_tz_convert_none():
27532754
)
27542755
result = ser.dt.tz_convert(None)
27552756
expected = pd.Series(
2756-
[datetime(year=2023, month=1, day=2, hour=3), None],
2757+
[ser[0].tz_convert(None), None],
27572758
dtype=ArrowDtype(pa.timestamp("ns")),
27582759
)
27592760
tm.assert_series_equal(result, expected)
@@ -2767,7 +2768,7 @@ def test_dt_tz_convert(unit):
27672768
)
27682769
result = ser.dt.tz_convert("US/Eastern")
27692770
expected = pd.Series(
2770-
[datetime(year=2023, month=1, day=2, hour=3), None],
2771+
[ser[0].tz_convert("US/Eastern"), None],
27712772
dtype=ArrowDtype(pa.timestamp(unit, "US/Eastern")),
27722773
)
27732774
tm.assert_series_equal(result, expected)
@@ -3548,3 +3549,30 @@ def test_arrow_json_type():
35483549
dtype = ArrowDtype(pa.json_(pa.string()))
35493550
result = dtype.type
35503551
assert result == str
3552+
3553+
3554+
def test_timestamp_dtype_disallows_decimal():
3555+
# GH#61773 constructing with pyarrow timestamp dtype should disallow
3556+
# Decimal NaN, just like pd.to_datetime
3557+
vals = [pd.Timestamp("2016-01-02 03:04:05"), Decimal("NaN")]
3558+
3559+
msg = "<class 'decimal.Decimal'> is not convertible to datetime"
3560+
with pytest.raises(TypeError, match=msg):
3561+
# Check that the non-pyarrow version raises as expected
3562+
pd.to_datetime(vals)
3563+
3564+
with pytest.raises(TypeError, match=msg):
3565+
pd.array(vals, dtype=ArrowDtype(pa.timestamp("us")))
3566+
3567+
3568+
def test_timestamp_dtype_matches_to_datetime():
3569+
# GH#61775
3570+
dtype1 = "datetime64[ns, US/Eastern]"
3571+
dtype2 = "timestamp[ns, US/Eastern][pyarrow]"
3572+
3573+
ts = pd.Timestamp("2025-07-03 18:10")
3574+
3575+
result = pd.Series([ts], dtype=dtype2)
3576+
expected = pd.Series([ts], dtype=dtype1).convert_dtypes(dtype_backend="pyarrow")
3577+
3578+
tm.assert_series_equal(result, expected)

pandas/tests/io/test_sql.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
time,
99
timedelta,
1010
)
11+
from decimal import Decimal
1112
from io import StringIO
1213
from pathlib import Path
1314
import sqlite3
@@ -1038,6 +1039,12 @@ def test_dataframe_to_sql_arrow_dtypes(conn, request):
10381039
def test_dataframe_to_sql_arrow_dtypes_missing(conn, request, nulls_fixture):
10391040
# GH 52046
10401041
pytest.importorskip("pyarrow")
1042+
if isinstance(nulls_fixture, Decimal):
1043+
pytest.skip(
1044+
# GH#61773
1045+
reason="Decimal('NaN') not supported in constructor for timestamp dtype"
1046+
)
1047+
10411048
df = DataFrame(
10421049
{
10431050
"datetime": pd.array(

0 commit comments

Comments
 (0)