Skip to content

Commit 4f11dab

Browse files
committed
Removed Arrow support
1 parent e87e030 commit 4f11dab

File tree

2 files changed

+8
-77
lines changed

2 files changed

+8
-77
lines changed

pandas/core/arrays/arrow/array.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2215,9 +2215,7 @@ def _replace_with_mask(
22152215
def _to_masked(self):
22162216
pa_dtype = self._pa_array.type
22172217

2218-
if pa.types.is_floating(pa_dtype):
2219-
na_value = np.nan
2220-
elif pa.types.is_integer(pa_dtype):
2218+
if pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype):
22212219
na_value = 1
22222220
elif pa.types.is_boolean(pa_dtype):
22232221
na_value = True
@@ -2239,6 +2237,12 @@ def _groupby_op(
22392237
ids: npt.NDArray[np.intp],
22402238
**kwargs,
22412239
):
2240+
if how in ["sum", "prod", "mean", "median", "var", "sem", "std", "nim", "max"]:
2241+
if "skipna" in kwargs and not kwargs["skipna"]:
2242+
raise NotImplementedError(
2243+
f"method '{how}' with skipna=False not implemented for Arrow dtypes"
2244+
)
2245+
22422246
if isinstance(self.dtype, StringDtype):
22432247
return super()._groupby_op(
22442248
how=how,

pandas/tests/groupby/test_reductions.py

Lines changed: 1 addition & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from string import ascii_lowercase
44

55
import numpy as np
6-
import pyarrow as pa
76
import pytest
87

98
from pandas._libs.tslibs import iNaT
@@ -1057,27 +1056,11 @@ def scipy_sem(*args, **kwargs):
10571056
"sum",
10581057
Series(pd.array([-1.0, 1.2, -1.1, 1.5, np.nan, 1.0], dtype="Float64")),
10591058
),
1060-
(
1061-
"sum",
1062-
Series(
1063-
pd.array(
1064-
[1.0, 2.0, 3.0, np.nan, 4.0, 5.0], dtype=pd.ArrowDtype(pa.float64())
1065-
)
1066-
),
1067-
),
10681059
("min", [-1.0, 1.2, -1.1, 1.5, np.nan, 1.0]),
10691060
(
10701061
"min",
10711062
Series(pd.array([-1.0, 1.2, -1.1, 1.5, np.nan, 1.0], dtype="Float64")),
10721063
),
1073-
(
1074-
"min",
1075-
Series(
1076-
pd.array(
1077-
[1.0, 2.0, 3.0, np.nan, 4.0, 5.0], dtype=pd.ArrowDtype(pa.float64())
1078-
)
1079-
),
1080-
),
10811064
(
10821065
"min",
10831066
[
@@ -1105,14 +1088,6 @@ def scipy_sem(*args, **kwargs):
11051088
"max",
11061089
Series(pd.array([-1.0, 1.2, -1.1, 1.5, np.nan, 1.0], dtype="Float64")),
11071090
),
1108-
(
1109-
"max",
1110-
Series(
1111-
pd.array(
1112-
[1.0, 2.0, 3.0, np.nan, 4.0, 5.0], dtype=pd.ArrowDtype(pa.float64())
1113-
)
1114-
),
1115-
),
11161091
(
11171092
"max",
11181093
[
@@ -1140,14 +1115,6 @@ def scipy_sem(*args, **kwargs):
11401115
"mean",
11411116
Series(pd.array([-1.0, 1.2, -1.1, 1.5, np.nan, 1.0], dtype="Float64")),
11421117
),
1143-
(
1144-
"mean",
1145-
Series(
1146-
pd.array(
1147-
[1.0, 2.0, 3.0, np.nan, 4.0, 5.0], dtype=pd.ArrowDtype(pa.float64())
1148-
)
1149-
),
1150-
),
11511118
(
11521119
"mean",
11531120
[
@@ -1175,14 +1142,6 @@ def scipy_sem(*args, **kwargs):
11751142
"median",
11761143
Series(pd.array([-1.0, 1.2, -1.1, 1.5, np.nan, 1.0], dtype="Float64")),
11771144
),
1178-
(
1179-
"median",
1180-
Series(
1181-
pd.array(
1182-
[1.0, 2.0, 3.0, np.nan, 4.0, 5.0], dtype=pd.ArrowDtype(pa.float64())
1183-
)
1184-
),
1185-
),
11861145
(
11871146
"median",
11881147
[
@@ -1210,53 +1169,21 @@ def scipy_sem(*args, **kwargs):
12101169
"prod",
12111170
Series(pd.array([-1.0, 1.2, -1.1, 1.5, np.nan, 1.0], dtype="Float64")),
12121171
),
1213-
(
1214-
"prod",
1215-
Series(
1216-
pd.array(
1217-
[1.0, 2.0, 3.0, np.nan, 4.0, 5.0], dtype=pd.ArrowDtype(pa.float64())
1218-
)
1219-
),
1220-
),
12211172
("sem", [-1.0, 1.2, -1.1, 1.5, np.nan, 1.0]),
12221173
(
12231174
"sem",
12241175
Series(pd.array([-1.0, 1.2, -1.1, 1.5, np.nan, 1.0], dtype="Float64")),
12251176
),
1226-
(
1227-
"sem",
1228-
Series(
1229-
pd.array(
1230-
[1.0, 2.0, 3.0, np.nan, 4.0, 5.0], dtype=pd.ArrowDtype(pa.float64())
1231-
)
1232-
),
1233-
),
12341177
("std", [-1.0, 1.2, -1.1, 1.5, np.nan, 1.0]),
12351178
(
12361179
"std",
12371180
Series(pd.array([-1.0, 1.2, -1.1, 1.5, np.nan, 1.0], dtype="Float64")),
12381181
),
1239-
(
1240-
"std",
1241-
Series(
1242-
pd.array(
1243-
[1.0, 2.0, 3.0, np.nan, 4.0, 5.0], dtype=pd.ArrowDtype(pa.float64())
1244-
)
1245-
),
1246-
),
12471182
("var", [-1.0, 1.2, -1.1, 1.5, np.nan, 1.0]),
12481183
(
12491184
"var",
12501185
Series(pd.array([-1.0, 1.2, -1.1, 1.5, np.nan, 1.0], dtype="Float64")),
12511186
),
1252-
(
1253-
"var",
1254-
Series(
1255-
pd.array(
1256-
[1.0, 2.0, 3.0, np.nan, 4.0, 5.0], dtype=pd.ArrowDtype(pa.float64())
1257-
)
1258-
),
1259-
),
12601187
("any", [-1.0, 1.2, -1.1, 1.5, np.nan, 1.0]),
12611188
("all", [-1.0, 1.2, -1.1, 1.5, np.nan, 1.0]),
12621189
("skew", [-1.0, 1.2, -1.1, 1.5, np.nan, 1.0]),
@@ -1419,4 +1346,4 @@ def test_groupby_std_datetimelike():
14191346
td4 = pd.Timedelta("2886 days 00:42:34.664668096")
14201347
exp_ser = Series([td1 * 2, td1, td1, td1, td4], index=np.arange(5))
14211348
expected = DataFrame({"A": exp_ser, "B": exp_ser, "C": exp_ser})
1422-
tm.assert_frame_equal(result, expected)
1349+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)