Skip to content

Commit 2518696

Browse files
Reworked sugestions
Co-authored-by: Tiago Firmino <[email protected]>
1 parent 8f61fda commit 2518696

File tree

6 files changed

+38
-154
lines changed

6 files changed

+38
-154
lines changed

pandas/_libs/groupby.pyx

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,7 @@ def group_sum(
746746
else:
747747
sumx[lab, j] = val
748748
compensation[lab, j] = 0
749-
break
749+
continue
750750

751751
nobs[lab, j] += 1
752752

@@ -831,7 +831,7 @@ def group_prod(
831831
elif not skipna:
832832
prodx[lab, j] = val
833833
nobs[lab, j] = 0
834-
break
834+
continue
835835

836836
_check_below_mincount(
837837
out, uses_mask, result_mask, ncounts, K, nobs, min_count, prodx
@@ -900,7 +900,7 @@ def group_var(
900900
if not skipna and isna_entry:
901901
out[lab, j] = val
902902
nobs[lab, j] = 0
903-
break
903+
continue
904904

905905
elif not isna_entry:
906906
nobs[lab, j] += 1
@@ -1109,7 +1109,7 @@ def group_mean(
11091109
if not skipna and isna_entry:
11101110
sumx[lab, j] = nan_val
11111111
nobs[lab, j] = 0
1112-
break
1112+
continue
11131113

11141114
elif not isna_entry:
11151115
nobs[lab, j] += 1
@@ -1766,7 +1766,7 @@ cdef group_min_max(
17661766
if not skipna and isna_entry:
17671767
group_min_or_max[lab, j] = val
17681768
nobs[lab, j] = 0
1769-
break
1769+
continue
17701770

17711771
elif not isna_entry:
17721772
nobs[lab, j] += 1

pandas/core/_numba/executor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@ def column_looper(
8181
labels,
8282
ngroups,
8383
min_periods,
84-
*args,
8584
skipna,
85+
*args,
8686
)
8787
result[i] = output
8888
if len(na_pos) > 0:

pandas/core/_numba/kernels/min_max_.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ def grouped_min_max(
8888
labels: npt.NDArray[np.intp],
8989
ngroups: int,
9090
min_periods: int,
91+
skipna: bool,
9192
is_max: bool,
92-
skipna: bool = True,
9393
) -> tuple[np.ndarray, list[int]]:
9494
N = len(labels)
9595
nobs = np.zeros(ngroups, dtype=np.int64)

pandas/core/_numba/kernels/var_.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,8 @@ def grouped_var(
176176
labels: npt.NDArray[np.intp],
177177
ngroups: int,
178178
min_periods: int,
179-
ddof: int = 1,
180179
skipna: bool = True,
180+
ddof: int = 1,
181181
) -> tuple[np.ndarray, list[int]]:
182182
N = len(labels)
183183

pandas/tests/groupby/test_numba.py

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -81,48 +81,14 @@ def test_skipna_numba(self, numba_method):
8181
df = DataFrame(
8282
{
8383
"l": ["A", "A", "A", "B", "B", "B"],
84-
"int": [-1, 1, -1, 1, 1, np.nan],
8584
"float": [-1.0, 1.2, -1.1, 1.5, 1.0, np.nan],
8685
}
8786
)
8887

89-
result_numba = getattr(df.groupby("l").int, numba_method)(
90-
skipna=False, engine="numba"
91-
)
92-
expected = df.groupby("l").int.apply(
93-
lambda x: getattr(x, numba_method)(skipna=False)
94-
)
95-
tm.assert_series_equal(result_numba, expected, check_exact=False)
96-
9788
result_numba = getattr(df.groupby("l").float, numba_method)(
9889
skipna=False, engine="numba"
9990
)
10091
expected = df.groupby("l").float.apply(
10192
lambda x: getattr(x, numba_method)(skipna=False)
10293
)
10394
tm.assert_series_equal(result_numba, expected, check_exact=False)
104-
105-
@pytest.mark.parametrize(
106-
"numba_method", ["sum", "min", "max", "std", "var", "mean"]
107-
)
108-
def test_skipna_consistency_numba(self, numba_method):
109-
# GH15675
110-
df = DataFrame(
111-
{
112-
"l": ["A", "A", "A", "B", "B", "B"],
113-
"int": [-1, 1, -1, 1, 1, np.nan],
114-
"float": [-1.0, 1.2, -1.1, 1.5, 1.0, np.nan],
115-
}
116-
)
117-
118-
result_with_arg = getattr(df.groupby("l").int, numba_method)(
119-
skipna=True, engine="numba"
120-
)
121-
result_default = getattr(df.groupby("l").int, numba_method)(engine="numba")
122-
tm.assert_series_equal(result_with_arg, result_default, check_exact=False)
123-
124-
result_with_arg = getattr(df.groupby("l").float, numba_method)(
125-
skipna=True, engine="numba"
126-
)
127-
result_default = getattr(df.groupby("l").float, numba_method)(engine="numba")
128-
tm.assert_series_equal(result_with_arg, result_default, check_exact=False)

pandas/tests/groupby/test_reductions.py

Lines changed: 30 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,17 +1048,12 @@ def scipy_sem(*args, **kwargs):
10481048

10491049

10501050
@pytest.mark.parametrize(
1051-
"reduction_method",
1052-
["sum", "min", "max", "mean", "median", "prod", "sem", "std", "var"],
1053-
)
1054-
def test_skipna_reduction_ops_cython(reduction_method):
1055-
# GH15675
1056-
# Testing the skipna parameter against possible datatypes
1057-
df = DataFrame(
1051+
"data",
1052+
[
10581053
{
10591054
"l": ["A", "A", "A", "A", "B", "B", "B", "B"],
1060-
"int": [-1, 1, -1, 2, 1, 1, 1, np.nan],
1061-
"float": [-1.0, 1.2, -1.1, 1.5, -1.1, 1.5, np.nan, 1.0],
1055+
"f": [-1.0, 1.2, -1.1, 1.5, -1.1, 1.5, np.nan, 1.0],
1056+
"s": ["foo", "bar", "baz", "foo", "foo", "foo", pd.NA, "foo"],
10621057
"t": [
10631058
Timestamp("2024-01-01"),
10641059
Timestamp("2024-01-02"),
@@ -1080,85 +1075,36 @@ def test_skipna_reduction_ops_cython(reduction_method):
10801075
pd.NaT,
10811076
],
10821077
}
1083-
)
1084-
1085-
result_cython = getattr(df.groupby("l").int, reduction_method)(skipna=False)
1086-
expected = df.groupby("l").int.apply(
1087-
lambda x: getattr(x, reduction_method)(skipna=False)
1088-
)
1089-
tm.assert_series_equal(result_cython, expected, check_exact=False)
1090-
1091-
result_cython = getattr(df.groupby("l").float, reduction_method)(skipna=False)
1092-
expected = df.groupby("l").float.apply(
1093-
lambda x: getattr(x, reduction_method)(skipna=False)
1094-
)
1095-
tm.assert_series_equal(result_cython, expected, check_exact=False)
1096-
1097-
if reduction_method in ["min", "max", "mean", "median", "std"]:
1098-
result_ts = getattr(df.groupby("l").t, reduction_method)(skipna=False)
1099-
expected_ts = df.groupby("l").t.apply(
1100-
lambda x: getattr(x, reduction_method)(skipna=False)
1101-
)
1102-
tm.assert_series_equal(result_ts, expected_ts, check_exact=False)
1103-
1104-
result_td = getattr(df.groupby("l").td, reduction_method)(skipna=False)
1105-
expected_td = df.groupby("l").td.apply(
1106-
lambda x: getattr(x, reduction_method)(skipna=False)
1107-
)
1108-
tm.assert_series_equal(result_td, expected_td, check_exact=False)
1109-
1110-
1078+
],
1079+
)
11111080
@pytest.mark.parametrize(
1112-
"reduction_method",
1113-
["sum", "min", "max", "mean", "median", "prod", "sem", "std", "var"],
1081+
"reduction_method,columns",
1082+
[
1083+
("sum", ["f", "s"]),
1084+
("min", ["f", "t", "td"]),
1085+
("max", ["f", "t", "td"]),
1086+
("mean", ["f", "t", "td"]),
1087+
("median", ["f", "t", "td"]),
1088+
("prod", ["f"]),
1089+
("sem", ["f"]),
1090+
("std", ["f", "t", "td"]),
1091+
("var", ["f"]),
1092+
("any", ["f"]),
1093+
("all", ["f"]),
1094+
("skew", ["f"]),
1095+
],
11141096
)
1115-
def test_skipna_reduction_ops_consistency(reduction_method):
1097+
def test_skipna_reduction_ops_cython(reduction_method, columns, data):
11161098
# GH15675
1117-
# Testing if provinding skipna=True maintains the default functionality
1118-
df = DataFrame(
1119-
{
1120-
"l": ["A", "A", "A", "A", "B", "B", "B", "B"],
1121-
"int": [-1, 1, -1, 2, 1, 1, 1, np.nan],
1122-
"float": [-1.0, 1.2, -1.1, 1.5, -1.1, 1.5, np.nan, 1.0],
1123-
"t": [
1124-
Timestamp("2024-01-01"),
1125-
Timestamp("2024-01-02"),
1126-
Timestamp("2024-01-03"),
1127-
Timestamp("2024-01-04"),
1128-
Timestamp("2024-01-05"),
1129-
Timestamp("2024-01-06"),
1130-
pd.NaT,
1131-
Timestamp("2024-01-07"),
1132-
],
1133-
"td": [
1134-
pd.Timedelta(days=1),
1135-
pd.Timedelta(days=2),
1136-
pd.Timedelta(days=3),
1137-
pd.Timedelta(days=4),
1138-
pd.Timedelta(days=5),
1139-
pd.Timedelta(days=6),
1140-
pd.NaT,
1141-
pd.Timedelta(days=7),
1142-
],
1143-
}
1144-
)
1145-
1146-
result_with_arg = getattr(df.groupby("l").int, reduction_method)(skipna=True)
1147-
result_default = getattr(df.groupby("l").int, reduction_method)()
1148-
tm.assert_series_equal(result_with_arg, result_default, check_exact=False)
1149-
1150-
result_with_arg = getattr(df.groupby("l").float, reduction_method)(skipna=True)
1151-
result_default = getattr(df.groupby("l").float, reduction_method)()
1152-
tm.assert_series_equal(result_with_arg, result_default, check_exact=False)
1153-
1154-
if reduction_method in ["min", "max", "mean", "median", "std"]:
1155-
result_ts_with_arg = getattr(df.groupby("l").t, reduction_method)(skipna=True)
1156-
result_ts_default = getattr(df.groupby("l").t, reduction_method)()
1157-
tm.assert_series_equal(result_ts_with_arg, result_ts_default, check_exact=False)
1099+
# Testing the skipna parameter against possible datatypes
1100+
df = DataFrame(data)
11581101

1159-
result_td_with_arg = getattr(df.groupby("l").td, reduction_method)(skipna=True)
1160-
result_td_default = getattr(df.groupby("l").td, reduction_method)()
1161-
tm.assert_series_equal(result_td_with_arg, result_td_default, check_exact=False)
1102+
for column in columns:
1103+
result_cython = getattr(df.groupby("l")[column], reduction_method)(skipna=False)
1104+
expected = df.groupby("l")[column].apply(
1105+
lambda x: getattr(x, reduction_method)(skipna=False)
1106+
)
1107+
tm.assert_series_equal(result_cython, expected, check_exact=False)
11621108

11631109

11641110
@pytest.mark.parametrize(
@@ -1306,31 +1252,3 @@ def test_groupby_std_datetimelike():
13061252
exp_ser = Series([td1 * 2, td1, td1, td1, td4], index=np.arange(5))
13071253
expected = DataFrame({"A": exp_ser, "B": exp_ser, "C": exp_ser})
13081254
tm.assert_frame_equal(result, expected)
1309-
1310-
1311-
def test_skipna_string_sum():
1312-
# GH15675
1313-
df = DataFrame(
1314-
{
1315-
"l": ["A", "A", "A", "B", "B", "B"],
1316-
"v": ["foo", "bar", "baz", "foo", pd.NA, "foo"],
1317-
}
1318-
)
1319-
1320-
result_cython = df.groupby("l").v.sum(skipna=False)
1321-
expected = df.groupby("l").v.apply(lambda x: x.sum(skipna=False))
1322-
tm.assert_series_equal(result_cython, expected, check_exact=False)
1323-
1324-
1325-
def test_skipna_string_sum_consistency():
1326-
# GH15675
1327-
df = DataFrame(
1328-
{
1329-
"l": ["A", "A", "A", "B", "B", "B"],
1330-
"v": ["foo", "bar", "baz", "foo", pd.NA, "foo"],
1331-
}
1332-
)
1333-
1334-
result_cython = df.groupby("l").v.sum(skipna=True)
1335-
expected = df.groupby("l").v.sum()
1336-
tm.assert_series_equal(result_cython, expected, check_exact=False)

0 commit comments

Comments
 (0)