@@ -1048,17 +1048,12 @@ def scipy_sem(*args, **kwargs):
1048
1048
1049
1049
1050
1050
@pytest .mark .parametrize (
1051
- "reduction_method" ,
1052
- ["sum" , "min" , "max" , "mean" , "median" , "prod" , "sem" , "std" , "var" ],
1053
- )
1054
- def test_skipna_reduction_ops_cython (reduction_method ):
1055
- # GH15675
1056
- # Testing the skipna parameter against possible datatypes
1057
- df = DataFrame (
1051
+ "data" ,
1052
+ [
1058
1053
{
1059
1054
"l" : ["A" , "A" , "A" , "A" , "B" , "B" , "B" , "B" ],
1060
- "int " : [- 1 , 1 , - 1 , 2 , 1 , 1 , 1 , np .nan ],
1061
- "float " : [- 1.0 , 1.2 , - 1.1 , 1.5 , - 1.1 , 1.5 , np . nan , 1.0 ],
1055
+ "f " : [- 1.0 , 1.2 , - 1.1 , 1.5 , - 1.1 , 1.5 , np .nan , 1.0 ],
1056
+ "s " : ["foo" , "bar" , "baz" , "foo" , "foo" , "foo" , pd . NA , "foo" ],
1062
1057
"t" : [
1063
1058
Timestamp ("2024-01-01" ),
1064
1059
Timestamp ("2024-01-02" ),
@@ -1080,85 +1075,36 @@ def test_skipna_reduction_ops_cython(reduction_method):
1080
1075
pd .NaT ,
1081
1076
],
1082
1077
}
1083
- )
1084
-
1085
- result_cython = getattr (df .groupby ("l" ).int , reduction_method )(skipna = False )
1086
- expected = df .groupby ("l" ).int .apply (
1087
- lambda x : getattr (x , reduction_method )(skipna = False )
1088
- )
1089
- tm .assert_series_equal (result_cython , expected , check_exact = False )
1090
-
1091
- result_cython = getattr (df .groupby ("l" ).float , reduction_method )(skipna = False )
1092
- expected = df .groupby ("l" ).float .apply (
1093
- lambda x : getattr (x , reduction_method )(skipna = False )
1094
- )
1095
- tm .assert_series_equal (result_cython , expected , check_exact = False )
1096
-
1097
- if reduction_method in ["min" , "max" , "mean" , "median" , "std" ]:
1098
- result_ts = getattr (df .groupby ("l" ).t , reduction_method )(skipna = False )
1099
- expected_ts = df .groupby ("l" ).t .apply (
1100
- lambda x : getattr (x , reduction_method )(skipna = False )
1101
- )
1102
- tm .assert_series_equal (result_ts , expected_ts , check_exact = False )
1103
-
1104
- result_td = getattr (df .groupby ("l" ).td , reduction_method )(skipna = False )
1105
- expected_td = df .groupby ("l" ).td .apply (
1106
- lambda x : getattr (x , reduction_method )(skipna = False )
1107
- )
1108
- tm .assert_series_equal (result_td , expected_td , check_exact = False )
1109
-
1110
-
1078
+ ],
1079
+ )
1111
1080
@pytest .mark .parametrize (
1112
- "reduction_method" ,
1113
- ["sum" , "min" , "max" , "mean" , "median" , "prod" , "sem" , "std" , "var" ],
1081
+ "reduction_method,columns" ,
1082
+ [
1083
+ ("sum" , ["f" , "s" ]),
1084
+ ("min" , ["f" , "t" , "td" ]),
1085
+ ("max" , ["f" , "t" , "td" ]),
1086
+ ("mean" , ["f" , "t" , "td" ]),
1087
+ ("median" , ["f" , "t" , "td" ]),
1088
+ ("prod" , ["f" ]),
1089
+ ("sem" , ["f" ]),
1090
+ ("std" , ["f" , "t" , "td" ]),
1091
+ ("var" , ["f" ]),
1092
+ ("any" , ["f" ]),
1093
+ ("all" , ["f" ]),
1094
+ ("skew" , ["f" ]),
1095
+ ],
1114
1096
)
1115
- def test_skipna_reduction_ops_consistency (reduction_method ):
1097
+ def test_skipna_reduction_ops_cython (reduction_method , columns , data ):
1116
1098
# GH15675
1117
- # Testing if provinding skipna=True maintains the default functionality
1118
- df = DataFrame (
1119
- {
1120
- "l" : ["A" , "A" , "A" , "A" , "B" , "B" , "B" , "B" ],
1121
- "int" : [- 1 , 1 , - 1 , 2 , 1 , 1 , 1 , np .nan ],
1122
- "float" : [- 1.0 , 1.2 , - 1.1 , 1.5 , - 1.1 , 1.5 , np .nan , 1.0 ],
1123
- "t" : [
1124
- Timestamp ("2024-01-01" ),
1125
- Timestamp ("2024-01-02" ),
1126
- Timestamp ("2024-01-03" ),
1127
- Timestamp ("2024-01-04" ),
1128
- Timestamp ("2024-01-05" ),
1129
- Timestamp ("2024-01-06" ),
1130
- pd .NaT ,
1131
- Timestamp ("2024-01-07" ),
1132
- ],
1133
- "td" : [
1134
- pd .Timedelta (days = 1 ),
1135
- pd .Timedelta (days = 2 ),
1136
- pd .Timedelta (days = 3 ),
1137
- pd .Timedelta (days = 4 ),
1138
- pd .Timedelta (days = 5 ),
1139
- pd .Timedelta (days = 6 ),
1140
- pd .NaT ,
1141
- pd .Timedelta (days = 7 ),
1142
- ],
1143
- }
1144
- )
1145
-
1146
- result_with_arg = getattr (df .groupby ("l" ).int , reduction_method )(skipna = True )
1147
- result_default = getattr (df .groupby ("l" ).int , reduction_method )()
1148
- tm .assert_series_equal (result_with_arg , result_default , check_exact = False )
1149
-
1150
- result_with_arg = getattr (df .groupby ("l" ).float , reduction_method )(skipna = True )
1151
- result_default = getattr (df .groupby ("l" ).float , reduction_method )()
1152
- tm .assert_series_equal (result_with_arg , result_default , check_exact = False )
1153
-
1154
- if reduction_method in ["min" , "max" , "mean" , "median" , "std" ]:
1155
- result_ts_with_arg = getattr (df .groupby ("l" ).t , reduction_method )(skipna = True )
1156
- result_ts_default = getattr (df .groupby ("l" ).t , reduction_method )()
1157
- tm .assert_series_equal (result_ts_with_arg , result_ts_default , check_exact = False )
1099
+ # Testing the skipna parameter against possible datatypes
1100
+ df = DataFrame (data )
1158
1101
1159
- result_td_with_arg = getattr (df .groupby ("l" ).td , reduction_method )(skipna = True )
1160
- result_td_default = getattr (df .groupby ("l" ).td , reduction_method )()
1161
- tm .assert_series_equal (result_td_with_arg , result_td_default , check_exact = False )
1102
+ for column in columns :
1103
+ result_cython = getattr (df .groupby ("l" )[column ], reduction_method )(skipna = False )
1104
+ expected = df .groupby ("l" )[column ].apply (
1105
+ lambda x : getattr (x , reduction_method )(skipna = False )
1106
+ )
1107
+ tm .assert_series_equal (result_cython , expected , check_exact = False )
1162
1108
1163
1109
1164
1110
@pytest .mark .parametrize (
@@ -1306,31 +1252,3 @@ def test_groupby_std_datetimelike():
1306
1252
exp_ser = Series ([td1 * 2 , td1 , td1 , td1 , td4 ], index = np .arange (5 ))
1307
1253
expected = DataFrame ({"A" : exp_ser , "B" : exp_ser , "C" : exp_ser })
1308
1254
tm .assert_frame_equal (result , expected )
1309
-
1310
-
1311
- def test_skipna_string_sum ():
1312
- # GH15675
1313
- df = DataFrame (
1314
- {
1315
- "l" : ["A" , "A" , "A" , "B" , "B" , "B" ],
1316
- "v" : ["foo" , "bar" , "baz" , "foo" , pd .NA , "foo" ],
1317
- }
1318
- )
1319
-
1320
- result_cython = df .groupby ("l" ).v .sum (skipna = False )
1321
- expected = df .groupby ("l" ).v .apply (lambda x : x .sum (skipna = False ))
1322
- tm .assert_series_equal (result_cython , expected , check_exact = False )
1323
-
1324
-
1325
- def test_skipna_string_sum_consistency ():
1326
- # GH15675
1327
- df = DataFrame (
1328
- {
1329
- "l" : ["A" , "A" , "A" , "B" , "B" , "B" ],
1330
- "v" : ["foo" , "bar" , "baz" , "foo" , pd .NA , "foo" ],
1331
- }
1332
- )
1333
-
1334
- result_cython = df .groupby ("l" ).v .sum (skipna = True )
1335
- expected = df .groupby ("l" ).v .sum ()
1336
- tm .assert_series_equal (result_cython , expected , check_exact = False )
0 commit comments