@@ -389,6 +389,8 @@ def chunk_argreduce(
389
389
if not np .isnan (results ["groups" ]).all ():
390
390
# will not work for empty groups...
391
391
# glorious
392
+ # TODO: npg bug
393
+ results ["intermediates" ][1 ] = results ["intermediates" ][1 ].astype (int )
392
394
newidx = np .broadcast_to (idx , array .shape )[
393
395
np .unravel_index (results ["intermediates" ][1 ], array .shape )
394
396
]
@@ -992,6 +994,7 @@ def groupby_reduce(
992
994
isbin : bool = False ,
993
995
axis = None ,
994
996
fill_value = None ,
997
+ skipna : Optional [bool ] = None ,
995
998
min_count : Optional [int ] = None ,
996
999
split_out : int = 1 ,
997
1000
method : str = "mapreduce" ,
@@ -1020,6 +1023,16 @@ def groupby_reduce(
1020
1023
Negative integers are normalized using array.ndim
1021
1024
fill_value: Any
1022
1025
Value when a label in `expected_groups` is not present
1026
+ skipna : bool, default: None
1027
+ If True, skip missing values (as marked by NaN). By default, only
1028
+ skips missing values for float dtypes; other dtypes either do not
1029
+ have a sentinel missing value (int) or ``skipna=True`` has not been
1030
+ implemented (object, datetime64 or timedelta64).
1031
+ min_count : int, default: None
1032
+ The required number of valid values to perform the operation. If
1033
+ fewer than min_count non-NA values are present the result will be
1034
+ NA. Only used if skipna is set to True or defaults to True for the
1035
+ array's dtype.
1023
1036
split_out: int, optional
1024
1037
Number of chunks along group axis in output (last axis)
1025
1038
method: {"mapreduce", "blockwise", "cohorts"}, optional
@@ -1062,10 +1075,24 @@ def groupby_reduce(
1062
1075
f"Received array of shape { array .shape } and by of shape { by .shape } "
1063
1076
)
1064
1077
1065
- if min_count is not None and min_count > 1 and func not in ["nansum" , "nanprod" ]:
1066
- raise ValueError (
1067
- "min_count can be > 1 only for nansum, nanprod. This is an Xarray limitation."
1068
- )
1078
+ # Handle skipna here because I need to know dtype to make a good default choice.
1079
+ # We cannnot handle this easily for xarray Datasets in xarray_reduce
1080
+ if skipna and func in ["all" , "any" , "count" ]:
1081
+ raise ValueError (f"skipna cannot be truthy for { func } reductions." )
1082
+
1083
+ if skipna or (skipna is None and array .dtype .kind in "cfO" ):
1084
+ if "nan" not in func and func not in ["all" , "any" , "count" ]:
1085
+ func = f"nan{ func } "
1086
+
1087
+ if min_count is not None and min_count > 1 :
1088
+ if func not in ["nansum" , "nanprod" ]:
1089
+ raise ValueError (
1090
+ "min_count can be > 1 only for nansum, nanprod."
1091
+ " or for sum, prod with skipna=True."
1092
+ " This is an Xarray limitation."
1093
+ )
1094
+ elif "nan" not in func and skipna :
1095
+ func = f"nan{ func } "
1069
1096
1070
1097
if axis is None :
1071
1098
axis = tuple (array .ndim + np .arange (- by .ndim , 0 ))
0 commit comments