Skip to content

TST (string dtype): fix groupby xfails with using_infer_string + update error message #59430

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Nov 8, 2024
Merged
14 changes: 14 additions & 0 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
@@ -2303,6 +2303,20 @@ def _groupby_op(
**kwargs,
):
if isinstance(self.dtype, StringDtype):
if how in [
"prod",
"mean",
"median",
"cumsum",
"cumprod",
"std",
"sem",
"var",
"skew",
]:
raise TypeError(
f"dtype '{self.dtype}' does not support operation '{how}'"
)
return super()._groupby_op(
how=how,
has_dropped_na=has_dropped_na,
14 changes: 14 additions & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
@@ -2608,6 +2608,20 @@ def _groupby_op(
# GH#43682
if isinstance(self.dtype, StringDtype):
# StringArray
if op.how in [
"prod",
"mean",
"median",
"cumsum",
"cumprod",
"std",
"sem",
"var",
"skew",
]:
raise TypeError(
f"dtype '{self.dtype}' does not support operation '{how}'"
)
if op.how not in ["any", "all"]:
# Fail early to avoid conversion to object
op._get_cython_function(op.kind, op.how, np.dtype(object), False)
4 changes: 2 additions & 2 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
@@ -4162,9 +4162,9 @@ def quantile(
starts, ends = lib.generate_slices(splitter._slabels, splitter.ngroups)

def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, DtypeObj | None]:
if is_object_dtype(vals.dtype):
if isinstance(vals.dtype, StringDtype) or is_object_dtype(vals.dtype):
raise TypeError(
"'quantile' cannot be performed against 'object' dtypes!"
f"dtype '{vals.dtype}' does not support operation 'quantile'"
)

inference: DtypeObj | None = None
4 changes: 3 additions & 1 deletion pandas/tests/frame/test_stack_unstack.py
Original file line number Diff line number Diff line change
@@ -2113,7 +2113,7 @@ def test_unstack_period_frame(self):
@pytest.mark.filterwarnings(
"ignore:The previous implementation of stack is deprecated"
)
def test_stack_multiple_bug(self, future_stack):
def test_stack_multiple_bug(self, future_stack, using_infer_string):
# bug when some uniques are not present in the data GH#3170
id_col = ([1] * 3) + ([2] * 3)
name = (["a"] * 3) + (["b"] * 3)
@@ -2125,6 +2125,8 @@ def test_stack_multiple_bug(self, future_stack):
multi.columns.name = "Params"
unst = multi.unstack("ID")
msg = re.escape("agg function failed [how->mean,dtype->")
if using_infer_string:
msg = "dtype 'str' does not support operation 'mean'"
with pytest.raises(TypeError, match=msg):
unst.resample("W-THU").mean()
down = unst.resample("W-THU").mean(numeric_only=True)
4 changes: 2 additions & 2 deletions pandas/tests/groupby/aggregate/test_cython.py
Original file line number Diff line number Diff line change
@@ -148,11 +148,11 @@ def test_cython_agg_return_dict():

def test_cython_fail_agg():
dr = bdate_range("1/1/2000", periods=50)
ts = Series(["A", "B", "C", "D", "E"] * 10, index=dr)
ts = Series(["A", "B", "C", "D", "E"] * 10, dtype=object, index=dr)

grouped = ts.groupby(lambda x: x.month)
summed = grouped.sum()
expected = grouped.agg(np.sum)
expected = grouped.agg(np.sum).astype(object)
Comment on lines 149 to +155
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was there a specific reason you added an explicit dtype=object here (since it seems you only added this in the last commit, after updating for sum() being implemented, so now this is actually no longer needed, I think) ?

tm.assert_series_equal(summed, expected)


9 changes: 4 additions & 5 deletions pandas/tests/groupby/methods/test_quantile.py
Original file line number Diff line number Diff line change
@@ -162,7 +162,8 @@ def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby,
def test_quantile_raises():
df = DataFrame([["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"])

with pytest.raises(TypeError, match="cannot be performed against 'object' dtypes"):
msg = "dtype 'object' does not support operation 'quantile'"
with pytest.raises(TypeError, match=msg):
df.groupby("key").quantile()


@@ -241,7 +242,6 @@ def test_groupby_quantile_nullable_array(values, q):
tm.assert_series_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]])
@pytest.mark.parametrize("numeric_only", [True, False])
def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only):
@@ -251,9 +251,8 @@ def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only):
expected = df.groupby("a")[["b"]].quantile(q)
tm.assert_frame_equal(result, expected)
else:
with pytest.raises(
TypeError, match="'quantile' cannot be performed against 'object' dtypes!"
):
msg = "dtype '.*' does not support operation 'quantile'"
with pytest.raises(TypeError, match=msg):
df.groupby("a").quantile(q, numeric_only=numeric_only)


56 changes: 42 additions & 14 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
@@ -425,7 +425,7 @@ def test_frame_multi_key_function_list():
tm.assert_frame_equal(agged, expected)


def test_frame_multi_key_function_list_partial_failure():
def test_frame_multi_key_function_list_partial_failure(using_infer_string):
data = DataFrame(
{
"A": [
@@ -476,6 +476,8 @@ def test_frame_multi_key_function_list_partial_failure():
grouped = data.groupby(["A", "B"])
funcs = ["mean", "std"]
msg = re.escape("agg function failed [how->mean,dtype->")
if using_infer_string:
msg = "dtype 'str' does not support operation 'mean'"
with pytest.raises(TypeError, match=msg):
grouped.agg(funcs)

@@ -662,9 +664,11 @@ def test_groupby_multi_corner(df):
tm.assert_frame_equal(agged, expected)


def test_raises_on_nuisance(df):
def test_raises_on_nuisance(df, using_infer_string):
grouped = df.groupby("A")
msg = re.escape("agg function failed [how->mean,dtype->")
if using_infer_string:
msg = "dtype 'str' does not support operation 'mean'"
with pytest.raises(TypeError, match=msg):
grouped.agg("mean")
with pytest.raises(TypeError, match=msg):
@@ -699,15 +703,18 @@ def test_keep_nuisance_agg(df, agg_function):
["sum", "mean", "prod", "std", "var", "sem", "median"],
)
@pytest.mark.parametrize("numeric_only", [True, False])
def test_omit_nuisance_agg(df, agg_function, numeric_only):
def test_omit_nuisance_agg(df, agg_function, numeric_only, using_infer_string):
# GH 38774, GH 38815
grouped = df.groupby("A")

no_drop_nuisance = ("var", "std", "sem", "mean", "prod", "median")
if agg_function in no_drop_nuisance and not numeric_only:
# Added numeric_only as part of GH#46560; these do not drop nuisance
# columns when numeric_only is False
if agg_function in ("std", "sem"):
if using_infer_string:
msg = f"dtype 'str' does not support operation '{agg_function}'"
klass = TypeError
elif agg_function in ("std", "sem"):
klass = ValueError
msg = "could not convert string to float: 'one'"
else:
@@ -728,16 +735,24 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only):
tm.assert_frame_equal(result, expected)


def test_raise_on_nuisance_python_single(df):
def test_raise_on_nuisance_python_single(df, using_infer_string):
# GH 38815
grouped = df.groupby("A")
with pytest.raises(ValueError, match="could not convert"):

err = ValueError
msg = "could not convert"
if using_infer_string:
err = TypeError
msg = "dtype 'str' does not support operation 'skew'"
with pytest.raises(err, match=msg):
grouped.skew()


def test_raise_on_nuisance_python_multiple(three_group):
def test_raise_on_nuisance_python_multiple(three_group, using_infer_string):
grouped = three_group.groupby(["A", "B"])
msg = re.escape("agg function failed [how->mean,dtype->")
if using_infer_string:
msg = "dtype 'str' does not support operation 'mean'"
with pytest.raises(TypeError, match=msg):
grouped.agg("mean")
with pytest.raises(TypeError, match=msg):
@@ -775,12 +790,16 @@ def test_nonsense_func():
df.groupby(lambda x: x + "foo")


def test_wrap_aggregated_output_multindex(multiindex_dataframe_random_data):
def test_wrap_aggregated_output_multindex(
multiindex_dataframe_random_data, using_infer_string
):
df = multiindex_dataframe_random_data.T
df["baz", "two"] = "peekaboo"

keys = [np.array([0, 0, 1]), np.array([0, 0, 1])]
msg = re.escape("agg function failed [how->mean,dtype->")
if using_infer_string:
msg = "dtype 'str' does not support operation 'mean'"
with pytest.raises(TypeError, match=msg):
df.groupby(keys).agg("mean")
agged = df.drop(columns=("baz", "two")).groupby(keys).agg("mean")
@@ -960,8 +979,10 @@ def test_groupby_with_hier_columns():

def test_grouping_ndarray(df):
grouped = df.groupby(df["A"].values)
grouped2 = df.groupby(df["A"].rename(None))

result = grouped.sum()
expected = df.groupby(df["A"].rename(None)).sum()
expected = grouped2.sum()
tm.assert_frame_equal(result, expected)


@@ -1457,8 +1478,8 @@ def test_no_dummy_key_names(df):
result = df.groupby(df["A"].values).sum()
assert result.index.name is None

result = df.groupby([df["A"].values, df["B"].values]).sum()
assert result.index.names == (None, None)
result2 = df.groupby([df["A"].values, df["B"].values]).sum()
assert result2.index.names == (None, None)


def test_groupby_sort_multiindex_series():
@@ -1761,6 +1782,7 @@ def get_categorical_invalid_expected():
is_per = isinstance(df.dtypes.iloc[0], pd.PeriodDtype)
is_dt64 = df.dtypes.iloc[0].kind == "M"
is_cat = isinstance(values, Categorical)
is_str = isinstance(df.dtypes.iloc[0], pd.StringDtype)

if (
isinstance(values, Categorical)
@@ -1785,13 +1807,15 @@ def get_categorical_invalid_expected():

if op in ["prod", "sum", "skew"]:
# ops that require more than just ordered-ness
if is_dt64 or is_cat or is_per:
if is_dt64 or is_cat or is_per or (is_str and op != "sum"):
# GH#41291
# datetime64 -> prod and sum are invalid
if is_dt64:
msg = "datetime64 type does not support"
elif is_per:
msg = "Period type does not support"
elif is_str:
msg = f"dtype 'str' does not support operation '{op}'"
else:
msg = "category type does not support"
if op == "skew":
@@ -2714,7 +2738,7 @@ def test_obj_with_exclusions_duplicate_columns():
def test_groupby_numeric_only_std_no_result(numeric_only):
# GH 51080
dicts_non_numeric = [{"a": "foo", "b": "bar"}, {"a": "car", "b": "dar"}]
df = DataFrame(dicts_non_numeric)
df = DataFrame(dicts_non_numeric, dtype=object)
dfgb = df.groupby("a", as_index=False, sort=False)

if numeric_only:
@@ -2773,10 +2797,14 @@ def test_grouping_with_categorical_interval_columns():
def test_groupby_sum_on_nan_should_return_nan(bug_var):
# GH 24196
df = DataFrame({"A": [bug_var, bug_var, bug_var, np.nan]})
if isinstance(bug_var, str):
df = df.astype(object)
dfgb = df.groupby(lambda x: x)
result = dfgb.sum(min_count=1)

expected_df = DataFrame([bug_var, bug_var, bug_var, None], columns=["A"])
expected_df = DataFrame(
[bug_var, bug_var, bug_var, None], columns=["A"], dtype=df["A"].dtype
)
tm.assert_frame_equal(result, expected_df)


2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_groupby_subclass.py
Original file line number Diff line number Diff line change
@@ -109,7 +109,7 @@ def test_groupby_resample_preserves_subclass(obj):

df = obj(
{
"Buyer": "Carl Carl Carl Carl Joe Carl".split(),
"Buyer": Series("Carl Carl Carl Carl Joe Carl".split(), dtype=object),
"Quantity": [18, 3, 5, 1, 9, 3],
"Date": [
datetime(2013, 9, 1, 13, 0),
20 changes: 14 additions & 6 deletions pandas/tests/groupby/test_numeric_only.py
Original file line number Diff line number Diff line change
@@ -28,7 +28,8 @@ def df(self):
"group": [1, 1, 2],
"int": [1, 2, 3],
"float": [4.0, 5.0, 6.0],
"string": list("abc"),
"string": Series(["a", "b", "c"], dtype="str"),
"object": Series(["a", "b", "c"], dtype=object),
"category_string": Series(list("abc")).astype("category"),
"category_int": [7, 8, 9],
"datetime": date_range("20130101", periods=3),
@@ -40,6 +41,7 @@ def df(self):
"int",
"float",
"string",
"object",
"category_string",
"category_int",
"datetime",
@@ -112,6 +114,7 @@ def test_first_last(self, df, method):
"int",
"float",
"string",
"object",
"category_string",
"category_int",
"datetime",
@@ -159,7 +162,9 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):

# object dtypes for transformations are not implemented in Cython and
# have no Python fallback
exception = NotImplementedError if method.startswith("cum") else TypeError
exception = (
(NotImplementedError, TypeError) if method.startswith("cum") else TypeError
)

if method in ("min", "max", "cummin", "cummax", "cumsum", "cumprod"):
# The methods default to numeric_only=False and raise TypeError
@@ -170,6 +175,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
re.escape(f"agg function failed [how->{method},dtype->object]"),
# cumsum/cummin/cummax/cumprod
"function is not implemented for this dtype",
f"dtype 'str' does not support operation '{method}'",
]
)
with pytest.raises(exception, match=msg):
@@ -180,7 +186,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
"category type does not support sum operations",
re.escape(f"agg function failed [how->{method},dtype->object]"),
re.escape(f"agg function failed [how->{method},dtype->string]"),
re.escape(f"agg function failed [how->{method},dtype->str]"),
f"dtype 'str' does not support operation '{method}'",
]
)
with pytest.raises(exception, match=msg):
@@ -198,7 +204,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
f"Cannot perform {method} with non-ordered Categorical",
re.escape(f"agg function failed [how->{method},dtype->object]"),
re.escape(f"agg function failed [how->{method},dtype->string]"),
re.escape(f"agg function failed [how->{method},dtype->str]"),
f"dtype 'str' does not support operation '{method}'",
]
)
with pytest.raises(exception, match=msg):
@@ -299,7 +305,9 @@ def test_numeric_only(kernel, has_arg, numeric_only, keys):
re.escape(f"agg function failed [how->{kernel},dtype->object]"),
]
)
if kernel == "idxmin":
if kernel == "quantile":
msg = "dtype 'object' does not support operation 'quantile'"
elif kernel == "idxmin":
msg = "'<' not supported between instances of 'type' and 'type'"
elif kernel == "idxmax":
msg = "'>' not supported between instances of 'type' and 'type'"
@@ -379,7 +387,7 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request):
# that succeed should not be allowed to fail (without deprecation, at least)
if groupby_func in fails_on_numeric_object and dtype is object:
if groupby_func == "quantile":
msg = "cannot be performed against 'object' dtypes"
msg = "dtype 'object' does not support operation 'quantile'"
else:
msg = "is not supported for object dtype"
with pytest.raises(TypeError, match=msg):
54 changes: 48 additions & 6 deletions pandas/tests/groupby/test_raises.py
Original file line number Diff line number Diff line change
@@ -8,8 +8,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
Categorical,
DataFrame,
@@ -106,10 +104,9 @@ def _call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg=""):
gb.transform(groupby_func, *args)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("how", ["method", "agg", "transform"])
def test_groupby_raises_string(
how, by, groupby_series, groupby_func, df_with_string_col
how, by, groupby_series, groupby_func, df_with_string_col, using_infer_string
):
df = df_with_string_col
args = get_groupby_method_args(groupby_func, df)
@@ -169,7 +166,7 @@ def test_groupby_raises_string(
TypeError,
re.escape("agg function failed [how->prod,dtype->object]"),
),
"quantile": (TypeError, "cannot be performed against 'object' dtypes!"),
"quantile": (TypeError, "dtype 'object' does not support operation 'quantile'"),
"rank": (None, ""),
"sem": (ValueError, "could not convert string to float"),
"shift": (None, ""),
@@ -183,6 +180,37 @@ def test_groupby_raises_string(
),
}[groupby_func]

if using_infer_string:
if groupby_func in [
"prod",
"mean",
"median",
"cumsum",
"cumprod",
"std",
"sem",
"var",
"skew",
"quantile",
]:
msg = f"dtype 'str' does not support operation '{groupby_func}'"
if groupby_func in ["sem", "std", "skew"]:
# The object-dtype raises ValueError when trying to convert to numeric.
klass = TypeError
elif groupby_func == "pct_change" and df["d"].dtype.storage == "pyarrow":
# This doesn't go through EA._groupby_op so the message isn't controlled
# there.
msg = "operation 'truediv' not supported for dtype 'str' with dtype 'str'"
elif groupby_func == "diff" and df["d"].dtype.storage == "pyarrow":
# This doesn't go through EA._groupby_op so the message isn't controlled
# there.
msg = "operation 'sub' not supported for dtype 'str' with dtype 'str'"

elif groupby_func in ["cummin", "cummax"]:
msg = msg.replace("object", "str")
elif groupby_func == "corrwith":
msg = "Cannot perform reduction 'mean' with string dtype"

if groupby_func == "fillna":
kind = "Series" if groupby_series else "DataFrame"
warn_msg = f"{kind}GroupBy.fillna is deprecated"
@@ -211,7 +239,12 @@ def func(x):
@pytest.mark.parametrize("how", ["agg", "transform"])
@pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean])
def test_groupby_raises_string_np(
how, by, groupby_series, groupby_func_np, df_with_string_col
how,
by,
groupby_series,
groupby_func_np,
df_with_string_col,
using_infer_string,
):
# GH#50749
df = df_with_string_col
@@ -228,6 +261,15 @@ def test_groupby_raises_string_np(
"Cannot perform reduction 'mean' with string dtype",
),
}[groupby_func_np]

if using_infer_string:
if groupby_func_np is np.mean:
klass = TypeError
msg = (
f"Cannot perform reduction '{groupby_func_np.__name__}' "
"with string dtype"
)

_call_and_check(klass, msg, how, gb, groupby_func_np, ())


20 changes: 17 additions & 3 deletions pandas/tests/resample/test_resample_api.py
Original file line number Diff line number Diff line change
@@ -187,7 +187,7 @@ def test_api_compat_before_use(attr):
getattr(rs, attr)


def tests_raises_on_nuisance(test_frame):
def tests_raises_on_nuisance(test_frame, using_infer_string):
df = test_frame
df["D"] = "foo"
r = df.resample("h")
@@ -197,6 +197,8 @@ def tests_raises_on_nuisance(test_frame):

expected = r[["A", "B", "C"]].mean()
msg = re.escape("agg function failed [how->mean,dtype->")
if using_infer_string:
msg = "dtype 'str' does not support operation 'mean'"
with pytest.raises(TypeError, match=msg):
r.mean()
result = r.mean(numeric_only=True)
@@ -881,7 +883,9 @@ def test_end_and_end_day_origin(
("sem", lib.no_default, "could not convert string to float"),
],
)
def test_frame_downsample_method(method, numeric_only, expected_data):
def test_frame_downsample_method(
method, numeric_only, expected_data, using_infer_string
):
# GH#46442 test if `numeric_only` behave as expected for DataFrameGroupBy

index = date_range("2018-01-01", periods=2, freq="D")
@@ -898,6 +902,11 @@ def test_frame_downsample_method(method, numeric_only, expected_data):
if method in ("var", "mean", "median", "prod"):
klass = TypeError
msg = re.escape(f"agg function failed [how->{method},dtype->")
if using_infer_string:
msg = f"dtype 'str' does not support operation '{method}'"
elif method in ["sum", "std", "sem"] and using_infer_string:
klass = TypeError
msg = f"dtype 'str' does not support operation '{method}'"
else:
klass = ValueError
msg = expected_data
@@ -932,7 +941,9 @@ def test_frame_downsample_method(method, numeric_only, expected_data):
("last", lib.no_default, ["cat_2"]),
],
)
def test_series_downsample_method(method, numeric_only, expected_data):
def test_series_downsample_method(
method, numeric_only, expected_data, using_infer_string
):
# GH#46442 test if `numeric_only` behave as expected for SeriesGroupBy

index = date_range("2018-01-01", periods=2, freq="D")
@@ -948,8 +959,11 @@ def test_series_downsample_method(method, numeric_only, expected_data):
func(**kwargs)
elif method == "prod":
msg = re.escape("agg function failed [how->prod,dtype->")
if using_infer_string:
msg = "dtype 'str' does not support operation 'prod'"
with pytest.raises(TypeError, match=msg):
func(**kwargs)

else:
result = func(**kwargs)
expected = Series(expected_data, index=expected_index)
4 changes: 3 additions & 1 deletion pandas/tests/reshape/merge/test_join.py
Original file line number Diff line number Diff line change
@@ -620,7 +620,7 @@ def test_join_non_unique_period_index(self):
)
tm.assert_frame_equal(result, expected)

def test_mixed_type_join_with_suffix(self):
def test_mixed_type_join_with_suffix(self, using_infer_string):
# GH #916
df = DataFrame(
np.random.default_rng(2).standard_normal((20, 6)),
@@ -631,6 +631,8 @@ def test_mixed_type_join_with_suffix(self):

grouped = df.groupby("id")
msg = re.escape("agg function failed [how->mean,dtype->")
if using_infer_string:
msg = "dtype 'str' does not support operation 'mean'"
with pytest.raises(TypeError, match=msg):
grouped.mean()
mn = grouped.mean(numeric_only=True)
8 changes: 6 additions & 2 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
@@ -935,12 +935,14 @@ def test_margins(self, data):
for value_col in table.columns.levels[0]:
self._check_output(table[value_col], value_col, data)

def test_no_col(self, data):
def test_no_col(self, data, using_infer_string):
# no col

# to help with a buglet
data.columns = [k * 2 for k in data.columns]
msg = re.escape("agg function failed [how->mean,dtype->")
if using_infer_string:
msg = "dtype 'str' does not support operation 'mean'"
with pytest.raises(TypeError, match=msg):
data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean")
table = data.drop(columns="CC").pivot_table(
@@ -990,7 +992,7 @@ def test_no_col(self, data):
],
)
def test_margin_with_only_columns_defined(
self, columns, aggfunc, values, expected_columns
self, columns, aggfunc, values, expected_columns, using_infer_string
):
# GH 31016
df = DataFrame(
@@ -1014,6 +1016,8 @@ def test_margin_with_only_columns_defined(
)
if aggfunc != "sum":
msg = re.escape("agg function failed [how->mean,dtype->")
if using_infer_string:
msg = "dtype 'str' does not support operation 'mean'"
with pytest.raises(TypeError, match=msg):
df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc)
if "B" not in columns: