From 46b398710707a295a53a43fb1e364e7c1ed3a205 Mon Sep 17 00:00:00 2001 From: Kirill Date: Mon, 26 Aug 2024 07:13:07 -0400 Subject: [PATCH 1/7] updating subset types --- pandas-stubs/core/frame.pyi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 1ca7c568e..c4988f088 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -900,7 +900,7 @@ class DataFrame(NDFrame, OpsMixin): ) -> DataFrame | None: ... def drop_duplicates( self, - subset=..., + subset: Hashable | Iterable[Hashable] | None = ..., *, keep: NaPosition | _bool = ..., inplace: _bool = ..., @@ -908,7 +908,7 @@ class DataFrame(NDFrame, OpsMixin): ) -> DataFrame: ... def duplicated( self, - subset: Hashable | Sequence[Hashable] | None = ..., + subset: Hashable | Iterable[Hashable] | None = ..., keep: NaPosition | _bool = ..., ) -> Series: ... @overload From bcf38d6805c946a94fed3b85d10a68e31eb5a680 Mon Sep 17 00:00:00 2001 From: Kirill Date: Wed, 28 Aug 2024 18:14:00 -0400 Subject: [PATCH 2/7] adding pytest --- tests/test_frame.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tests/test_frame.py b/tests/test_frame.py index 33aa1616e..53df355e9 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -29,7 +29,7 @@ import numpy as np import numpy.typing as npt import pandas as pd -from pandas._testing import ensure_clean +from pandas._testing import ensure_clean, assert_frame_equal from pandas.core.resample import ( DatetimeIndexResampler, Resampler, @@ -368,6 +368,33 @@ def test_types_dropna() -> None: res3: None = df.dropna(axis=0, how="all", subset=["col1"], inplace=True) +@pytest.mark.parametrize( + "drop_arg", + [ + {"AAA"}, # set + ["AAA"], # list + ("AAA",), # tuple + {"AAA": None}, # dict + "AAA", # str + ] +) +def test_types_drop_duplicates(drop_arg) -> None: + + # GH#59237 + df = pd.DataFrame( + { + "AAA": ["foo", "bar", "foo", "bar", "foo", "bar", "bar", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": [1, 1, 2, 2, 2, 2, 1, 2], + "D": range(8), + } + ) + expected = df[:2] + + result = df.drop_duplicates(drop_arg) + assert_frame_equal(result, expected) + + def test_types_fillna() -> None: df = pd.DataFrame(data={"col1": [np.nan, np.nan], "col2": [3, np.nan]}) res: pd.DataFrame = df.fillna(0) From e91671b8d137396416a463620773d4a8073a3fcb Mon Sep 17 00:00:00 2001 From: Kirill Date: Wed, 28 Aug 2024 18:57:20 -0400 Subject: [PATCH 3/7] updating based on feedback --- tests/test_frame.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/test_frame.py b/tests/test_frame.py index 53df355e9..7cd2157f0 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -29,7 +29,7 @@ import numpy as np import numpy.typing as npt import pandas as pd -from pandas._testing import ensure_clean, assert_frame_equal +from pandas._testing import ensure_clean from pandas.core.resample import ( DatetimeIndexResampler, Resampler, @@ -389,10 +389,12 @@ def test_types_drop_duplicates(drop_arg) -> None: "D": range(8), } ) - expected = df[:2] - result = df.drop_duplicates(drop_arg) - assert_frame_equal(result, expected) + check(assert_type(df.drop_duplicates({"AAA"}), pd.DataFrame), pd.DataFrame) + check(assert_type(df.drop_duplicates(["AAA"]), pd.DataFrame), pd.DataFrame) + check(assert_type(df.drop_duplicates(("AAA",)), pd.DataFrame), pd.DataFrame) + check(assert_type(df.drop_duplicates({"AAA": None}), pd.DataFrame), pd.DataFrame) + check(assert_type(df.drop_duplicates("AAA"), pd.DataFrame), pd.DataFrame) def test_types_fillna() -> None: From 7897b3141dd6a9c3d6645fadc22272f935b72949 Mon Sep 17 00:00:00 2001 From: Kirill Date: Wed, 28 Aug 2024 18:58:40 -0400 Subject: [PATCH 4/7] updating based on feedback --- tests/test_frame.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/tests/test_frame.py b/tests/test_frame.py index 7cd2157f0..6583bbd6b 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -368,18 +368,7 @@ def test_types_dropna() -> None: res3: None = df.dropna(axis=0, how="all", subset=["col1"], inplace=True) -@pytest.mark.parametrize( - "drop_arg", - [ - {"AAA"}, # set - ["AAA"], # list - ("AAA",), # tuple - {"AAA": None}, # dict - "AAA", # str - ] -) -def test_types_drop_duplicates(drop_arg) -> None: - +def test_types_drop_duplicates() -> None: # GH#59237 df = pd.DataFrame( { From aaa109475c577e37bfc40c0d1b27bcff3f836c7f Mon Sep 17 00:00:00 2001 From: Kirill Date: Thu, 29 Aug 2024 19:58:05 -0400 Subject: [PATCH 5/7] updating based on feedback --- tests/test_frame.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_frame.py b/tests/test_frame.py index 6583bbd6b..9e9e4d0d5 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -379,12 +379,14 @@ def test_types_drop_duplicates() -> None: } ) - check(assert_type(df.drop_duplicates({"AAA"}), pd.DataFrame), pd.DataFrame) check(assert_type(df.drop_duplicates(["AAA"]), pd.DataFrame), pd.DataFrame) check(assert_type(df.drop_duplicates(("AAA",)), pd.DataFrame), pd.DataFrame) - check(assert_type(df.drop_duplicates({"AAA": None}), pd.DataFrame), pd.DataFrame) check(assert_type(df.drop_duplicates("AAA"), pd.DataFrame), pd.DataFrame) + if PD_LTE_22: + check(assert_type(df.drop_duplicates({"AAA"}), pd.DataFrame), pd.DataFrame) + check(assert_type(df.drop_duplicates({"AAA": None}), pd.DataFrame), pd.DataFrame) + def test_types_fillna() -> None: df = pd.DataFrame(data={"col1": [np.nan, np.nan], "col2": [3, np.nan]}) From e2ab37264814658dde2d83ba3d397944f91b8585 Mon Sep 17 00:00:00 2001 From: Kirill Date: Tue, 3 Sep 2024 20:52:36 -0400 Subject: [PATCH 6/7] correcting conditional Co-authored-by: Irv Lustig --- tests/test_frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_frame.py b/tests/test_frame.py index 9e9e4d0d5..a1a66dc5c 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -383,7 +383,7 @@ def test_types_drop_duplicates() -> None: check(assert_type(df.drop_duplicates(("AAA",)), pd.DataFrame), pd.DataFrame) check(assert_type(df.drop_duplicates("AAA"), pd.DataFrame), pd.DataFrame) - if PD_LTE_22: + if not PD_LTE_22: check(assert_type(df.drop_duplicates({"AAA"}), pd.DataFrame), pd.DataFrame) check(assert_type(df.drop_duplicates({"AAA": None}), pd.DataFrame), pd.DataFrame) From 253eada1ab0240c60a4c68d576cc356438763394 Mon Sep 17 00:00:00 2001 From: Kirill Date: Fri, 6 Sep 2024 16:21:39 -0400 Subject: [PATCH 7/7] complying with black formatting --- tests/test_frame.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_frame.py b/tests/test_frame.py index a1a66dc5c..43082143a 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -385,7 +385,9 @@ def test_types_drop_duplicates() -> None: if not PD_LTE_22: check(assert_type(df.drop_duplicates({"AAA"}), pd.DataFrame), pd.DataFrame) - check(assert_type(df.drop_duplicates({"AAA": None}), pd.DataFrame), pd.DataFrame) + check( + assert_type(df.drop_duplicates({"AAA": None}), pd.DataFrame), pd.DataFrame + ) def test_types_fillna() -> None: