Skip to content

Commit b876c67

Browse files
authored
BUG: Fix Index.equals between object and string (#61541)
1 parent e635c3e commit b876c67

File tree

4 files changed

+32
-17
lines changed

4 files changed

+32
-17
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,7 @@ Indexing
758758
- Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`)
759759
- Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` returning incorrect dtype when selecting from a :class:`DataFrame` with mixed data types. (:issue:`60600`)
760760
- Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`)
761+
- Bug in :meth:`Index.equals` when comparing between :class:`Series` with string dtype :class:`Index` (:issue:`61099`)
761762
- Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`)
762763
- Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`)
763764
- Bug in :meth:`Series.__setitem__` when assigning boolean series with boolean indexer will raise ``LossySetitemError`` (:issue:`57338`)

pandas/core/indexes/base.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5481,11 +5481,7 @@ def equals(self, other: Any) -> bool:
54815481
# quickly return if the lengths are different
54825482
return False
54835483

5484-
if (
5485-
isinstance(self.dtype, StringDtype)
5486-
and self.dtype.na_value is np.nan
5487-
and other.dtype != self.dtype
5488-
):
5484+
if isinstance(self.dtype, StringDtype) and other.dtype != self.dtype:
54895485
# TODO(infer_string) can we avoid this special case?
54905486
# special case for object behavior
54915487
return other.equals(self.astype(object))

pandas/tests/frame/test_arithmetic.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111
import numpy as np
1212
import pytest
1313

14-
from pandas.compat import HAS_PYARROW
15-
1614
import pandas as pd
1715
from pandas import (
1816
DataFrame,
@@ -2183,19 +2181,14 @@ def test_enum_column_equality():
21832181
tm.assert_series_equal(result, expected)
21842182

21852183

2186-
def test_mixed_col_index_dtype(using_infer_string):
2184+
def test_mixed_col_index_dtype(string_dtype_no_object):
21872185
# GH 47382
21882186
df1 = DataFrame(columns=list("abc"), data=1.0, index=[0])
21892187
df2 = DataFrame(columns=list("abc"), data=0.0, index=[0])
2190-
df1.columns = df2.columns.astype("string")
2188+
df1.columns = df2.columns.astype(string_dtype_no_object)
21912189
result = df1 + df2
21922190
expected = DataFrame(columns=list("abc"), data=1.0, index=[0])
2193-
if using_infer_string:
2194-
# df2.columns.dtype will be "str" instead of object,
2195-
# so the aligned result will be "string", not object
2196-
if HAS_PYARROW:
2197-
dtype = "string[pyarrow]"
2198-
else:
2199-
dtype = "string"
2200-
expected.columns = expected.columns.astype(dtype)
2191+
2192+
expected.columns = expected.columns.astype(string_dtype_no_object)
2193+
22012194
tm.assert_frame_equal(result, expected)

pandas/tests/indexes/test_base.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
ensure_index,
4141
ensure_index_from_sequences,
4242
)
43+
from pandas.testing import assert_series_equal
4344

4445

4546
class TestIndex:
@@ -1717,3 +1718,27 @@ def test_is_monotonic_pyarrow_list_type():
17171718
idx = Index([[1], [2, 3]], dtype=pd.ArrowDtype(pa.list_(pa.int64())))
17181719
assert not idx.is_monotonic_increasing
17191720
assert not idx.is_monotonic_decreasing
1721+
1722+
1723+
def test_index_equals_different_string_dtype(string_dtype_no_object):
1724+
# GH 61099
1725+
idx_obj = Index(["a", "b", "c"])
1726+
idx_str = Index(["a", "b", "c"], dtype=string_dtype_no_object)
1727+
1728+
assert idx_obj.equals(idx_str)
1729+
assert idx_str.equals(idx_obj)
1730+
1731+
1732+
def test_index_comparison_different_string_dtype(string_dtype_no_object):
1733+
# GH 61099
1734+
idx = Index(["a", "b", "c"])
1735+
s_obj = Series([1, 2, 3], index=idx)
1736+
s_str = Series([4, 5, 6], index=idx.astype(string_dtype_no_object))
1737+
1738+
expected = Series([True, True, True], index=["a", "b", "c"])
1739+
result = s_obj < s_str
1740+
assert_series_equal(result, expected)
1741+
1742+
result = s_str > s_obj
1743+
expected.index = idx.astype(string_dtype_no_object)
1744+
assert_series_equal(result, expected)

0 commit comments

Comments
 (0)