Skip to content

BUG: Pandas concat raises RuntimeWarning: '<' not supported between i… #61608

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
@@ -3105,7 +3105,19 @@ def union(self, other, sort: bool | None = None):
return result.sort_values()
return result

result = self._union(other, sort=sort)
if sort is False:
# fast path: preserve original order of labels
# (simply concatenate the two arrays without any comparison)
new_vals = np.concatenate([self._values, other._values])
result = Index(new_vals, name=self.name)
else:
# sort==True or sort==None: call into the subclass-specific union
# but guard against TypeError from mixed-type comparisons
try:
result = self._union(other, sort=sort)
except TypeError:
new_vals = np.concatenate([self._values, other._values])
result = Index(new_vals, name=self.name)

return self._wrap_setop_result(other, result)

13 changes: 4 additions & 9 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
@@ -3911,18 +3911,13 @@ def _union(self, other, sort) -> MultiIndex:
else:
result = self._get_reconciled_name_object(other)

if sort is not False:
# only sort if requested; if types are unorderable, skip silently
if sort:
try:
result = result.sort_values()
except TypeError:
if sort is True:
raise
warnings.warn(
"The values in the array are unorderable. "
"Pass `sort=False` to suppress this warning.",
RuntimeWarning,
stacklevel=find_stack_level(),
)
# mixed-type tuples: bail out on sorting
pass
return result

def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
7 changes: 5 additions & 2 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
@@ -823,8 +823,11 @@ def _get_sample_object(
return objs[0], objs


def _concat_indexes(indexes) -> Index:
return indexes[0].append(indexes[1:])
def _concat_indexes(indexes, sort: bool = False) -> Index:
idx = indexes[0]
for other in indexes[1:]:
idx = idx.union(other, sort=sort)
return idx


def validate_unique_levels(levels: list[Index]) -> None:
24 changes: 7 additions & 17 deletions pandas/tests/frame/test_query_eval.py
Original file line number Diff line number Diff line change
@@ -160,21 +160,13 @@ def test_query_empty_string(self):
df.query("")

def test_query_duplicate_column_name(self, engine, parser):
df = DataFrame(
{
"A": range(3),
"B": range(3),
"C": range(3)
}
).rename(columns={"B": "A"})
df = DataFrame({"A": range(3), "B": range(3), "C": range(3)}).rename(
columns={"B": "A"}
)

res = df.query('C == 1', engine=engine, parser=parser)
res = df.query("C == 1", engine=engine, parser=parser)

expect = DataFrame(
[[1, 1, 1]],
columns=["A", "A", "C"],
index=[1]
)
expect = DataFrame([[1, 1, 1]], columns=["A", "A", "C"], index=[1])

tm.assert_frame_equal(res, expect)

@@ -1140,9 +1132,7 @@ def test_query_with_nested_special_character(self, parser, engine):
[">=", operator.ge],
],
)
def test_query_lex_compare_strings(
self, parser, engine, op, func
):
def test_query_lex_compare_strings(self, parser, engine, op, func):
a = Series(np.random.default_rng(2).choice(list("abcde"), 20))
b = Series(np.arange(a.size))
df = DataFrame({"X": a, "Y": b})
@@ -1411,7 +1401,7 @@ def test_expr_with_column_name_with_backtick_and_hash(self):
def test_expr_with_column_name_with_backtick(self):
# GH 59285
df = DataFrame({"a`b": (1, 2, 3), "ab": (4, 5, 6)})
result = df.query("`a``b` < 2") # noqa
result = df.query("`a``b` < 2")
# Note: Formatting checks may wrongly consider the above ``inline code``.
expected = df[df["a`b"] < 2]
tm.assert_frame_equal(result, expected)
3 changes: 1 addition & 2 deletions scripts/check_for_inconsistent_pandas_namespace.py
Original file line number Diff line number Diff line change
@@ -30,8 +30,7 @@
from typing import NamedTuple

ERROR_MESSAGE = (
"{path}:{lineno}:{col_offset}: "
"Found both '{prefix}.{name}' and '{name}' in {path}"
"{path}:{lineno}:{col_offset}: Found both '{prefix}.{name}' and '{name}' in {path}"
)


1 change: 1 addition & 0 deletions scripts/check_test_naming.py
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@
NOTE: if this finds a false positive, you can add the comment `# not a test` to the
class or function definition. Though hopefully that shouldn't be necessary.
"""

from __future__ import annotations

import argparse
1 change: 1 addition & 0 deletions scripts/generate_pip_deps_from_conda.py
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@
generated with this script:
$ python scripts/generate_pip_deps_from_conda.py --compare
"""

import argparse
import pathlib
import re
1 change: 1 addition & 0 deletions scripts/pandas_errors_documented.py
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@
pre-commit run pandas-errors-documented --all-files
"""

from __future__ import annotations

import argparse
1 change: 1 addition & 0 deletions scripts/sort_whatsnew_note.py
Original file line number Diff line number Diff line change
@@ -23,6 +23,7 @@
pre-commit run sort-whatsnew-items --all-files
"""

from __future__ import annotations

import argparse
5 changes: 1 addition & 4 deletions scripts/tests/test_check_test_naming.py
Original file line number Diff line number Diff line change
@@ -24,10 +24,7 @@
0,
),
(
"class Foo: # not a test\n"
" pass\n"
"def test_foo():\n"
" Class.foo()\n",
"class Foo: # not a test\n pass\ndef test_foo():\n Class.foo()\n",
"",
0,
),
8 changes: 2 additions & 6 deletions scripts/tests/test_inconsistent_namespace_check.py
Original file line number Diff line number Diff line change
@@ -5,14 +5,10 @@
)

BAD_FILE_0 = (
"from pandas import Categorical\n"
"cat_0 = Categorical()\n"
"cat_1 = pd.Categorical()"
"from pandas import Categorical\ncat_0 = Categorical()\ncat_1 = pd.Categorical()"
)
BAD_FILE_1 = (
"from pandas import Categorical\n"
"cat_0 = pd.Categorical()\n"
"cat_1 = Categorical()"
"from pandas import Categorical\ncat_0 = pd.Categorical()\ncat_1 = Categorical()"
)
BAD_FILE_2 = (
"from pandas import Categorical\n"
20 changes: 9 additions & 11 deletions scripts/tests/test_validate_docstrings.py
Original file line number Diff line number Diff line change
@@ -34,8 +34,7 @@ def redundant_import(self, paramx=None, paramy=None) -> None:
--------
>>> import numpy as np
>>> import pandas as pd
>>> df = pd.DataFrame(np.ones((3, 3)),
... columns=('a', 'b', 'c'))
>>> df = pd.DataFrame(np.ones((3, 3)), columns=("a", "b", "c"))
>>> df.all(axis=1)
0 True
1 True
@@ -50,14 +49,14 @@ def unused_import(self) -> None:
Examples
--------
>>> import pandas as pdf
>>> df = pd.DataFrame(np.ones((3, 3)), columns=('a', 'b', 'c'))
>>> df = pd.DataFrame(np.ones((3, 3)), columns=("a", "b", "c"))
"""

def missing_whitespace_around_arithmetic_operator(self) -> None:
"""
Examples
--------
>>> 2+5
>>> 2 + 5
7
"""

@@ -66,14 +65,14 @@ def indentation_is_not_a_multiple_of_four(self) -> None:
Examples
--------
>>> if 2 + 5:
... pass
... pass
"""

def missing_whitespace_after_comma(self) -> None:
"""
Examples
--------
>>> df = pd.DataFrame(np.ones((3,3)),columns=('a','b', 'c'))
>>> df = pd.DataFrame(np.ones((3, 3)), columns=("a", "b", "c"))
"""

def write_array_like_with_hyphen_not_underscore(self) -> None:
@@ -227,13 +226,13 @@ def test_validate_all_ignore_errors(self, monkeypatch):
"errors": [
("ER01", "err desc"),
("ER02", "err desc"),
("ER03", "err desc")
("ER03", "err desc"),
],
"warnings": [],
"examples_errors": "",
"deprecated": True,
"file": "file1",
"file_line": "file_line1"
"file_line": "file_line1",
},
)
monkeypatch.setattr(
@@ -272,14 +271,13 @@ def test_validate_all_ignore_errors(self, monkeypatch):
None: {"ER03"},
"pandas.DataFrame.align": {"ER01"},
# ignoring an error that is not requested should be of no effect
"pandas.Index.all": {"ER03"}
}
"pandas.Index.all": {"ER03"},
},
)
# two functions * two not global ignored errors - one function ignored error
assert exit_status == 2 * 2 - 1



class TestApiItems:
@property
def api_doc(self):
69 changes: 36 additions & 33 deletions scripts/validate_docstrings.py
Original file line number Diff line number Diff line change
@@ -13,6 +13,7 @@
$ ./validate_docstrings.py
$ ./validate_docstrings.py pandas.DataFrame.head
"""

from __future__ import annotations

import argparse
@@ -69,8 +70,10 @@
}
ALL_ERRORS = set(NUMPYDOC_ERROR_MSGS).union(set(ERROR_MSGS))
duplicated_errors = set(NUMPYDOC_ERROR_MSGS).intersection(set(ERROR_MSGS))
assert not duplicated_errors, (f"Errors {duplicated_errors} exist in both pandas "
"and numpydoc, should they be removed from pandas?")
assert not duplicated_errors, (
f"Errors {duplicated_errors} exist in both pandas "
"and numpydoc, should they be removed from pandas?"
)


def pandas_error(code, **kwargs):
@@ -257,7 +260,7 @@ def pandas_validate(func_name: str):
pandas_error(
"SA05",
reference_name=rel_name,
right_reference=rel_name[len("pandas."):],
right_reference=rel_name[len("pandas.") :],
)
for rel_name in doc.see_also
if rel_name.startswith("pandas.")
@@ -365,17 +368,18 @@ def print_validate_all_results(
for func_name, res in result.items():
error_messages = dict(res["errors"])
actual_failures = set(error_messages)
expected_failures = (ignore_errors.get(func_name, set())
| ignore_errors.get(None, set()))
expected_failures = ignore_errors.get(func_name, set()) | ignore_errors.get(
None, set()
)
for err_code in actual_failures - expected_failures:
sys.stdout.write(
f'{prefix}{res["file"]}:{res["file_line"]}:'
f'{err_code}:{func_name}:{error_messages[err_code]}\n'
f"{prefix}{res['file']}:{res['file_line']}:"
f"{err_code}:{func_name}:{error_messages[err_code]}\n"
)
exit_status += 1
for err_code in ignore_errors.get(func_name, set()) - actual_failures:
sys.stdout.write(
f'{prefix}{res["file"]}:{res["file_line"]}:'
f"{prefix}{res['file']}:{res['file_line']}:"
f"{err_code}:{func_name}:"
"EXPECTED TO FAIL, BUT NOT FAILING\n"
)
@@ -384,8 +388,9 @@ def print_validate_all_results(
return exit_status


def print_validate_one_results(func_name: str,
ignore_errors: dict[str, set[str]]) -> int:
def print_validate_one_results(
func_name: str, ignore_errors: dict[str, set[str]]
) -> int:
def header(title, width=80, char="#") -> str:
full_line = char * width
side_len = (width - len(title) - 2) // 2
@@ -396,15 +401,18 @@ def header(title, width=80, char="#") -> str:

result = pandas_validate(func_name)

result["errors"] = [(code, message) for code, message in result["errors"]
if code not in ignore_errors.get(None, set())]
result["errors"] = [
(code, message)
for code, message in result["errors"]
if code not in ignore_errors.get(None, set())
]

sys.stderr.write(header(f"Docstring ({func_name})"))
sys.stderr.write(f"{result['docstring']}\n")

sys.stderr.write(header("Validation"))
if result["errors"]:
sys.stderr.write(f'{len(result["errors"])} Errors found for `{func_name}`:\n')
sys.stderr.write(f"{len(result['errors'])} Errors found for `{func_name}`:\n")
for err_code, err_desc in result["errors"]:
sys.stderr.write(f"\t{err_code}\t{err_desc}\n")
else:
@@ -431,14 +439,16 @@ def _format_ignore_errors(raw_ignore_errors):
raise ValueError(
f"Object `{obj_name}` is present in more than one "
"--ignore_errors argument. Please use it once and specify "
"the errors separated by commas.")
"the errors separated by commas."
)
ignore_errors[obj_name] = set(error_codes.split(","))

unknown_errors = ignore_errors[obj_name] - ALL_ERRORS
if unknown_errors:
raise ValueError(
f"Object `{obj_name}` is ignoring errors {unknown_errors} "
f"which are not known. Known errors are: {ALL_ERRORS}")
f"which are not known. Known errors are: {ALL_ERRORS}"
)

# global errors "PR02,ES01"
else:
@@ -448,27 +458,19 @@ def _format_ignore_errors(raw_ignore_errors):
if unknown_errors:
raise ValueError(
f"Unknown errors {unknown_errors} specified using --ignore_errors "
"Known errors are: {ALL_ERRORS}")
"Known errors are: {ALL_ERRORS}"
)

return ignore_errors


def main(
func_name,
output_format,
prefix,
ignore_deprecated,
ignore_errors
):
def main(func_name, output_format, prefix, ignore_deprecated, ignore_errors):
"""
Main entry point. Call the validation for one or for all docstrings.
"""
if func_name is None:
return print_validate_all_results(
output_format,
prefix,
ignore_deprecated,
ignore_errors
output_format, prefix, ignore_deprecated, ignore_errors
)
else:
return print_validate_one_results(func_name, ignore_errors)
@@ -524,10 +526,11 @@ def main(
args = argparser.parse_args(sys.argv[1:])

sys.exit(
main(args.function,
args.format,
args.prefix,
args.ignore_deprecated,
_format_ignore_errors(args.ignore_errors),
)
main(
args.function,
args.format,
args.prefix,
args.ignore_deprecated,
_format_ignore_errors(args.ignore_errors),
)
)
1 change: 1 addition & 0 deletions scripts/validate_exception_location.py
Original file line number Diff line number Diff line change
@@ -18,6 +18,7 @@
As a pre-commit hook:
pre-commit run validate-errors-locations --all-files
"""

from __future__ import annotations

import argparse
3 changes: 2 additions & 1 deletion scripts/validate_min_versions_in_sync.py
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@
pre-commit run validate-min-versions-in-sync --all-files
"""

from __future__ import annotations

import pathlib
@@ -105,7 +106,7 @@ def get_operator_from(dependency: str) -> str | None:


def get_yaml_map_from(
yaml_dic: list[str | dict[str, list[str]]]
yaml_dic: list[str | dict[str, list[str]]],
) -> dict[str, list[str] | None]:
yaml_map: dict[str, list[str] | None] = {}
for dependency in yaml_dic:
4 changes: 3 additions & 1 deletion scripts/validate_rst_title_capitalization.py
Original file line number Diff line number Diff line change
@@ -11,6 +11,7 @@
From the command-line:
python scripts/validate_rst_title_capitalization.py <rst file>
"""

from __future__ import annotations

import argparse
@@ -271,7 +272,8 @@ def main(source_paths: list[str]) -> int:
if title != correct_title_capitalization(title):
print(
f"""{filename}:{line_number}:{err_msg} "{title}" to "{
correct_title_capitalization(title)}" """
correct_title_capitalization(title)
}" """
)
number_of_errors += 1

44 changes: 18 additions & 26 deletions scripts/validate_unwanted_patterns.py
Original file line number Diff line number Diff line change
@@ -179,17 +179,11 @@ def strings_with_wrong_placed_whitespace(
For example:
>>> rule = (
... "We want the space at the end of the line, "
... "not at the beginning"
... )
>>> rule = "We want the space at the end of the line, not at the beginning"
Instead of:
>>> rule = (
... "We want the space at the end of the line,"
... " not at the beginning"
... )
>>> rule = "We want the space at the end of the line, not at the beginning"
Parameters
----------
@@ -229,35 +223,29 @@ def has_wrong_whitespace(first_line: str, second_line: str) -> bool:
For example, this is bad:
>>> rule = (
... "We want the space at the end of the line,"
... " not at the beginning"
... )
>>> rule = "We want the space at the end of the line, not at the beginning"
And what we want is:
>>> rule = (
... "We want the space at the end of the line, "
... "not at the beginning"
... )
>>> rule = "We want the space at the end of the line, not at the beginning"
And if the string is ending with a new line character (\n) we
do not want any trailing whitespaces after it.
For example, this is bad:
>>> rule = (
... "We want the space at the begging of "
... "the line if the previous line is ending with a \n "
... "not at the end, like always"
... "We want the space at the begging of "
... "the line if the previous line is ending with a \n "
... "not at the end, like always"
... )
And what we do want is:
>>> rule = (
... "We want the space at the begging of "
... "the line if the previous line is ending with a \n"
... " not at the end, like always"
... "We want the space at the begging of "
... "the line if the previous line is ending with a \n"
... " not at the end, like always"
... )
"""
if first_line.endswith(r"\n"):
@@ -319,10 +307,14 @@ def nodefault_used_not_only_for_typing(file_obj: IO[str]) -> Iterable[tuple[int,
while nodes:
in_annotation, node = nodes.pop()
if not in_annotation and (
(isinstance(node, ast.Name) # Case `NoDefault`
and node.id == "NoDefault")
or (isinstance(node, ast.Attribute) # Cases e.g. `lib.NoDefault`
and node.attr == "NoDefault")
(
isinstance(node, ast.Name) # Case `NoDefault`
and node.id == "NoDefault"
)
or (
isinstance(node, ast.Attribute) # Cases e.g. `lib.NoDefault`
and node.attr == "NoDefault"
)
):
yield (node.lineno, "NoDefault is used not only for typing")