From f85fb6d4ec8e240480d866d67c0db1c4bac78fe0 Mon Sep 17 00:00:00 2001 From: Rahul Pai Date: Sat, 24 May 2025 12:43:45 +0100 Subject: [PATCH 1/5] fix: Guard against dictionaries being passed to with_columns --- py-polars/polars/lazyframe/frame.py | 10 ++++++++++ py-polars/tests/unit/dataframe/test_df.py | 23 +++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index c599f3eb5b88..0652b218ca94 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -5851,6 +5851,16 @@ def with_columns( │ 4 ┆ 13.0 ┆ {1,3.0} │ └─────┴──────┴─────────────┘ """ + # Ensures that Dictionaries (as an iterable) cannot be passed in + for expr in exprs: + if isinstance(expr, dict): + raise TypeError( + "Cannot pass a Dictionary as an argument to `with_columns`.\n" + "To utilise key-value information from a dictionary, use either:\n" + " - `with_columns(**your_dict)`\n" + " - `with_columns(expr.alias(name) for name, expr in your_dict.items())`" + ) + structify = bool(int(os.environ.get("POLARS_AUTO_STRUCTIFY", 0))) pyexprs = parse_into_list_of_expressions( diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index 055e3ab32def..a9d3f724c9f5 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -3224,3 +3224,26 @@ def test_nan_to_null() -> None: ) assert_frame_equal(df1, df2) + + +# Below 3 tests for https://github.com/pola-rs/polars/issues/17879 + +def test_with_columns_dict_direct_typeerror(): + data = {"a": pl.col("a") * 2} + df = pl.select(a=1) + with pytest.raises(TypeError, match="Cannot pass a Dictionary as an argument to `with_columns`"): + df.with_columns(data) + + +def test_with_columns_dict_unpacking(): + data = {"a": pl.col("a") * 2} + df = pl.select(a=1).with_columns(**data) + expected = pl.DataFrame({"a": [2]}) + assert df.equals(expected) + + +def test_with_columns_generator_alias(): + data = {"a": pl.col("a") * 2} + df = pl.select(a=1).with_columns(expr.alias(name) for name, expr in data.items()) + expected = pl.DataFrame({"a": [2]}) + assert df.equals(expected) \ No newline at end of file From 59ccda1bb7a7906cb74848f766a517aef9b9d544 Mon Sep 17 00:00:00 2001 From: Rahul Pai Date: Sat, 24 May 2025 12:52:43 +0100 Subject: [PATCH 2/5] chore: linting corrections --- py-polars/polars/lazyframe/frame.py | 5 +++-- py-polars/tests/unit/dataframe/test_df.py | 13 ++++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 0652b218ca94..505762b1f9d0 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -5854,12 +5854,13 @@ def with_columns( # Ensures that Dictionaries (as an iterable) cannot be passed in for expr in exprs: if isinstance(expr, dict): - raise TypeError( + msg = ( "Cannot pass a Dictionary as an argument to `with_columns`.\n" - "To utilise key-value information from a dictionary, use either:\n" + "To utilize key-value information from a dictionary, use either:\n" " - `with_columns(**your_dict)`\n" " - `with_columns(expr.alias(name) for name, expr in your_dict.items())`" ) + raise TypeError(msg) structify = bool(int(os.environ.get("POLARS_AUTO_STRUCTIFY", 0))) diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index a9d3f724c9f5..c334d13d2d38 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -3228,22 +3228,25 @@ def test_nan_to_null() -> None: # Below 3 tests for https://github.com/pola-rs/polars/issues/17879 -def test_with_columns_dict_direct_typeerror(): + +def test_with_columns_dict_direct_typeerror() -> None: data = {"a": pl.col("a") * 2} df = pl.select(a=1) - with pytest.raises(TypeError, match="Cannot pass a Dictionary as an argument to `with_columns`"): + with pytest.raises( + TypeError, match="Cannot pass a Dictionary as an argument to `with_columns`" + ): df.with_columns(data) -def test_with_columns_dict_unpacking(): +def test_with_columns_dict_unpacking() -> None: data = {"a": pl.col("a") * 2} df = pl.select(a=1).with_columns(**data) expected = pl.DataFrame({"a": [2]}) assert df.equals(expected) -def test_with_columns_generator_alias(): +def test_with_columns_generator_alias() -> None: data = {"a": pl.col("a") * 2} df = pl.select(a=1).with_columns(expr.alias(name) for name, expr in data.items()) expected = pl.DataFrame({"a": [2]}) - assert df.equals(expected) \ No newline at end of file + assert df.equals(expected) From 6f06619e64270562caf74aa0e46d8bb2652fc05b Mon Sep 17 00:00:00 2001 From: Rahul Pai Date: Sun, 25 May 2025 09:14:02 +0100 Subject: [PATCH 3/5] chore: modify df with_columns check to only check the outermost iterable is not a dictionary like object --- py-polars/polars/lazyframe/frame.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 505762b1f9d0..9c8983ded9a6 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -5851,16 +5851,15 @@ def with_columns( │ 4 ┆ 13.0 ┆ {1,3.0} │ └─────┴──────┴─────────────┘ """ - # Ensures that Dictionaries (as an iterable) cannot be passed in - for expr in exprs: - if isinstance(expr, dict): - msg = ( - "Cannot pass a Dictionary as an argument to `with_columns`.\n" - "To utilize key-value information from a dictionary, use either:\n" - " - `with_columns(**your_dict)`\n" - " - `with_columns(expr.alias(name) for name, expr in your_dict.items())`" - ) - raise TypeError(msg) + # Ensures that the outermost element cannot be a Dictionary (as an iterable) + if len(exprs) == 1 and isinstance(exprs[0], Mapping): + msg = ( + "Cannot pass a Dictionary as an argument to `with_columns`.\n" + "To utilize key-value information from a dictionary, use either:\n" + " - `with_columns(**your_dict)`\n" + " - `with_columns(expr.alias(name) for name, expr in your_dict.items())`" + ) + raise TypeError(msg) structify = bool(int(os.environ.get("POLARS_AUTO_STRUCTIFY", 0))) From da62d7a6fe83b397c9bba29c34be9407fb70aaf4 Mon Sep 17 00:00:00 2001 From: Rahul Pai Date: Mon, 26 May 2025 17:47:45 +0100 Subject: [PATCH 4/5] chore: update dictionary guard to operate at the generic level --- py-polars/polars/_utils/parse/expr.py | 14 +++++++++++++- py-polars/polars/lazyframe/frame.py | 10 ---------- py-polars/tests/unit/dataframe/test_df.py | 2 +- py-polars/tests/unit/test_projections.py | 2 +- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/py-polars/polars/_utils/parse/expr.py b/py-polars/polars/_utils/parse/expr.py index 2213f20a1b6f..d6899c2b53c9 100644 --- a/py-polars/polars/_utils/parse/expr.py +++ b/py-polars/polars/_utils/parse/expr.py @@ -1,7 +1,7 @@ from __future__ import annotations import contextlib -from collections.abc import Iterable +from collections.abc import Iterable, Mapping from typing import TYPE_CHECKING, Any import polars._reexport as pl @@ -120,6 +120,18 @@ def _parse_inputs_as_iterable( if not inputs: return [] + # Ensures that the outermost element cannot be a Dictionary (as an iterable) + if len(inputs) == 1 and isinstance(inputs[0], Mapping): + msg = ( + "Cannot pass a dictionary as a single positional argument.\n" + "If you merely want the *keys*, use:\n" + " • df.method(*your_dict.keys())\n" + "If you need the key–value pairs, use one of:\n" + " • unpack as keywords: df.method(**your_dict)\n" + " • build expressions: df.method(expr.alias(k) for k, expr in your_dict.items())" + ) + raise TypeError(msg) + # Treat elements of a single iterable as separate inputs if len(inputs) == 1 and _is_iterable(inputs[0]): return inputs[0] diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 9c8983ded9a6..c599f3eb5b88 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -5851,16 +5851,6 @@ def with_columns( │ 4 ┆ 13.0 ┆ {1,3.0} │ └─────┴──────┴─────────────┘ """ - # Ensures that the outermost element cannot be a Dictionary (as an iterable) - if len(exprs) == 1 and isinstance(exprs[0], Mapping): - msg = ( - "Cannot pass a Dictionary as an argument to `with_columns`.\n" - "To utilize key-value information from a dictionary, use either:\n" - " - `with_columns(**your_dict)`\n" - " - `with_columns(expr.alias(name) for name, expr in your_dict.items())`" - ) - raise TypeError(msg) - structify = bool(int(os.environ.get("POLARS_AUTO_STRUCTIFY", 0))) pyexprs = parse_into_list_of_expressions( diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index c334d13d2d38..2afcb3958bb1 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -3233,7 +3233,7 @@ def test_with_columns_dict_direct_typeerror() -> None: data = {"a": pl.col("a") * 2} df = pl.select(a=1) with pytest.raises( - TypeError, match="Cannot pass a Dictionary as an argument to `with_columns`" + TypeError, match="Cannot pass a dictionary as a single positional argument" ): df.with_columns(data) diff --git a/py-polars/tests/unit/test_projections.py b/py-polars/tests/unit/test_projections.py index fbab58275c85..fbf355ac48c7 100644 --- a/py-polars/tests/unit/test_projections.py +++ b/py-polars/tests/unit/test_projections.py @@ -356,7 +356,7 @@ def test_projection_join_names_9955() -> None: how="inner", ) - q = q.select(batting.collect_schema()) + q = q.select(*batting.collect_schema().keys()) assert q.collect().schema == { "playerID": pl.String, From 35e6b3cdc39a9c82e65c9242cbf59b0c245412c5 Mon Sep 17 00:00:00 2001 From: Rahul Pai Date: Mon, 26 May 2025 17:49:04 +0100 Subject: [PATCH 5/5] chore: correct linting error --- py-polars/polars/_utils/parse/expr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-polars/polars/_utils/parse/expr.py b/py-polars/polars/_utils/parse/expr.py index d6899c2b53c9..d5b72e08cce9 100644 --- a/py-polars/polars/_utils/parse/expr.py +++ b/py-polars/polars/_utils/parse/expr.py @@ -126,7 +126,7 @@ def _parse_inputs_as_iterable( "Cannot pass a dictionary as a single positional argument.\n" "If you merely want the *keys*, use:\n" " • df.method(*your_dict.keys())\n" - "If you need the key–value pairs, use one of:\n" + "If you need the key value pairs, use one of:\n" " • unpack as keywords: df.method(**your_dict)\n" " • build expressions: df.method(expr.alias(k) for k, expr in your_dict.items())" )