Skip to content

Commit b194a87

Browse files
authored
feat/improve ruff test coverage (#1055)
* Run python tests on all currently supported python versions * Update ruff checks to select all * Ruff auto fix * Applying ruff suggestions * noqa rules updates per ruff checks * Working through more ruff suggestions * Working through more ruff suggestions * update timestamps on tests * More ruff updates * More ruff updates * Instead of importing udf static functions as variables, import * More ruff formatting suggestions * more ruff formatting suggestions * More ruff formatting * More ruff formatting * Cut off lint errors for this PR * Working through more ruff checks and disabling a bunch for now * Address CI difference from local ruff * UDWF isn't a proper abstract base class right now since users can opt in to all methods * Update pre-commit to match the version of ruff used in CI * To enable testing in python 3.9 we need numpy. Also going to the current minimal supported version * Update min requried version of python to 3.9 in pyproject.toml. The other changes will come in #1043 that is soon to be merged. * Suppress UP035 * ruff format
1 parent 9d634de commit b194a87

40 files changed

+697
-599
lines changed

.github/workflows/test.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,11 @@ jobs:
3333
fail-fast: false
3434
matrix:
3535
python-version:
36+
- "3.9"
3637
- "3.10"
3738
- "3.11"
3839
- "3.12"
40+
- "3.13"
3941
toolchain:
4042
- "stable"
4143

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ repos:
2222
- id: actionlint-docker
2323
- repo: https://github.com/astral-sh/ruff-pre-commit
2424
# Ruff version.
25-
rev: v0.3.0
25+
rev: v0.9.10
2626
hooks:
2727
# Run the linter.
2828
- id: ruff

benchmarks/tpch/tpch.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,13 @@ def bench(data_path, query_path):
5959
end = time.time()
6060
time_millis = (end - start) * 1000
6161
total_time_millis += time_millis
62-
print("setup,{}".format(round(time_millis, 1)))
63-
results.write("setup,{}\n".format(round(time_millis, 1)))
62+
print(f"setup,{round(time_millis, 1)}")
63+
results.write(f"setup,{round(time_millis, 1)}\n")
6464
results.flush()
6565

6666
# run queries
6767
for query in range(1, 23):
68-
with open("{}/q{}.sql".format(query_path, query)) as f:
68+
with open(f"{query_path}/q{query}.sql") as f:
6969
text = f.read()
7070
tmp = text.split(";")
7171
queries = []
@@ -83,14 +83,14 @@ def bench(data_path, query_path):
8383
end = time.time()
8484
time_millis = (end - start) * 1000
8585
total_time_millis += time_millis
86-
print("q{},{}".format(query, round(time_millis, 1)))
87-
results.write("q{},{}\n".format(query, round(time_millis, 1)))
86+
print(f"q{query},{round(time_millis, 1)}")
87+
results.write(f"q{query},{round(time_millis, 1)}\n")
8888
results.flush()
8989
except Exception as e:
9090
print("query", query, "failed", e)
9191

92-
print("total,{}".format(round(total_time_millis, 1)))
93-
results.write("total,{}\n".format(round(total_time_millis, 1)))
92+
print(f"total,{round(total_time_millis, 1)}")
93+
results.write(f"total,{round(total_time_millis, 1)}\n")
9494

9595

9696
if __name__ == "__main__":

dev/release/check-rat-report.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
exclude_globs_filename = sys.argv[1]
3030
xml_filename = sys.argv[2]
3131

32-
globs = [line.strip() for line in open(exclude_globs_filename, "r")]
32+
globs = [line.strip() for line in open(exclude_globs_filename)]
3333

3434
tree = ET.parse(xml_filename)
3535
root = tree.getroot()

dev/release/generate-changelog.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,11 @@
2626

2727
def print_pulls(repo_name, title, pulls):
2828
if len(pulls) > 0:
29-
print("**{}:**".format(title))
29+
print(f"**{title}:**")
3030
print()
3131
for pull, commit in pulls:
32-
url = "https://github.com/{}/pull/{}".format(repo_name, pull.number)
33-
print(
34-
"- {} [#{}]({}) ({})".format(
35-
pull.title, pull.number, url, commit.author.login
36-
)
37-
)
32+
url = f"https://github.com/{repo_name}/pull/{pull.number}"
33+
print(f"- {pull.title} [#{pull.number}]({url}) ({commit.author.login})")
3834
print()
3935

4036

docs/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@
7373
autoapi_python_class_content = "both"
7474

7575

76-
def autoapi_skip_member_fn(app, what, name, obj, skip, options):
76+
def autoapi_skip_member_fn(app, what, name, obj, skip, options): # noqa: ARG001
7777
skip_contents = [
7878
# Re-exports
7979
("class", "datafusion.DataFrame"),

examples/python-udwf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def __init__(self, alpha: float) -> None:
5959
def supports_bounded_execution(self) -> bool:
6060
return True
6161

62-
def get_range(self, idx: int, num_rows: int) -> tuple[int, int]:
62+
def get_range(self, idx: int, num_rows: int) -> tuple[int, int]: # noqa: ARG002
6363
# Override the default range of current row since uses_window_frame is False
6464
# So for the purpose of this test we just smooth from the previous row to
6565
# current.

examples/tpch/_tests.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,28 +27,25 @@
2727
def df_selection(col_name, col_type):
2828
if col_type == pa.float64() or isinstance(col_type, pa.Decimal128Type):
2929
return F.round(col(col_name), lit(2)).alias(col_name)
30-
elif col_type == pa.string() or col_type == pa.string_view():
30+
if col_type == pa.string() or col_type == pa.string_view():
3131
return F.trim(col(col_name)).alias(col_name)
32-
else:
33-
return col(col_name)
32+
return col(col_name)
3433

3534

3635
def load_schema(col_name, col_type):
3736
if col_type == pa.int64() or col_type == pa.int32():
3837
return col_name, pa.string()
39-
elif isinstance(col_type, pa.Decimal128Type):
38+
if isinstance(col_type, pa.Decimal128Type):
4039
return col_name, pa.float64()
41-
else:
42-
return col_name, col_type
40+
return col_name, col_type
4341

4442

4543
def expected_selection(col_name, col_type):
4644
if col_type == pa.int64() or col_type == pa.int32():
4745
return F.trim(col(col_name)).cast(col_type).alias(col_name)
48-
elif col_type == pa.string() or col_type == pa.string_view():
46+
if col_type == pa.string() or col_type == pa.string_view():
4947
return F.trim(col(col_name)).alias(col_name)
50-
else:
51-
return col(col_name)
48+
return col(col_name)
5249

5350

5451
def selections_and_schema(original_schema):

pyproject.toml

Lines changed: 70 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,57 @@ features = ["substrait"]
6565

6666
# Enable docstring linting using the google style guide
6767
[tool.ruff.lint]
68-
select = ["E4", "E7", "E9", "F", "FA", "D", "W", "I"]
68+
select = ["ALL" ]
69+
ignore = [
70+
"A001", # Allow using words like min as variable names
71+
"A002", # Allow using words like filter as variable names
72+
"ANN401", # Allow Any for wrapper classes
73+
"COM812", # Recommended to ignore these rules when using with ruff-format
74+
"FIX002", # Allow TODO lines - consider removing at some point
75+
"FBT001", # Allow boolean positional args
76+
"FBT002", # Allow boolean positional args
77+
"ISC001", # Recommended to ignore these rules when using with ruff-format
78+
"SLF001", # Allow accessing private members
79+
"TD002",
80+
"TD003", # Allow TODO lines
81+
"UP007", # Disallowing Union is pedantic
82+
# TODO: Enable all of the following, but this PR is getting too large already
83+
"PT001",
84+
"ANN204",
85+
"B008",
86+
"EM101",
87+
"PLR0913",
88+
"PLR1714",
89+
"ANN201",
90+
"C400",
91+
"TRY003",
92+
"B904",
93+
"UP006",
94+
"RUF012",
95+
"FBT003",
96+
"C416",
97+
"SIM102",
98+
"PGH003",
99+
"PLR2004",
100+
"PERF401",
101+
"PD901",
102+
"EM102",
103+
"ERA001",
104+
"SIM108",
105+
"ICN001",
106+
"ANN001",
107+
"ANN202",
108+
"PTH",
109+
"N812",
110+
"INP001",
111+
"DTZ007",
112+
"PLW2901",
113+
"RET503",
114+
"RUF015",
115+
"A005",
116+
"TC001",
117+
"UP035",
118+
]
69119

70120
[tool.ruff.lint.pydocstyle]
71121
convention = "google"
@@ -75,16 +125,30 @@ max-doc-length = 88
75125

76126
# Disable docstring checking for these directories
77127
[tool.ruff.lint.per-file-ignores]
78-
"python/tests/*" = ["D"]
79-
"examples/*" = ["D", "W505"]
80-
"dev/*" = ["D"]
81-
"benchmarks/*" = ["D", "F"]
128+
"python/tests/*" = [
129+
"ANN",
130+
"ARG",
131+
"BLE001",
132+
"D",
133+
"S101",
134+
"SLF",
135+
"PD",
136+
"PLR2004",
137+
"PT011",
138+
"RUF015",
139+
"S608",
140+
"PLR0913",
141+
"PT004",
142+
]
143+
"examples/*" = ["D", "W505", "E501", "T201", "S101"]
144+
"dev/*" = ["D", "E", "T", "S", "PLR", "C", "SIM", "UP", "EXE", "N817"]
145+
"benchmarks/*" = ["D", "F", "T", "BLE", "FURB", "PLR", "E", "TD", "TRY", "S", "SIM", "EXE", "UP"]
82146
"docs/*" = ["D"]
83147

84148
[dependency-groups]
85149
dev = [
86150
"maturin>=1.8.1",
87-
"numpy>1.24.4 ; python_full_version >= '3.10'",
151+
"numpy>1.25.0",
88152
"pytest>=7.4.4",
89153
"ruff>=0.9.1",
90154
"toml>=0.10.2",

python/datafusion/__init__.py

Lines changed: 23 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -48,44 +48,47 @@
4848
from .io import read_avro, read_csv, read_json, read_parquet
4949
from .plan import ExecutionPlan, LogicalPlan
5050
from .record_batch import RecordBatch, RecordBatchStream
51-
from .udf import Accumulator, AggregateUDF, ScalarUDF, WindowUDF
51+
from .udf import Accumulator, AggregateUDF, ScalarUDF, WindowUDF, udaf, udf, udwf
5252

5353
__version__ = importlib_metadata.version(__name__)
5454

5555
__all__ = [
5656
"Accumulator",
57+
"AggregateUDF",
58+
"Catalog",
5759
"Config",
58-
"DataFrame",
59-
"SessionContext",
60-
"SessionConfig",
61-
"SQLOptions",
62-
"RuntimeEnvBuilder",
63-
"Expr",
64-
"ScalarUDF",
65-
"WindowFrame",
66-
"column",
67-
"col",
68-
"literal",
69-
"lit",
7060
"DFSchema",
71-
"Catalog",
61+
"DataFrame",
7262
"Database",
73-
"Table",
74-
"AggregateUDF",
75-
"WindowUDF",
76-
"LogicalPlan",
7763
"ExecutionPlan",
64+
"Expr",
65+
"LogicalPlan",
7866
"RecordBatch",
7967
"RecordBatchStream",
68+
"RuntimeEnvBuilder",
69+
"SQLOptions",
70+
"ScalarUDF",
71+
"SessionConfig",
72+
"SessionContext",
73+
"Table",
74+
"WindowFrame",
75+
"WindowUDF",
76+
"col",
77+
"column",
8078
"common",
8179
"expr",
8280
"functions",
81+
"lit",
82+
"literal",
8383
"object_store",
84-
"substrait",
85-
"read_parquet",
8684
"read_avro",
8785
"read_csv",
8886
"read_json",
87+
"read_parquet",
88+
"substrait",
89+
"udaf",
90+
"udf",
91+
"udwf",
8992
]
9093

9194

@@ -120,10 +123,3 @@ def str_lit(value):
120123
def lit(value):
121124
"""Create a literal expression."""
122125
return Expr.literal(value)
123-
124-
125-
udf = ScalarUDF.udf
126-
127-
udaf = AggregateUDF.udaf
128-
129-
udwf = WindowUDF.udwf

0 commit comments

Comments
 (0)