Skip to content

Commit ebbe69f

Browse files
committed
Minimum part cost must be identified per part not across all parts that match the filters
1 parent 99aac68 commit ebbe69f

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

examples/tpch/q02_minimum_cost_supplier.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@
8888
# as matching the request
8989

9090
df = df_partsupp.join(df_supplier, (["ps_suppkey"], ["s_suppkey"]), how="inner")
91-
df = df.join(df_part, (["ps_partkey"], ["p_partkey"]), how="inner")
9291

9392
# Locate the minimum cost across all suppliers. There are multiple ways you could do this,
9493
# but one way is to create a window function across all suppliers, find the minimum, and
@@ -99,11 +98,13 @@
9998
# We want to evaluate the entire data frame, so we specify this.
10099
window_frame = datafusion.WindowFrame("rows", None, None)
101100
df = df.with_column(
102-
"min_cost", F.window("min", [col("ps_supplycost")], window_frame=window_frame)
101+
"min_cost", F.window("min", [col("ps_supplycost")], partition_by=[col("ps_partkey")], window_frame=window_frame)
103102
)
104103

105104
df = df.filter(col("min_cost") == col("ps_supplycost"))
106105

106+
df = df.join(df_part, (["ps_partkey"], ["p_partkey"]), how="inner")
107+
107108
# From the problem statement, these are the values we wish to output
108109

109110
df = df.select_columns(
@@ -119,7 +120,7 @@
119120

120121
# Sort and display 100 entries
121122
df = df.sort(
122-
col("s_acctbal").sort(),
123+
col("s_acctbal").sort(ascending=False),
123124
col("n_name").sort(),
124125
col("s_name").sort(),
125126
col("p_partkey").sort(),

0 commit comments

Comments
 (0)