|
88 | 88 | # as matching the request
|
89 | 89 |
|
90 | 90 | df = df_partsupp.join(df_supplier, (["ps_suppkey"], ["s_suppkey"]), how="inner")
|
91 |
| -df = df.join(df_part, (["ps_partkey"], ["p_partkey"]), how="inner") |
92 | 91 |
|
93 | 92 | # Locate the minimum cost across all suppliers. There are multiple ways you could do this,
|
94 | 93 | # but one way is to create a window function across all suppliers, find the minimum, and
|
|
99 | 98 | # We want to evaluate the entire data frame, so we specify this.
|
100 | 99 | window_frame = datafusion.WindowFrame("rows", None, None)
|
101 | 100 | df = df.with_column(
|
102 |
| - "min_cost", F.window("min", [col("ps_supplycost")], window_frame=window_frame) |
| 101 | + "min_cost", F.window("min", [col("ps_supplycost")], partition_by=[col("ps_partkey")], window_frame=window_frame) |
103 | 102 | )
|
104 | 103 |
|
105 | 104 | df = df.filter(col("min_cost") == col("ps_supplycost"))
|
106 | 105 |
|
| 106 | +df = df.join(df_part, (["ps_partkey"], ["p_partkey"]), how="inner") |
| 107 | + |
107 | 108 | # From the problem statement, these are the values we wish to output
|
108 | 109 |
|
109 | 110 | df = df.select_columns(
|
|
119 | 120 |
|
120 | 121 | # Sort and display 100 entries
|
121 | 122 | df = df.sort(
|
122 |
| - col("s_acctbal").sort(), |
| 123 | + col("s_acctbal").sort(ascending=False), |
123 | 124 | col("n_name").sort(),
|
124 | 125 | col("s_name").sort(),
|
125 | 126 | col("p_partkey").sort(),
|
|
0 commit comments