Skip to content

Commit 435b686

Browse files
committed
Adding retrieval time ram
1 parent 43309fb commit 435b686

File tree

1 file changed

+24
-2
lines changed

1 file changed

+24
-2
lines changed

prepare_results.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,19 @@ def fix_duplicate_runs(df):
2323
df = pl.read_parquet("results/master_list.parquet")
2424
df = fix_duplicate_runs(df)
2525
df = df.filter(~pl.col("estimator").is_in(["nojoin", "top_k_full_join"]))
26-
#%%
27-
df = df.with_columns(base_table=pl.col("base_table").str.split("-").list.first())
26+
27+
df_ram = pl.read_csv("stats/dummy_peak_ram.csv")
28+
df_query_time_retrieval = pl.read_csv(
29+
"stats/avg_query_time_for_pareto_plot_retrieval.csv"
30+
)
31+
df_query_time_all_datalakes = pl.read_csv(
32+
"stats/avg_query_time_for_pareto_plot_all_datalakes.csv"
33+
)
34+
35+
# %%
36+
df = df.with_columns(base_table=pl.col("base_table").str.split("-").list.first()).join(
37+
df_ram, on="jd_method"
38+
).with_columns(peak_ram=pl.max_horizontal("peak_ram", "peak_fit", "peak_predict", "peak_test"))
2839
# %%
2940
# General configuration (all data lakes, no Starmie)
3041
config_general = json.load(
@@ -33,6 +44,9 @@ def fix_duplicate_runs(df):
3344
df_config = prepare_config(config_general)
3445
group_keys = df_config.columns
3546
df_test = df_config.join(df, on=group_keys, how="inner")
47+
df_test.join(df_query_time_all_datalakes, on="jd_method").with_columns(
48+
total_runtime=pl.col("time_run") + pl.col("time_query")
49+
)
3650
df_test.write_parquet("results/results_general.parquet")
3751
# %%
3852
# Retrieval method configuration (Starmie, no 50k/open data)
@@ -42,6 +56,10 @@ def fix_duplicate_runs(df):
4256
df_config = prepare_config(config_general)
4357
group_keys = df_config.columns
4458
df_test = df_config.join(df, on=group_keys, how="inner")
59+
df_test = df_test.join(df_query_time_retrieval, on="jd_method").with_columns(
60+
total_runtime=pl.col("time_run") + pl.col("time_query")
61+
)
62+
4563
df_test.write_parquet("results/results_retrieval.parquet")
4664

4765
# %%
@@ -52,6 +70,10 @@ def fix_duplicate_runs(df):
5270
df_config = prepare_config(config_general)
5371
group_keys = df_config.columns
5472
df_test = df_config.join(df, on=group_keys, how="inner")
73+
df_test = df_test.join(df_query_time_all_datalakes, on="jd_method").with_columns(
74+
total_runtime=pl.col("time_run") + pl.col("time_query")
75+
)
76+
5577
df_test.write_parquet("results/results_aggregation.parquet")
5678

5779
# %%

0 commit comments

Comments
 (0)