Skip to content

Commit cf1c9f7

Browse files
committed
merge with master. fix duckdb group by and enums
1 parent 3baab25 commit cf1c9f7

File tree

9 files changed

+18
-50
lines changed

9 files changed

+18
-50
lines changed

_control/data.csv

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,2 @@
11
task,data,nrow,k,na,sort,active
2-
groupby,G1_1e7_1e2_0_0,1e7,1e2,0,0,1
3-
groupby,G1_1e7_1e1_0_0,1e7,1e1,0,0,1
4-
groupby,G1_1e7_2e0_0_0,1e7,2e0,0,0,1
5-
groupby,G1_1e7_1e2_0_1,1e7,1e2,0,1,1
6-
groupby,G1_1e7_1e2_5_0,1e7,1e2,5,0,1
7-
groupby,G1_1e8_1e2_0_0,1e8,1e2,0,0,1
8-
groupby,G1_1e8_1e1_0_0,1e8,1e1,0,0,1
9-
groupby,G1_1e8_2e0_0_0,1e8,2e0,0,0,1
10-
groupby,G1_1e8_1e2_0_1,1e8,1e2,0,1,1
11-
groupby,G1_1e8_1e2_5_0,1e8,1e2,5,0,1
122
groupby,G1_1e9_1e2_0_0,1e9,1e2,0,0,1
13-
groupby,G1_1e9_1e1_0_0,1e9,1e1,0,0,1
14-
groupby,G1_1e9_2e0_0_0,1e9,2e0,0,0,1
15-
groupby,G1_1e9_1e2_0_1,1e9,1e2,0,1,1
16-
groupby,G1_1e9_1e2_5_0,1e9,1e2,5,0,1
17-
join,J1_1e7_NA_0_0,1e7,NA,0,0,1
18-
join,J1_1e7_NA_5_0,1e7,NA,5,0,1
19-
join,J1_1e7_NA_0_1,1e7,NA,0,1,1
20-
join,J1_1e8_NA_0_0,1e8,NA,0,0,1
21-
join,J1_1e8_NA_5_0,1e8,NA,5,0,1
22-
join,J1_1e8_NA_0_1,1e8,NA,0,1,1
23-
join,J1_1e9_NA_0_0,1e9,NA,0,0,1
24-
join,J1_1e9_NA_5_0,1e9,NA,5,0,1
25-
join,J1_1e9_NA_0_1,1e9,NA,0,1,1

duckdb/groupby-duckdb.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,13 @@ invisible(dbExecute(con, sprintf("COPY y FROM '%s' (AUTO_DETECT TRUE)", src_grp)
4040

4141
# if there are no nulls (which our enums can't handle, make enums)
4242
if (!uses_NAs) {
43-
invisible(dbExecute(con, sprintf("CREATE TYPE id1ENUM AS ENUM (SELECT id1 FROM y)", src_grp)))
44-
invisible(dbExecute(con, sprintf("CREATE TYPE id2ENUM AS ENUM (SELECT id2 FROM y)", src_grp)))
45-
invisible(dbExecute(con, sprintf("CREATE TYPE id3ENUM AS ENUM (SELECT id3 FROM y)", src_grp)))
43+
invisible(dbExecute(con, "CREATE TYPE id1ENUM AS ENUM (SELECT id1 FROM y)"))
44+
invisible(dbExecute(con, "CREATE TYPE id2ENUM AS ENUM (SELECT id2 FROM y)"))
45+
invisible(dbExecute(con, "CREATE TYPE id3ENUM AS ENUM (SELECT id3 FROM y)"))
4646

4747
invisible(dbExecute(con, "CREATE TABLE x(id1 id1ENUM, id2 id2ENUM, id3 id3ENUM, id4 INT, id5 INT, id6 INT, v1 INT, v2 INT, v3 FLOAT)"))
4848
invisible(dbExecute(con, sprintf("INSERT INTO x (SELECT * FROM y)"))
49-
invisible(dbExecute(con, "DROP TABLE IF EXISTS y"))
49+
invisible(dbExecute(con, "DROP TABLE IF EXISTS y"))
5050
} else {
5151
# otherwise rename y
5252
invisible(dbExecute(con, "ALTER TABLE y RENAME TO x"))

pandas/groupby-pandas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python
1+
#!/usr/bin/env python3
22

33
print("# groupby-pandas.py", flush=True)
44

pandas/join-pandas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python
1+
#!/usr/bin/env python3
22

33
print("# join-pandas.py", flush=True)
44

pandas/read-pandas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python
1+
#!/usr/bin/env python3
22

33
print("# read-pandas.py")
44

pandas/sort-pandas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python
1+
#!/usr/bin/env python3
22

33
print("# sort-pandas.py")
44

path.env

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
11
export JULIA_HOME=/opt/julia-1.6.1
2-
export PATH=$PATH:$JULIA_HOME/bin
32
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
4-
export JULIA_HOME=/opt/julia-1.6.1
53
export PATH=$PATH:$JULIA_HOME/bin
6-
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
7-
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64

polars/join-polars.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,14 @@
2727
print("loading datasets " + data_name + ", " + y_data_name[0] + ", " + y_data_name[2] + ", " + y_data_name[2], flush=True)
2828

2929
with pl.StringCache():
30-
x = pl.read_csv(src_jn_x, dtype={"id1":pl.Int32, "id2":pl.Int32, "id3":pl.Int32, "v1":pl.Float64})
31-
x["id4"] = x["id4"].cast(pl.Categorical)
32-
x["id5"] = x["id5"].cast(pl.Categorical)
33-
x["id6"] = x["id6"].cast(pl.Categorical)
34-
small = pl.read_csv(src_jn_y[0], dtype={"id1":pl.Int32, "v2":pl.Float64})
35-
small["id4"] = small["id4"].cast(pl.Categorical)
36-
medium = pl.read_csv(src_jn_y[1], dtype={"id1":pl.Int32, "id2":pl.Int32, "v2":pl.Float64})
37-
medium["id4"] = medium["id4"].cast(pl.Categorical)
38-
medium["id5"] = medium["id5"].cast(pl.Categorical)
39-
big = pl.read_csv(src_jn_y[2], dtype={"id1":pl.Int32, "id2":pl.Int32, "id3":pl.Int32, "v2":pl.Float64})
40-
big["id4"] = big["id4"].cast(pl.Categorical)
41-
big["id5"] = big["id5"].cast(pl.Categorical)
42-
big["id6"] = big["id6"].cast(pl.Categorical)
30+
x = pl.read_csv(src_jn_x, dtypes={"id1":pl.Int32, "id2":pl.Int32, "id3":pl.Int32, "v1":pl.Float64})
31+
x = x.with_columns([x["id4"].cast(pl.Categorical),x["id5"].cast(pl.Categorical),x["id6"].cast(pl.Categorical)])
32+
small = pl.read_csv(src_jn_y[0], dtypes={"id1":pl.Int32, "v2":pl.Float64})
33+
small = small.with_columns([small["id4"].cast(pl.Categorical)])
34+
medium = pl.read_csv(src_jn_y[1], dtypes={"id1":pl.Int32, "id2":pl.Int32, "v2":pl.Float64})
35+
medium = medium.with_columns([medium["id4"].cast(pl.Categorical), medium["id5"].cast(pl.Categorical)])
36+
big = pl.read_csv(src_jn_y[2], dtypes={"id1":pl.Int32, "id2":pl.Int32, "id3":pl.Int32, "v2":pl.Float64})
37+
big = big.with_columns([big["id4"].cast(pl.Categorical), big["id5"].cast(pl.Categorical), big["id6"].cast(pl.Categorical)])
4338

4439
print(len(x), flush=True)
4540
print(len(small), flush=True)

run.conf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# task, used in init-setup-iteration.R
2-
export RUN_TASKS="groupby join"
2+
export RUN_TASKS="groupby"
33
# solution, used in init-setup-iteration.R
4-
export RUN_SOLUTIONS="data.table dplyr pandas pydatatable spark dask polars duckdb"
4+
export RUN_SOLUTIONS="duckdb"
55

66
# juliadf cudf clickhouse"
77

0 commit comments

Comments
 (0)