@@ -17,6 +17,7 @@ cache = TRUE
17
17
on_disk = FALSE
18
18
19
19
data_name = Sys.getenv(" SRC_DATANAME" )
20
+ machine_type = Sys.getenv(" MACHINE_TYPE" )
20
21
src_jn_x = file.path(" data" , paste(data_name , " csv" , sep = " ." ))
21
22
y_data_name = join_to_tbls(data_name )
22
23
src_jn_y = setNames(file.path(" data" , paste(y_data_name , " csv" , sep = " ." )), names(y_data_name ))
@@ -46,15 +47,15 @@ t = system.time({
46
47
})[[" elapsed" ]]
47
48
m = memory_usage()
48
49
chkt = system.time(chk <- collect(summarise(ans , sum(v1 , na.rm = TRUE ), sum(v2 , na.rm = TRUE ))))[[" elapsed" ]]
49
- write.log(run = 1L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk )
50
+ write.log(run = 1L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk , machine_type = machine_type )
50
51
rm(ans )
51
52
t = system.time({
52
53
ans <- collect(inner_join(x , small , by = " id1" ))
53
54
print(dim(ans ))
54
55
})[[" elapsed" ]]
55
56
m = memory_usage()
56
57
chkt = system.time(chk <- collect(summarise(ans , sum(v1 , na.rm = TRUE ), sum(v2 , na.rm = TRUE ))))[[" elapsed" ]]
57
- write.log(run = 2L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk )
58
+ write.log(run = 2L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk , machine_type = machine_type )
58
59
ans <- collect(ans )
59
60
print(head(ans , 3 ))
60
61
print(tail(ans , 3 ))
@@ -68,15 +69,15 @@ t = system.time({
68
69
})[[" elapsed" ]]
69
70
m = memory_usage()
70
71
chkt = system.time(chk <- collect(summarise(ans , sum(v1 , na.rm = TRUE ), sum(v2 , na.rm = TRUE ))))[[" elapsed" ]]
71
- write.log(run = 1L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk )
72
+ write.log(run = 1L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk , machine_type = machine_type )
72
73
rm(ans )
73
74
t = system.time({
74
75
ans <- collect(inner_join(x , medium , by = " id2" ))
75
76
print(dim(ans ))
76
77
})[[" elapsed" ]]
77
78
m = memory_usage()
78
79
chkt = system.time(chk <- collect(summarise(ans , sum(v1 , na.rm = TRUE ), sum(v2 , na.rm = TRUE ))))[[" elapsed" ]]
79
- write.log(run = 2L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk )
80
+ write.log(run = 2L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk , machine_type = machine_type )
80
81
ans <- collect(ans )
81
82
print(head(ans , 3 ))
82
83
print(tail(ans , 3 ))
@@ -90,15 +91,15 @@ t = system.time({
90
91
})[[" elapsed" ]]
91
92
m = memory_usage()
92
93
chkt = system.time(chk <- collect(summarise(ans , sum(v1 , na.rm = TRUE ), sum(v2 , na.rm = TRUE ))))[[" elapsed" ]]
93
- write.log(run = 1L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk )
94
+ write.log(run = 1L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk , machine_type = machine_type )
94
95
rm(ans )
95
96
t = system.time({
96
97
ans <- collect(left_join(x , medium , by = " id2" ))
97
98
print(dim(ans ))
98
99
})[[" elapsed" ]]
99
100
m = memory_usage()
100
101
chkt = system.time(chk <- collect(summarise(ans , sum(v1 , na.rm = TRUE ), sum(v2 , na.rm = TRUE ))))[[" elapsed" ]]
101
- write.log(run = 2L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk )
102
+ write.log(run = 2L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk , machine_type = machine_type )
102
103
ans <- collect(ans )
103
104
print(head(ans , 3 ))
104
105
print(tail(ans , 3 ))
@@ -112,15 +113,15 @@ t = system.time({
112
113
})[[" elapsed" ]]
113
114
m = memory_usage()
114
115
chkt = system.time(chk <- collect(summarise(ans , sum(v1 , na.rm = TRUE ), sum(v2 , na.rm = TRUE ))))[[" elapsed" ]]
115
- write.log(run = 1L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk )
116
+ write.log(run = 1L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk , machine_type = machine_type )
116
117
rm(ans )
117
118
t = system.time({
118
119
ans <- collect(inner_join(x , medium , by = " id5" ))
119
120
print(dim(ans ))
120
121
})[[" elapsed" ]]
121
122
m = memory_usage()
122
123
chkt = system.time(chk <- collect(summarise(ans , sum(v1 , na.rm = TRUE ), sum(v2 , na.rm = TRUE ))))[[" elapsed" ]]
123
- write.log(run = 2L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk )
124
+ write.log(run = 2L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk , machine_type = machine_type )
124
125
ans <- collect(ans )
125
126
print(head(ans , 3 ))
126
127
print(tail(ans , 3 ))
@@ -134,15 +135,15 @@ t = system.time({
134
135
})[[" elapsed" ]]
135
136
m = memory_usage()
136
137
chkt = system.time(chk <- collect(summarise(ans , sum(v1 , na.rm = TRUE ), sum(v2 , na.rm = TRUE ))))[[" elapsed" ]]
137
- write.log(run = 1L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk )
138
+ write.log(run = 1L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk , machine_type = machine_type )
138
139
rm(ans )
139
140
t = system.time({
140
141
ans <- collect(inner_join(x , big , by = " id3" ))
141
142
print(dim(ans ))
142
143
})[[" elapsed" ]]
143
144
m = memory_usage()
144
145
chkt = system.time(chk <- collect(summarise(ans , sum(v1 , na.rm = TRUE ), sum(v2 , na.rm = TRUE ))))[[" elapsed" ]]
145
- write.log(run = 2L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk )
146
+ write.log(run = 2L , task = task , data = data_name , in_rows = nrow(x ), question = question , out_rows = nrow(ans ), out_cols = ncol(ans ), solution = solution , version = ver , git = git , fun = fun , time_sec = t , mem_gb = m , cache = cache , chk = make_chk(chk ), chk_time_sec = chkt , on_disk = on_disk , machine_type = machine_type )
146
147
ans <- collect(ans )
147
148
print(head(ans , 3 ))
148
149
print(tail(ans , 3 ))
0 commit comments