forked from duckdblabs/db-benchmark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread-datatable.R
executable file
·58 lines (49 loc) · 2.94 KB
/
read-datatable.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env Rscript
cat("# read-datatable.R\n")
source("./helpers.R")
source("./datatable/helpers-datatable.R")
suppressPackageStartupMessages(library(data.table))
ver = packageVersion("data.table")
git = datatable.git()
task = "read"
solution = "data.table"
fun = "fread"
cache = TRUE
src_grp = Sys.getenv("SRC_GRP_LOCAL")
data_name = basename(src_grp)
options("datatable.showProgress"=FALSE)
in_rows = as.numeric(strsplit(system(sprintf("wc -l %s", data_name), intern=TRUE), " ", fixed=TRUE)[[1L]][1L])-1
cat("reading...\n")
question = "all rows" #1
t = system.time(print(dim(ans<-fread(data_name))))[["elapsed"]]
m = memory_usage()
chkt = system.time(chk<-ans[, .(sum(v3))])[["elapsed"]]
write.log(run=1L, task=task, data=data_name, in_rows=in_rows, question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt)
rm(ans)
t = system.time(print(dim(ans<-fread(data_name))))[["elapsed"]]
m = memory_usage()
chkt = system.time(chk<-ans[, .(sum(v3))])[["elapsed"]]
write.log(run=2L, task=task, data=data_name, in_rows=in_rows, question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt)
rm(ans)
t = system.time(print(dim(ans<-fread(data_name))))[["elapsed"]]
m = memory_usage()
chkt = system.time(chk<-ans[, .(sum(v3))])[["elapsed"]]
write.log(run=3L, task=task, data=data_name, in_rows=in_rows, question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt)
rm(ans)
question = "top 100 rows" #2
t = system.time(print(dim(ans<-fread(data_name, nrows=100))))[["elapsed"]]
m = memory_usage()
chkt = system.time(chk<-ans[, .(sum(v3))])[["elapsed"]]
write.log(run=1L, task=task, data=data_name, in_rows=in_rows, question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt)
rm(ans)
t = system.time(print(dim(ans<-fread(data_name, nrows=100))))[["elapsed"]]
m = memory_usage()
chkt = system.time(chk<-ans[, .(sum(v3))])[["elapsed"]]
write.log(run=2L, task=task, data=data_name, in_rows=in_rows, question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt)
rm(ans)
t = system.time(print(dim(ans<-fread(data_name, nrows=100))))[["elapsed"]]
m = memory_usage()
chkt = system.time(chk<-ans[, .(sum(v3))])[["elapsed"]]
write.log(run=3L, task=task, data=data_name, in_rows=in_rows, question=question, out_rows=nrow(ans), out_cols=ncol(ans), solution=solution, version=ver, git=git, fun=fun, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt)
rm(ans)
if( !interactive() ) q("no", status=0)