Skip to content

Commit

Permalink
join datagen and helpers
Browse files Browse the repository at this point in the history
  • Loading branch information
jangorecki committed Feb 19, 2019
1 parent 312822b commit a58b010
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 3 deletions.
2 changes: 1 addition & 1 deletion datatable/join-datatable.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ cache = TRUE

data_name = Sys.getenv("SRC_JN_LOCAL") # "J1_1e6_NA_0_0"
src_jn_x = file.path("data", paste(data_name, "csv", sep="."))
y_data_name = sapply((function(x) sapply(setNames(c(x, x/1e3, x/1e6), c("big","medium","small")), pretty_sci))(as.numeric(strsplit(data_name, "_", fixed=TRUE)[[1L]][2L])), gsub, pattern="NA", x=data_name)
y_data_name = join_to_tbls(data_name)
src_jn_y = setNames(file.path("data", paste(y_data_name, "csv", sep=".")), names(y_data_name))
stopifnot(length(src_jn_y)==3L)
cat(sprintf("loading datasets %s\n", paste(c(data_name, y_data_name), collapse=", ")))
Expand Down
6 changes: 6 additions & 0 deletions helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -221,3 +221,9 @@ getenv = function(x) {
} else v = character(0)
v
}

join_to_tbls = function(data_name) {
x_n = as.numeric(strsplit(data_name, "_", fixed=TRUE)[[1L]][2L])
y_n = setNames(c(x_n, x_n/1e3, x_n/1e6), c("big","medium","small"))
sapply(sapply(y_n, pretty_sci), gsub, pattern="NA", x=data_name)
}
10 changes: 8 additions & 2 deletions join-datagen.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,15 @@ y_N = setNames(c(N, N/1e3, N/1e6), c("big","medium","small"))
cat(sprintf("Producing join tables of %s rows\n", paste(collapse=", ", sapply(y_N, pretty_sci))))
# create join tables
y_DT = lapply(y_N, function(n) DT[sample(n)])

y_data_name = sapply(sapply(y_N, pretty_sci), gsub, pattern="NA", x=data_name)
join_to_tbls = function(data_name) {
x_n = as.numeric(strsplit(data_name, "_", fixed=TRUE)[[1L]][2L])
y_n = setNames(c(x_n, x_n/1e3, x_n/1e6), c("big","medium","small"))
sapply(sapply(y_n, pretty_sci), gsub, pattern="NA", x=data_name)
}
y_data_name = join_to_tbls(data_name)

if (nas>0L) {
stop("not yet implemented")
real_nas = nas/100
cat(sprintf("Turning %s of data in each column to NAs\n", real_nas))
N_nas = as.integer(N*real_nas)
Expand All @@ -52,6 +57,7 @@ if (nas>0L) {
}
}
if (sort==1L) {
stop("not yet implemented")
cat(sprintf("Sorting data\n"))
setkeyv(DT, paste0("id", 1:6))
}
Expand Down

0 comments on commit a58b010

Please sign in to comment.