forked from vaexio/db-benchmark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
helpers.py
57 lines (51 loc) · 2.21 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import time
import csv
import math
import psutil
import os
import platform
def write_log(task, data, in_rows, question, out_rows, out_cols, solution, version, git, fun, run, time_sec, mem_gb, cache, chk, chk_time_sec):
batch = os.getenv('BATCH', "")
timestamp = time.time()
csv_file = os.getenv('CSV_TIME_FILE', "time.csv")
nodename = platform.node()
comment = "" # placeholder for updates to timing data
time_sec = round(time_sec, 3)
chk_time_sec = round(chk_time_sec, 3)
mem_gb = round(mem_gb, 3)
if math.isnan(time_sec):
time_sec = ""
if math.isnan(mem_gb):
mem_gb = ""
log_row = [nodename, batch, timestamp, task, data, in_rows, question, out_rows, out_cols, solution, version, git, fun, run, time_sec, mem_gb, cache, chk, chk_time_sec, comment]
log_header = ["nodename","batch","timestamp","task","data","in_rows","question","out_rows","out_cols","solution","version","git","fun","run","time_sec","mem_gb","cache","chk","chk_time_sec","comment"]
append = os.path.isfile(csv_file)
csv_verbose = os.getenv('CSV_VERBOSE', "true")
if csv_verbose.lower()=="true":
print('# ' + ','.join(str(x) for x in log_row))
if append:
with open(csv_file, 'a') as f:
w = csv.writer(f, lineterminator='\n')
w.writerow(log_row)
else:
with open(csv_file, 'w+') as f:
w = csv.writer(f, lineterminator='\n')
w.writerow(log_header)
w.writerow(log_row)
return True
def str_round(x):
if type(x).__name__ in ["float","float64"]:
x = round(x,3)
return str(x)
flatten = lambda l: [item for sublist in l for item in sublist]
def make_chk(values):
s = ';'.join(str_round(x) for x in values)
return s.replace(",","_") # comma is reserved for csv separator
def memory_usage():
process = psutil.Process(os.getpid())
return process.memory_info().rss/(1024**3) # GB units
def join_to_tbls(data_name):
x_n = int(float(data_name.split("_")[1]))
y_n = ["{:.0e}".format(x_n/1e6), "{:.0e}".format(x_n/1e3), "{:.0e}".format(x_n)]
y_n = [y_n[0].replace('+0', ''), y_n[1].replace('+0', ''), y_n[2].replace('+0', '')]
return [data_name.replace('NA', y_n[0]), data_name.replace('NA', y_n[1]), data_name.replace('NA', y_n[2])]