This repository has been archived by the owner on Jul 13, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsim_data_load.py
63 lines (53 loc) · 2.45 KB
/
sim_data_load.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# -*- coding: utf-8 -*-
"""
Main module for simulation data processing. Traverses a folder structure and
loads results into a dictionary of lists. Each dictionary key is formed by
concatenating folder names and final file name. For each key, and therefore
each file, a list of numpy arrays containing the individual results in said
file is created.
The dictionary keys themselves are held in a separate list. The metadata, i.e.
column labels, are saved into a second dictionary, using the same keys.
@author: elvd
"""
import os
import csv
import numpy as np
start_dir = '/var/host/media/removable/UNTITLED/projects/phd_helper/sim_data/'
# use same key to refer to data and data labels
sim_results = dict()
sim_results_labels = dict()
# holds keys
dict_key_list = list()
for dirname, subdirlist, filelist in os.walk(start_dir):
if not subdirlist:
for filename in filelist:
# key defined by filename plus path to it
key_base = os.path.relpath(dirname, start_dir)
key_base = key_base.replace(os.path.sep, '_')
dict_key = '_'.join([key_base, os.path.splitext(filename)[0]])
# absolute path name
fname = os.path.join(dirname, filename)
# individual datasets stored as elements in a list
sim_results[dict_key] = list()
sim_results_labels[dict_key] = list()
dict_key_list.append(dict_key)
with open(fname, 'rt') as file_in:
inp = csv.reader(file_in, delimiter='\t')
dataset = list()
for line in inp:
if not line: # skip empty lines
continue
try: # test for rows containing data labels
line[0] = float(line[0])
if line[1] == '<invalid>':
line[1] = 0.0 # need better way to handle
else:
line[1] = float(line[1])
dataset.append(line) # one datapoint
except: # add data label, both x and y
sim_results_labels[dict_key].append(line)
if dataset: # reached the start of new dataset
sim_results[dict_key].append(np.array(dataset))
dataset = list()
if dataset: # handle last dataset in a file
sim_results[dict_key].append(np.array(dataset))