forked from emanuele/kaggle_pbr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
load_data.py
38 lines (31 loc) · 925 Bytes
/
load_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
"""
Functions to load the dataset.
"""
import numpy as np
def read_data(file_name):
"""This function is taken from:
https://github.com/benhamner/BioResponse/blob/master/Benchmarks/csv_io.py
"""
f = open(file_name)
#ignore header
f.readline()
samples = []
target = []
for line in f:
line = line.strip().split(",")
sample = [float(x) for x in line]
samples.append(sample)
return samples
def load():
"""Conveninence function to load all data as numpy arrays.
"""
print "Loading data..."
filename_train = 'data/train.csv'
filename_test = 'data/test.csv'
train = read_data("data/train.csv")
y_train = np.array([x[0] for x in train])
X_train = np.array([x[1:] for x in train])
X_test = np.array(read_data("data/test.csv"))
return X_train, y_train, X_test
if __name__ == '__main__':
X_train, y_train, X_test = load()