forked from SheffieldML/GPy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtasks.py
86 lines (67 loc) · 2.78 KB
/
tasks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# Copyright (c) 2015, Zhenwen Dai
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import abc
import os
import numpy as np
class RegressionTask(object):
__metaclass__ = abc.ABCMeta
def __init__(self, datapath='./'):
self.datapath = datapath
@abc.abstractmethod
def load_data(self):
"""Download the dataset if not exist. Return True if successful"""
return True
@abc.abstractmethod
def get_training_data(self):
"""Return the training data: training data and labels"""
return None
@abc.abstractmethod
def get_test_data(self):
"""Return the test data: training data and labels"""
return None
class Housing(RegressionTask):
name='Housing'
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
filename = 'housing.data'
def load_data(self):
from GPy.util.datasets import download_url, data_path
if not os.path.exists(os.path.join(data_path,self.datapath, self.filename)):
download_url(Housing.url, self.datapath, messages=True)
if not os.path.exists(os.path.join(data_path, self.datapath, self.filename)):
return False
data = np.loadtxt(os.path.join(data_path, self.datapath, self.filename))
self.data = data
data_train = data[:250,:-1]
label_train = data[:250, -1:]
self.train = (data_train, label_train)
data_test = data[250:,:-1]
label_test = data[250:,-1:]
self.test = (data_test, label_test)
return True
def get_training_data(self):
return self.train
def get_test_data(self):
return self.test
class WineQuality(RegressionTask):
name='WineQuality'
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
filename = 'winequality-red.csv'
def load_data(self):
from GPy.util.datasets import download_url, data_path
if not os.path.exists(os.path.join(data_path,self.datapath, self.filename)):
download_url(self.url, self.datapath, messages=True)
if not os.path.exists(os.path.join(data_path, self.datapath, self.filename)):
return False
data = np.loadtxt(os.path.join(data_path, self.datapath, self.filename),skiprows=1,delimiter=';')
self.data = data
data_train = data[:1000,:-1]
label_train = data[:1000, -1:]
self.train = (data_train, label_train)
data_test = data[1000:,:-1]
label_test = data[1000:,-1:]
self.test = (data_test, label_test)
return True
def get_training_data(self):
return self.train
def get_test_data(self):
return self.test