-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
Copy pathtest_create_searchspace_util_classification.py
160 lines (140 loc) · 6.38 KB
/
test_create_searchspace_util_classification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
from collections import OrderedDict
import numpy
from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import CategoricalHyperparameter
import autosklearn.pipeline.create_searchspace_util
from autosklearn.pipeline.components.classification.lda import LDA
from autosklearn.pipeline.components.classification.liblinear_svc import LibLinear_SVC
from autosklearn.pipeline.components.feature_preprocessing.no_preprocessing import (
NoPreprocessing,
)
from autosklearn.pipeline.components.feature_preprocessing.pca import PCA
from autosklearn.pipeline.components.feature_preprocessing.random_trees_embedding import ( # noqa: E501
RandomTreesEmbedding,
)
from autosklearn.pipeline.components.feature_preprocessing.truncatedSVD import (
TruncatedSVD,
)
import unittest
class TestCreateClassificationSearchspace(unittest.TestCase):
_multiprocess_can_split_ = True
def test_get_match_array_sparse_and_dense(self):
# preproc is empty
preprocessors = OrderedDict()
preprocessors["pca"] = PCA
classifiers = OrderedDict()
classifiers["lda"] = LDA
# Sparse + dense
class Preprocessors(object):
@classmethod
def get_available_components(self, *args, **kwargs):
return preprocessors
class Classifiers(object):
@classmethod
def get_available_components(self, *args, **kwargs):
return classifiers
# Dense
m = autosklearn.pipeline.create_searchspace_util.get_match_array(
pipeline=((0, PCA), (1, LDA)), dataset_properties={"sparse": True}
)
self.assertEqual(numpy.sum(m), 0)
m = autosklearn.pipeline.create_searchspace_util.get_match_array(
pipeline=((0, PCA), (1, LDA)), dataset_properties={"sparse": False}
)
self.assertEqual(m, [[1]])
# Sparse
preprocessors["tSVD"] = TruncatedSVD
m = autosklearn.pipeline.create_searchspace_util.get_match_array(
pipeline=((0, Preprocessors), (1, LDA)), dataset_properties={"sparse": True}
)
self.assertEqual(m[0], [0]) # pca
self.assertEqual(m[1], [1]) # svd
m = autosklearn.pipeline.create_searchspace_util.get_match_array(
pipeline=((0, Preprocessors), (1, LDA)),
dataset_properties={"sparse": False},
)
self.assertEqual(m[0], [1]) # pca
self.assertEqual(m[1], [0]) # svd
preprocessors["none"] = NoPreprocessing
m = autosklearn.pipeline.create_searchspace_util.get_match_array(
pipeline=((0, Preprocessors), (1, LDA)), dataset_properties={"sparse": True}
)
self.assertEqual(m[0, :], [0]) # pca
self.assertEqual(m[1, :], [1]) # tsvd
self.assertEqual(m[2, :], [0]) # none
m = autosklearn.pipeline.create_searchspace_util.get_match_array(
pipeline=((0, Preprocessors), (1, LDA)),
dataset_properties={"sparse": False},
)
self.assertEqual(m[0, :], [1]) # pca
self.assertEqual(m[1, :], [0]) # tsvd
self.assertEqual(m[2, :], [1]) # none
classifiers["libsvm"] = LibLinear_SVC
m = autosklearn.pipeline.create_searchspace_util.get_match_array(
pipeline=((0, Preprocessors), (1, Classifiers)),
dataset_properties={"sparse": False},
)
self.assertListEqual(list(m[0, :]), [1, 1]) # pca
self.assertListEqual(list(m[1, :]), [0, 0]) # tsvd
self.assertListEqual(list(m[2, :]), [1, 1]) # none
m = autosklearn.pipeline.create_searchspace_util.get_match_array(
pipeline=((0, Preprocessors), (1, Classifiers)),
dataset_properties={"sparse": True},
)
self.assertListEqual(list(m[0, :]), [0, 0]) # pca
self.assertListEqual(list(m[1, :]), [1, 1]) # tsvd
self.assertListEqual(list(m[2, :]), [0, 1]) # none
# Do fancy 3d stuff
preprocessors["random_trees"] = RandomTreesEmbedding
m = autosklearn.pipeline.create_searchspace_util.get_match_array(
pipeline=((0, Preprocessors), (1, Preprocessors), (2, Classifiers)),
dataset_properties={"sparse": False},
)
# PCA followed by truncated SVD is forbidden
self.assertEqual(list(m[0].flatten()), [1, 1, 0, 0, 1, 1, 0, 1])
# Truncated SVD is forbidden
self.assertEqual(list(m[1].flatten()), [0, 0, 0, 0, 0, 0, 0, 0])
# Truncated SVD is forbidden after no_preprocessing
self.assertEqual(list(m[2].flatten()), [1, 1, 0, 0, 1, 1, 0, 1])
# PCA is forbidden, truncatedSVD allowed after random trees embedding
# lda only allowed after truncatedSVD
self.assertEqual(list(m[3].flatten()), [0, 0, 1, 1, 0, 1, 0, 1])
def test_get_match_array_signed_unsigned_and_binary(self):
pass
@unittest.skip("Not currently working.")
def test_add_forbidden(self):
m = numpy.ones([2, 3])
preprocessors_list = ["pa", "pb"]
classifier_list = ["ca", "cb", "cc"]
cs = ConfigurationSpace()
preprocessor = CategoricalHyperparameter(
name="feature_preprocessor", choices=preprocessors_list
)
classifier = CategoricalHyperparameter(
name="classifier", choices=classifier_list
)
cs.add_hyperparameter(preprocessor)
cs.add_hyperparameter(classifier)
new_cs = autosklearn.pipeline.create_searchspace_util.add_forbidden(
conf_space=cs,
node_0_list=preprocessors_list,
node_1_list=classifier_list,
matches=m,
node_0_name="feature_preprocessor",
node_1_name="classifier",
)
self.assertEqual(len(new_cs.forbidden_clauses), 0)
self.assertIsInstance(new_cs, ConfigurationSpace)
m[1, 1] = 0
new_cs = autosklearn.pipeline.create_searchspace_util.add_forbidden(
conf_space=cs,
node_0_list=preprocessors_list,
node_1_list=classifier_list,
matches=m,
node_0_name="feature_preprocessor",
node_1_name="classifier",
)
self.assertEqual(len(new_cs.forbidden_clauses), 1)
self.assertEqual(new_cs.forbidden_clauses[0].components[0].value, "cb")
self.assertEqual(new_cs.forbidden_clauses[0].components[1].value, "pb")
self.assertIsInstance(new_cs, ConfigurationSpace)