Fixes

Memory fixes, code errors.
Qwinpin · Jan 24, 2019 · fa0b2c2 · fa0b2c2
1 parent 1cf97ce
commit fa0b2c2
Show file tree

Hide file tree

Showing 13 changed files with 82 additions and 71 deletions.
diff --git a/.gitignore b/.gitignore
@@ -45,3 +45,7 @@ viz_data2\.json
 viz_data\(7\)\.json
 
 viz_data_imdb_2\.json
+
+*.png
+
+*.json
diff --git a/main.py b/main.py
@@ -29,29 +29,33 @@ def main():
     evaluator = neuvol.Evaluator(x_train, y_train, kfold_number=1)
     mutator = neuvol.Mutator()
     crosser = neuvol.Crosser()
-
+    # imdb dataset contains processed texts in form of indexes
     evaluator.create_tokens = False
     evaluator.fitness_measure = 'f1'
-    options = {'classes': 2, 'shape': (100,), 'depth': 4}
+    # evaluator.device = 'gpu'
+
+    options = {'classes': 2, 'shape': (100,), 'depth': 10}
 
     wop = neuvol.evolution.Evolution(
-                                    stages=10,
-                                    population_size=10,
-                                    evaluator=evaluator,
-                                    mutator=mutator,
-                                    crosser=crosser,
-                                    data_type='text',
-                                    task_type='classification',
-                                    active_distribution=True,
-                                    freeze=None,
-                                    **options)
+        stages=10,
+        population_size=10,
+        evaluator=evaluator,
+        mutator=mutator,
+        crosser=crosser,
+        data_type='text',
+        task_type='classification',
+        active_distribution=True,
+        freeze=None,
+        **options)
     wop.cultivate()
 
     for individ in wop.population:
         print('Architecture: \n')
         print(individ.schema)
         print('\nScore: ', individ.result)
 
+    wop.dump('population.json')
+
 
 if __name__ == "__main__":
     main()
diff --git a/neuvol/architecture/individ_base.py b/neuvol/architecture/individ_base.py
@@ -62,6 +62,10 @@ def __init__(self, stage, task_type='classification', parents=None, freeze=None,
                 self._random_init()
                 self._history.append(EVENT('Init', stage))
             else:
+                # okay, we need some hack to avoid memory leak
+                self._parents[0]._parents = None
+                self._parents[1]._parents = None
+
                 self._task_type = parents[0].task_type
                 self._data_processing_type = parents[0].data_type
                 self._history.append(EVENT('Birth', self._stage))
@@ -82,6 +86,8 @@ def _random_init_branch(self, ):
 
         # choose number of layers
         self._layers_number = Distribution.layers_number()
+        if self._layers_number > self.options['depth']:
+            self._layers_number = self.options['depth']
 
         # layers around current one
         previous_layer = None
@@ -162,6 +168,7 @@ def _check_compatibility(self):
         """
         Check shapes compatibilities of different layers, modify layer if it is necessary
         """
+        # TODO: REWRITE AT ALL
         previous_shape = []
         shape_structure = []
         tmp = deepcopy(self._architecture)
@@ -176,6 +183,7 @@ def _check_compatibility(self):
 
             if block.type == 'input':
                 output_shape = block.config['shape']
+
             if block.type == 'embedding':
                 output_shape = (2, block.config['sentences_length'], block.config['embedding_dim'])
 
@@ -295,7 +303,6 @@ def init_tf_graph(self):
 
             else:
                 # we need just to add new layer
-
                 network_graph = init_layer(block)(network_graph)
             # except ValueError:
             # in some cases shape of previous output could be less than kernel size of cnn

diff --git a/neuvol/constants.py b/neuvol/constants.py
@@ -28,9 +28,9 @@
 
 # Training parameters
 TRAINING = {
-    'batchs': [i for i in range(8, 512, 32)],
-    'epochs': [i for i in range(1, 100) if i % 2],
-    'optimizer': ['adam', 'RMSprop'],
+    'batchs': [4],  # [i for i in range(8, 512, 32)],
+    'epochs': [200],  # [i for i in range(1, 100) if i % 2],
+    'optimizer': ['adam'],  # ['adam', 'RMSprop'],
     'optimizer_decay': [FLOAT32(i / 10000) for i in range(1, 500, 1)],
     'optimizer_lr': [FLOAT32(i / 10000) for i in range(1, 500, 1)]}
 
@@ -68,15 +68,15 @@
         'strides': [1, 2, 3],
         'padding': ['valid', 'same', 'causal'],
         'activation': ['tanh', 'relu'],
-        'dilation_rate': [1, 2, 3]},
+        'dilation_rate': [0, 1, 2, 3]},
 
     'cnn2': {
-        'filters': [i for i in range(4, 128, 2)],
-        'kernel_size': [i for i in range(1, 9, 1)],
+        'filters': [i for i in range(1, 128, 1)],
+        'kernel_size': [i for i in range(1, 11, 1)],
         'strides': [1, 2, 3],
         'padding': ['valid', 'same'],
         'activation': ['tanh', 'relu'],
-        'dilation_rate': [1, 2, 3]},
+        'dilation_rate': [0, 1, 2, 3]},
 
     'max_pool': {
         'pool_size': [i for i in range(2, 16)],

diff --git a/neuvol/evaluation/evaluation.py b/neuvol/evaluation/evaluation.py
@@ -178,7 +178,9 @@ def fit(self, network):
             # create list of indexes
             # to work without cross-validation and avoid code duplication
             # we imitate kfold behaviour and return two lists of indexes
-            kfold_generator = [[list(range(self._x.shape[0]))] * 2]
+            tmp = list(range(self._x.shape[0]))
+            np.random.shuffle(tmp)
+            kfold_generator = [[tmp] * 2]
 
         global graph
         graph = tf.get_default_graph()

diff --git a/neuvol/evolution/evolution.py b/neuvol/evolution/evolution.py
@@ -92,13 +92,14 @@ def mutation_step(self):
         """
         Mutate randomly chosen individs
         """
-        for network in self._population:
+        # mutate only previous elements in the population
+        for network in range(round(self._population_size * (1 - self._mortality_rate))):
             if np.random.randint(0, 1) > 0.3:
                 # TODO: more accurate error handling
-                network = self._mutator.mutate(network, self._current_stage)
+                self._population[network] = self._mutator.mutate(self._population[network], self._current_stage + 1)
 
                 # set result as -1 to retrain net
-                network.result = -1.0
+                self._population[network].result = -1.0
 
     def step(self):
         """
@@ -151,12 +152,12 @@ def cultivate(self):
         """
         Perform all evolutional steps
         """
-        tmp = self._current_stage + self._stages - 1
+        tmp = self._stages + self._current_stage
 
-        for i in range(1, self._stages + 1):
-            print('\nStage #{} of {}\n'.format(self._current_stage, tmp))
+        for i in range(self._stages):
+            print('\nStage #{} of {}\n'.format(self._current_stage, tmp - 1))
 
-            if i == 1:
+            if self._current_stage == 0:
                 self.step()
                 if self._active_distribution:
                     self._population_probability()
@@ -204,6 +205,9 @@ def viz(self):
         """
         if os.path.isfile('./viz_data.json'):
             self._viz_data = load('viz_data.json')
+        else:
+            self._viz_data = {}
+            self._viz_data['population'] = []
 
         for network in self._population:
             tmp = deepcopy(network)

diff --git a/neuvol/layer/block.py b/neuvol/layer/block.py
@@ -51,18 +51,16 @@ def _check_compatibility(self):
                 pass
 
             elif self.type == 'cnn' or self.type == 'cnn2':
-                strides = self.layers[0].config['strides']
-                dilation_rate = self.layers[0].config['dilation_rate']
+                # note that same padding and strides != 1 is inconsistent in keras
                 for layer in self.layers:
                     layer.config['padding'] = 'same'
-                    layer.config['strides'] = strides
-                    layer.config['dilation_rate'] = dilation_rate
+                    layer.config['strides'] = 1
 
             elif self.type == 'max_pool' or self.type == 'max_pool2':
-                strides = self.layers[0].config['strides']
+                # note that same padding and strides != 1 is inconsistent in keras
                 for layer in self.layers:
                     layer.config['padding'] = 'same'
-                    layer.config['strides'] = strides
+                    layer.config['strides'] = 1
 
             else:
                 output = self.layers[0].config['units']

diff --git a/neuvol/layer/layer.py b/neuvol/layer/layer.py
@@ -82,12 +82,9 @@ def _check_compatibility(self):
             self.config['units'] = self.options['classes']
 
         elif self.type == 'cnn' or self.type == 'cnn2':
-            if self.config['padding'] == 'causal':
+            # control dilation constraints
+            if self.config['dilation_rate'] != 1:
                 self.config['strides'] = 1
-                if self.config['dilation_rate'] == 1:
-                    self.config['padding'] = 'same'
-            else:
-                self.config['dilation_rate'] = 1
 
     def save(self):
         """

diff --git a/neuvol/mutation/base_mutation.py b/neuvol/mutation/base_mutation.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from ..constants import EVENT
 
 
 class MutatorBase:
@@ -24,4 +23,4 @@ def mutate(individ):
         """
         Mutate individ
         """
-        individ.history(EVENT('Mutation', individ.stage))
+        pass
diff --git a/neuvol/mutation/image_mutation.py b/neuvol/mutation/image_mutation.py
@@ -27,15 +27,15 @@ def mutate(individ):
         """
         Mutate individ
         """
-        super().mutate()
+        super(MutatorImage, MutatorImage).mutate(individ)
 
         mutation_type = np.random.choice([
             'architecture_part',
             'architecture_parameters',
-            'training_all',
-            'training_part',
             'architecture_add',
-            'architecture_remove'
+            'architecture_remove',
+            'training_all',
+            'training_part'
         ])
 
         individ = perform_mutation(individ, mutation_type)

diff --git a/neuvol/mutation/mutation_modules/mutation_modules_interface.py b/neuvol/mutation/mutation_modules/mutation_modules_interface.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import numpy as np
 
+from ...constants import EVENT
 from ...constants import TRAINING
 from ...layer.block import Block
 from ...probabilty_pool import Distribution
@@ -30,25 +31,29 @@ def perform_mutation(individ, mutation_type):
         limitations = [1, 1]
 
     if mutation_type == 'architecture_part':
-        return architecture_part(individ, limitations)
+        individ = architecture_part(individ, limitations)
 
     elif mutation_type == 'architecture_parameters':
-        return architecture_parameters(individ, limitations)
+        individ = architecture_parameters(individ, limitations)
 
     elif mutation_type == 'training_all':
-        return training_all(individ)
+        individ = training_all(individ)
 
     elif mutation_type == 'training_part':
-        return training_part(individ)
+        individ = training_part(individ)
 
     elif mutation_type == 'architecture_add':
-        return architecture_add_layer(individ, limitations)
+        individ = architecture_add_layer(individ, limitations)
 
     elif mutation_type == 'architecture_remove':
-        return architecture_remove_layer(individ, limitations)
+        individ = architecture_remove_layer(individ, limitations)
 
     else:
-        return architecture_part(individ, limitations)
+        individ = architecture_part(individ, limitations)
+
+    individ.history = EVENT('Mutation: {}'.format(mutation_type), individ.stage)
+
+    return individ
 
 
 def architecture_part(individ, limitations):

diff --git a/neuvol/mutation/text_mutation.py b/neuvol/mutation/text_mutation.py
@@ -27,7 +27,7 @@ def mutate(individ):
         """
         Mutate individ
         """
-        super().mutate()
+        super(MutatorText, MutatorText).mutate(individ)
 
         mutation_type = np.random.choice([
             'architecture_part',

diff --git a/neuvol/probabilty_pool/generating_distribution.py b/neuvol/probabilty_pool/generating_distribution.py
@@ -86,19 +86,20 @@ class Distribution():
     _layers_parameters_probability = parse_layer_parameter_const()
     _layers_number_probability = parse_layers_number()
     _training_parameters_probability = parse_training_const()
-    # True value of this parameter leads to convergence
+    # True value of this parameter leads to fast convergence
+    # TODO: options
     _appeareance_increases_probability = False
     _diactivated_layers = []
 
     def _increase_layer_probability(self, layer):
-        self._layers_probability[layer] += 1
+        self._layers_probability[layer] += 0.1
 
     def _increase_layer_parameters_probability(self, layer, parameter, value):
         a = list(self._layers_parameters_probability[layer][parameter])
 
         a.sort()
         index_of_selected_value = a.index(value)
-        kernel = lambda x: 2.71 ** (1 / (1 + abs(index_of_selected_value - x)))
+        kernel = lambda x: 0.423 ** (1 / (1 + abs(index_of_selected_value - x)))
 
         for i, value in enumerate(a):
             self._layers_parameters_probability[layer][parameter][value] += kernel(i)
@@ -108,7 +109,7 @@ def _increase_training_parameters(self, parameter, value):
 
         a.sort()
         index_of_selected_value = a.index(value)
-        kernel = lambda x: 2.71 ** (1 / (1 + abs(index_of_selected_value - x)))
+        kernel = lambda x: 0.423 ** (1 / (1 + abs(index_of_selected_value - x)))
 
         for i, value in enumerate(a):
             self._training_parameters_probability[parameter][value] += kernel(i)
@@ -131,7 +132,7 @@ def layer(cls):
 
         if cls._appeareance_increases_probability:
             # now we increase the probability of this layer to be appear
-            cls._layers_probability[choice] += 1
+            cls._increase_layer_probability(cls, choice)
 
         return choice
 
@@ -152,12 +153,7 @@ def layer_parameters(cls, layer, parameter):
             # now one important thing - imagine parameters as a field of values
             # we chose one value, and now we want to increase the probability of this value
             # but we also should increase probabilities of near values
-            a.sort()
-            index_of_selected_value = a.index(choice)
-            kernel = lambda x: 2.71 ** (1 / (1 + abs(index_of_selected_value - x)))
-
-            for i, value in enumerate(a):
-                cls._layers_parameters_probability[layer][parameter][value] += kernel(i)
+            cls._increase_layer_parameters_probability(cls, layer, parameter, choice)
 
         return choice
 
@@ -180,7 +176,7 @@ def layers_number(cls):
             # but we also should increase probabilities of near values
             a.sort()
             index_of_selected_value = a.index(choice)
-            kernel = lambda x: 2.71 ** (1 / (1 + abs(index_of_selected_value - x)))
+            kernel = lambda x: 0.423 ** (1 / (1 + abs(index_of_selected_value - x)))
 
             for i, value in enumerate(a):
                 cls._layers_number_probability[value] += kernel(i)
@@ -204,12 +200,7 @@ def training_parameters(cls, parameter):
             # now one important thing - imagine parameters as a field of values
             # we chose one value, and now we want to increase the probability of this value
             # but we also should increase probabilities of near values
-            a.sort()
-            index_of_selected_value = a.index(choice)
-            kernel = lambda x: 2.71 ** (1 / (1 + abs(index_of_selected_value - x)))
-
-            for i, value in enumerate(a):
-                cls._training_parameters_probability[parameter][value] += kernel(i)
+            cls._increase_training_parameters(cls, parameter, choice)
 
         return choice
-Original file line number
+Diff line change
@@ Expand Up / @@ -45,3 +45,7 @@ viz_data2\.json @@
     viz_data\(7\)\.json
     viz_data_imdb_2\.json
+    *.png
+    *.json