Fix extension of pz files from pckl to pz

hgascon · Jan 13, 2014 · 71928b5 · 71928b5
1 parent eb255c4
commit 71928b5
Show file tree

Hide file tree

Showing 4 changed files with 7 additions and 18 deletions.
diff --git a/analysis.py b/analysis.py
@@ -60,7 +60,7 @@ def __init__(self, dataset_dir, families, split, precomputed_matrix="", y="", fn
             print "[*] file names loaded"
 
         else:            
-            files = self.read_files(dataset_dir, "fcgnx.pckl")
+            files = self.read_files(dataset_dir, "fcgnx.pz")
             if len(files) > 0:
                 print "Loading {0} samples".format(len(files))
                 widgets = ['Unpickling... : ', Percentage(), ' ', Bar(marker='#',left='[',right=']'),
@@ -245,7 +245,7 @@ def __init__(self, dirs, labels, split, max_files=0, max_node_size=0,
             split: The percentage of samples used for training (value between 0 and 1)
             precomputed_matrix: name of file if a data or kernel matrix has already
                 been computed.
-            y: If precomputed_matrix is True, a pcklzip list of labels must
+            y: If precomputed_matrix is True, a pickled and gzipped list of labels must
                 be provided.
 
         Returns:
@@ -281,7 +281,7 @@ def __init__(self, dirs, labels, split, max_files=0, max_node_size=0,
         else:
             # loop over dirs
             for d in zip(dirs, labels):
-                files = self.read_files(d[0], "fcgnx.pckl", max_files)
+                files = self.read_files(d[0], "fcgnx.pz", max_files)
                 print "Loading samples in dir {0} with label {1}".format(d[0], d[1])
                 widgets = ['Unpickling... : ', Percentage(), ' ', Bar(marker='#',left='[',right=']'),
                                    ' ', ETA(), ' ']

diff --git a/featureAnalysis.py b/featureAnalysis.py
@@ -70,7 +70,7 @@ def compute_neighborhoods_per_weights(d, w, n_weights, n_files=300):
         Outputs the file feature_analysis.txt
     """
 
-    files = read_files(d, "fcgnx.pckl", n_files)
+    files = read_files(d, "fcgnx.pz", n_files)
     sorted_weights_idx = w.argsort()[::-1]
 
     f_out = "feature_analysis.txt".format(n_weights)
@@ -93,9 +93,7 @@ def compute_neighborhoods_per_weights(d, w, n_weights, n_files=300):
     fd.close()
     print "[*] File written."
 
-
 def get_high_ranked_neighborhoods(fcgnx_file, w, sorted_weights_idx, show_small=False, weights=1):
-
     # g = FCGextractor.build_cfgnx(fcgnx_file)
     g = pz.load(fcgnx_file)
     g_hash = ml.neighborhood_hash(g)
@@ -129,7 +127,6 @@ def get_high_ranked_neighborhoods(fcgnx_file, w, sorted_weights_idx, show_small=
             return neighborhoods, n_nodes
 
 def add_weights_to_nodes(g, w, show_labels=True):
-
     g_hash = ml.neighborhood_hash(g)    
 
     #initialize the weight for every node in g_hash
@@ -176,13 +173,11 @@ def add_weights_to_nodes(g, w, show_labels=True):
 
     return g_hash_weighted
 
-
 def normalize_weights(a, imin=0.0, imax=1.0):
     dmin = a.min()
     dmax = a.max()
     return imin + (imax - imin) * (a - dmin) / (dmax - dmin)
 
-
 def read_files(d, file_extension, max_files=0):
     files = []
     for fn in  os.listdir(d):

diff --git a/instructionSet.py b/instructionSet.py
@@ -1,6 +1,6 @@
 #!/usr/bin/python
 # ADAGIO Android Application Graph-based Classification
-# instructionSet.py >> Dalvik intstruction set and corresponding classes 
+# instructionSet.py >> Dalvik intstruction set and corresponding categories
 # Copyright (c) 2013 Hugo Gascon <[email protected]>
 
 INSTRUCTION_CLASS_COLOR = {

diff --git a/ml.py b/ml.py
@@ -1,6 +1,6 @@
 #!/usr/bin/python # ADAGIO Android Application Graph-based Classification
 # ml >> functions for computation of kernel matrices and feature vectors
-# Copyright (c) 2013 Hugo Gascon <hgascon@gmail.com>
+# Copyright (c) 2013 Hugo Gascon <hgascon@uni-goettingen.de>
 
 import pz
 import numpy as np
@@ -74,7 +74,7 @@ def nh_kernel_matrix(graph_set, R=1):
         pbar.finish()
         #build lower triangle
         K = K + K.transpose() - np.identity(len(K))
-        pz.save(K, "K_{0}.pckl".format(r))
+        pz.save(K, "K_{0}.pz".format(r))
         K_set.append(K)
 
     #normalization of K
@@ -506,9 +506,3 @@ def array_to_str(a):
 def str_to_array(s):
     return np.array(list(s), dtype=np.int64)
 
-
-
-
-
-
-