tower expertise clustering

shism2 · Feb 20, 2019 · abc7e7a · abc7e7a
1 parent ba0e2f2
commit abc7e7a
Show file tree

Hide file tree

Showing 5 changed files with 92 additions and 16 deletions.
diff --git a/official_experiments b/official_experiments
@@ -5,6 +5,7 @@ Google cloud experiments - repeated many times.
 
 Text:
 for SEED in `seq 1 5`; do python launch.py -k  -c -z n1-highmem-64 text "python text.py  -t 720  --pseudoCounts 30 --aic 1.0 --structurePenalty 1.5 --topK 2 --arity 3 --maximumFrontier 5 -i 20 -R 3600 --storeTaskMetrics --testingTimeout 600 --biasOptimal --contextual --taskReranker randomShuffle --taskBatchSize 10  --auxiliary --ensembleSize 1 -RS 5000 --seed $SEED" ; done
+for SEED in `seq 1 2`; do python launch.py -k  -c -z n1-highmem-64 text_no_length_no_map "python text.py --noLength --noMap  -t 720  --pseudoCounts 30 --aic 1.0 --structurePenalty 1.5 --topK 2 --arity 3 --maximumFrontier 5 -i 20 -R 3600 --storeTaskMetrics --testingTimeout 600 --biasOptimal --contextual --taskReranker randomShuffle --taskBatchSize 10  --auxiliary --ensembleSize 1 -RS 5000 --seed $SEED" ; done
 for SEED in `seq 1 3`; do python launch.py -k  -c -z n1-highmem-64 text_mask "python text.py  -t 720  --pseudoCounts 30 --aic 1.0 --structurePenalty 1.5 --topK 2 --arity 3 --maximumFrontier 5 -i 20 -R 3600 --storeTaskMetrics --testingTimeout 600 --biasOptimal --contextual --taskReranker randomShuffle --taskBatchSize 10  --auxiliary --mask --ensembleSize 1 -RS 5000 --seed $SEED" ; done
 for SEED in `seq 1 3`; do python launch.py -k  -c -z n1-highmem-64 text_mask "python text.py  -t 720  --pseudoCounts 30 --aic 1.0 --structurePenalty 1.5 --topK 2 --arity 3 --maximumFrontier 5 -i 20 -R 3600 --storeTaskMetrics --testingTimeout 600 --biasOptimal --contextual --taskReranker randomShuffle --taskBatchSize 10  --auxiliary --mask --ensembleSize 1 -RS 5000 --seed $SEED" ; done
 
@@ -16,6 +17,7 @@ for SEED in `seq 4 5`; do python launch.py -k  -c -z n1-highmem-64 text_no_recog
 
 List:
 for SEED in `seq 1 5`; do python launch.py -k  -c -z n1-highmem-64 list "python list.py --split 0.5 -t 720  --pseudoCounts 30 --aic 1.0 --structurePenalty 1.5 --topK 2 --arity 3 --maximumFrontier 5 -i 20 -R 3600 --storeTaskMetrics --testingTimeout 600 --biasOptimal --contextual --taskReranker randomShuffle --taskBatchSize 10  --auxiliary --ensembleSize 1 -RS 5000 --seed $SEED" ; done
+for SEED in `seq 1 2`; do python launch.py -k  -c -z n1-highmem-64 list_no_length_no_map "python list.py --noLength --noMap --split 0.5 -t 720  --pseudoCounts 30 --aic 1.0 --structurePenalty 1.5 --topK 2 --arity 3 --maximumFrontier 5 -i 20 -R 3600 --storeTaskMetrics --testingTimeout 600 --biasOptimal --contextual --taskReranker randomShuffle --taskBatchSize 10  --auxiliary --ensembleSize 1 -RS 5000 --seed $SEED" ; done
 for SEED in `seq 1 2`; do python launch.py -k  -c -z n1-highmem-64 list_small_train "python list.py --split 0.25 -t 720  --pseudoCounts 30 --aic 1.0 --structurePenalty 1.5 --topK 2 --arity 3 --maximumFrontier 5 -i 10 -R 3600 --storeTaskMetrics --testingTimeout 600 --biasOptimal --contextual --taskReranker randomShuffle --taskBatchSize 10  --auxiliary --ensembleSize 1 -RS 5000 --seed $SEED" ; done
 for SEED in `seq 3 5`; do python launch.py -k  -c -z n1-highmem-64 list_tiny_train "python list.py --split 0.15 -t 720  --pseudoCounts 30 --aic 1.0 --structurePenalty 1.5 --topK 2 --arity 3 --maximumFrontier 5 -i 10 -R 3600 --storeTaskMetrics --testingTimeout 600 --biasOptimal --contextual --taskReranker randomShuffle --taskBatchSize 10  --auxiliary --ensembleSize 1 -RS 5000 --seed $SEED" ; done
 for SEED in `seq 1 2`; do python launch.py -k  -c -z n1-highmem-64 list_small_train_no_dsl "python list.py --no-dsl --split 0.25 -t 720  --pseudoCounts 30 --aic 1.0 --structurePenalty 1.5 --topK 2 --arity 3 --maximumFrontier 5 -i 10 -R 3600 --storeTaskMetrics --testingTimeout 600 --biasOptimal --contextual --taskReranker randomShuffle --taskBatchSize 10  --auxiliary --ensembleSize 1 -RS 5000 --seed $SEED" ; done

diff --git a/pinn b/pinn
diff --git a/pregex b/pregex
diff --git a/taskRankGraphs.py b/taskRankGraphs.py
@@ -94,6 +94,17 @@ def __getitem__(self, key):
 "Misc" : "Other"
 }
 
+towerTasks=[
+        ("on top of", "Other"),
+        ("bridge", "Bridge"),
+        ("brick", "Brick wall"),
+        ("aqueduct", "Aqueduct"),
+        ("staircase", "Staircase"),
+        ("pyramid", "Pyramid"),
+        ("arch stack", "Other"),
+        ("arch", "Arch"),
+        ("Other", "Other"),
+]
 logoTasks=[
         ("next to", "Other"),
         ("row of squares", "Square"),
@@ -395,22 +406,31 @@ def plotLabeledImages(embeddings, images, labels, title, exportPath, xlabel=None
         plot.tick_params(axis='both', left='off', top='off', right='off', bottom='off', labelleft='off', labeltop='off', labelright='off', labelbottom='off')
         plot.grid(False)
 
-        print("warning: only works for logo")
+        print("warning: only works for logo/towers")
+        if "tower" in title:
+                plot.title("Tower Building", fontsize=15)
+                nameMapping = towerTasks
+                zoom = 0.5
+        else:
+                plot.title("LOGO/Turtle Graphics", fontsize=15)
+                nameMapping = logoTasks
+                zoom = 0.5
+
         cmap = matplotlib.cm.get_cmap('tab10')
-        prettyNames = sorted(list({pretty for _,pretty in logoTasks}))
-        colorLabels = logoNamesToLabels(labels)
+        prettyNames = sorted(list({pretty for _,pretty in nameMapping}))
+        colorLabels = logo_tower_NamesToLabels(labels, nameMapping)
         import matplotlib.patches as mpatches
         patches = [mpatches.Patch(color=cmap(i), label=prettyNames[i]) for i in range(len(prettyNames))]
         legend=plot.legend(handles=patches, frameon=True, loc='upper center', bbox_to_anchor=(0.5, -0.02),
   ncol=1, fontsize=15)
-        plot.title("LOGO/Turtle Graphics", fontsize=15)
 
         def trimImage(image):
                 image = 255. - image[:,:,0]
                 while image[0,:].sum() == 0.: image = image[1:,:]
                 while image[-1,:].sum() == 0.: image = image[:-1,:]
                 while image[:,0].sum() == 0.: image = image[:,1:]
                 while image[:,-1].sum() == 0.: image = image[:,:-1]
+
                 Alpha = 255*(image > 0)
                 return np.dstack([255. - image]*3 + [Alpha])
         def projectColor(c,i):
@@ -425,19 +445,14 @@ def projectColor(c,i):
         imageLabels = [] # [(x,y,image)]
         initialDisplacements = []
         for i, label in enumerate(labels):
-                print(label)
                 x, y = embeddings[i, 0], embeddings[i, 1]
-
-
                 plot.scatter(x,y, color=cmap(colorLabels[i]), s=150, alpha=0.00001)
-
-
                 name = labels[i]
                 c = cmap(colorLabels[i])
 
                 imageLabels.append((x,y,projectColor(c,trimImage(images[i]))))
         for index, (x,y,i) in enumerate(imageLabels):
-                ab = AnnotationBbox(OffsetImage(i, zoom=0.5),
+                ab = AnnotationBbox(OffsetImage(i, zoom=zoom),
                                     (x,y),
                                     xycoords='data',
                                     frameon=False)
@@ -498,12 +513,33 @@ def makeLogoImage(im):
         im = np.dstack([im, im, im, alpha])
         return im
 
-def makeTowerImage(im):
+def makeTowerImage(im, labelsAndImages):
+        # remove the floor
+        im = im[:-2,:,:]
+        if labelsAndImages:
+                # Make monochromatic
+                im[im[:,:,1] > 0] = 0
+                im[im[:,:,0] > 0] = 1
+        w = im.shape[0]
+        h = im.shape[1]
+
         # Set the black pixels to transparent.
         black_mask = im[:, :, 0] == 0
-        alpha = np.ones((256,256)) * 255
+        alpha = np.ones((w,h)) * 255
         alpha[black_mask] = 0
+        if labelsAndImages: im *= 255
         im = np.dstack([im, alpha])
+        if labelsAndImages: # trim
+                import scipy
+                w = 3
+                k = np.ones((w,w,w))/4.
+                im = growImage(im, iterations=1)
+                while im[0,:,-1].sum() == 0.: im = im[1:,:,:]
+                while im[-1,:,-1].sum() == 0.: im = im[:-1,:,:]
+                while im[:,0,-1].sum() == 0.: im = im[:,1:,:]
+                while im[:,-1,-1].sum() == 0.: im = im[:,:-1,:]
+                im[:,:,:-1] = 255 - im[:,:,:-1]
+
         return im
 
 def makeRationalImage(im):
@@ -615,14 +651,15 @@ def plotTSNE(resultPaths,
                         if labelWithImages or labelsAndImages:
                                 images = {}
                                 for i, task in enumerate(sorted(filter(lambda mt: isinstance(mt, Task), recognitionTaskMetrics.keys()), key=lambda task : task.name)): # Enumerate in same order as sorted tasks.
-                                        if 'taskImages' not in recognitionTaskMetrics[task] and domain == 'tower': recognitionTaskMetrics[task]['taskImages'] = task.getImage(pretty=True) # BUG: this should not be necessaryd
+                                        if domain == 'tower':
+                                                recognitionTaskMetrics[task]['taskImages'] = task.getImage(pretty=not labelsAndImages)
                                         if 'taskImages' not in recognitionTaskMetrics[task] and domain == 'rational': recognitionTaskMetrics[task]['taskImages'] = task.features
                                         if 'taskImages' not in recognitionTaskMetrics[task] and domain == 'logo': recognitionTaskMetrics[task]['taskImages'] = task.highresolution
                                         im = np.array(recognitionTaskMetrics[task]['taskImages'])
                                         if domain == 'logo':
                                                 im = makeLogoImage(im)
                                         elif domain == 'tower':
-                                                im = makeTowerImage(im)
+                                                im = makeTowerImage(im, labelsAndImages)
                                         elif domain == 'rational':
                                                 im = makeRationalImage(im)
                                         images[task.name] = im
@@ -982,6 +1019,24 @@ def logoNamesToLabels(listNames):
         labels = [prettyNames.index(label) for label in labels]
         return labels
 
+def logo_tower_NamesToLabels(listNames, nameMapping):
+        labels = []
+        for name in listNames:
+                foundName = False
+                for label,pretty in nameMapping:
+                        if label in name:
+                                labels.append(pretty)
+                                foundName = True
+                                break
+                if not foundName:
+                        labels.append("Other")
+                print("%s{0:20}%s"%(name,labels[-1]))
+        # Assign to list
+        prettyNames = sorted(list({pretty for _,pretty in nameMapping}))
+
+        labels = [prettyNames.index(label) for label in labels]
+        return labels
+
 def getTopNMostSimilar(names, sims, topN):
         sortedSims=np.dstack(np.unravel_index(np.argsort(-sims.ravel()), sims.shape)).squeeze()
         for n in range(topN):

diff --git a/utilities.py b/utilities.py
@@ -65,6 +65,25 @@ def flatten(x, abort=lambda x: False):
     except TypeError:  # not iterable
         yield x
 
+def growImage(i, iterations=2):
+    import numpy as np
+    for _ in range(iterations):
+        ip = np.zeros(i.shape)
+        # assume it is monochromatic and get the color
+        c = np.array([i[:,:,j].max()
+                      for j in range(4) ])
+        # assume that the alpha channel indicates where the foreground is
+        foreground = i[:,:,3] > 0
+        foreground = foreground + \
+                     np.pad(foreground, ((0,1),(0,0)), mode='constant')[1:,:] +\
+                     np.pad(foreground, ((0,0),(0,1)), mode='constant')[:,1:] + \
+                     np.pad(foreground, ((0,0),(1,0)), mode='constant')[:,:-1] + \
+                     np.pad(foreground, ((1,0),(0,0)), mode='constant')[:-1,:]
+        ip[foreground] = c
+        i = ip
+    return ip
+
+
 
 def summaryStatistics(n, times):
     if len(times) == 0: