Skip to content

Commit

Permalink
tower expertise clustering
Browse files Browse the repository at this point in the history
  • Loading branch information
ellisk42 committed Feb 20, 2019
1 parent ba0e2f2 commit abc7e7a
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 16 deletions.
2 changes: 2 additions & 0 deletions official_experiments
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Google cloud experiments - repeated many times.

Text:
for SEED in `seq 1 5`; do python launch.py -k -c -z n1-highmem-64 text "python text.py -t 720 --pseudoCounts 30 --aic 1.0 --structurePenalty 1.5 --topK 2 --arity 3 --maximumFrontier 5 -i 20 -R 3600 --storeTaskMetrics --testingTimeout 600 --biasOptimal --contextual --taskReranker randomShuffle --taskBatchSize 10 --auxiliary --ensembleSize 1 -RS 5000 --seed $SEED" ; done
for SEED in `seq 1 2`; do python launch.py -k -c -z n1-highmem-64 text_no_length_no_map "python text.py --noLength --noMap -t 720 --pseudoCounts 30 --aic 1.0 --structurePenalty 1.5 --topK 2 --arity 3 --maximumFrontier 5 -i 20 -R 3600 --storeTaskMetrics --testingTimeout 600 --biasOptimal --contextual --taskReranker randomShuffle --taskBatchSize 10 --auxiliary --ensembleSize 1 -RS 5000 --seed $SEED" ; done
for SEED in `seq 1 3`; do python launch.py -k -c -z n1-highmem-64 text_mask "python text.py -t 720 --pseudoCounts 30 --aic 1.0 --structurePenalty 1.5 --topK 2 --arity 3 --maximumFrontier 5 -i 20 -R 3600 --storeTaskMetrics --testingTimeout 600 --biasOptimal --contextual --taskReranker randomShuffle --taskBatchSize 10 --auxiliary --mask --ensembleSize 1 -RS 5000 --seed $SEED" ; done
for SEED in `seq 1 3`; do python launch.py -k -c -z n1-highmem-64 text_mask "python text.py -t 720 --pseudoCounts 30 --aic 1.0 --structurePenalty 1.5 --topK 2 --arity 3 --maximumFrontier 5 -i 20 -R 3600 --storeTaskMetrics --testingTimeout 600 --biasOptimal --contextual --taskReranker randomShuffle --taskBatchSize 10 --auxiliary --mask --ensembleSize 1 -RS 5000 --seed $SEED" ; done

Expand All @@ -16,6 +17,7 @@ for SEED in `seq 4 5`; do python launch.py -k -c -z n1-highmem-64 text_no_recog

List:
for SEED in `seq 1 5`; do python launch.py -k -c -z n1-highmem-64 list "python list.py --split 0.5 -t 720 --pseudoCounts 30 --aic 1.0 --structurePenalty 1.5 --topK 2 --arity 3 --maximumFrontier 5 -i 20 -R 3600 --storeTaskMetrics --testingTimeout 600 --biasOptimal --contextual --taskReranker randomShuffle --taskBatchSize 10 --auxiliary --ensembleSize 1 -RS 5000 --seed $SEED" ; done
for SEED in `seq 1 2`; do python launch.py -k -c -z n1-highmem-64 list_no_length_no_map "python list.py --noLength --noMap --split 0.5 -t 720 --pseudoCounts 30 --aic 1.0 --structurePenalty 1.5 --topK 2 --arity 3 --maximumFrontier 5 -i 20 -R 3600 --storeTaskMetrics --testingTimeout 600 --biasOptimal --contextual --taskReranker randomShuffle --taskBatchSize 10 --auxiliary --ensembleSize 1 -RS 5000 --seed $SEED" ; done
for SEED in `seq 1 2`; do python launch.py -k -c -z n1-highmem-64 list_small_train "python list.py --split 0.25 -t 720 --pseudoCounts 30 --aic 1.0 --structurePenalty 1.5 --topK 2 --arity 3 --maximumFrontier 5 -i 10 -R 3600 --storeTaskMetrics --testingTimeout 600 --biasOptimal --contextual --taskReranker randomShuffle --taskBatchSize 10 --auxiliary --ensembleSize 1 -RS 5000 --seed $SEED" ; done
for SEED in `seq 3 5`; do python launch.py -k -c -z n1-highmem-64 list_tiny_train "python list.py --split 0.15 -t 720 --pseudoCounts 30 --aic 1.0 --structurePenalty 1.5 --topK 2 --arity 3 --maximumFrontier 5 -i 10 -R 3600 --storeTaskMetrics --testingTimeout 600 --biasOptimal --contextual --taskReranker randomShuffle --taskBatchSize 10 --auxiliary --ensembleSize 1 -RS 5000 --seed $SEED" ; done
for SEED in `seq 1 2`; do python launch.py -k -c -z n1-highmem-64 list_small_train_no_dsl "python list.py --no-dsl --split 0.25 -t 720 --pseudoCounts 30 --aic 1.0 --structurePenalty 1.5 --topK 2 --arity 3 --maximumFrontier 5 -i 10 -R 3600 --storeTaskMetrics --testingTimeout 600 --biasOptimal --contextual --taskReranker randomShuffle --taskBatchSize 10 --auxiliary --ensembleSize 1 -RS 5000 --seed $SEED" ; done
Expand Down
2 changes: 1 addition & 1 deletion pinn
Submodule pinn updated 1 files
+2 −2 robustfill.py
2 changes: 1 addition & 1 deletion pregex
Submodule pregex updated 2 files
+17 −50 pregex.py
+0 −8 test_pregex.py
83 changes: 69 additions & 14 deletions taskRankGraphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,17 @@ def __getitem__(self, key):
"Misc" : "Other"
}

towerTasks=[
("on top of", "Other"),
("bridge", "Bridge"),
("brick", "Brick wall"),
("aqueduct", "Aqueduct"),
("staircase", "Staircase"),
("pyramid", "Pyramid"),
("arch stack", "Other"),
("arch", "Arch"),
("Other", "Other"),
]
logoTasks=[
("next to", "Other"),
("row of squares", "Square"),
Expand Down Expand Up @@ -395,22 +406,31 @@ def plotLabeledImages(embeddings, images, labels, title, exportPath, xlabel=None
plot.tick_params(axis='both', left='off', top='off', right='off', bottom='off', labelleft='off', labeltop='off', labelright='off', labelbottom='off')
plot.grid(False)

print("warning: only works for logo")
print("warning: only works for logo/towers")
if "tower" in title:
plot.title("Tower Building", fontsize=15)
nameMapping = towerTasks
zoom = 0.5
else:
plot.title("LOGO/Turtle Graphics", fontsize=15)
nameMapping = logoTasks
zoom = 0.5

cmap = matplotlib.cm.get_cmap('tab10')
prettyNames = sorted(list({pretty for _,pretty in logoTasks}))
colorLabels = logoNamesToLabels(labels)
prettyNames = sorted(list({pretty for _,pretty in nameMapping}))
colorLabels = logo_tower_NamesToLabels(labels, nameMapping)
import matplotlib.patches as mpatches
patches = [mpatches.Patch(color=cmap(i), label=prettyNames[i]) for i in range(len(prettyNames))]
legend=plot.legend(handles=patches, frameon=True, loc='upper center', bbox_to_anchor=(0.5, -0.02),
ncol=1, fontsize=15)
plot.title("LOGO/Turtle Graphics", fontsize=15)

def trimImage(image):
image = 255. - image[:,:,0]
while image[0,:].sum() == 0.: image = image[1:,:]
while image[-1,:].sum() == 0.: image = image[:-1,:]
while image[:,0].sum() == 0.: image = image[:,1:]
while image[:,-1].sum() == 0.: image = image[:,:-1]

Alpha = 255*(image > 0)
return np.dstack([255. - image]*3 + [Alpha])
def projectColor(c,i):
Expand All @@ -425,19 +445,14 @@ def projectColor(c,i):
imageLabels = [] # [(x,y,image)]
initialDisplacements = []
for i, label in enumerate(labels):
print(label)
x, y = embeddings[i, 0], embeddings[i, 1]


plot.scatter(x,y, color=cmap(colorLabels[i]), s=150, alpha=0.00001)


name = labels[i]
c = cmap(colorLabels[i])

imageLabels.append((x,y,projectColor(c,trimImage(images[i]))))
for index, (x,y,i) in enumerate(imageLabels):
ab = AnnotationBbox(OffsetImage(i, zoom=0.5),
ab = AnnotationBbox(OffsetImage(i, zoom=zoom),
(x,y),
xycoords='data',
frameon=False)
Expand Down Expand Up @@ -498,12 +513,33 @@ def makeLogoImage(im):
im = np.dstack([im, im, im, alpha])
return im

def makeTowerImage(im):
def makeTowerImage(im, labelsAndImages):
# remove the floor
im = im[:-2,:,:]
if labelsAndImages:
# Make monochromatic
im[im[:,:,1] > 0] = 0
im[im[:,:,0] > 0] = 1
w = im.shape[0]
h = im.shape[1]

# Set the black pixels to transparent.
black_mask = im[:, :, 0] == 0
alpha = np.ones((256,256)) * 255
alpha = np.ones((w,h)) * 255
alpha[black_mask] = 0
if labelsAndImages: im *= 255
im = np.dstack([im, alpha])
if labelsAndImages: # trim
import scipy
w = 3
k = np.ones((w,w,w))/4.
im = growImage(im, iterations=1)
while im[0,:,-1].sum() == 0.: im = im[1:,:,:]
while im[-1,:,-1].sum() == 0.: im = im[:-1,:,:]
while im[:,0,-1].sum() == 0.: im = im[:,1:,:]
while im[:,-1,-1].sum() == 0.: im = im[:,:-1,:]
im[:,:,:-1] = 255 - im[:,:,:-1]

return im

def makeRationalImage(im):
Expand Down Expand Up @@ -615,14 +651,15 @@ def plotTSNE(resultPaths,
if labelWithImages or labelsAndImages:
images = {}
for i, task in enumerate(sorted(filter(lambda mt: isinstance(mt, Task), recognitionTaskMetrics.keys()), key=lambda task : task.name)): # Enumerate in same order as sorted tasks.
if 'taskImages' not in recognitionTaskMetrics[task] and domain == 'tower': recognitionTaskMetrics[task]['taskImages'] = task.getImage(pretty=True) # BUG: this should not be necessaryd
if domain == 'tower':
recognitionTaskMetrics[task]['taskImages'] = task.getImage(pretty=not labelsAndImages)
if 'taskImages' not in recognitionTaskMetrics[task] and domain == 'rational': recognitionTaskMetrics[task]['taskImages'] = task.features
if 'taskImages' not in recognitionTaskMetrics[task] and domain == 'logo': recognitionTaskMetrics[task]['taskImages'] = task.highresolution
im = np.array(recognitionTaskMetrics[task]['taskImages'])
if domain == 'logo':
im = makeLogoImage(im)
elif domain == 'tower':
im = makeTowerImage(im)
im = makeTowerImage(im, labelsAndImages)
elif domain == 'rational':
im = makeRationalImage(im)
images[task.name] = im
Expand Down Expand Up @@ -982,6 +1019,24 @@ def logoNamesToLabels(listNames):
labels = [prettyNames.index(label) for label in labels]
return labels

def logo_tower_NamesToLabels(listNames, nameMapping):
labels = []
for name in listNames:
foundName = False
for label,pretty in nameMapping:
if label in name:
labels.append(pretty)
foundName = True
break
if not foundName:
labels.append("Other")
print("%s{0:20}%s"%(name,labels[-1]))
# Assign to list
prettyNames = sorted(list({pretty for _,pretty in nameMapping}))

labels = [prettyNames.index(label) for label in labels]
return labels

def getTopNMostSimilar(names, sims, topN):
sortedSims=np.dstack(np.unravel_index(np.argsort(-sims.ravel()), sims.shape)).squeeze()
for n in range(topN):
Expand Down
19 changes: 19 additions & 0 deletions utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,25 @@ def flatten(x, abort=lambda x: False):
except TypeError: # not iterable
yield x

def growImage(i, iterations=2):
import numpy as np
for _ in range(iterations):
ip = np.zeros(i.shape)
# assume it is monochromatic and get the color
c = np.array([i[:,:,j].max()
for j in range(4) ])
# assume that the alpha channel indicates where the foreground is
foreground = i[:,:,3] > 0
foreground = foreground + \
np.pad(foreground, ((0,1),(0,0)), mode='constant')[1:,:] +\
np.pad(foreground, ((0,0),(0,1)), mode='constant')[:,1:] + \
np.pad(foreground, ((0,0),(1,0)), mode='constant')[:,:-1] + \
np.pad(foreground, ((1,0),(0,0)), mode='constant')[:-1,:]
ip[foreground] = c
i = ip
return ip



def summaryStatistics(n, times):
if len(times) == 0:
Expand Down

0 comments on commit abc7e7a

Please sign in to comment.