forked from ellisk42/ec
-
Notifications
You must be signed in to change notification settings - Fork 0
/
taskBatcher.py
200 lines (150 loc) · 9.48 KB
/
taskBatcher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
from dreamcoder.utilities import eprint
import random
class DefaultTaskBatcher:
"""Iterates through task batches of the specified size. Defaults to all tasks if taskBatchSize is None."""
def __init__(self):
pass
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
if taskBatchSize is None:
taskBatchSize = len(tasks)
elif taskBatchSize > len(tasks):
eprint("Task batch size is greater than total number of tasks, aborting.")
assert False
start = (taskBatchSize * currIteration) % len(tasks)
end = start + taskBatchSize
taskBatch = (tasks + tasks)[start:end] # Handle wraparound.
return taskBatch
class RandomTaskBatcher:
"""Returns a randomly sampled task batch of the specified size. Defaults to all tasks if taskBatchSize is None."""
def __init__(self):
pass
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
if taskBatchSize is None:
taskBatchSize = len(tasks)
elif taskBatchSize > len(tasks):
eprint("Task batch size is greater than total number of tasks, aborting.")
assert False
return random.sample(tasks, taskBatchSize)
class RandomShuffleTaskBatcher:
"""Randomly shuffles the task batch first, and then iterates through task batches of the specified size like DefaultTaskBatcher.
Reshuffles across iterations - intended as benchmark comparison to test the task ordering."""
def __init__(self, baseSeed=0): self.baseSeed = baseSeed
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
if taskBatchSize is None:
taskBatchSize = len(tasks)
elif taskBatchSize > len(tasks):
eprint("Task batch size is greater than total number of tasks, aborting.")
assert False
# Reshuffles tasks in a fixed way across epochs for reproducibility.
currEpoch = int(int(currIteration * taskBatchSize) / int(len(tasks)))
shuffledTasks = tasks.copy() # Since shuffle works in place.
random.Random(self.baseSeed + currEpoch).shuffle(shuffledTasks)
shuffledTasksWrap = tasks.copy() # Since shuffle works in place.
random.Random(self.baseSeed + currEpoch + 1).shuffle(shuffledTasksWrap)
start = (taskBatchSize * currIteration) % len(shuffledTasks)
end = start + taskBatchSize
taskBatch = (shuffledTasks + shuffledTasksWrap)[start:end] # Wraparound nicely.
return list(set(taskBatch))
class UnsolvedTaskBatcher:
"""At a given epoch, returns only batches of the tasks that have not been solved at least twice"""
def __init__(self):
self.timesSolved = {} # map from task to times that we have solved it
self.start = 0
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
assert taskBatchSize is None, "This batching strategy does not support batch sizes"
for t,f in ec_result.allFrontiers.items():
if f.empty:
self.timesSolved[t] = max(0, self.timesSolved.get(t,0))
else:
self.timesSolved[t] = 1 + self.timesSolved.get(t, 0)
return [t for t in tasks if self.timesSolved.get(t,0) < 2 ]
def entropyRandomBatch(ec_result, tasks, taskBatchSize, randomRatio):
numRandom = int(randomRatio * taskBatchSize)
numEntropy = taskBatchSize - numRandom
eprint("Selecting top %d tasks from the %d overall tasks given lowest entropy." % (taskBatchSize, len(tasks)))
eprint("Will be selecting %d by lowest entropy and %d randomly." %(numEntropy, numRandom))
taskGrammarEntropies = ec_result.recognitionModel.taskGrammarEntropies(tasks)
sortedEntropies = sorted(taskGrammarEntropies.items(), key=lambda x:x[1])
entropyBatch = [task for (task, entropy) in sortedEntropies[:numEntropy]]
randomBatch = random.sample([task for (task, entropy) in sortedEntropies[numEntropy:]], numRandom)
batch = entropyBatch + randomBatch
return batch
def kNearestNeighbors(ec_result, tasks, k, task):
"""Finds the k nearest neighbors in the recognition model logProduction space to a given task."""
import numpy as np
cosDistance = ec_result.recognitionModel.grammarLogProductionDistanceToTask(task, tasks)
argSort = np.argsort(-cosDistance)# Want the greatest similarity.
topK = argSort[:k]
topKTasks = list(np.array(tasks)[topK])
return topKTasks
class RandomkNNTaskBatcher:
"""Chooses a random task and finds the (taskBatchSize - 1) nearest neighbors using the recognition model logits."""
def __init__(self):
pass
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
if taskBatchSize is None:
taskBatchSize = len(tasks)
elif taskBatchSize > len(tasks):
eprint("Task batch size is greater than total number of tasks, aborting.")
assert False
if ec_result.recognitionModel is None:
eprint("No recognition model, falling back on random %d" % taskBatchSize)
return random.sample(tasks, taskBatchSize)
else:
randomTask = random.choice(tasks)
kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1, randomTask)
return [randomTask] + kNN
class RandomLowEntropykNNTaskBatcher:
"""Choose a random task from the 10 unsolved with the lowest entropy, and finds the (taskBatchSize - 1) nearest neighbors using the recognition model logits."""
def __init__(self):
pass
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty]
if taskBatchSize is None:
return unsolvedTasks
elif taskBatchSize > len(tasks):
eprint("Task batch size is greater than total number of tasks, aborting.")
assert False
if ec_result.recognitionModel is None:
eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks)))
return random.sample(unsolvedTasks, taskBatchSize)
else:
lowEntropyUnsolved = entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=0)
randomTask = random.choice(lowEntropyUnsolved)
kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1, randomTask)
return [randomTask] + kNN
class UnsolvedEntropyTaskBatcher:
"""Returns tasks that have never been solved at any previous iteration.
Given a task batch size, returns the unsolved tasks with the lowest entropy."""
def __init__(self):
pass
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty]
if taskBatchSize is None:
return unsolvedTasks
elif taskBatchSize > len(tasks):
eprint("Task batch size is greater than total number of tasks, aborting.")
assert False
if ec_result.recognitionModel is None:
eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks)))
return random.sample(unsolvedTasks, taskBatchSize)
else:
return entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=0)
class UnsolvedRandomEntropyTaskBatcher:
"""Returns tasks that have never been solved at any previous iteration.
Given a task batch size, returns a mix of unsolved tasks with percentRandom
selected randomly and the remaining selected by lowest entropy."""
def __init__(self):
pass
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty]
if taskBatchSize is None:
return unsolvedTasks
elif taskBatchSize > len(tasks):
eprint("Task batch size is greater than total number of tasks, aborting.")
assert False
if ec_result.recognitionModel is None:
eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks)))
return random.sample(unsolvedTasks, taskBatchSize)
else:
return entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=.5)