Skip to content

Commit

Permalink
marshall's stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
georgewu2 committed May 4, 2013
2 parents 9b201cf + c9aa5cd commit d225187
Showing 1 changed file with 109 additions and 43 deletions.
152 changes: 109 additions & 43 deletions adaboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,30 @@ def loadData(self,positiveDir="sameface/",negativeDir="randombg2/"):
n,m = np.shape(self.featuresMatrix)
print n,m

def loadDataFromMatrices(self,positives,negatives):

bigSet = []
bigSet.extend(positives)
bigSet.extend(negatives)
self.data = bigSet

self.labels = ([1 for x in range(len(positives))])
self.labels.extend([-1 for x in range(len(negatives))])

self.featuresMatrix = np.zeros((1,8628))

x = 0
for img in self.data:
print x
x += 1
integralImageWithFeatures = Features(img)
self.featuresMatrix = np.vstack([self.featuresMatrix,integralImageWithFeatures.f])
self.featuresMatrix = np.delete(self.featuresMatrix,0,0)
n,m = np.shape(self.featuresMatrix)
print n,m



def guessClass(self,featuresMatrix,feature,threshold,inequality):

# get dimensions and make a return vector
Expand Down Expand Up @@ -99,7 +123,7 @@ def guessClass(self,featuresMatrix,feature,threshold,inequality):
classed[img] = -1
return classed

def trainClassifier(self,data,labels,weights,steps):
def trainClassifier(self,data,labels,weights,steps,weakClassGuessers):

# setup
labelMatrix = np.matrix(labels).T
Expand All @@ -123,7 +147,7 @@ def trainClassifier(self,data,labels,weights,steps):
# FUCK YEAH

# the work
for feature in range(0,100):
for feature in range(0,500):
# find min and max of x or y coordinates
rangeMin = self.featuresMatrix[:,feature].min()
rangeMax = self.featuresMatrix[:,feature].max()
Expand All @@ -150,22 +174,29 @@ def trainClassifier(self,data,labels,weights,steps):
errorArray[img] = 0

weightedError = np.matrix(weights).T * np.matrix(errorArray)

# if weighted error is smallest, then put all our current
# stuff in a dictionary
if weightedError < minError:
# print "CLASSGUESS", classGuess
minError = weightedError
bestClassGuess = classGuess.copy()
bestClassifier['feature'] = feature
bestClassifier['threshold'] = threshold
bestClassifier['inequality'] = inequality

print bestClassifier
if not weakClassGuessers:
# print "CLASSGUESS", classGuess
minError = weightedError
bestClassGuess = classGuess.copy()
bestClassifier['feature'] = feature
bestClassifier['threshold'] = threshold
bestClassifier['inequality'] = inequality
else:
if feature not in [weakClassGuessers[x]['feature'] for x in range(0,len(weakClassGuessers))]:
minError = weightedError
bestClassGuess = classGuess.copy()
bestClassifier['feature'] = feature
bestClassifier['threshold'] = threshold
bestClassifier['inequality'] = inequality

return bestClassifier,minError,bestClassGuess

def boost(self,maxFeatures):

self.classifierArray = []
weakClassGuessers = []

try:
Expand All @@ -178,7 +209,6 @@ def boost(self,maxFeatures):
# setup weight vector
weights = np.ones((n,1))
weights = weights * (1. / n)
print weights

aggregateClassGuess = np.zeros((n,1))

Expand All @@ -188,7 +218,7 @@ def boost(self,maxFeatures):
# print("ITERATION", i)

# train best classifier for these weights
bestClassifier,error,classGuess = self.trainClassifier(self.data,self.labels,weights,10)
bestClassifier,error,classGuess = self.trainClassifier(self.data,self.labels,weights,10,weakClassGuessers)

print bestClassifier

Expand All @@ -205,7 +235,7 @@ def boost(self,maxFeatures):
weakClassGuessers.append(bestClassifier)

# calculate new weights
exponent = np.multiply(1 * alpha * np.matrix(self.labels), classGuess)
exponent = np.multiply(alpha * np.matrix(self.labels), classGuess)

# print "EXPONENT", exponent

Expand Down Expand Up @@ -251,7 +281,7 @@ def classify(self,i):
else:
return False

class cascade:
class Cascade:

def __init__(self):
self.subwindow = []
Expand Down Expand Up @@ -294,6 +324,8 @@ def cascadedClassifierGuess(self,data,adabooster):

# for every data point
for i in data:
# print "DATA WE ARE GUESSING",i
classifiedDict[i] = 1

features = Features(i)
featuresMatrix = features.f
Expand All @@ -305,10 +337,11 @@ def cascadedClassifierGuess(self,data,adabooster):
for layer,classifier in self.cascadedClassifier.items():

# get a classguess
for i in range (0,len(classifier)):
classGuess = adabooster.guessClass(featuresMatrix,classifier[i]['feature'],classifier[i]['threshold'],classifier[i]['inequality'])
for x in range (0,len(classifier)):
classGuess = adabooster.guessClass(featuresMatrix,classifier[x]['feature'],classifier[x]['threshold'],classifier[x]['inequality'])
# print "CLASS GUESS", classGuess
aggregateClassGuess = aggregateClassGuess + (-1 * classifier[i]['alpha'] * classGuess)
aggregateClassGuess = aggregateClassGuess + (-1 * classifier[x]['alpha'] * classGuess)
# print "AGGREGATE GUESS", aggregateClassGuess

# if a layer returns a negative result, automatically return negative
# print "AGG GUESS",aggregateClassGuess
Expand All @@ -317,12 +350,16 @@ def cascadedClassifierGuess(self,data,adabooster):
break

# else, if every classifier says it's good, then return 1
classifiedDict[i] = 1

# print "CLASSIFIED DICT", classifiedDict
# print classifiedDict
return classifiedDict

def adjustThreshold(self,classifier):
classifier['threshold'] -= 1
def adjustThreshold(self,classifier,n):
for i in range(0,n):
if classifier[n][i]['inequality'] == "<=":
classifier[n][i]['threshold'] += 2
else:
classifier[n][i]['threshold'] -= 2

def trainCascadedClassifier(self,f,d,Ftarget):

Expand All @@ -334,14 +371,17 @@ def trainCascadedClassifier(self,f,d,Ftarget):

n = 0
newFalsePositiveRate = self.falsePositiveRate

print "BIG LOOP FALSE POSITIVE", newFalsePositiveRate

# we're trying to get our false positive rate down
while newFalsePositiveRate > (f * self.falsePositiveRate):
print "FALSE POSITIVE RATE", self.falsePositiveRate
print "CURRENT FALSE POSITIVE RATE", newFalsePositiveRate

n += 1

if n > 1:
adabooster.loadDataFromMatrices(self.positiveSet,self.negativeSet)

# make a new adabooster and boost to get a classifier with n features
adabooster.boost(n)

Expand All @@ -353,50 +393,76 @@ def trainCascadedClassifier(self,f,d,Ftarget):
ncnt = Counter()
for k,v in negativeSetGuesses.items():
ncnt[v] += 1
newFalsePositiveRate = ncnt[1] / len(negativeSetGuesses)
newNewFalsePositiveRate = float(ncnt[1]) / float(len(negativeSetGuesses.items()))

# print "NEGATIVE SET GUESSES", negativeSetGuesses

positiveSetGuesses = self.cascadedClassifierGuess(self.positiveSet,adabooster)
pcnt = Counter()
for k,v in positiveSetGuesses.items():
pcnt[v] += 1
newDetectionRate = pcnt[1] / len(positiveSetGuesses)
newDetectionRate = float(pcnt[1]) / float(len(positiveSetGuesses.items()))

# print "POSITIVE SET GUESSES", positiveSetGuesses

# adjust the most recently added classifier
while newDetectionRate < d * self.detectionRate:

# IMPLEMENT THIS
self.adjustThreshold(self.cascadedClassifier[n])
print "CASCADED CLASSIFIER", self.cascadedClassifier
self.adjustThreshold(self.cascadedClassifier,n)

# re-test and see if we have a good detection rate
positiveSetGuesses = self.cascadedClassifierGuess(self.positiveSet)
positiveSetGuesses = self.cascadedClassifierGuess(self.positiveSet,adabooster)
# print "POSITIVE SET GUESSES", positiveSetGuesses
cnt = Counter()
for k,v in positiveSetGuesses.items():
cnt[v] += 1
newDetectionRate = cnt[1] / len(positiveSetGuesses)
newDetectionRate = float(cnt[1]) / float(len(positiveSetGuesses.items()))
print "DIVIDIED", newDetectionRate, d * self.detectionRate

# replace our current negative set with only false detections
self.negativeSet = []
# replace our current negative set with only false detections
tempNegativeSet = []

print "NEW FALSE POSITIVE RATE", newFalsePositiveRate, f, self.falsePositiveRate, newNewFalsePositiveRate
if newFalsePositiveRate > f:
negativeSetGuesses = self.cascadedClassifierGuess(self.negativeSet,adabooster)
print negativeSetGuesses
for (k,v) in negativeSetGuesses.items():
if v == 1:
tempNegativeSet.append(k)
self.negativeSet = tempNegativeSet
print self.negativeSet

newFalsePositiveRate = newFalsePositiveRate * newNewFalsePositiveRate
print "NEW FALSE POSITIVE RATE TO CHECK", newFalsePositiveRate


if newFalsePositiveRate > self.falsePositiveRate:
negativeSetGuesses = self.cascadedClassifierGuess(self.negativeSet)
self.negativeSet = [k for (k,v) in negativeSetGuesses.iteritems() if v == 1]
self.falsePositiveRate = newFalsePositiveRate
print "NEW SELF FALSE POSITIVE RATE", self.falsePositiveRate

# cascader = cascade()
# cascader.trainCascadedClassifier(.1,.9,.1)
def cascadedClassify(self, i):
adabooster = adaBoost()
result = self.cascadedClassifierGuess([i],adabooster)
if [v for (k,v) in result.items()][0] == -1:
return False
else:
return True

adabooster = adaBoost()
adabooster.loadData()
adabooster.boost(10)
cascader = Cascade()
cascader.trainCascadedClassifier(.2,.25,.7)

positiveImages = os.listdir(os.getcwd() + "/testconfirmedpos")
positiveImages.pop(0)

positiveSet = [v for k,v in (adabooster.classify(map(lambda x : get_frame_vector("testconfirmedpos/" + x,False),positiveImages))).items()]
for i in positiveImages:
print "POSITIVES"
print cascader.cascadedClassify(get_frame_vector("testconfirmedpos/" + i,False))

negativeImages = os.listdir(os.getcwd() + "/testconfirmedneg")
negativeImages.pop(0)

negativeSet = [v for k,v in (adabooster.classify(map(lambda x : get_frame_vector("testconfirmedneg/" + x,False),negativeImages))).items()]
for i in negativeImages:
print "NEGATIVES"
print cascader.cascadedClassify(get_frame_vector("testconfirmedneg/" + i,False))

print "POSITIVE",positiveSet
print "NEGATIVE",negativeSet

0 comments on commit d225187

Please sign in to comment.