Skip to content

Commit

Permalink
Neural net predictor for Googlenet labels
Browse files Browse the repository at this point in the history
83% F1 score on test set!
  • Loading branch information
kelrit committed Apr 15, 2016
1 parent 8ffd779 commit 500f383
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 0 deletions.
73 changes: 73 additions & 0 deletions net2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from __future__ import print_function
import numpy as np
import sys
np.random.seed(1337) # for reproducibility

from keras import models
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten, Reshape
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
import json
import score
import read_data

if __name__ == '__main__':
dropout = 0.5
batch_size = 64
nb_classes = 9
nb_epoch = 200

model = Sequential()

model.add(Flatten(input_shape=(3,1000)))

model.add(Dropout(dropout))
model.add(Dense(1024))
model.add(Activation('relu'))

model.add(Dropout(dropout))
model.add(Dense(512))
model.add(Activation('relu'))

model.add(Dense(nb_classes))

model.compile(loss='hinge', optimizer='adadelta')

# image_labels: shape (num_image, 3, 1000)
image_labels = np.load('data/googlenet_predictions.npy')
with open('data/googlenet_predictions_order.json', 'r') as jfile:
image_label_order = json.load(jfile)
biz_csv = read_data.read_biz_csv()

num_biz = len(biz_csv)
x_whole = np.zeros((num_biz, image_labels.shape[1], image_labels.shape[2]))
y_whole = np.zeros((num_biz, 9))
# aggregate data by business: take average of predictions
for idx, (biz_id, (label, photo_ids)) in enumerate(biz_csv.items()):
for photo_id in photo_ids:
image_idx = image_label_order[str(photo_id)]
x_whole[idx] += image_labels[image_idx]
x_whole[idx] /= len(photo_ids)
y_whole[idx] = label

train_frac = 0.9
x_train, x_test = np.vsplit(x_whole, [int(num_biz*train_frac)])
y_train, y_test = np.vsplit(y_whole, [int(num_biz*train_frac)])

print('X_train shape:', x_train.shape)
print('Y_train shape:', y_train.shape)
print('Training on %s biz, testing on %s biz' % \
(x_train.shape[0], x_test.shape[0]))

model.fit(x_train, y_train, batch_size=batch_size,
nb_epoch=nb_epoch, verbose=1, validation_split=0.1)

test_pred = np.sign(model.predict(x_test))
test_loss = model.evaluate(x_test, y_test)
np.savetxt('pred.csv', test_pred, delimiter=',')

print('Test loss: ', test_loss)
print('Test accuracy: ', score.accuracy(test_pred, y_test))
print('F1 score: ', score.f1score(test_pred, y_test))
14 changes: 14 additions & 0 deletions score.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import numpy as np

def accuracy(y_pred, y_test):
total = y_test.size
correct = np.sum(y_pred == y_test)
return correct/total

def f1score(y_pred, y_test):
tp = np.sum(np.logical_and(y_pred == 1, y_test == 1))
fp = np.sum(np.logical_and(y_pred == 1, y_test == -1))
fn = np.sum(np.logical_and(y_pred == -1, y_test == 1))
precision = tp/(tp + fp)
recall = tp/(tp + fn)
return 2*precision*recall/(precision + recall)

0 comments on commit 500f383

Please sign in to comment.