Skip to content

Commit

Permalink
Added changes after testing
Browse files Browse the repository at this point in the history
  • Loading branch information
nikhilmaram committed Jun 8, 2018
1 parent 3b5f6d0 commit ce1471d
Show file tree
Hide file tree
Showing 8 changed files with 322 additions and 77 deletions.
32 changes: 20 additions & 12 deletions config.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
class Config(object):
def __init__(self):
## Questions and Annotataions JSON files
self.DATA_DIR ='../datasets/'
# self.DATA_DIR ='./datasets/'
self.DATA_DIR = '/Users/sainikhilmaram/Desktop/datasets/'

self.TRAIN_QUESTIONS_FILE='v2_OpenEnded_mscoco_train2014_questions.json'
self.TRAIN_ANNOTATIONS_FILE='v2_mscoco_train2014_annotations.json'
self.TRAIN_IMAGE_DIR = self.DATA_DIR + '/train2014/'
#self.TRAIN_IMAGE_DIR = '/Users/sainikhilmaram/Desktop/train2014'
self.EVAL_IMAGE_DIR = self.DATA_DIR + '/val2014/'



self.VAL_QUESTIONS_FILE='v2_OpenEnded_mscoco_val2014_questions.json'
self.VAL_ANNOTATIONS_FILE='v2_mscoco_val2014_annotations.json'
self.EVAL_QUESTIONS_FILE='v2_OpenEnded_mscoco_val2014_questions.json'
self.EVAL_ANNOTATIONS_FILE='v2_mscoco_val2014_annotations.json'

self.GLOVE_EMBEDDING_FILE='./datasets/glove.6B.100d.txt'

Expand All @@ -22,8 +25,12 @@ def __init__(self):
self.IMAGE_DIMENSION = [224,224]
self.IMAGE_SHAPE = self.IMAGE_DIMENSION + [3]
self.IMAGE_FEATURES = 14
self.CONV_DATA_SET = 'conv_dict.npy'
self.FC_DATA_SET = 'fc2_dict.npy'
self.IMAGE_FEATURES_MAP = 512
self.CONV_DATA_SET_TRAIN = 'conv_dict_train.npy'
self.FC_DATA_SET_TRAIN = 'fc2_dict_train.npy'

self.CONV_DATA_SET_EVAL = 'conv_dict_eval.npy'
self.FC_DATA_SET_EVAL = 'fc2_dict_eval.npy'

# self.CNN = 'resnet50'
# self.CNN_PRETRAINED_FILE = './resnet50_no_fc.npy'
Expand All @@ -33,8 +40,6 @@ def __init__(self):
self.EMBEDDING_DIMENSION = 512
self.VOCAB_SIZE = 13764



## Decoder Parameters
self.TOP_ANSWERS = 1000
self.OUTPUT_SIZE = self.TOP_ANSWERS
Expand All @@ -45,7 +50,10 @@ def __init__(self):


## Model Parameters
self.PHASE = 'cnn_features'
# self.PHASE = 'cnn_features'
# self.PHASE = 'train'
self.PHASE = 'test'
self.EVALUATION_PRESENT = True
self.POINT_WISE_FEATURES = 1024
self.INTERMEDIATE_DIMENSION = 30

Expand All @@ -55,9 +63,9 @@ def __init__(self):
self.NUM_BATCHES = 2 ## Just a place holder, so it doesn't loop through all the data.
self.SAVE_DIR = './models/'
self.SAVE_PERIOD = 370000/(self.BATCH_SIZE*4)
self.LOAD_MODEL = False
self.MODEL_FILE_NAME= self.SAVE_DIR + '/step_722.npy'
self.EPOCH_COUNT = 0
self.LOAD_MODEL = True
self.MODEL_FILE_NAME= self.SAVE_DIR + '/epoch_14.npy'
self.EPOCH_COUNT = 5


## Testing Parameters
Expand Down
8 changes: 5 additions & 3 deletions vqa_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,17 +70,19 @@ def build(self,attend_image_word,attend_question_word,attend_image_phrase,
fcb = tf.get_variable(initializer=tf.constant(1.0, shape=[self.config.OUTPUT_SIZE], dtype=tf.float32),
trainable=True, name='fc_b')
fcl = tf.nn.bias_add(tf.matmul(attend_vector_sentence, fcw), fcb)
logits = tf.nn.relu(fcl)
self.logits = tf.nn.relu(fcl)

if config.PHASE == 'train':
# Compute the loss for this step, if necessary
cross_entropy_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=self.answers[:,0], ##[:,0] because answers is array of arrays
logits=logits)
logits=self.logits)

self.optimizer = tf.train.AdamOptimizer(config.INITIAL_LEARNING_RATE).minimize(cross_entropy_loss)

self.predictions = tf.argmax(logits, 1,output_type=tf.int32)

self.predictions = tf.argmax(self.logits, 1,output_type=tf.int32)
self.softmax_logits = tf.nn.softmax(self.logits)
if config.PHASE == 'train':
## Number of correct predictions in each run
self.predictions_correct = tf.reduce_sum(tf.cast(tf.equal(self.predictions, self.answers[:, 0]),tf.float32))
Expand Down
25 changes: 17 additions & 8 deletions vqa_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,14 @@ def __init__(self,config):
self.phrase_level = vqa_phrase_level(config)
self.sentence_level = vqa_lstm(config)

def build(self, images, questions, question_masks, embedding_matrix):
## Build the CNN model
self.cnn.build(images)
# def build(self, images, questions, question_masks, embedding_matrix):
def build(self, image_features, questions, question_masks, embedding_matrix):
# ## Build the CNN model
if self.config.PHASE == "test":
images = image_features
self.cnn.build(images)

self.image_features = image_features
## Build the word level
self.word_level.build(questions, question_masks, embedding_matrix)
## Build the Phrase level
Expand All @@ -37,15 +42,20 @@ def build(self, images, questions, question_masks, embedding_matrix):
def build_encoder(self):

config = self.config
config.IMAGE_FEATURES = self.cnn.num_ctx
# config.IMAGE_FEATURES = self.cnn.num_ctx

## d = 512, N = 14, T = 25, k = 25
## Building Word Level features

print("CNN feature size {}".format(self.cnn.conv_feats.get_shape())) ## [BATCH_SIZE,14,512]
if self.config.PHASE == "test":
print("CNN feature size {}".format(self.cnn.conv_feats.get_shape())) ## [BATCH_SIZE,14,512]
self.V = tf.transpose(self.cnn.conv_feats, [0, 2, 1]) ##[BATCH_SIZE,512,14] (V) [?,d,N]
print("V_word shape : {}".format(self.V.get_shape()))
else:
print("CNN feature size {}".format(self.image_features.get_shape())) ## [BATCH_SIZE,14,512]
self.V = tf.transpose(self.image_features, [0, 2, 1]) ##[BATCH_SIZE,512,14] (V) [?,d,N]
print("V_word shape : {}".format(self.V.get_shape()))

self.V = tf.transpose(self.cnn.conv_feats, [0, 2, 1]) ##[BATCH_SIZE,512,14] (V) [?,d,N]
print("V_word shape : {}".format(self.V.get_shape()))

print("Word Level feature size {}".format(self.word_level.word_embed.get_shape())) ## [BATCH_SIZE,25,512]

Expand All @@ -66,7 +76,6 @@ def build_encoder(self):




def parallel_co_attention(self,V,Q,name_scope="word"):
config = self.config

Expand Down
29 changes: 15 additions & 14 deletions vqa_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,40 +111,39 @@ def assign_args(args):
vocabulary = Vocabulary(config)
## Build the vocabulary to get the indexes
vocabulary.build(config.DATA_DIR+config.TRAIN_QUESTIONS_FILE)

vocabulary.save_file()

config.VOCAB_SIZE = vocabulary.num_words
## Create the data set
data_set = prepare_train_data(config,vocabulary)
## Create the evaluation data set
data_set_eval = prepare_eval_data(config, vocabulary)
# Create the model object
model = vqa_model(config)
# Build the model
model.build()
sess.run(tf.global_variables_initializer())
## Load the Pre-trained CNN file
model.encoder.cnn.load_cnn(sess,config.CNN_PRETRAINED_FILE)

if (config.LOAD_MODEL):
model.load(sess,config.MODEL_FILE_NAME)
# Train the data with the data set and embedding matrix
model.train(sess,data_set)

elif config.PHASE=="cnn_features":

# ## Create Vocabulary object
# vocabulary = Vocabulary(config)
# ## Build the vocabulary to get the indexes
# vocabulary.build(config.DATA_DIR + config.TRAIN_QUESTIONS_FILE)
model.train(sess,data_set,data_set_eval)

elif config.PHASE == "cnn_features":
## Create the data set
data_set = prepare_cnn_data(config)

model = vqa_model_static_cnn(config)
model.build()
sess.run(tf.global_variables_initializer())
## Load Pre-trained CNN file
model.cnn.load_cnn(sess, config.CNN_PRETRAINED_FILE)
model.train(sess,data_set)

# fc_file_name = config.DATA_DIR + config.FC_DATA_SET_TRAIN
# conv_file_name = config.DATA_DIR + config.CONV_DATA_SET_TRAIN

fc_file_name = config.DATA_DIR + config.FC_DATA_SET_EVAL
conv_file_name = config.DATA_DIR + config.CONV_DATA_SET_EVAL

model.train(sess, data_set, fc_file_name, conv_file_name)


elif config.PHASE == 'test':
Expand All @@ -163,4 +162,6 @@ def assign_args(args):
sess.run(tf.global_variables_initializer())

model.load(sess, config.MODEL_FILE_NAME)
## Load the Pre-trained CNN file
model.encoder.cnn.load_cnn(sess, config.CNN_PRETRAINED_FILE)
model.test(sess,data_set,top_answers)
88 changes: 67 additions & 21 deletions vqa_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@

class vqa_model:
def __init__(self,config):
print("Crearing the Model")
print("Creating the Model")
self.config = config
self.encoder = vqa_encoder(self.config)
self.decoder = vqa_decoder(self.config)
self.image_loader = ImageLoader('./ilsvrc_2012_mean.npy',self.config)
self.image_loader = ImageLoader('./ilsvrc_2012_mean.npy', self.config)
self.image_feature_loader = image_feature_loader(self.config)
self.image_feature_loader_eval = image_feature_loader_eval(self.config)
self.global_step = 0

def build(self):
Expand All @@ -27,6 +29,9 @@ def build(self):
self.images = tf.placeholder(
dtype=tf.float32,
shape=[self.config.BATCH_SIZE] + self.config.IMAGE_SHAPE)

self.image_features = tf.placeholder(dtype=tf.float32,
shape=[self.config.BATCH_SIZE]+[self.config.IMAGE_FEATURES]+[self.config.IMAGE_FEATURES_MAP])
self.questions =tf.placeholder(
dtype=tf.int32,
shape=[self.config.BATCH_SIZE] + [self.config.MAX_QUESTION_LENGTH])
Expand All @@ -43,51 +48,57 @@ def build(self):
regularizer=self.encoder.cnn.nn.fc_kernel_regularizer,
trainable=True)

## pass the images, questions and embedding matrix to the encoder
self.encoder.build(self.images,self.questions,self.question_masks, self.embedding_matrix)
if self.config.PHASE == 'test':
## pass the images, questions and embedding matrix to the encoder
self.encoder.build(self.images,self.questions,self.question_masks, self.embedding_matrix)
else:
## pass the image features, questions and embedding matrix to the encoder
self.encoder.build(self.image_features, self.questions, self.question_masks, self.embedding_matrix)

# ## pass the outputs of encoder to decoder model
self.decoder.build(self.encoder.v_attend_word,self.encoder.q_attend_word,
self.encoder.v_attend_phrase,self.encoder.q_attend_phrase,
self.encoder.v_attend_sentence,self.encoder.q_attend_sentence)
## Load the pre-computed image features
self.image_feature_loader.build()
self.image_feature_loader_eval.build()
#
# self.build_model()

def build_model(self):
## Assign variables that needs to be passed to variables from encoder and decoder
pass

def train(self,sess,train_data):
def train(self,sess,train_data,eval_data):
print("Training the model")
epoch_count = self.config.EPOCH_COUNT

for _ in tqdm(list(range(self.config.NUM_EPOCHS)), desc='epoch'):
total_predictions_correct = 0
for _ in tqdm(list(range(train_data.num_batches)), desc='batch'):
#for _ in tqdm(list(range(self.config.NUM_BATCHES)), desc='batch'):
# for _ in tqdm(list(range(self.config.NUM_BATCHES)), desc='batch'):
batch = train_data.next_batch()
image_files, image_idxs, question_idxs, question_masks, answer_idxs, answer_masks = batch
images = self.image_loader.load_images(image_files)
#images = self.image_loader.load_images(image_files)
image_features = self.image_feature_loader.load_images(image_idxs)

feed_dict = {self.images:images,
feed_dict = {self.image_features:image_features,
# self.images:images,
self.questions:question_idxs,
self.question_masks:question_masks,
self.decoder.answers:answer_idxs,
self.decoder.answer_masks:answer_masks}

_, predictions_correct = sess.run([self.decoder.optimizer,self.decoder.predictions_correct],feed_dict=feed_dict)

# _ = sess.run(self.encoder.v_attend_phrase,
# feed_dict=feed_dict)
#
# predictions_correct = 0

## Global step count in order to store the model between batches
self.global_step += 1
total_predictions_correct += predictions_correct


if(self.global_step % int(self.config.SAVE_PERIOD) == 0):
self.save("step_"+ str(self.global_step))
# self.save("step_"+ str(self.global_step))
print("Total Predictions correct : {0} at time step {1}".format(total_predictions_correct,self.global_step))
f = open("results.txt", "a")
f.write("Total Predictions correct : {0} at time step {1} \n".format(total_predictions_correct,self.global_step))
Expand All @@ -99,31 +110,66 @@ def train(self,sess,train_data):
self.save("epoch_"+str(epoch_count))
f = open("results.txt", "a")
f.write("Total Predictions correct : {0} at epoch {1} \n".format(total_predictions_correct, epoch_count))
f.write("------------------------------------------------------------------------------\n")
f.close()
train_data.reset()

if (self.config.EVALUATION_PRESENT and (epoch_count % 2 == 0)):
self.eval(sess, eval_data)

def eval(self, sess, eval_data):
print("Evaluating the model")
total_predictions_correct = 0
for _ in tqdm(list(range(eval_data.num_batches)), desc='batch'):
# for _ in tqdm(list(range(self.config.NUM_BATCHES)), desc='batch'):
batch = eval_data.next_batch()
image_files, image_idxs, question_idxs, question_masks, answer_idxs, answer_masks = batch
# images = self.image_loader.load_images(image_files)
image_features = self.image_feature_loader_eval.load_images(image_idxs)

feed_dict = {self.image_features: image_features,
# self.images:images,
self.questions: question_idxs,
self.question_masks: question_masks,
self.decoder.answers: answer_idxs,
self.decoder.answer_masks: answer_masks}

predictions_correct = sess.run(self.decoder.predictions_correct,
feed_dict=feed_dict)

total_predictions_correct += predictions_correct

print("Total Predictions correct : {0} in Validation".format(total_predictions_correct))
f = open("results.txt", 'a')
f.write("------------------------------------------------------------------------------\n")
f.write("Total Predictions correct : {0} in Validation\n".format(total_predictions_correct))
f.write("------------------------------------------------------------------------------\n")
f.close()
eval_data.reset()

def test(self,sess,test_data,top_answers):

batch = test_data.batch()
image_files, question_idxs, question_masks = batch
batch = test_data.next_batch()
image_files, image_idxs, question_idxs, question_masks = batch
images = self.image_loader.load_images(image_files)

feed_dict = {self.images: images,
self.questions: question_idxs,
self.question_masks: question_masks
}

predictions = sess.run(self.decoder.predictions,feed_dict = feed_dict)
print("Answer is {}".format(top_answers[predictions]))



predictions, logits = sess.run([self.decoder.predictions, self.decoder.softmax_logits], feed_dict=feed_dict)

## Get top 5 elements
logits = np.array(logits[0]) ## logits obtained are two dimensional array
idxs = sorted(range(len(logits)), key=lambda i: logits[i], reverse=True)[:5]

print("Answers ......")
for i in range(5):
print("Answer : {0:10} probability : {1:10}".format(top_answers[idxs[i]], logits[idxs[i]]))

def save(self,file_name):
""" Save the model. """
print(tf.global_variables())
config = self.config
data = {v.name: v.eval() for v in tf.global_variables()}
save_path = os.path.join(config.SAVE_DIR, file_name)
Expand Down
Loading

0 comments on commit ce1471d

Please sign in to comment.