Added changes after testing

Heronalps · Jun 8, 2018 · ce1471d · ce1471d
1 parent 3b5f6d0
commit ce1471d
Show file tree

Hide file tree

Showing 8 changed files with 322 additions and 77 deletions.
diff --git a/config.py b/config.py
@@ -1,15 +1,18 @@
 class Config(object):
     def __init__(self):
         ## Questions and Annotataions JSON files
-        self.DATA_DIR ='../datasets/'
+        # self.DATA_DIR ='./datasets/'
+        self.DATA_DIR = '/Users/sainikhilmaram/Desktop/datasets/'
+
         self.TRAIN_QUESTIONS_FILE='v2_OpenEnded_mscoco_train2014_questions.json'
         self.TRAIN_ANNOTATIONS_FILE='v2_mscoco_train2014_annotations.json'
         self.TRAIN_IMAGE_DIR = self.DATA_DIR + '/train2014/'
-        #self.TRAIN_IMAGE_DIR = '/Users/sainikhilmaram/Desktop/train2014'
+        self.EVAL_IMAGE_DIR = self.DATA_DIR + '/val2014/'
+
 
 
-        self.VAL_QUESTIONS_FILE='v2_OpenEnded_mscoco_val2014_questions.json'
-        self.VAL_ANNOTATIONS_FILE='v2_mscoco_val2014_annotations.json'
+        self.EVAL_QUESTIONS_FILE='v2_OpenEnded_mscoco_val2014_questions.json'
+        self.EVAL_ANNOTATIONS_FILE='v2_mscoco_val2014_annotations.json'
 
         self.GLOVE_EMBEDDING_FILE='./datasets/glove.6B.100d.txt'
 
@@ -22,8 +25,12 @@ def __init__(self):
         self.IMAGE_DIMENSION = [224,224]
         self.IMAGE_SHAPE = self.IMAGE_DIMENSION + [3]
         self.IMAGE_FEATURES = 14
-        self.CONV_DATA_SET = 'conv_dict.npy'
-        self.FC_DATA_SET = 'fc2_dict.npy'
+        self.IMAGE_FEATURES_MAP = 512
+        self.CONV_DATA_SET_TRAIN = 'conv_dict_train.npy'
+        self.FC_DATA_SET_TRAIN = 'fc2_dict_train.npy'
+
+        self.CONV_DATA_SET_EVAL = 'conv_dict_eval.npy'
+        self.FC_DATA_SET_EVAL = 'fc2_dict_eval.npy'
 
         # self.CNN = 'resnet50'
         # self.CNN_PRETRAINED_FILE = './resnet50_no_fc.npy'
@@ -33,8 +40,6 @@ def __init__(self):
         self.EMBEDDING_DIMENSION = 512
         self.VOCAB_SIZE = 13764
 
-
-
         ## Decoder Parameters
         self.TOP_ANSWERS = 1000
         self.OUTPUT_SIZE = self.TOP_ANSWERS
@@ -45,7 +50,10 @@ def __init__(self):
 
 
         ## Model Parameters
-        self.PHASE = 'cnn_features'
+        # self.PHASE = 'cnn_features'
+        # self.PHASE = 'train'
+        self.PHASE = 'test'
+        self.EVALUATION_PRESENT = True
         self.POINT_WISE_FEATURES = 1024
         self.INTERMEDIATE_DIMENSION = 30
 
@@ -55,9 +63,9 @@ def __init__(self):
         self.NUM_BATCHES = 2 ## Just a place holder, so it doesn't loop through all the data.
         self.SAVE_DIR = './models/'
         self.SAVE_PERIOD = 370000/(self.BATCH_SIZE*4)
-        self.LOAD_MODEL = False
-        self.MODEL_FILE_NAME= self.SAVE_DIR + '/step_722.npy'
-        self.EPOCH_COUNT = 0
+        self.LOAD_MODEL = True
+        self.MODEL_FILE_NAME= self.SAVE_DIR + '/epoch_14.npy'
+        self.EPOCH_COUNT = 5
 
 
         ## Testing Parameters

diff --git a/vqa_decoder.py b/vqa_decoder.py
@@ -70,17 +70,19 @@ def build(self,attend_image_word,attend_question_word,attend_image_phrase,
             fcb = tf.get_variable(initializer=tf.constant(1.0, shape=[self.config.OUTPUT_SIZE], dtype=tf.float32),
                                trainable=True, name='fc_b')
             fcl = tf.nn.bias_add(tf.matmul(attend_vector_sentence, fcw), fcb)
-            logits = tf.nn.relu(fcl)
+            self.logits = tf.nn.relu(fcl)
 
         if config.PHASE == 'train':
             # Compute the loss for this step, if necessary
             cross_entropy_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                 labels=self.answers[:,0],  ##[:,0] because answers is array of arrays
-                logits=logits)
+                logits=self.logits)
 
             self.optimizer = tf.train.AdamOptimizer(config.INITIAL_LEARNING_RATE).minimize(cross_entropy_loss)
 
-        self.predictions = tf.argmax(logits, 1,output_type=tf.int32)
+
+        self.predictions = tf.argmax(self.logits, 1,output_type=tf.int32)
+        self.softmax_logits = tf.nn.softmax(self.logits)
         if config.PHASE == 'train':
             ## Number of correct predictions in each run
             self.predictions_correct = tf.reduce_sum(tf.cast(tf.equal(self.predictions, self.answers[:, 0]),tf.float32))

diff --git a/vqa_encoder.py b/vqa_encoder.py
@@ -19,9 +19,14 @@ def __init__(self,config):
         self.phrase_level = vqa_phrase_level(config)
         self.sentence_level = vqa_lstm(config)
 
-    def build(self, images, questions, question_masks, embedding_matrix):
-        ## Build the CNN model
-        self.cnn.build(images)
+    # def build(self, images, questions, question_masks, embedding_matrix):
+    def build(self, image_features, questions, question_masks, embedding_matrix):
+        # ## Build the CNN model
+        if self.config.PHASE == "test":
+            images = image_features
+            self.cnn.build(images)
+
+        self.image_features = image_features
         ## Build the word level
         self.word_level.build(questions, question_masks, embedding_matrix)
         ## Build the Phrase level
@@ -37,15 +42,20 @@ def build(self, images, questions, question_masks, embedding_matrix):
     def build_encoder(self):
 
         config = self.config
-        config.IMAGE_FEATURES = self.cnn.num_ctx
+        # config.IMAGE_FEATURES = self.cnn.num_ctx
 
         ## d = 512, N = 14, T = 25, k = 25
         ## Building Word Level features
 
-        print("CNN feature size {}".format(self.cnn.conv_feats.get_shape())) ## [BATCH_SIZE,14,512]
+        if self.config.PHASE == "test":
+            print("CNN feature size {}".format(self.cnn.conv_feats.get_shape())) ## [BATCH_SIZE,14,512]
+            self.V = tf.transpose(self.cnn.conv_feats, [0, 2, 1]) ##[BATCH_SIZE,512,14] (V) [?,d,N]
+            print("V_word shape : {}".format(self.V.get_shape()))
+        else:
+            print("CNN feature size {}".format(self.image_features.get_shape()))  ## [BATCH_SIZE,14,512]
+            self.V = tf.transpose(self.image_features, [0, 2, 1])  ##[BATCH_SIZE,512,14] (V) [?,d,N]
+            print("V_word shape : {}".format(self.V.get_shape()))
 
-        self.V = tf.transpose(self.cnn.conv_feats, [0, 2, 1]) ##[BATCH_SIZE,512,14] (V) [?,d,N]
-        print("V_word shape : {}".format(self.V.get_shape()))
 
         print("Word Level feature size {}".format(self.word_level.word_embed.get_shape())) ## [BATCH_SIZE,25,512]
 
@@ -66,7 +76,6 @@ def build_encoder(self):
 
 
 
-
     def parallel_co_attention(self,V,Q,name_scope="word"):
         config = self.config
 

diff --git a/vqa_main.py b/vqa_main.py
@@ -111,40 +111,39 @@ def assign_args(args):
             vocabulary = Vocabulary(config)
             ## Build the vocabulary to get the indexes
             vocabulary.build(config.DATA_DIR+config.TRAIN_QUESTIONS_FILE)
-
             vocabulary.save_file()
-
             config.VOCAB_SIZE = vocabulary.num_words
             ## Create the data set
             data_set = prepare_train_data(config,vocabulary)
+            ## Create the evaluation data set
+            data_set_eval = prepare_eval_data(config, vocabulary)
             # Create the model object
             model = vqa_model(config)
             # Build the model
             model.build()
             sess.run(tf.global_variables_initializer())
-            ## Load the Pre-trained CNN file
-            model.encoder.cnn.load_cnn(sess,config.CNN_PRETRAINED_FILE)
+
             if (config.LOAD_MODEL):
                 model.load(sess,config.MODEL_FILE_NAME)
             # Train the data with the data set and embedding matrix
-            model.train(sess,data_set)
-
-        elif config.PHASE=="cnn_features":
-
-            # ## Create Vocabulary object
-            # vocabulary = Vocabulary(config)
-            # ## Build the vocabulary to get the indexes
-            # vocabulary.build(config.DATA_DIR + config.TRAIN_QUESTIONS_FILE)
+            model.train(sess,data_set,data_set_eval)
 
+        elif config.PHASE == "cnn_features":
             ## Create the data set
             data_set = prepare_cnn_data(config)
-
             model = vqa_model_static_cnn(config)
             model.build()
             sess.run(tf.global_variables_initializer())
             ## Load Pre-trained CNN file
             model.cnn.load_cnn(sess, config.CNN_PRETRAINED_FILE)
-            model.train(sess,data_set)
+
+            # fc_file_name = config.DATA_DIR + config.FC_DATA_SET_TRAIN
+            # conv_file_name = config.DATA_DIR + config.CONV_DATA_SET_TRAIN
+
+            fc_file_name = config.DATA_DIR + config.FC_DATA_SET_EVAL
+            conv_file_name = config.DATA_DIR + config.CONV_DATA_SET_EVAL
+
+            model.train(sess, data_set, fc_file_name, conv_file_name)
 
 
         elif config.PHASE == 'test':
@@ -163,4 +162,6 @@ def assign_args(args):
             sess.run(tf.global_variables_initializer())
 
             model.load(sess, config.MODEL_FILE_NAME)
+            ## Load the Pre-trained CNN file
+            model.encoder.cnn.load_cnn(sess, config.CNN_PRETRAINED_FILE)
             model.test(sess,data_set,top_answers)
diff --git a/vqa_model.py b/vqa_model.py
@@ -13,11 +13,13 @@
 
 class vqa_model:
     def __init__(self,config):
-        print("Crearing the Model")
+        print("Creating the Model")
         self.config = config
         self.encoder = vqa_encoder(self.config)
         self.decoder = vqa_decoder(self.config)
-        self.image_loader = ImageLoader('./ilsvrc_2012_mean.npy',self.config)
+        self.image_loader = ImageLoader('./ilsvrc_2012_mean.npy', self.config)
+        self.image_feature_loader = image_feature_loader(self.config)
+        self.image_feature_loader_eval = image_feature_loader_eval(self.config)
         self.global_step = 0
 
     def build(self):
@@ -27,6 +29,9 @@ def build(self):
         self.images = tf.placeholder(
             dtype=tf.float32,
             shape=[self.config.BATCH_SIZE] + self.config.IMAGE_SHAPE)
+
+        self.image_features = tf.placeholder(dtype=tf.float32,
+                                             shape=[self.config.BATCH_SIZE]+[self.config.IMAGE_FEATURES]+[self.config.IMAGE_FEATURES_MAP])
         self.questions =tf.placeholder(
             dtype=tf.int32,
             shape=[self.config.BATCH_SIZE] + [self.config.MAX_QUESTION_LENGTH])
@@ -43,51 +48,57 @@ def build(self):
             regularizer=self.encoder.cnn.nn.fc_kernel_regularizer,
             trainable=True)
 
-        ## pass the images, questions and embedding matrix to the encoder
-        self.encoder.build(self.images,self.questions,self.question_masks, self.embedding_matrix)
+        if self.config.PHASE == 'test':
+            ## pass the images, questions and embedding matrix to the encoder
+            self.encoder.build(self.images,self.questions,self.question_masks, self.embedding_matrix)
+        else:
+            ## pass the image features, questions and embedding matrix to the encoder
+            self.encoder.build(self.image_features, self.questions, self.question_masks, self.embedding_matrix)
+
         # ## pass the outputs of encoder to decoder model
         self.decoder.build(self.encoder.v_attend_word,self.encoder.q_attend_word,
                            self.encoder.v_attend_phrase,self.encoder.q_attend_phrase,
                            self.encoder.v_attend_sentence,self.encoder.q_attend_sentence)
+        ## Load the pre-computed image features
+        self.image_feature_loader.build()
+        self.image_feature_loader_eval.build()
         #
         # self.build_model()
 
     def build_model(self):
         ## Assign variables that needs to be passed to variables from encoder and decoder
         pass
 
-    def train(self,sess,train_data):
+    def train(self,sess,train_data,eval_data):
         print("Training the model")
         epoch_count = self.config.EPOCH_COUNT
 
         for _ in tqdm(list(range(self.config.NUM_EPOCHS)), desc='epoch'):
             total_predictions_correct = 0
             for _ in tqdm(list(range(train_data.num_batches)), desc='batch'):
-            #for _ in tqdm(list(range(self.config.NUM_BATCHES)), desc='batch'):
+            # for _ in tqdm(list(range(self.config.NUM_BATCHES)), desc='batch'):
                 batch = train_data.next_batch()
                 image_files, image_idxs, question_idxs, question_masks, answer_idxs, answer_masks = batch
-                images = self.image_loader.load_images(image_files)
+                #images = self.image_loader.load_images(image_files)
+                image_features = self.image_feature_loader.load_images(image_idxs)
 
-                feed_dict = {self.images:images,
+                feed_dict = {self.image_features:image_features,
+                             # self.images:images,
                              self.questions:question_idxs,
                              self.question_masks:question_masks,
                              self.decoder.answers:answer_idxs,
                              self.decoder.answer_masks:answer_masks}
 
                 _, predictions_correct = sess.run([self.decoder.optimizer,self.decoder.predictions_correct],feed_dict=feed_dict)
 
-                # _ = sess.run(self.encoder.v_attend_phrase,
-                #                               feed_dict=feed_dict)
-                #
-                # predictions_correct = 0
 
                 ## Global step count in order to store the model between batches
                 self.global_step += 1
                 total_predictions_correct += predictions_correct
 
 
                 if(self.global_step % int(self.config.SAVE_PERIOD) == 0):
-                    self.save("step_"+ str(self.global_step))
+                    # self.save("step_"+ str(self.global_step))
                     print("Total Predictions correct : {0} at time step {1}".format(total_predictions_correct,self.global_step))
                     f = open("results.txt", "a")
                     f.write("Total Predictions correct : {0} at time step {1} \n".format(total_predictions_correct,self.global_step))
@@ -99,31 +110,66 @@ def train(self,sess,train_data):
             self.save("epoch_"+str(epoch_count))
             f = open("results.txt", "a")
             f.write("Total Predictions correct : {0} at epoch {1} \n".format(total_predictions_correct, epoch_count))
+            f.write("------------------------------------------------------------------------------\n")
             f.close()
             train_data.reset()
 
+            if (self.config.EVALUATION_PRESENT and (epoch_count % 2 == 0)):
+                self.eval(sess, eval_data)
+
+    def eval(self, sess, eval_data):
+        print("Evaluating the model")
+        total_predictions_correct = 0
+        for _ in tqdm(list(range(eval_data.num_batches)), desc='batch'):
+            # for _ in tqdm(list(range(self.config.NUM_BATCHES)), desc='batch'):
+            batch = eval_data.next_batch()
+            image_files, image_idxs, question_idxs, question_masks, answer_idxs, answer_masks = batch
+            # images = self.image_loader.load_images(image_files)
+            image_features = self.image_feature_loader_eval.load_images(image_idxs)
+
+            feed_dict = {self.image_features: image_features,
+                         # self.images:images,
+                         self.questions: question_idxs,
+                         self.question_masks: question_masks,
+                         self.decoder.answers: answer_idxs,
+                         self.decoder.answer_masks: answer_masks}
+
+            predictions_correct = sess.run(self.decoder.predictions_correct,
+                                           feed_dict=feed_dict)
+
+            total_predictions_correct += predictions_correct
+
+        print("Total Predictions correct : {0} in Validation".format(total_predictions_correct))
+        f = open("results.txt", 'a')
+        f.write("------------------------------------------------------------------------------\n")
+        f.write("Total Predictions correct : {0} in Validation\n".format(total_predictions_correct))
+        f.write("------------------------------------------------------------------------------\n")
+        f.close()
+        eval_data.reset()
+
     def test(self,sess,test_data,top_answers):
 
-        batch = test_data.batch()
-        image_files, question_idxs, question_masks = batch
+        batch = test_data.next_batch()
+        image_files, image_idxs, question_idxs, question_masks = batch
         images = self.image_loader.load_images(image_files)
 
         feed_dict = {self.images: images,
                      self.questions: question_idxs,
                      self.question_masks: question_masks
                      }
 
-        predictions = sess.run(self.decoder.predictions,feed_dict = feed_dict)
-        print("Answer is {}".format(top_answers[predictions]))
-
-
-
+        predictions, logits = sess.run([self.decoder.predictions, self.decoder.softmax_logits], feed_dict=feed_dict)
 
+        ## Get top 5 elements
+        logits = np.array(logits[0])  ## logits obtained are two dimensional array
+        idxs = sorted(range(len(logits)), key=lambda i: logits[i], reverse=True)[:5]
 
+        print("Answers ......")
+        for i in range(5):
+            print("Answer : {0:10} probability : {1:10}".format(top_answers[idxs[i]], logits[idxs[i]]))
 
     def save(self,file_name):
         """ Save the model. """
-        print(tf.global_variables())
         config = self.config
         data = {v.name: v.eval() for v in tf.global_variables()}
         save_path = os.path.join(config.SAVE_DIR, file_name)