config project by root_dir/config.ini, copy source code into save

eight-corner · Jan 26, 2017 · 7a4300b · 7a4300b
1 parent 8ff7f04
commit 7a4300b
Show file tree

Hide file tree

Showing 8 changed files with 128 additions and 49 deletions.
diff --git a/.gitignore b/.gitignore
@@ -13,7 +13,7 @@ data/*
 !data/cornell
 
 # train 
-/deepqa2/config.ini
+config.ini
 
 # serve
 /deepqa2/serve/db.sqlite3

diff --git a/README.md b/README.md
@@ -18,9 +18,8 @@ pip install -r requirements.txt
 # Pre-process data
 Process data, build vocabulary, word embedding, conversations, etc.
 ```
-cd deepqa2
 cp config.sample.ini config.ini
-python dataset/preprocesser.py
+python deepqa2/dataset/preprocesser.py
 ```
 
 Sample Corpus http://www.cs.cornell.edu/~cristian/Cornell_Movie-Dialogs_Corpus.html
@@ -29,7 +28,7 @@ Sample Corpus http://www.cs.cornell.edu/~cristian/Cornell_Movie-Dialogs_Corpus.h
 Train language model with Seq2seq.
 ```
 cp config.sample.ini config.ini # modify keys
-python train.py
+python deepqa2/train.py
 ```
 
 # Serve Model

diff --git a/deepqa2/config.sample.ini → config.sample.ini b/deepqa2/config.sample.ini → config.sample.ini
@@ -33,4 +33,11 @@ train_softmax_samples = 0
 # Batch Size
 train_num_batch_size = 10
 # Save model every N steps
-train_save_every = 10
+train_save_every = 10
+
+################################
+# Parameters for serving model
+################################
+[serve]
+hostname = 127.0.0.1
+secret_key = foobar789
diff --git a/deepqa2/config.py b/deepqa2/config.py
@@ -25,8 +25,9 @@
 from time import localtime, strftime
 from utils.helper import singleton
 
-CONF_DIR = os.path.dirname(os.path.abspath(__file__))
-print(CONF_DIR)
+CONF_DIR = os.path.join(os.path.dirname(
+    os.path.realpath(__file__)), os.pardir)
+
 
 def get_cfg_dir():
     '''
@@ -37,18 +38,18 @@ def get_cfg_dir():
     return CONF_DIR
 
 
-def get_cfg_path():
+def get_cfg_path(filename):
     '''
     Get cfg path
     '''
-    return os.path.join(get_cfg_dir(), 'config.ini')
+    return os.path.join(get_cfg_dir(), filename)
 
 
-def load_config():
+def load_config(filename):
     '''
     Load configurations
     '''
-    cf = get_cfg_path()
+    cf = get_cfg_path(filename)
     if not os.path.exists(cf):
         f = open(cf, 'w')
         f.close()
@@ -58,34 +59,40 @@ def load_config():
     return config
 
 
-def read_properties():
+def read_properties(filename='config.ini'):
     '''
     Read Properties from Config File.
     '''
-    config = load_config()
+    config = load_config(filename)
     secs = config.sections()
     conf = {}
     for x in secs:
         conf[x] = {y: config.get(x, y) for y in config.options(x)}
-    conf['data'] = {'rootDir': CONF_DIR + '/..',
-                    'save': CONF_DIR + '/../save',
-                    'dataset': CONF_DIR + '/../data/' + conf['data']['dataset_name']}
-    conf['log']['log_path'] = CONF_DIR + '/../logs'
+    conf['data'] = {
+        'rootDir': CONF_DIR,
+        'save': os.path.join(CONF_DIR, 'save'),
+        'dataset': os.path.join(CONF_DIR, 'data', conf['data']['dataset_name'])}
+    conf['log']['log_path'] = os.path.join(CONF_DIR, 'logs')
     return conf
 
+
 @singleton
 class Config:
     '''
     Load All Parameters in one place.
     '''
+
     def __init__(self):
         self.ini = read_properties()
         '''
         Define project params
         '''
-        self.config_ini_path = get_cfg_path()
-        self.model_save_tag = "deeplearning.cobra.%s.%s" % (socket.gethostname(), strftime("%Y%m%d.%H%M%S", localtime()))
-        self.model_save_dir = os.path.join(CONF_DIR, '../save/' + self.model_save_tag)
+        self.base_dir = CONF_DIR
+        self.config_ini_path = get_cfg_path(filename='config.ini')
+        self.model_save_tag = "deeplearning.cobra.%s.%s" % (
+            socket.gethostname(), strftime("%Y%m%d.%H%M%S", localtime()))
+        self.model_save_dir = os.path.join(
+            CONF_DIR, 'save', self.model_save_tag)
         self.model_save_ckpt = os.path.join(self.model_save_dir, 'model.ckpt')
 
         '''
@@ -111,43 +118,50 @@ def __init__(self):
         self.dataset_padToken = self.dataset_word2id["<pad>"]
         self.dataset_goToken = self.dataset_word2id["<go>"]
         self.dataset_eosToken = self.dataset_word2id["<eos>"]
-        self.dataset_unknownToken = self.dataset_word2id["<unknown>"]  # Restore special words
+        self.dataset_unknownToken = self.dataset_word2id[
+            "<unknown>"]  # Restore special words
 
         print('>> dataset word2id size: %d' % len(self.dataset_word2id.keys()))
         print('>> dataset id2word size: %d' % len(self.dataset_id2word.keys()))
-        print('>> dataset training samples size: %d' % len(self.dataset_trainingSamples))
+        print('>> dataset training samples size: %d' %
+              len(self.dataset_trainingSamples))
         print('>> dataset training max length: %d' % self.dataset["maxLength"])
 
         '''
         Define hyper parameters for model training.
         '''
-        # Epoch training runs    
+        # Epoch training runs
         self.train_num_epoch = int(self.ini['hyparams']['train_num_epoch'])
         # number of rnn layers
-        self.train_num_layers = int(self.ini['hyparams']['train_num_layers']) 
+        self.train_num_layers = int(self.ini['hyparams']['train_num_layers'])
         # batch size
-        self.train_num_batch_size = int(self.ini['hyparams']['train_num_batch_size'])
+        self.train_num_batch_size = int(
+            self.ini['hyparams']['train_num_batch_size'])
         # embedding size
-        self.train_num_embedding = int(self.ini['hyparams']['train_num_embedding'])
+        self.train_num_embedding = int(
+            self.ini['hyparams']['train_num_embedding'])
         # number of hidden units of RNN Cell
         self.train_hidden_size = int(self.ini['hyparams']['train_hidden_size'])
         # softmax samples
-        self.train_softmax_samples = int(self.ini['hyparams']['train_softmax_samples'])
-        # TODO is watson mode, what is it, config from config.ini 
+        self.train_softmax_samples = int(
+            self.ini['hyparams']['train_softmax_samples'])
+        # TODO is watson mode, what is it, config from config.ini
         self.train_is_watson_mode = False
-        # Save every N steps 
+        # Save every N steps
         self.train_save_every = int(self.ini['hyparams']['train_save_every'])
         # Trained Max Length
         self.train_max_length = self.dataset["maxLength"]
-        # For now, not arbitrary  independent maxLength between encoder and decoder
+        # For now, not arbitrary  independent maxLength between encoder and
+        # decoder
         self.train_max_length_enco = self.dataset["maxLength"]
         self.train_max_length_deco = self.dataset["maxLength"] + 2
-        self.train_learning_rate = float(self.ini['hyparams']['train_learning_rate'])
+        self.train_learning_rate = float(
+            self.ini['hyparams']['train_learning_rate'])
 
 config = Config()
 
 if __name__ == "__main__":
     # conf = read_properties()
     # for x in conf['rule']['blacklist']:
     #     print x
-    print(CONF_DIR)
+    print(CONF_DIR)
diff --git a/deepqa2/serve/api/chatbotmanager.py b/deepqa2/serve/api/chatbotmanager.py
@@ -14,20 +14,66 @@
 # limitations under the License.
 # ============================================================================
 import sys
+import os
 import logging
 import configparser
+import tensorflow as tf
 from django.conf import settings
 from django.apps import AppConfig
+# import config
+sys.path.append(os.path.join(os.path.dirname(
+    os.path.realpath(__file__)), os.pardir, os.pardir))
+from config import config
+from dataset.textdata import TextData
+from munch import munchify
+from models.rnn import Model
 
 logger = logging.getLogger(__name__)
 
+# load model config file
+logger.info("get model path %s" % config.ini['serve']['model_dir'])
+model_config = configparser.ConfigParser()
+model_config.read(os.path.join(config.ini['serve']['model_dir'], 'config.ini'))
+
+
+def _initBot():
+    '''
+    Init Bot Service
+    '''
+    # load text data
+    td = TextData(munchify({
+        'rootDir': config.dataset_root_dir,
+        'corpus': config.corpus_name,
+        'maxLength': config.train_max_length,
+        'maxLengthEnco': config.train_max_length_enco,
+        'maxLengthDeco': config.train_max_length_deco,
+        'datasetTag': '',
+        'test': False,
+        'watsonMode': False,
+        'batchSize': config.train_num_batch_size
+    }))
+    # restore model
+    with tf.device(None):
+        tf_model = Model(config, config.dataset)
+    tf_saver = tf.train.Saver(max_to_keep=200)
+    tf_session = tf.Session()
+    tf_session.run(tf.initialize_all_variables())
+    tf_saver.restore(
+        tf_session, '/Users/hain/snaplingo/deeplearning/chatbot_cobra/serve/addon/model/model.ckpt')
+    # enable predict method
+
+
+def predict(sentence):
+    pass
+
+
 class ChatbotManager(AppConfig):
     """ Manage a single instance of the chatbot shared over the website
     """
     name = 'api'
     verbose_name = 'DeepQA2 RESt API'
 
-    bot = None
+    inited = False
 
     def ready(self):
         """ Called by Django only once during startup
@@ -42,9 +88,10 @@ def initBot():
         """ Instantiate the chatbot for later use
         Should be called only once
         """
-        if not ChatbotManager.bot:
-            logger.info('Initializing bot...')
-            ChatbotManager.bot = {'foo': 'bar'}
+        if not ChatbotManager.inited:
+            logger.info('Initializing bot ...')
+            _initBot()
+            ChatbotManager.inited = True
         else:
             logger.info('Bot already initialized.')
 
@@ -56,7 +103,7 @@ def callBot(sentence):
         Return:
             str: the answer
         """
-        if ChatbotManager.bot:
+        if ChatbotManager.inited:
             return 'ChatbotManager.bot.daemonPredict(sentence)'
         else:
             logger.error('Error: Bot not initialized!')
diff --git a/deepqa2/serve/logs/.gitignore b/deepqa2/serve/logs/.gitignore
diff --git a/deepqa2/serve/server/settings.py b/deepqa2/serve/server/settings.py
@@ -25,21 +25,25 @@
 https://docs.djangoproject.com/en/1.10/ref/settings/
 """
 import os
+import sys
+# import config
+sys.path.append(os.path.join(os.path.dirname(
+    os.path.realpath(__file__)), os.pardir, os.pardir))
+from config import config
 
 # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
 BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
-
 # Quick-start development settings - unsuitable for production
 # See https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/
 
 # SECURITY WARNING: keep the secret key used in production secret!
-SECRET_KEY = os.environ['CHATBOT_SECRET_KEY']
+SECRET_KEY = config.ini['serve']['secret_key']
 
 # SECURITY WARNING: don't run with debug turned on in production!
 DEBUG = True
 
-ALLOWED_HOSTS = ['philly-nnm.snaplingo.net', '127.0.0.1']
+ALLOWED_HOSTS = [config.ini['serve']['hostname'], '127.0.0.1']
 
 # Application definition
 
@@ -127,12 +131,12 @@
         'file_django': {
             'level': 'DEBUG',
             'class': 'logging.FileHandler',
-            'filename': 'logs/debug_django.log',
+            'filename': config.ini['log']['log_path'] + '/debug_django.log',
         },
         'file_chatbot': {
             'level': 'DEBUG',
             'class': 'logging.FileHandler',
-            'filename': 'logs/debug_chatbot.log',
+            'filename': config.ini['log']['log_path'] + '/debug_chatbot.log',
         },
         'console': {
             'level': 'DEBUG',
@@ -166,4 +170,4 @@
 
 USE_TZ = True
 
-STATIC_URL = '/static/'
+STATIC_URL = '/static/'
diff --git a/deepqa2/train.py b/deepqa2/train.py
@@ -31,6 +31,7 @@
 
 logger = log.getLogger(__name__)
 
+
 def main(unused_argv):
     batch_data = TextData(munchify({
         'rootDir': config.dataset_root_dir,
@@ -59,8 +60,19 @@ def save_session():
         # Save the model parameters and the variables
         logger.info('Save tf session ... %s' % config.model_save_ckpt)
         tf_saver.save(tf_sess, config.model_save_ckpt)
-        logger.info('Copy dataset ... %s' % config.model_save_dir)
-        shutil.copy(config.dataset_pkl_path, config.model_save_dir)
+
+        logger.info('Copy source code ... %s' % config.model_save_dir)
+        sourcecode_path = os.path.join(config.model_save_dir, 'deepqa2')
+        if not os.path.exists(sourcecode_path):
+            shutil.copytree(os.path.join(config.base_dir,
+                                     'deepqa2'), sourcecode_path)
+
+        dataset_path = os.path.join(config.model_save_dir, 'data')
+        logger.info('Copy dataset ... %s' % dataset_path)
+        if not os.path.exists(dataset_path):
+            os.makedirs(dataset_path)
+        shutil.copy(config.dataset_pkl_path, dataset_path)
+
         logger.info('Save config.ini ... %s' % config.model_save_dir)
         shutil.copy(config.config_ini_path, config.model_save_dir)
         logger.info('Done.')