Skip to content

Commit

Permalink
config project by root_dir/config.ini, copy source code into save
Browse files Browse the repository at this point in the history
  • Loading branch information
hailiang-wang committed Jan 26, 2017
1 parent 8ff7f04 commit 7a4300b
Show file tree
Hide file tree
Showing 8 changed files with 128 additions and 49 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ data/*
!data/cornell

# train
/deepqa2/config.ini
config.ini

# serve
/deepqa2/serve/db.sqlite3
Expand Down
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@ pip install -r requirements.txt
# Pre-process data
Process data, build vocabulary, word embedding, conversations, etc.
```
cd deepqa2
cp config.sample.ini config.ini
python dataset/preprocesser.py
python deepqa2/dataset/preprocesser.py
```

Sample Corpus http://www.cs.cornell.edu/~cristian/Cornell_Movie-Dialogs_Corpus.html
Expand All @@ -29,7 +28,7 @@ Sample Corpus http://www.cs.cornell.edu/~cristian/Cornell_Movie-Dialogs_Corpus.h
Train language model with Seq2seq.
```
cp config.sample.ini config.ini # modify keys
python train.py
python deepqa2/train.py
```

# Serve Model
Expand Down
9 changes: 8 additions & 1 deletion deepqa2/config.sample.ini → config.sample.ini
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,11 @@ train_softmax_samples = 0
# Batch Size
train_num_batch_size = 10
# Save model every N steps
train_save_every = 10
train_save_every = 10

################################
# Parameters for serving model
################################
[serve]
hostname = 127.0.0.1
secret_key = foobar789
68 changes: 41 additions & 27 deletions deepqa2/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@
from time import localtime, strftime
from utils.helper import singleton

CONF_DIR = os.path.dirname(os.path.abspath(__file__))
print(CONF_DIR)
CONF_DIR = os.path.join(os.path.dirname(
os.path.realpath(__file__)), os.pardir)


def get_cfg_dir():
'''
Expand All @@ -37,18 +38,18 @@ def get_cfg_dir():
return CONF_DIR


def get_cfg_path():
def get_cfg_path(filename):
'''
Get cfg path
'''
return os.path.join(get_cfg_dir(), 'config.ini')
return os.path.join(get_cfg_dir(), filename)


def load_config():
def load_config(filename):
'''
Load configurations
'''
cf = get_cfg_path()
cf = get_cfg_path(filename)
if not os.path.exists(cf):
f = open(cf, 'w')
f.close()
Expand All @@ -58,34 +59,40 @@ def load_config():
return config


def read_properties():
def read_properties(filename='config.ini'):
'''
Read Properties from Config File.
'''
config = load_config()
config = load_config(filename)
secs = config.sections()
conf = {}
for x in secs:
conf[x] = {y: config.get(x, y) for y in config.options(x)}
conf['data'] = {'rootDir': CONF_DIR + '/..',
'save': CONF_DIR + '/../save',
'dataset': CONF_DIR + '/../data/' + conf['data']['dataset_name']}
conf['log']['log_path'] = CONF_DIR + '/../logs'
conf['data'] = {
'rootDir': CONF_DIR,
'save': os.path.join(CONF_DIR, 'save'),
'dataset': os.path.join(CONF_DIR, 'data', conf['data']['dataset_name'])}
conf['log']['log_path'] = os.path.join(CONF_DIR, 'logs')
return conf


@singleton
class Config:
'''
Load All Parameters in one place.
'''

def __init__(self):
self.ini = read_properties()
'''
Define project params
'''
self.config_ini_path = get_cfg_path()
self.model_save_tag = "deeplearning.cobra.%s.%s" % (socket.gethostname(), strftime("%Y%m%d.%H%M%S", localtime()))
self.model_save_dir = os.path.join(CONF_DIR, '../save/' + self.model_save_tag)
self.base_dir = CONF_DIR
self.config_ini_path = get_cfg_path(filename='config.ini')
self.model_save_tag = "deeplearning.cobra.%s.%s" % (
socket.gethostname(), strftime("%Y%m%d.%H%M%S", localtime()))
self.model_save_dir = os.path.join(
CONF_DIR, 'save', self.model_save_tag)
self.model_save_ckpt = os.path.join(self.model_save_dir, 'model.ckpt')

'''
Expand All @@ -111,43 +118,50 @@ def __init__(self):
self.dataset_padToken = self.dataset_word2id["<pad>"]
self.dataset_goToken = self.dataset_word2id["<go>"]
self.dataset_eosToken = self.dataset_word2id["<eos>"]
self.dataset_unknownToken = self.dataset_word2id["<unknown>"] # Restore special words
self.dataset_unknownToken = self.dataset_word2id[
"<unknown>"] # Restore special words

print('>> dataset word2id size: %d' % len(self.dataset_word2id.keys()))
print('>> dataset id2word size: %d' % len(self.dataset_id2word.keys()))
print('>> dataset training samples size: %d' % len(self.dataset_trainingSamples))
print('>> dataset training samples size: %d' %
len(self.dataset_trainingSamples))
print('>> dataset training max length: %d' % self.dataset["maxLength"])

'''
Define hyper parameters for model training.
'''
# Epoch training runs
# Epoch training runs
self.train_num_epoch = int(self.ini['hyparams']['train_num_epoch'])
# number of rnn layers
self.train_num_layers = int(self.ini['hyparams']['train_num_layers'])
self.train_num_layers = int(self.ini['hyparams']['train_num_layers'])
# batch size
self.train_num_batch_size = int(self.ini['hyparams']['train_num_batch_size'])
self.train_num_batch_size = int(
self.ini['hyparams']['train_num_batch_size'])
# embedding size
self.train_num_embedding = int(self.ini['hyparams']['train_num_embedding'])
self.train_num_embedding = int(
self.ini['hyparams']['train_num_embedding'])
# number of hidden units of RNN Cell
self.train_hidden_size = int(self.ini['hyparams']['train_hidden_size'])
# softmax samples
self.train_softmax_samples = int(self.ini['hyparams']['train_softmax_samples'])
# TODO is watson mode, what is it, config from config.ini
self.train_softmax_samples = int(
self.ini['hyparams']['train_softmax_samples'])
# TODO is watson mode, what is it, config from config.ini
self.train_is_watson_mode = False
# Save every N steps
# Save every N steps
self.train_save_every = int(self.ini['hyparams']['train_save_every'])
# Trained Max Length
self.train_max_length = self.dataset["maxLength"]
# For now, not arbitrary independent maxLength between encoder and decoder
# For now, not arbitrary independent maxLength between encoder and
# decoder
self.train_max_length_enco = self.dataset["maxLength"]
self.train_max_length_deco = self.dataset["maxLength"] + 2
self.train_learning_rate = float(self.ini['hyparams']['train_learning_rate'])
self.train_learning_rate = float(
self.ini['hyparams']['train_learning_rate'])

config = Config()

if __name__ == "__main__":
# conf = read_properties()
# for x in conf['rule']['blacklist']:
# print x
print(CONF_DIR)
print(CONF_DIR)
57 changes: 52 additions & 5 deletions deepqa2/serve/api/chatbotmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,66 @@
# limitations under the License.
# ============================================================================
import sys
import os
import logging
import configparser
import tensorflow as tf
from django.conf import settings
from django.apps import AppConfig
# import config
sys.path.append(os.path.join(os.path.dirname(
os.path.realpath(__file__)), os.pardir, os.pardir))
from config import config
from dataset.textdata import TextData
from munch import munchify
from models.rnn import Model

logger = logging.getLogger(__name__)

# load model config file
logger.info("get model path %s" % config.ini['serve']['model_dir'])
model_config = configparser.ConfigParser()
model_config.read(os.path.join(config.ini['serve']['model_dir'], 'config.ini'))


def _initBot():
'''
Init Bot Service
'''
# load text data
td = TextData(munchify({
'rootDir': config.dataset_root_dir,
'corpus': config.corpus_name,
'maxLength': config.train_max_length,
'maxLengthEnco': config.train_max_length_enco,
'maxLengthDeco': config.train_max_length_deco,
'datasetTag': '',
'test': False,
'watsonMode': False,
'batchSize': config.train_num_batch_size
}))
# restore model
with tf.device(None):
tf_model = Model(config, config.dataset)
tf_saver = tf.train.Saver(max_to_keep=200)
tf_session = tf.Session()
tf_session.run(tf.initialize_all_variables())
tf_saver.restore(
tf_session, '/Users/hain/snaplingo/deeplearning/chatbot_cobra/serve/addon/model/model.ckpt')
# enable predict method


def predict(sentence):
pass


class ChatbotManager(AppConfig):
""" Manage a single instance of the chatbot shared over the website
"""
name = 'api'
verbose_name = 'DeepQA2 RESt API'

bot = None
inited = False

def ready(self):
""" Called by Django only once during startup
Expand All @@ -42,9 +88,10 @@ def initBot():
""" Instantiate the chatbot for later use
Should be called only once
"""
if not ChatbotManager.bot:
logger.info('Initializing bot...')
ChatbotManager.bot = {'foo': 'bar'}
if not ChatbotManager.inited:
logger.info('Initializing bot ...')
_initBot()
ChatbotManager.inited = True
else:
logger.info('Bot already initialized.')

Expand All @@ -56,7 +103,7 @@ def callBot(sentence):
Return:
str: the answer
"""
if ChatbotManager.bot:
if ChatbotManager.inited:
return 'ChatbotManager.bot.daemonPredict(sentence)'
else:
logger.error('Error: Bot not initialized!')
4 changes: 0 additions & 4 deletions deepqa2/serve/logs/.gitignore

This file was deleted.

16 changes: 10 additions & 6 deletions deepqa2/serve/server/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,25 @@
https://docs.djangoproject.com/en/1.10/ref/settings/
"""
import os
import sys
# import config
sys.path.append(os.path.join(os.path.dirname(
os.path.realpath(__file__)), os.pardir, os.pardir))
from config import config

# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))


# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/

# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = os.environ['CHATBOT_SECRET_KEY']
SECRET_KEY = config.ini['serve']['secret_key']

# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True

ALLOWED_HOSTS = ['philly-nnm.snaplingo.net', '127.0.0.1']
ALLOWED_HOSTS = [config.ini['serve']['hostname'], '127.0.0.1']

# Application definition

Expand Down Expand Up @@ -127,12 +131,12 @@
'file_django': {
'level': 'DEBUG',
'class': 'logging.FileHandler',
'filename': 'logs/debug_django.log',
'filename': config.ini['log']['log_path'] + '/debug_django.log',
},
'file_chatbot': {
'level': 'DEBUG',
'class': 'logging.FileHandler',
'filename': 'logs/debug_chatbot.log',
'filename': config.ini['log']['log_path'] + '/debug_chatbot.log',
},
'console': {
'level': 'DEBUG',
Expand Down Expand Up @@ -166,4 +170,4 @@

USE_TZ = True

STATIC_URL = '/static/'
STATIC_URL = '/static/'
16 changes: 14 additions & 2 deletions deepqa2/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

logger = log.getLogger(__name__)


def main(unused_argv):
batch_data = TextData(munchify({
'rootDir': config.dataset_root_dir,
Expand Down Expand Up @@ -59,8 +60,19 @@ def save_session():
# Save the model parameters and the variables
logger.info('Save tf session ... %s' % config.model_save_ckpt)
tf_saver.save(tf_sess, config.model_save_ckpt)
logger.info('Copy dataset ... %s' % config.model_save_dir)
shutil.copy(config.dataset_pkl_path, config.model_save_dir)

logger.info('Copy source code ... %s' % config.model_save_dir)
sourcecode_path = os.path.join(config.model_save_dir, 'deepqa2')
if not os.path.exists(sourcecode_path):
shutil.copytree(os.path.join(config.base_dir,
'deepqa2'), sourcecode_path)

dataset_path = os.path.join(config.model_save_dir, 'data')
logger.info('Copy dataset ... %s' % dataset_path)
if not os.path.exists(dataset_path):
os.makedirs(dataset_path)
shutil.copy(config.dataset_pkl_path, dataset_path)

logger.info('Save config.ini ... %s' % config.model_save_dir)
shutil.copy(config.config_ini_path, config.model_save_dir)
logger.info('Done.')
Expand Down

0 comments on commit 7a4300b

Please sign in to comment.