Skip to content

Commit

Permalink
finished tensorflow intent classifier. need to clean and fine tune
Browse files Browse the repository at this point in the history
  • Loading branch information
alfredfrancis committed May 13, 2018
1 parent 354e51a commit a7d0890
Show file tree
Hide file tree
Showing 9 changed files with 158 additions and 25 deletions.
26 changes: 9 additions & 17 deletions app/endpoint/controllers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,11 @@
endpoint = Blueprint('api', __name__, url_prefix='/api')

# Loading ML Models at app startup
from app.nlu.intent_classifer import IntentClassifier

with app.app_context():
PATH = "{}/{}".format(app.config["MODELS_DIR"],
app.config["INTENT_MODEL_NAME"])

sentence_classifier = IntentClassifier()
sentence_classifier.load(PATH)

synonyms = get_synonyms()
entity_extraction = EntityExtractor(synonyms)
app.logger.info("Intent Model loaded.")
from app.nlu.classifiers.tf_intent_classifer import TfIntentClassifier

sentence_classifier = None
synonyms = None
entity_extraction = None

# Request Handler
@endpoint.route('/v1', methods=['POST'])
Expand Down Expand Up @@ -202,7 +194,6 @@ def api():
else:
return abort(400)


def update_model(app, message, **extra):
"""
Signal hook to be called after training is completed.
Expand All @@ -212,20 +203,21 @@ def update_model(app, message, **extra):
:param extra:
:return:
"""
sentence_classifier.load(PATH)
global sentence_classifier

sentence_classifier = TfIntentClassifier()
sentence_classifier.load(app.config["MODELS_DIR"])
synonyms = get_synonyms()
global entity_extraction
entity_extraction = EntityExtractor(synonyms)
app.logger.info("Intent Model updated")

update_model(app,"Modles updated")

from app.nlu.tasks import model_updated_signal

model_updated_signal.connect(update_model, app)

from app.agents.models import Bot


def predict(sentence):
"""
Predict Intent using Intent classifier
Expand Down
Empty file added app/nlu/classifiers/__init__.py
Empty file.
136 changes: 136 additions & 0 deletions app/nlu/classifiers/tf_intent_classifer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import numpy as np
import tensorflow as tf
import spacy
from sklearn.preprocessing import LabelBinarizer
import os
import cloudpickle
import time

class TfIntentClassifier():

def __init__(self):
self.model = None
self.nlp = spacy.load('en')
self.label_encoder = LabelBinarizer()
self.graph=None
print("im executed")

def train(self, X, y, models_dir=None, verbose=True):
"""
Train intent classifier for given training data
:param X:
:param y:
:param outpath:
:param verbose:
:return:
"""

def create_model():
"""
Define and return tensorflow model.
"""
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(512, activation=tf.nn.relu, input_shape=(vocab_size,)))
model.add(tf.keras.layers.Dense(num_labels, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(num_labels, activation=tf.nn.softmax))

model.compile(loss='categorical_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])

model.summary()

return model

# spacy context vector size
vocab_size = 384

# create spacy doc vector matrix
x_train = np.array([list(self.nlp(x).vector) for x in X])

num_labels = len(set(y))
self.label_encoder.fit(y)
y_train = self.label_encoder.transform(y)

del self.model
tf.keras.backend.clear_session()
time.sleep(3)

self.model = create_model()
# start training
self.model.fit(x_train, y_train, shuffle=True, epochs=50, verbose=1)

if models_dir:
tf.keras.models.save_model(
self.model,
os.path.join(models_dir, "tf_intent_model.hd5")

)
if verbose:
print("TF Model written out to {}".format(os.path.join(models_dir, "tf_intent_model.hd5")))

cloudpickle.dump(self.label_encoder, open(os.path.join(models_dir, "labels.pkl"), 'wb'))

if verbose:
print("Labels written out to {}".format(os.path.join(models_dir, "labels.pkl")))


def load(self, models_dir):
try:
del self.model
tf.keras.backend.clear_session()
self.model = tf.keras.models.load_model(os.path.join(models_dir, "tf_intent_model.hd5"),compile=True)
self.graph = tf.get_default_graph()
print("Tf model loaded")
with open(os.path.join(models_dir, "labels.pkl"), 'rb') as f:
self.label_encoder = cloudpickle.load(f)
print("Labels model loaded")

except IOError:
return False

def predict(self, text):
"""
Predict class label for given model
:param text:
:param PATH:
:return:
"""
return self.process(text)

def predict_proba(self, x):
"""Given a bow vector of an input text, predict most probable label. Returns only the most likely label.
:param x: raw input text
:return: tuple of first, the most probable label and second, its probability"""

x_predict = [self.nlp(x).vector]
with self.graph.as_default():
pred_result = self.model.predict(np.array([x_predict[0]]))
sorted_indices = np.fliplr(np.argsort(pred_result, axis=1))
return sorted_indices, pred_result[:, sorted_indices]

def process(self, x, return_type="intent", INTENT_RANKING_LENGTH=5):
"""Returns the most likely intent and its probability for the input text."""

if not self.model:
print("no class")
intent = None
intent_ranking = []
else:
intents, probabilities = self.predict_proba(x)
intents, probabilities = [self.label_encoder.classes_[intent] for intent in
intents.flatten()], probabilities.flatten()

if len(intents) > 0 and len(probabilities) > 0:
ranking = list(zip(list(intents), list(probabilities)))[:INTENT_RANKING_LENGTH]

intent = {"intent": intents[0], "confidence": float("%.2f"%probabilities[0])}
intent_ranking = [{"intent": intent_name, "confidence": float("%.2f"%score)} for intent_name, score in ranking]
else:
intent = {"name": None, "confidence": 0.0}
intent_ranking = []
if return_type == "intent":
return intent
else:
return intent_ranking
1 change: 1 addition & 0 deletions app/nlu/intent_classifer.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def predict_proba(self, X):
import numpy as np

pred_result = self.model.predict_proba(X)
print(pred_result)
# sort the probabilities retrieving the indices of the elements in sorted order
sorted_indices = np.fliplr(np.argsort(pred_result, axis=1))
return sorted_indices, pred_result[:, sorted_indices]
Expand Down
8 changes: 3 additions & 5 deletions app/nlu/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from app.intents.models import Intent

from app import app
from app.nlu.intent_classifer import IntentClassifier
from app.nlu.classifiers.tf_intent_classifer import TfIntentClassifier

from app import my_signals
model_updated_signal = my_signals.signal('model-updated')
Expand Down Expand Up @@ -44,12 +44,10 @@ def train_intent_classifier(intents):
X.append(example.get("text"))
y.append(str(intent.id))

PATH = "{}/{}".format(app.config["MODELS_DIR"],
app.config["INTENT_MODEL_NAME"])
intent_classifier = IntentClassifier()
intent_classifier = TfIntentClassifier()
intent_classifier.train(X,
y,
outpath=PATH, verbose=False)
models_dir=app.config["MODELS_DIR"], verbose=True)


def train_all_ner(story_id, training_data):
Expand Down
1 change: 1 addition & 0 deletions dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ RUN python -m nltk.downloader "averaged_perceptron_tagger"; python
RUN python -m nltk.downloader "punkt"; python
RUN python -m nltk.downloader "stopwords"; python
RUN python -m nltk.downloader "wordnet"; python
RUN python -m spacy download en; python

EXPOSE 8080

Expand Down
4 changes: 3 additions & 1 deletion model_files/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
*.model
*.model
*.pkl
*.hd5
5 changes: 4 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,7 @@ gevent
pytest
cloudpickle
flask_script
blinker
blinker
h5py
spacy
tensorflow
2 changes: 1 addition & 1 deletion run.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from app import app

if __name__ == '__main__':
app.run(host='0.0.0.0', port=8080, debug=True, threaded=True)
app.run(host='0.0.0.0', port=8080, debug=False, threaded=False)

0 comments on commit a7d0890

Please sign in to comment.