-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathintentIdentifier.py
124 lines (107 loc) · 4.08 KB
/
intentIdentifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import time
import os
import numpy as np
import tensorflow as tf
class IntentIdentifier:
def __init__(self, intents: list, thresh: float = 0.5):
self.__intents = intents
self.__thresh = thresh
path_to_glove_file = os.path.join(
'glove.6B.100d.txt'
)
embeddings_index = {}
with open(path_to_glove_file) as f:
for line in f:
word, coefs = line.split(maxsplit=1)
coefs = np.fromstring(coefs, "f", sep=" ")
embeddings_index[word] = coefs
print("Found %s word vectors." % len(embeddings_index))
self.vectorizer = tf.keras.layers.TextVectorization(
output_sequence_length=20)
self.vectorizer.set_vocabulary(list(embeddings_index.keys()))
print(
f'Stored {len(self.vectorizer.get_vocabulary())} words in vocabulary.')
voc = self.vectorizer.get_vocabulary()
word_index = dict(zip(voc, range(len(voc))))
num_tokens = len(voc) + 2
embedding_dim = 100 # We are using 100 dim embeddings
hits = 0
misses = 0
# Prepare embedding matrix
embedding_matrix = np.zeros((num_tokens, embedding_dim))
for word, i in word_index.items():
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
# Words not found in embedding index will be all-zeros.
# This includes the representation for "padding" and "OOV"
embedding_matrix[i] = embedding_vector
hits += 1
else:
misses += 1
print("Converted %d words (%d misses)" % (hits, misses))
self.embedding_layer = tf.keras.layers.Embedding(
num_tokens,
embedding_dim,
embeddings_initializer=tf.keras.initializers.Constant(
embedding_matrix
),
trainable=False,
input_length=20
)
self.model_path = 'sentence_matching_model'
self.model = tf.keras.models.load_model(self.model_path)
def getSimilarityProbability(self, s1: str, s2: str):
s1 = s1.lower()
s2 = s2.lower()
v1 = self.vectorizer(s1)
v2 = self.vectorizer(s2)
e1 = self.embedding_layer(v1)
e2 = self.embedding_layer(v2)
return self.model.predict({"sentence1": np.array(e1).reshape((1, 20, 100)), "sentence2": np.array(e2).reshape((1, 20, 100))}, verbose=0)[0][0]
def getMatchingIntentIndex(self, userRequest: str):
mxProb = 0
mxI = -1
for i in range(len(self.__intents)):
matchingProb = self.__getMatchingProb(
userRequest, self.__intents[i])
if matchingProb > mxProb:
mxI = i
mxProb = matchingProb
return mxI if mxProb >= self.__thresh else -1
def __getMatchingProb(self, s1: str, s2: str):
return self.getSimilarityProbability(s1, s2)
# return 0 if s1 != s2 else 1
def turnLights(params: list):
on = params[0]
if on:
print('Turning lights ON')
else:
print('Turning lights OFF')
def getTime(params: list):
print('The time is', time.ctime())
def runAction(s: str, actions: list, defaultAction, intentIdentifier: IntentIdentifier):
'''
Takes in a string, and a list of actions, along with a default action. Uses the intent identifier to decide what action to run. If no action seems fit, it runs the default action.
'''
i = intentIdentifier.getMatchingIntentIndex(s)
if i == -1:
defaultAction()
else:
actions[i][0](actions[i][1])
if __name__ == '__main__':
intents = [
'Turn on the lights',
'Turn off the lights',
'What is the time?'
]
actions = [
[turnLights, [True]],
[turnLights, [False]],
[getTime, []]
]
def defaultAction():
print('Not a supported intent')
intentIdentifier = IntentIdentifier(intents)
for s in intents:
runAction(s, actions, defaultAction)
runAction('Start the timer.', actions, defaultAction)