Add interact.py to reload estimator and predict on sample text

songyaheng · Nov 6, 2018 · d806fd1 · d806fd1
1 parent c3284f0
commit d806fd1
Show file tree

Hide file tree

Showing 7 changed files with 351 additions and 1 deletion.
diff --git a/models/chars_conv_lstm_crf/interact.py b/models/chars_conv_lstm_crf/interact.py
@@ -0,0 +1,61 @@
+"""Interact with a model"""
+
+__author__ = "Guillaume Genthial"
+
+from pathlib import Path
+import functools
+import json
+
+import tensorflow as tf
+
+from main import model_fn
+
+LINE = 'John lives in New York'
+DATADIR = '../../data/example'
+PARAMS = './results/params.json'
+MODELDIR = './results/model'
+
+
+def pretty_print(line, preds):
+    words = line.strip().split()
+    lengths = [max(len(w), len(p)) for w, p in zip(words, preds)]
+    padded_words = [w + (l - len(w)) * ' ' for w, l in zip(words, lengths)]
+    padded_preds = [p.decode() + (l - len(p)) * ' ' for p, l in zip(preds, lengths)]
+    print('words: {}'.format(' '.join(padded_words)))
+    print('preds: {}'.format(' '.join(padded_preds)))
+
+
+def predict_input_fn(line):
+    # Words
+    words = [w.encode() for w in line.strip().split()]
+    nwords = len(words)
+
+    # Chars
+    chars = [[c.encode() for c in w] for w in line.strip().split()]
+    lengths = [len(c) for c in chars]
+    max_len = max(lengths)
+    chars = [c + [b'<pad>'] * (max_len - l) for c, l in zip(chars, lengths)]
+
+    # Wrapping in Tensors
+    words = tf.constant([words], dtype=tf.string)
+    nwords = tf.constant([nwords], dtype=tf.int32)
+    chars = tf.constant([chars], dtype=tf.string)
+    nchars = tf.constant([lengths], dtype=tf.int32)
+
+    return ((words, nwords), (chars, nchars)), None
+
+
+if __name__ == '__main__':
+    with Path(PARAMS).open() as f:
+        params = json.load(f)
+
+    params['words'] = str(Path(DATADIR, 'vocab.words.txt'))
+    params['chars'] = str(Path(DATADIR, 'vocab.chars.txt'))
+    params['tags'] = str(Path(DATADIR, 'vocab.tags.txt'))
+    params['glove'] = str(Path(DATADIR, 'glove.npz'))
+
+    estimator = tf.estimator.Estimator(model_fn, MODELDIR, params=params)
+    predict_inpf = functools.partial(predict_input_fn, LINE)
+    for pred in estimator.predict(predict_inpf):
+        pretty_print(LINE, pred['tags'])
+        break
diff --git a/models/chars_conv_lstm_crf/main.py b/models/chars_conv_lstm_crf/main.py
@@ -93,7 +93,7 @@ def model_fn(features, labels, mode, params):
     char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout,
                                         training=training)
 
-    # Char LSTM
+    # Char 1d convolution
     weights = tf.sequence_mask(nchars)
     char_embeddings = masked_conv1d_and_max(
         char_embeddings, weights, params['filters'], params['kernel_size'])

diff --git a/models/chars_conv_lstm_crf_ema/interact.py b/models/chars_conv_lstm_crf_ema/interact.py
@@ -0,0 +1,61 @@
+"""Interact with a model"""
+
+__author__ = "Guillaume Genthial"
+
+from pathlib import Path
+import functools
+import json
+
+import tensorflow as tf
+
+from main import model_fn
+
+LINE = 'John lives in New York'
+DATADIR = '../../data/example'
+PARAMS = './results/params.json'
+MODELDIR = './results/model'
+
+
+def pretty_print(line, preds):
+    words = line.strip().split()
+    lengths = [max(len(w), len(p)) for w, p in zip(words, preds)]
+    padded_words = [w + (l - len(w)) * ' ' for w, l in zip(words, lengths)]
+    padded_preds = [p.decode() + (l - len(p)) * ' ' for p, l in zip(preds, lengths)]
+    print('words: {}'.format(' '.join(padded_words)))
+    print('preds: {}'.format(' '.join(padded_preds)))
+
+
+def predict_input_fn(line):
+    # Words
+    words = [w.encode() for w in line.strip().split()]
+    nwords = len(words)
+
+    # Chars
+    chars = [[c.encode() for c in w] for w in line.strip().split()]
+    lengths = [len(c) for c in chars]
+    max_len = max(lengths)
+    chars = [c + [b'<pad>'] * (max_len - l) for c, l in zip(chars, lengths)]
+
+    # Wrapping in Tensors
+    words = tf.constant([words], dtype=tf.string)
+    nwords = tf.constant([nwords], dtype=tf.int32)
+    chars = tf.constant([chars], dtype=tf.string)
+    nchars = tf.constant([lengths], dtype=tf.int32)
+
+    return ((words, nwords), (chars, nchars)), None
+
+
+if __name__ == '__main__':
+    with Path(PARAMS).open() as f:
+        params = json.load(f)
+
+    params['words'] = str(Path(DATADIR, 'vocab.words.txt'))
+    params['chars'] = str(Path(DATADIR, 'vocab.chars.txt'))
+    params['tags'] = str(Path(DATADIR, 'vocab.tags.txt'))
+    params['glove'] = str(Path(DATADIR, 'glove.npz'))
+
+    estimator = tf.estimator.Estimator(model_fn, MODELDIR, params=params)
+    predict_inpf = functools.partial(predict_input_fn, LINE)
+    for pred in estimator.predict(predict_inpf):
+        pretty_print(LINE, pred['tags_ema'])
+        break
diff --git a/models/chars_lstm_lstm_crf/interact.py b/models/chars_lstm_lstm_crf/interact.py
@@ -0,0 +1,61 @@
+"""Interact with a model"""
+
+__author__ = "Guillaume Genthial"
+
+from pathlib import Path
+import functools
+import json
+
+import tensorflow as tf
+
+from main import model_fn
+
+LINE = 'John lives in New York'
+DATADIR = '../../data/example'
+PARAMS = './results/params.json'
+MODELDIR = './results/model'
+
+
+def pretty_print(line, preds):
+    words = line.strip().split()
+    lengths = [max(len(w), len(p)) for w, p in zip(words, preds)]
+    padded_words = [w + (l - len(w)) * ' ' for w, l in zip(words, lengths)]
+    padded_preds = [p.decode() + (l - len(p)) * ' ' for p, l in zip(preds, lengths)]
+    print('words: {}'.format(' '.join(padded_words)))
+    print('preds: {}'.format(' '.join(padded_preds)))
+
+
+def predict_input_fn(line):
+    # Words
+    words = [w.encode() for w in line.strip().split()]
+    nwords = len(words)
+
+    # Chars
+    chars = [[c.encode() for c in w] for w in line.strip().split()]
+    lengths = [len(c) for c in chars]
+    max_len = max(lengths)
+    chars = [c + [b'<pad>'] * (max_len - l) for c, l in zip(chars, lengths)]
+
+    # Wrapping in Tensors
+    words = tf.constant([words], dtype=tf.string)
+    nwords = tf.constant([nwords], dtype=tf.int32)
+    chars = tf.constant([chars], dtype=tf.string)
+    nchars = tf.constant([lengths], dtype=tf.int32)
+
+    return ((words, nwords), (chars, nchars)), None
+
+
+if __name__ == '__main__':
+    with Path(PARAMS).open() as f:
+        params = json.load(f)
+
+    params['words'] = str(Path(DATADIR, 'vocab.words.txt'))
+    params['chars'] = str(Path(DATADIR, 'vocab.chars.txt'))
+    params['tags'] = str(Path(DATADIR, 'vocab.tags.txt'))
+    params['glove'] = str(Path(DATADIR, 'glove.npz'))
+
+    estimator = tf.estimator.Estimator(model_fn, MODELDIR, params=params)
+    predict_inpf = functools.partial(predict_input_fn, LINE)
+    for pred in estimator.predict(predict_inpf):
+        pretty_print(LINE, pred['tags'])
+        break
diff --git a/models/chars_lstm_lstm_crf_ema/interact.py b/models/chars_lstm_lstm_crf_ema/interact.py
@@ -0,0 +1,61 @@
+"""Interact with a model"""
+
+__author__ = "Guillaume Genthial"
+
+from pathlib import Path
+import functools
+import json
+
+import tensorflow as tf
+
+from main import model_fn
+
+LINE = 'John lives in New York'
+DATADIR = '../../data/example'
+PARAMS = './results/params.json'
+MODELDIR = './results/model'
+
+
+def pretty_print(line, preds):
+    words = line.strip().split()
+    lengths = [max(len(w), len(p)) for w, p in zip(words, preds)]
+    padded_words = [w + (l - len(w)) * ' ' for w, l in zip(words, lengths)]
+    padded_preds = [p.decode() + (l - len(p)) * ' ' for p, l in zip(preds, lengths)]
+    print('words: {}'.format(' '.join(padded_words)))
+    print('preds: {}'.format(' '.join(padded_preds)))
+
+
+def predict_input_fn(line):
+    # Words
+    words = [w.encode() for w in line.strip().split()]
+    nwords = len(words)
+
+    # Chars
+    chars = [[c.encode() for c in w] for w in line.strip().split()]
+    lengths = [len(c) for c in chars]
+    max_len = max(lengths)
+    chars = [c + [b'<pad>'] * (max_len - l) for c, l in zip(chars, lengths)]
+
+    # Wrapping in Tensors
+    words = tf.constant([words], dtype=tf.string)
+    nwords = tf.constant([nwords], dtype=tf.int32)
+    chars = tf.constant([chars], dtype=tf.string)
+    nchars = tf.constant([lengths], dtype=tf.int32)
+
+    return ((words, nwords), (chars, nchars)), None
+
+
+if __name__ == '__main__':
+    with Path(PARAMS).open() as f:
+        params = json.load(f)
+
+    params['words'] = str(Path(DATADIR, 'vocab.words.txt'))
+    params['chars'] = str(Path(DATADIR, 'vocab.chars.txt'))
+    params['tags'] = str(Path(DATADIR, 'vocab.tags.txt'))
+    params['glove'] = str(Path(DATADIR, 'glove.npz'))
+
+    estimator = tf.estimator.Estimator(model_fn, MODELDIR, params=params)
+    predict_inpf = functools.partial(predict_input_fn, LINE)
+    for pred in estimator.predict(predict_inpf):
+        pretty_print(LINE, pred['tags_ema'])
+        break
diff --git a/models/lstm_crf/interact.py b/models/lstm_crf/interact.py
@@ -0,0 +1,53 @@
+"""Interact with a model"""
+
+__author__ = "Guillaume Genthial"
+
+from pathlib import Path
+import functools
+import json
+
+import tensorflow as tf
+
+from main import model_fn
+
+LINE = 'John lives in New York'
+DATADIR = '../../data/example'
+PARAMS = './results/params.json'
+MODELDIR = './results/model'
+
+
+def pretty_print(line, preds):
+    words = line.strip().split()
+    lengths = [max(len(w), len(p)) for w, p in zip(words, preds)]
+    padded_words = [w + (l - len(w)) * ' ' for w, l in zip(words, lengths)]
+    padded_preds = [p.decode() + (l - len(p)) * ' ' for p, l in zip(preds, lengths)]
+    print('words: {}'.format(' '.join(padded_words)))
+    print('preds: {}'.format(' '.join(padded_preds)))
+
+
+def predict_input_fn(line):
+    # Words
+    words = [w.encode() for w in line.strip().split()]
+    nwords = len(words)
+
+    # Wrapping in Tensors
+    words = tf.constant([words], dtype=tf.string)
+    nwords = tf.constant([nwords], dtype=tf.int32)
+
+    return (words, nwords), None
+
+
+if __name__ == '__main__':
+    with Path(PARAMS).open() as f:
+        params = json.load(f)
+
+    params['words'] = str(Path(DATADIR, 'vocab.words.txt'))
+    params['chars'] = str(Path(DATADIR, 'vocab.chars.txt'))
+    params['tags'] = str(Path(DATADIR, 'vocab.tags.txt'))
+    params['glove'] = str(Path(DATADIR, 'glove.npz'))
+
+    estimator = tf.estimator.Estimator(model_fn, MODELDIR, params=params)
+    predict_inpf = functools.partial(predict_input_fn, LINE)
+    for pred in estimator.predict(predict_inpf):
+        pretty_print(LINE, pred['tags'])
+        break
diff --git a/models/lstm_crf_ema/interact.py b/models/lstm_crf_ema/interact.py
@@ -0,0 +1,53 @@
+"""Interact with a model"""
+
+__author__ = "Guillaume Genthial"
+
+from pathlib import Path
+import functools
+import json
+
+import tensorflow as tf
+
+from main import model_fn
+
+LINE = 'John lives in New York'
+DATADIR = '../../data/example'
+PARAMS = './results/params.json'
+MODELDIR = './results/model'
+
+
+def pretty_print(line, preds):
+    words = line.strip().split()
+    lengths = [max(len(w), len(p)) for w, p in zip(words, preds)]
+    padded_words = [w + (l - len(w)) * ' ' for w, l in zip(words, lengths)]
+    padded_preds = [p.decode() + (l - len(p)) * ' ' for p, l in zip(preds, lengths)]
+    print('words: {}'.format(' '.join(padded_words)))
+    print('preds: {}'.format(' '.join(padded_preds)))
+
+
+def predict_input_fn(line):
+    # Words
+    words = [w.encode() for w in line.strip().split()]
+    nwords = len(words)
+
+    # Wrapping in Tensors
+    words = tf.constant([words], dtype=tf.string)
+    nwords = tf.constant([nwords], dtype=tf.int32)
+
+    return (words, nwords), None
+
+
+if __name__ == '__main__':
+    with Path(PARAMS).open() as f:
+        params = json.load(f)
+
+    params['words'] = str(Path(DATADIR, 'vocab.words.txt'))
+    params['chars'] = str(Path(DATADIR, 'vocab.chars.txt'))
+    params['tags'] = str(Path(DATADIR, 'vocab.tags.txt'))
+    params['glove'] = str(Path(DATADIR, 'glove.npz'))
+
+    estimator = tf.estimator.Estimator(model_fn, MODELDIR, params=params)
+    predict_inpf = functools.partial(predict_input_fn, LINE)
+    for pred in estimator.predict(predict_inpf):
+        pretty_print(LINE, pred['tags_ema'])
+        break