Skip to content

Commit

Permalink
formatted & cleaned up code✨
Browse files Browse the repository at this point in the history
  • Loading branch information
Ki6an committed Mar 18, 2021
1 parent 8dda859 commit 3555f11
Show file tree
Hide file tree
Showing 9 changed files with 371 additions and 266 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,15 +160,15 @@ The results were tested for English to French translation with beam search numbe

## Get Help
- Contact me at [email protected]
- If appropriate, [open an issue](https://github.com/Ki6an/fastT5-draft/issues/new/choose) on GitHub
- If appropriate, [open an issue](https://github.com/Ki6an/fastT5/issues/new/choose) on GitHub


## Acknowledgements
- [original T5 paper](https://arxiv.org/pdf/1910.10683.pdf)
- [transformers](https://github.com/huggingface/transformers) by huggingface
- [onnx](https://github.com/onnx/onnx)
- [onnxruntime ](https://github.com/microsoft/onnxruntime) by microsoft
- [onnxt5]()
- [onnxt5](https://github.com/abelriboulot/onnxt5)

```
@article{2019t5,
Expand Down
22 changes: 13 additions & 9 deletions examples/custom_infer.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
from fastT5 import (OnnxT5, export_and_get_onnx_model,
get_onnx_model, get_onnx_runtime_sessions,
generate_onnx_representation, quantize)
from fastT5 import (
OnnxT5,
export_and_get_onnx_model,
get_onnx_model,
get_onnx_runtime_sessions,
generate_onnx_representation,
quantize,
)

from transformers import AutoTokenizer

model_or_model_path = 't5-small'
model_or_model_path = "t5-small"

# Step 1. convert huggingfaces t5 model to onnx
onnx_model_paths = generate_onnx_representation(model_or_model_path)
Expand All @@ -24,13 +29,12 @@

t_input = "translate English to French: The universe is a dark forest."

token = tokenizer(t_input, return_tensors='pt')
token = tokenizer(t_input, return_tensors="pt")

input_ids = token['input_ids']
attention_mask = token['attention_mask']
input_ids = token["input_ids"]
attention_mask = token["attention_mask"]
# 'set num_beams = 1' for greedy search
tokens = model.generate(input_ids=input_ids,
attention_mask=attention_mask, num_beams=2)
tokens = model.generate(input_ids=input_ids, attention_mask=attention_mask, num_beams=2)

output = tokenizer.decode(tokens.squeeze(), skip_special_tokens=True)

Expand Down
13 changes: 6 additions & 7 deletions examples/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,24 @@
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from fastT5.model_testing_tools import speed_test

model_or_model_path = 't5-small'
model_or_model_path = "t5-small"

model = export_and_get_onnx_model(model_or_model_path)

# if you've already exported the models
# if you've already exported the models
# model = get_onnx_model(model_or_model_path)

tokenizer = AutoTokenizer.from_pretrained(model_or_model_path)

t_input = "translate English to French: The universe is a dark forest."

token = tokenizer(t_input, return_tensors='pt')
token = tokenizer(t_input, return_tensors="pt")

input_ids = token['input_ids']
attention_mask = token['attention_mask']
input_ids = token["input_ids"]
attention_mask = token["attention_mask"]

# 'set num_beams = 1' for greedy search
tokens = model.generate(input_ids=input_ids,
attention_mask=attention_mask, num_beams=2)
tokens = model.generate(input_ids=input_ids, attention_mask=attention_mask, num_beams=2)

output = tokenizer.decode(tokens.squeeze(), skip_special_tokens=True)

Expand Down
90 changes: 49 additions & 41 deletions fastT5/model_testing_tools.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,30 @@
from time import perf_counter as pc
from matplotlib import pyplot as plt
import numpy as np
from transformers import AutoTokenizer

import numpy as np

def speed_test(onnx_model, torch_model, beam_range: range = range(1, 10, 1),
seq_length_range: range = range(10, 500, 50), input_text=None,):
'''
method prints the time took for onnx and pytorch model to finish a text generation task

args:
input_text (str) : text input for the model.
onnx_model : onnx representation of the t5 model,
torch_model : torch represention of the t5 model,
beam_range (range) : provide a range, which takes starting end and steps (don't start with 0)
sequence_length-range (range) : takes the start, end and steps as a range (start with 10)
return :
onnx_model_latency : numpy array of latency for each beam number and sequence length
pytorch_model_latency : numpy array of latency for each beam number and sequence length
'''
def speed_test(
onnx_model,
torch_model,
beam_range: range = range(1, 10, 1),
seq_length_range: range = range(10, 500, 50),
input_text=None,
):
"""
method prints the time took for onnx and pytorch model to finish a text generation task
args:
input_text (str) : text input for the model.
onnx_model : onnx representation of the t5 model,
torch_model : torch represention of the t5 model,
beam_range (range) : provide a range, which takes starting end and steps (don't start with 0)
sequence_length-range (range) : takes the start, end and steps as a range (start with 10)
return :
onnx_model_latency : numpy array of latency for each beam number and sequence length
pytorch_model_latency : numpy array of latency for each beam number and sequence length
"""

if input_text is None:
input_text = """translate English to French: A nucleus is a collection of a large number of up and down quarks, confined into triplets (neutrons and protons). According to the strange matter hypothesis, strangelets are more stable than nuclei, so nuclei are expected to decay into strangelets. But this process may be extremely slow because there is a large energy barrier to overcome:
Expand All @@ -40,40 +46,43 @@ def speed_test(onnx_model, torch_model, beam_range: range = range(1, 10, 1),
prev = [1, 2]
for i in seq_length_range:

token = tokenizer(input_text,
padding=True,
truncation=True,
max_length=i,
pad_to_max_length=i,
return_tensors='pt')
token = tokenizer(
input_text,
padding=True,
truncation=True,
max_length=i,
pad_to_max_length=i,
return_tensors="pt",
)

input_ids = token['input_ids']
attention_mask = token['attention_mask']
input_ids = token["input_ids"]
attention_mask = token["attention_mask"]

a = pc()
out = onnx_model.generate(input_ids=input_ids,
attention_mask=attention_mask,
max_length=i,
num_beams=j
)
out = onnx_model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
max_length=i,
num_beams=j,
)
b = pc()
x.append(b-a)
x.append(b - a)

c = pc()
o = torch_model.generate(input_ids=input_ids,
attention_mask=attention_mask,
max_length=i,
num_beams=j
)
o = torch_model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
max_length=i,
num_beams=j,
)
d = pc()
y.append(d-c)
y.append(d - c)

mean_y = np.mean(y)
mean_x = np.mean(x)
mean_ratio = mean_y/mean_x
mean_ratio = mean_y / mean_x

print(
f'seqL : {i}, onnx-{b-a}, pt-{d-c} .. X faster {(d-c)/(b-a)}')
print(f"seqL : {i}, onnx-{b-a}, pt-{d-c} .. X faster {(d-c)/(b-a)}")

# ...bleu_score-{bleu.compute(predictions=, references=[[tokenizer.decode(o.squeeze(), skip_special_tokens=True)], ])}')
# print(f'o---{tokenizer.decode(out.squeeze(), skip_special_tokens=True)}...p---{tokenizer.decode(o.squeeze(), skip_special_tokens=True)}')
Expand All @@ -83,13 +92,12 @@ def speed_test(onnx_model, torch_model, beam_range: range = range(1, 10, 1),

prev.append(o.shape[1])

print(f'beam no.- {j} onnx-{mean_x} pt-{mean_y} X ratio-{mean_ratio}')
print(f"beam no.- {j} onnx-{mean_x} pt-{mean_y} X ratio-{mean_ratio}")

xx.append(x)
yy.append(y)
plt.plot(x, 'g', y, 'r')
plt.plot(x, "g", y, "r")
plt.pause(0.05)

plt.show()
return np.array(xx), np.array(yy)

Loading

0 comments on commit 3555f11

Please sign in to comment.