forked from shibing624/text2vec
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
9d82044
commit 21ac3ee
Showing
4 changed files
with
195 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
@author:XuMing([email protected]) | ||
@description: | ||
This example starts multiple processes (1 per GPU), which encode | ||
sentences in parallel. This gives a near linear speed-up | ||
when encoding large text collections. | ||
This basic example loads a pre-trained model from the web and uses it to | ||
generate sentence embeddings for a given list of sentences. | ||
""" | ||
|
||
import sys | ||
|
||
sys.path.append('..') | ||
from text2vec import SentenceModel | ||
|
||
|
||
def main(): | ||
# Create a large list of sentences | ||
sentences = ["This is sentence {}".format(i) for i in range(10000)] | ||
model = SentenceModel("shibing624/text2vec-base-chinese") | ||
print(f"Sentences size: {len(sentences)}, model: {model}") | ||
|
||
# Start the multi processes pool on all available CUDA devices | ||
pool = model.start_multi_process_pool() | ||
|
||
# Compute the embeddings using the multi processes pool | ||
emb = model.encode_multi_process(sentences, pool) | ||
print(f"Embeddings computed. Shape: {emb.shape}") | ||
|
||
# Optional: Stop the process in the pool | ||
model.stop_multi_process_pool(pool) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
@author:XuMing([email protected]) | ||
@description: | ||
code copy from: SentenceTransformers.tests.test_multi_process.py | ||
""" | ||
|
||
import sys | ||
import unittest | ||
|
||
sys.path.append('..') | ||
from text2vec import SentenceModel | ||
import numpy as np | ||
|
||
|
||
class ComputeMultiProcessTest(unittest.TestCase): | ||
def setUp(self): | ||
self.model = SentenceModel() | ||
|
||
def test_multi_gpu_encode(self): | ||
# Start the multi processes pool on all available CUDA devices | ||
pool = self.model.start_multi_process_pool(['cpu', 'cpu']) | ||
|
||
sentences = ["This is sentence {}".format(i) for i in range(1000)] | ||
|
||
# Compute the embeddings using the multi processes pool | ||
emb = self.model.encode_multi_process(sentences, pool, chunk_size=50) | ||
assert emb.shape == (len(sentences), 768) | ||
|
||
emb_normal = self.model.encode(sentences) | ||
|
||
diff = np.max(np.abs(emb - emb_normal)) | ||
print("Max multi proc diff", diff) | ||
assert diff < 0.001 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters