forked from OpenCSGs/llm-inference
-
Notifications
You must be signed in to change notification settings - Fork 0
/
setup.py
89 lines (82 loc) · 2.21 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from setuptools import find_packages, setup
setup(
name="llm-serve",
version="0.0.1",
description="An LLM inference solution to quickly deploy productive LLM service",
author="llm-serve authors",
author_email="[email protected]",
packages=find_packages(include="llmserve*"),
include_package_data=True,
package_data={"llmserve": ["models/*"]},
entry_points={
"console_scripts": [
"llm-serve=llmserve.api.cli:app",
]
},
install_requires=["typer>=0.9", "rich"],
extras_require={
"backend": [
"async_timeout",
"datasets",
"ftfy",
"tensorboard",
"sentencepiece",
"Jinja2",
"numexpr>=2.7.3",
"hf_transfer",
"evaluate",
"bitsandbytes",
"numpy<1.24",
"ninja",
"protobuf<3.21.0",
"optimum",
"safetensors",
"pydantic==2.7.1",
"einops",
"markdown-it-py[plugins]",
"scipy==1.11.1",
"jieba==0.42.1",
"rouge_chinese==1.0.3",
"nltk==3.8.1",
"sqlalchemy==1.4.41",
"typing-extensions>=4.6.1",
"linkify-it-py==2.0.2",
"markdown-it-py==2.2.0",
"gradio",
"httpx[socks]==0.23.3",
"torch==2.2.1",
"torchaudio",
"torchvision",
"accelerate==0.25.0",
"deepspeed==0.14.0",
"torchmetrics==1.2.1",
"llama_cpp_python==0.2.57",
"transformers==4.40.0",
"ray[serve]==2.20.0",
],
"vllm": [
"vllm==0.2.6",
"xformers==0.0.23.post1",
],
"frontend": [
"gradio",
"aiorwlock",
"pymongo",
"pandas",
"boto3",
],
"dev": [
"pre-commit",
"ruff==0.0.270",
"black==23.3.0",
],
"test": [
"pytest",
],
"docs": [
"mkdocs-material",
],
},
dependency_links=["https://download.pytorch.org/whl/cu118"],
python_requires=">=3.10",
)