-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
14 changed files
with
351 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,11 @@ | ||
.vscode | ||
.env | ||
.env | ||
|
||
# Jan inference | ||
jan-inference/llm/models/** | ||
jan-inference/llm/.env | ||
|
||
jan-inference/sd/models/** | ||
jan-inference/sd/output/** | ||
jan-inference/sd/.env | ||
jan-inference/sd/sd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[submodule "jan-inference/sd/sd_cpp"] | ||
path = jan-inference/sd/sd_cpp | ||
url = https://github.com/leejet/stable-diffusion.cpp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Inference - LLM | ||
|
||
```bash | ||
docker network create traefik_public | ||
cp .env.example .env | ||
# -> Update MODEL_URL in `.env` file | ||
docker compose up -d --scale llm=2 | ||
`````` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
version: '3' | ||
|
||
services: | ||
|
||
# Service to download a model file. | ||
downloader: | ||
image: busybox | ||
# The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist. | ||
command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /models/$LLM_MODEL_FILE ]; then wget -O /models/$LLM_MODEL_FILE ${MODEL_URL}; fi" | ||
# Mount a local directory to store the downloaded model. | ||
volumes: | ||
- ./models:/models | ||
|
||
# Service to wait for the downloader service to finish downloading the model. | ||
wait-for-downloader: | ||
image: busybox | ||
# The command waits until the model file (specified in MODEL_URL) exists. | ||
command: /bin/sh -c "LLM_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/$LLM_MODEL_FILE ]; do sleep 1; done; echo 'Model downloaded!'" | ||
# Specifies that this service should start after the downloader service has started. | ||
depends_on: | ||
downloader: | ||
condition: service_started | ||
# Mount the same local directory to check for the downloaded model. | ||
volumes: | ||
- ./models:/models | ||
|
||
# Service to run the Llama web application. | ||
llm: | ||
image: ghcr.io/abetlen/llama-cpp-python:latest | ||
# Mount the directory that contains the downloaded model. | ||
volumes: | ||
- ./models:/models | ||
environment: | ||
# Specify the path to the model for the web application. | ||
MODEL: /models/llama-2-7b-chat.ggmlv3.q4_1.bin | ||
labels: | ||
# Instead of using the Host rule, set a PathPrefix rule | ||
- "traefik.http.routers.web.rule=PathPrefix(`/inference/llm`)" | ||
# This tells Traefik where to forward the traffic for this route. | ||
- "traefik.http.routers.web.service=llm" | ||
# Define a service for the llm and specify its load balancer configuration | ||
- "traefik.http.services.llm-service.loadbalancer.server.port=8000" | ||
|
||
- "traefik.http.middlewares.strip-llm-prefix.stripprefix.prefixes=/inference/llm" | ||
- "traefik.http.routers.web.middlewares=strip-llm-prefix" | ||
# Health check configuration | ||
healthcheck: | ||
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"] | ||
interval: 30s | ||
timeout: 10s | ||
retries: 3 | ||
start_period: 30s | ||
# Restart policy configuration | ||
restart: on-failure | ||
# Specifies that this service should start only after wait-for-downloader has completed successfully. | ||
depends_on: | ||
wait-for-downloader: | ||
condition: service_completed_successfully | ||
# Connect this service to two networks: inference_net and traefik_public. | ||
networks: | ||
- inference_net | ||
- traefik_public | ||
|
||
# Service for Traefik, a modern HTTP reverse proxy and load balancer. | ||
traefik: | ||
image: traefik:v2.5 | ||
command: | ||
# Enable the Traefik API dashboard without TLS (not recommended for production). | ||
- "--api.insecure=true" | ||
# Enable Traefik to use Docker as a provider. | ||
- "--providers.docker=true" | ||
# Do not expose services by default. Explicitly specify in each service if it should be exposed. | ||
- "--providers.docker.exposedbydefault=false" | ||
# Specify the default entry point on port 80. | ||
- "--entrypoints.web.address=:80" | ||
ports: | ||
# Map port 80 in the container to port 80 on the host. | ||
- "80:80" | ||
# Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host. | ||
- "8080:8080" | ||
# Mount the Docker socket to allow Traefik to listen to Docker's API. | ||
volumes: | ||
- /var/run/docker.sock:/var/run/docker.sock | ||
# Connect this service to the traefik_public network. | ||
networks: | ||
- traefik_public | ||
|
||
# Define networks used in this docker-compose file. | ||
networks: | ||
# Network for the llm service (used for inference). | ||
inference_net: | ||
# Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file. | ||
traefik_public: | ||
external: true |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
output/ | ||
models/ | ||
|
||
sd_cpp/.git | ||
sd_cpp/.github | ||
|
||
sd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
FROM python:3.9.17 | ||
|
||
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y | ||
ENV PATH=/root/.cargo/bin:$PATH | ||
|
||
WORKDIR /sd.cpp | ||
|
||
COPY . . | ||
|
||
RUN pip install -r compile.requirements.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# Compiling | ||
-r sd_cpp/models/requirements.txt | ||
|
||
# diffusers | ||
# torch | ||
# ftfy | ||
# scipy | ||
# transformers | ||
# accelerate | ||
# huggingface-hub | ||
# xformers | ||
# omegaconf | ||
# safetensors | ||
# cog | ||
# tomesd | ||
# compel | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
version: '3' | ||
|
||
services: | ||
|
||
# Service to download a model file. | ||
downloader: | ||
build: | ||
context: ./ | ||
dockerfile: compile.Dockerfile | ||
# platform: "linux/amd64" | ||
# The command extracts the model filename from MODEL_URL and downloads it if it doesn't exist. | ||
command: /bin/sh -c "SD_MODEL_FILE=$(basename ${MODEL_URL}); if [ ! -f /converted_models/*.bin ]; then wget -O /converted_models/$SD_MODEL_FILE ${MODEL_URL}; python /sd.cpp/models/convert.py --out_type q4_0 --out_file /converted_models/$SD_MODEL_FILE; fi" | ||
# Mount a local directory to store the downloaded model. | ||
volumes: | ||
- ./models:/converted_models | ||
|
||
# Service to wait for the downloader service to finish downloading the model. | ||
wait-for-downloader: | ||
image: busybox | ||
# The command waits until the model file (specified in MODEL_URL) exists. | ||
command: /bin/sh -c "SD_MODEL_FILE=$(basename ${MODEL_URL}); echo 'Waiting for downloader to finish'; while [ ! -f /models/*.bin ]; do sleep 1; done; echo 'Model downloaded and converted!'" | ||
# Specifies that this service should start after the downloader service has started. | ||
depends_on: | ||
downloader: | ||
condition: service_started | ||
# Mount the same local directory to check for the downloaded model. | ||
volumes: | ||
- ./models:/models | ||
|
||
# Service to run the SD web application. | ||
sd: | ||
build: | ||
context: ./ | ||
dockerfile: inference.Dockerfile | ||
# Mount the directory that contains the downloaded model. | ||
volumes: | ||
- ./models:/models | ||
- ./output/:/serving/output | ||
command: /bin/bash -c "python -m uvicorn main:app --proxy-headers --host 0.0.0.0 --port 8000" | ||
# platform: "linux/amd64" | ||
environment: | ||
# Specify the path to the model for the web application. | ||
BASE_URL: http://0.0.0.0:8000 | ||
MODEL_NAME: "v1-5-pruned-emaonly-ggml-model-q5_0.bin" | ||
MODEL_DIR: "/models" | ||
SD_PATH: "/sd" | ||
PYTHONUNBUFFERED: 1 | ||
ports: | ||
- 8000:8000 | ||
# Health check configuration | ||
healthcheck: | ||
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8000"] | ||
interval: 30s | ||
timeout: 10s | ||
retries: 3 | ||
start_period: 30s | ||
# Restart policy configuration | ||
restart: on-failure | ||
# Specifies that this service should start only after wait-for-downloader has completed successfully. | ||
depends_on: | ||
wait-for-downloader: | ||
condition: service_completed_successfully | ||
# Connect this service to two networks: inference_net and traefik_public. | ||
networks: | ||
- inference_net | ||
- traefik_public | ||
|
||
# Service for Traefik, a modern HTTP reverse proxy and load balancer. | ||
traefik: | ||
image: traefik:v2.5 | ||
command: | ||
# Enable the Traefik API dashboard without TLS (not recommended for production). | ||
- "--api.insecure=true" | ||
# Enable Traefik to use Docker as a provider. | ||
- "--providers.docker=true" | ||
# Do not expose services by default. Explicitly specify in each service if it should be exposed. | ||
- "--providers.docker.exposedbydefault=false" | ||
# Specify the default entry point on port 80. | ||
- "--entrypoints.web.address=:80" | ||
ports: | ||
# Map port 80 in the container to port 80 on the host. | ||
- "80:80" | ||
# Map port 8080 in the container (Traefik's dashboard) to port 8080 on the host. | ||
- "8080:8080" | ||
# Mount the Docker socket to allow Traefik to listen to Docker's API. | ||
volumes: | ||
- /var/run/docker.sock:/var/run/docker.sock | ||
# Connect this service to the traefik_public network. | ||
networks: | ||
- traefik_public | ||
|
||
# Define networks used in this docker-compose file. | ||
networks: | ||
# Network for the llm service (used for inference). | ||
inference_net: | ||
# Public-facing network that Traefik uses. Marked as external to indicate it may be defined outside this file. | ||
traefik_public: | ||
external: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
FROM python:3.9.17 as build | ||
|
||
RUN apt-get update && apt-get install -y build-essential git cmake | ||
|
||
WORKDIR /sd.cpp | ||
|
||
COPY sd_cpp /sd.cpp | ||
|
||
RUN mkdir build && cd build && cmake .. && cmake --build . --config Release | ||
|
||
FROM python:3.9.17 as runtime | ||
|
||
COPY --from=build /sd.cpp/build/bin/sd /sd | ||
|
||
WORKDIR /serving | ||
|
||
COPY . /serving/ | ||
|
||
RUN pip install -r inference.requirements.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# Inference | ||
fastapi | ||
uvicorn | ||
python-multipart |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
from fastapi import FastAPI, BackgroundTasks, HTTPException, Form | ||
from fastapi.responses import FileResponse | ||
from fastapi.staticfiles import StaticFiles | ||
import subprocess | ||
import os | ||
from uuid import uuid4 | ||
|
||
app = FastAPI() | ||
|
||
OUTPUT_DIR = "output" | ||
SD_PATH = os.environ.get("SD_PATH", "./sd") | ||
MODEL_DIR = os.environ.get("MODEL_DIR", "./models") | ||
BASE_URL = os.environ.get("BASE_URL", "http://localhost:8000") | ||
MODEL_NAME = os.environ.get( | ||
"MODEL_NAME", "v1-5-pruned-emaonly-ggml-model-q5_0.bin") | ||
|
||
# Create the OUTPUT_DIR directory if it does not exist | ||
if not os.path.exists(OUTPUT_DIR): | ||
os.makedirs(OUTPUT_DIR) | ||
|
||
# Create the OUTPUT_DIR directory if it does not exist | ||
if not os.path.exists(MODEL_DIR): | ||
os.makedirs(MODEL_DIR) | ||
|
||
# Serve files from the "files" directory | ||
app.mount("/output", StaticFiles(directory=OUTPUT_DIR), name="output") | ||
|
||
|
||
def run_command(prompt: str, filename: str): | ||
# Construct the command based on your provided example | ||
command = [SD_PATH, | ||
"-m", os.path.join(MODEL_DIR, MODEL_NAME), | ||
"-p", prompt, | ||
"-o", os.path.join(OUTPUT_DIR, filename) | ||
] | ||
|
||
try: | ||
sub_output = subprocess.run(command, timeout=5*60, capture_output=True, | ||
check=True, encoding="utf-8") | ||
print(sub_output.stdout) | ||
except subprocess.CalledProcessError: | ||
raise HTTPException( | ||
status_code=500, detail="Failed to execute the command.") | ||
|
||
|
||
@app.post("/inference/") | ||
async def run_inference(background_tasks: BackgroundTasks, prompt: str = Form()): | ||
# Generate a unique filename using uuid4() | ||
filename = f"{uuid4()}.png" | ||
|
||
# We will use background task to run the command so it won't block | ||
background_tasks.add_task(run_command, prompt, filename) | ||
|
||
# Return the expected path of the output file | ||
return {"url": f'{BASE_URL}/serve/{filename}'} | ||
|
||
|
||
@app.get("/serve/{filename}") | ||
async def serve_file(filename: str): | ||
file_path = os.path.join(OUTPUT_DIR, filename) | ||
|
||
if os.path.exists(file_path): | ||
return FileResponse(file_path) | ||
else: | ||
raise HTTPException(status_code=404, detail="File not found") | ||
|
||
|
||
if __name__ == "__main__": | ||
import uvicorn | ||
uvicorn.run(app, host="0.0.0.0", port=8000) |