Skip to content

Commit

Permalink
Update .gitignore and Dockerfile, add .env file
Browse files Browse the repository at this point in the history
and modify test batch
  • Loading branch information
dpsalvatierra authored and manyoso committed Nov 21, 2023
1 parent f3eaa33 commit db70f17
Show file tree
Hide file tree
Showing 10 changed files with 48 additions and 30 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -183,4 +183,7 @@ build_*
build-*

# IntelliJ
.idea/
.idea/

# LLM models
*.gguf
6 changes: 4 additions & 2 deletions gpt4all-api/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@ services:
restart: always #restart on error (usually code compilation from save during bad state)
ports:
- "4891:4891"
env_file:
- .env
environment:
- APP_ENVIRONMENT=dev
- WEB_CONCURRENCY=2
- LOGLEVEL=debug
- PORT=4891
- model=${MODEL_ID}
- model=${MODEL_BIN} # using variable from .env file
- inference_mode=cpu
volumes:
- './gpt4all_api/app:/app'
- './gpt4all_api/models:/models'
- './gpt4all_api/models:/models' # models are mounted in the container
command: ["/start-reload.sh"]
6 changes: 0 additions & 6 deletions gpt4all-api/gpt4all_api/Dockerfile.buildkit
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# syntax=docker/dockerfile:1.0.0-experimental
FROM tiangolo/uvicorn-gunicorn:python3.11

ARG MODEL_BIN=ggml-mpt-7b-chat.bin

# Put first so anytime this file changes other cached layers are invalidated.
COPY gpt4all_api/requirements.txt /requirements.txt

Expand All @@ -17,7 +15,3 @@ COPY gpt4all_api/app /app

RUN mkdir -p /models

# Include the following line to bake a model into the image and not have to download it on API start.
RUN wget -q --show-progress=off https://gpt4all.io/models/${MODEL_BIN} -P /models \
&& md5sum /models/${MODEL_BIN}

Empty file.
Empty file.
Empty file.
42 changes: 30 additions & 12 deletions gpt4all-api/gpt4all_api/app/tests/test_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,34 @@
Use the OpenAI python API to test gpt4all models.
"""
from typing import List, get_args
import os
from dotenv import load_dotenv

import openai

openai.api_base = "http://localhost:4891/v1"

openai.api_key = "not needed for a local LLM"

# Load the .env file
env_path = 'gpt4all-api/gpt4all_api/.env'
load_dotenv(dotenv_path=env_path)

# Fetch MODEL_ID from .env file
model_id = os.getenv('MODEL_BIN', 'default_model_id')
embedding = os.getenv('EMBEDDING', 'default_embedding_model_id')
print (model_id)
print (embedding)

def test_completion():
model = "ggml-mpt-7b-chat.bin"
model = model_id
prompt = "Who is Michael Jordan?"
response = openai.Completion.create(
model=model, prompt=prompt, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False
)
assert len(response['choices'][0]['text']) > len(prompt)

def test_streaming_completion():
model = "ggml-mpt-7b-chat.bin"
model = model_id
prompt = "Who is Michael Jordan?"
tokens = []
for resp in openai.Completion.create(
Expand All @@ -36,24 +46,32 @@ def test_streaming_completion():
assert (len(tokens) > 0)
assert (len("".join(tokens)) > len(prompt))


# Modified test batch, problems with keyerror in response
def test_batched_completion():
model = "ggml-mpt-7b-chat.bin"
model = model_id # replace with your specific model ID
prompt = "Who is Michael Jordan?"
response = openai.Completion.create(
model=model, prompt=[prompt] * 3, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False
)
assert len(response['choices'][0]['text']) > len(prompt)
assert len(response['choices']) == 3
responses = []

# Loop to create completions one at a time
for _ in range(3):
response = openai.Completion.create(
model=model, prompt=prompt, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False
)
responses.append(response)

# Assertions to check the responses
for response in responses:
assert len(response['choices'][0]['text']) > len(prompt)

assert len(responses) == 3

def test_embedding():
model = "ggml-all-MiniLM-L6-v2-f16.bin"
model = embedding
prompt = "Who is Michael Jordan?"
response = openai.Embedding.create(model=model, input=prompt)
output = response["data"][0]["embedding"]
args = get_args(List[float])

assert response["model"] == model
assert isinstance(output, list)
assert all(isinstance(x, args) for x in output)
assert all(isinstance(x, args) for x in output)
2 changes: 1 addition & 1 deletion gpt4all-api/gpt4all_api/models/README.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
# Drop GGUF compatible models here, make sure it matches MODEL_BIN on your .env file
### Drop GGUF compatible models here, make sure it matches MODEL_BIN on your .env file
5 changes: 3 additions & 2 deletions gpt4all-api/gpt4all_api/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ fastapi>=0.95.0
Jinja2>=3.0
gpt4all>=1.0.0
pytest
openai
openai==0.28.0
black
isort
isort
python-dotenv
12 changes: 6 additions & 6 deletions gpt4all-api/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ testenv_gpu: clean_testenv test_build
docker compose -f docker-compose.yaml -f docker-compose.gpu.yaml up --build

testenv_d: clean_testenv test_build
docker compose up --build -d
docker compose env up --build -d

test:
docker compose exec $(APP_NAME) pytest -svv --disable-warnings -p no:cacheprovider /app/tests
Expand All @@ -28,19 +28,19 @@ clean_testenv:
fresh_testenv: clean_testenv testenv

venv:
if [ ! -d $(ROOT_DIR)/env ]; then $(PYTHON) -m venv $(ROOT_DIR)/env; fi
if [ ! -d $(ROOT_DIR)/venv ]; then $(PYTHON) -m venv $(ROOT_DIR)/venv; fi

dependencies: venv
source $(ROOT_DIR)/env/bin/activate; $(PYTHON) -m pip install -r $(ROOT_DIR)/$(APP_NAME)/requirements.txt
source $(ROOT_DIR)/venv/bin/activate; $(PYTHON) -m pip install -r $(ROOT_DIR)/$(APP_NAME)/requirements.txt

clean: clean_testenv
# Remove existing environment
rm -rf $(ROOT_DIR)/env;
rm -rf $(ROOT_DIR)/venv;
rm -rf $(ROOT_DIR)/$(APP_NAME)/*.pyc;


black:
source $(ROOT_DIR)/env/bin/activate; black -l 120 -S --target-version py38 $(APP_NAME)
source $(ROOT_DIR)/venv/bin/activate; black -l 120 -S --target-version py38 $(APP_NAME)

isort:
source $(ROOT_DIR)/env/bin/activate; isort --ignore-whitespace --atomic -w 120 $(APP_NAME)
source $(ROOT_DIR)/venv/bin/activate; isort --ignore-whitespace --atomic -w 120 $(APP_NAME)

0 comments on commit db70f17

Please sign in to comment.