Skip to content

Commit

Permalink
Support OpenAI Reponse
Browse files Browse the repository at this point in the history
  • Loading branch information
kobkrit committed Oct 5, 2024
1 parent 1346004 commit 027a73a
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 14 deletions.
81 changes: 77 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ Published Date:
```
You can see more examples at `/docs`.

## Getting RAG's response.
## Getting RAG's Response.
To get a response from the RAG system, you can use the `/v1/completions` endpoint. This endpoint accepts a POST request with a JSON payload containing the user's query and optional parameters.

Here's a list of query parameters supported by the `/v1/completions` endpoint:
Expand All @@ -182,7 +182,7 @@ Here's a list of query parameters supported by the `/v1/completions` endpoint:
Note: Some parameters may not be applicable depending on the specific model and configuration of your OpenThaiRAG setup.


### Non-Streaming
### via API: Non-Streaming
```bash
>>>Request
curl --location 'http://localhost:5000/v1/completions' \
Expand Down Expand Up @@ -217,7 +217,7 @@ curl --location 'http://localhost:5000/v1/completions' \
}
```

### Streaming
### via API: Streaming
```bash
>>>Request
curl --location 'http://localhost:5000/v1/completions' \
Expand All @@ -241,7 +241,80 @@ data: {"id":"cmpl-8dbd8bdfbcfb4310bf611cd6f6f7c2e4","object":"text_completion","
data: [DONE]
```

## API Documentation
### via OpenAI Library
You can take a look at ``/app/query_rag_using_openai.py``.
To use the OpenAI library to get RAG responses, you can follow these steps:

1. Install the OpenAI library:
```
pip install openai==0.28
```

2. Configure the OpenAI client to use the vLLM server:
```python
import openai

openai.api_base = "http://127.0.0.1:5000"
openai.api_key = "dummy" # vLLM doesn't require a real API key
```

3. Define your prompt:
```python
prompt = "วัดพระแก้ว กทม. เดินทางไปอย่างไร"
```

4. For a non-streaming response:
```python
def response(prompt):
try:
response = openai.Completion.create(
model=".", # Specify the model you're using with vLLM
prompt=prompt,
max_tokens=512,
temperature=0.7,
top_p=0.8,
top_k=40,
stop=["<|im_end|>"]
)
print("Generated Text:", response.choices[0].text)
except Exception as e:
print("Error:", str(e))

# Example usage
print("Non-streaming response:")
response(prompt)
```

5. For a streaming response:
```python
def stream_response(prompt):
try:
response = openai.Completion.create(
model=".", # Specify the model you're using with vLLM
prompt=prompt,
max_tokens=512,
temperature=0.7,
top_p=0.8,
top_k=40,
stop=["<|im_end|>"],
stream=True # Enable streaming
)

for chunk in response:
if chunk.choices[0].text:
print(chunk.choices[0].text, end='', flush=True)
print() # Print a newline at the end
except Exception as e:
print("Error:", str(e))

# Example usage
print("Streaming response:")
stream_response(prompt)
```

You can find the complete example in the `/app/query_rag_using_openai.py` file.

## Full API Documentation

For detailed API documentation and examples, please refer to our Postman collection:
[OpenThaiRAG API Postman Collection](https://universal-capsule-630444.postman.co/workspace/Travel-LLM~43ad4794-de74-4579-bf8f-24dbe26da1e5/collection/5145656-81239b64-fc7e-4f61-acfd-8e5916e037ce?action=share&creator=5145656)
Expand Down
Binary file modified app/__pycache__/web.cpython-312.pyc
Binary file not shown.
53 changes: 53 additions & 0 deletions app/query_rag_using_openai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import openai

# Configure OpenAI client to use vLLM server
openai.api_base = "http://127.0.0.1:5000"
openai.api_key = "dummy" # vLLM doesn't require a real API key

prompt = "วัดพระแก้ว กทม. เดินทางไปอย่างไร"

# Non-Streaming Response
def response(prompt):
try:
response = openai.Completion.create(
model=".", # Specify the model you're using with vLLM
prompt=prompt,
max_tokens=512,
temperature=0.7,
top_p=0.8,
top_k=40,
stop=["<|im_end|>"]
)
print("Generated Text:", response.choices[0].text)
except Exception as e:
print("Error:", str(e))

# Example usage of non-streaming version
print("Non-streaming response:")
response(prompt)

# Streaming version
def stream_response(prompt):
try:
response = openai.Completion.create(
model=".", # Specify the model you're using with vLLM
prompt=prompt,
max_tokens=512,
temperature=0.7,
top_p=0.8,
top_k=40,
stop=["<|im_end|>"],
stream=True # Enable streaming
)

for chunk in response:
if chunk.choices[0].text:
print(chunk.choices[0].text, end='', flush=True)
print() # Print a newline at the end
except Exception as e:
print("Error:", str(e))

# Example usage of streaming version
print("Streaming response:")
stream_response(prompt)

32 changes: 23 additions & 9 deletions app/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@
import os
import numpy as np
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', force=True)
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', force=True)
logger = logging.getLogger(__name__)

logger.info("Logger initialized for Flask application")


# Configuration for Milvus and vLLM hosts
MILVUS_HOST = os.environ.get('MILVUS_HOST', 'milvus')
MILVUS_PORT = os.environ.get('MILVUS_PORT', '19530')
Expand Down Expand Up @@ -149,12 +148,18 @@ def index_text():
except Exception as e:
return jsonify({"error": str(e)}), 500

# Flask route for deleting all indexed documents
@app.route("/delete", methods=["DELETE"])
def delete_all_documents():
# Flask route for deleting indexed documents
@app.route("/delete/<doc_id>", methods=["DELETE"])
def delete_documents(doc_id):
try:
# Delete all entities in the collection
delete_result = collection.delete(expr="id >= 0") # Use a condition that matches all documents
if doc_id == '*':
# Delete all entities in the collection
delete_result = collection.delete(expr="id >= 0")
message = "All documents deleted successfully"
else:
# Delete specific document
delete_result = collection.delete(expr=f"id == {doc_id}")
message = f"Document with id {doc_id} deleted successfully"

# Log the delete result
logger.info(f"Delete result: {delete_result}")
Expand All @@ -163,7 +168,7 @@ def delete_all_documents():
collection.flush()

return jsonify({
"message": "All documents deleted successfully",
"message": message,
"num_deleted": delete_result.delete_count
}), 200

Expand Down Expand Up @@ -222,7 +227,7 @@ def list_documents():
# Flask route for handling user queries
@app.route("/completions", methods=["POST"])
@app.route("/query", methods=["POST"]) #For backward compatability with the previouse release.
def completions():
def completions():
# Get user query and parameters from request
data = request.get_json()
query = data.get("prompt", "")
Expand All @@ -232,6 +237,15 @@ def completions():
top_p = data.get("top_p", 1.0)
top_k = data.get("top_k", -1)

# Print all params for debugging
logger.debug("[Completion] Request data:")
logger.debug(f"Query: {data.get('prompt')}")
logger.debug(f"Stream: {data.get('stream')}")
logger.debug(f"Temperature: {data.get('temperature')}")
logger.debug(f"Max tokens: {data.get('max_tokens')}")
logger.debug(f"Top p: {data.get('top_p')}")
logger.debug(f"Top k: {data.get('top_k')}")

# Step 1: Generate query embedding
query_embedding = generate_embedding(query).numpy().flatten().tolist()

Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ transformers
torch
scikit-learn
gunicorn
numpy
numpy
openai==0.28

0 comments on commit 027a73a

Please sign in to comment.