Support OpenAI Reponse

OpenThaiGPT · Oct 5, 2024 · 027a73a · 027a73a
1 parent 1346004
commit 027a73a
Show file tree

Hide file tree

Showing 5 changed files with 155 additions and 14 deletions.
diff --git a/README.md b/README.md
@@ -159,7 +159,7 @@ Published Date:
 ```
 You can see more examples at `/docs`.
 
-## Getting RAG's response.
+## Getting RAG's Response.
 To get a response from the RAG system, you can use the `/v1/completions` endpoint. This endpoint accepts a POST request with a JSON payload containing the user's query and optional parameters. 
 
 Here's a list of query parameters supported by the `/v1/completions` endpoint:
@@ -182,7 +182,7 @@ Here's a list of query parameters supported by the `/v1/completions` endpoint:
 Note: Some parameters may not be applicable depending on the specific model and configuration of your OpenThaiRAG setup.
 
 
-### Non-Streaming
+### via API: Non-Streaming
 ```bash
 >>>Request
 curl --location 'http://localhost:5000/v1/completions' \
@@ -217,7 +217,7 @@ curl --location 'http://localhost:5000/v1/completions' \
 }
 ```
 
-### Streaming
+### via API: Streaming
 ```bash
 >>>Request
 curl --location 'http://localhost:5000/v1/completions' \
@@ -241,7 +241,80 @@ data: {"id":"cmpl-8dbd8bdfbcfb4310bf611cd6f6f7c2e4","object":"text_completion","
 data: [DONE]
 ```
 
-## API Documentation
+### via OpenAI Library
+You can take a look at ``/app/query_rag_using_openai.py``.
+To use the OpenAI library to get RAG responses, you can follow these steps:
+
+1. Install the OpenAI library:
+   ```
+   pip install openai==0.28
+   ```
+
+2. Configure the OpenAI client to use the vLLM server:
+   ```python
+   import openai
+
+   openai.api_base = "http://127.0.0.1:5000"
+   openai.api_key = "dummy"  # vLLM doesn't require a real API key
+   ```
+
+3. Define your prompt:
+   ```python
+   prompt = "วัดพระแก้ว กทม. เดินทางไปอย่างไร"
+   ```
+
+4. For a non-streaming response:
+   ```python
+   def response(prompt):
+       try:
+           response = openai.Completion.create(
+               model=".",  # Specify the model you're using with vLLM
+               prompt=prompt,
+               max_tokens=512,
+               temperature=0.7,
+               top_p=0.8,
+               top_k=40,
+               stop=["<|im_end|>"]
+           )
+           print("Generated Text:", response.choices[0].text)
+       except Exception as e:
+           print("Error:", str(e))
+
+   # Example usage
+   print("Non-streaming response:")
+   response(prompt)
+   ```
+
+5. For a streaming response:
+   ```python
+   def stream_response(prompt):
+       try:
+           response = openai.Completion.create(
+               model=".",  # Specify the model you're using with vLLM
+               prompt=prompt,
+               max_tokens=512,
+               temperature=0.7,
+               top_p=0.8,
+               top_k=40,
+               stop=["<|im_end|>"],
+               stream=True  # Enable streaming
+           )
+
+           for chunk in response:
+               if chunk.choices[0].text:
+                   print(chunk.choices[0].text, end='', flush=True)
+           print()  # Print a newline at the end
+       except Exception as e:
+           print("Error:", str(e))
+
+   # Example usage
+   print("Streaming response:")
+   stream_response(prompt)
+   ```
+
+You can find the complete example in the `/app/query_rag_using_openai.py` file.
+
+## Full API Documentation
 
 For detailed API documentation and examples, please refer to our Postman collection:
 [OpenThaiRAG API Postman Collection](https://universal-capsule-630444.postman.co/workspace/Travel-LLM~43ad4794-de74-4579-bf8f-24dbe26da1e5/collection/5145656-81239b64-fc7e-4f61-acfd-8e5916e037ce?action=share&creator=5145656)

diff --git a/app/__pycache__/web.cpython-312.pyc b/app/__pycache__/web.cpython-312.pyc
diff --git a/app/query_rag_using_openai.py b/app/query_rag_using_openai.py
@@ -0,0 +1,53 @@
+import openai
+
+# Configure OpenAI client to use vLLM server
+openai.api_base = "http://127.0.0.1:5000"
+openai.api_key = "dummy"  # vLLM doesn't require a real API key
+
+prompt = "วัดพระแก้ว กทม. เดินทางไปอย่างไร"
+
+# Non-Streaming Response
+def response(prompt):
+    try:
+        response = openai.Completion.create(
+            model=".",  # Specify the model you're using with vLLM
+            prompt=prompt,
+            max_tokens=512,
+            temperature=0.7,
+            top_p=0.8,
+            top_k=40,
+            stop=["<|im_end|>"]
+        )
+        print("Generated Text:", response.choices[0].text)
+    except Exception as e:
+        print("Error:", str(e))
+
+# Example usage of non-streaming version
+print("Non-streaming response:")
+response(prompt)
+
+# Streaming version
+def stream_response(prompt):
+    try:
+        response = openai.Completion.create(
+            model=".",  # Specify the model you're using with vLLM
+            prompt=prompt,
+            max_tokens=512,
+            temperature=0.7,
+            top_p=0.8,
+            top_k=40,
+            stop=["<|im_end|>"],
+            stream=True  # Enable streaming
+        )
+
+        for chunk in response:
+            if chunk.choices[0].text:
+                print(chunk.choices[0].text, end='', flush=True)
+        print()  # Print a newline at the end
+    except Exception as e:
+        print("Error:", str(e))
+
+# Example usage of streaming version
+print("Streaming response:")
+stream_response(prompt)
+
diff --git a/app/web.py b/app/web.py
@@ -8,12 +8,11 @@
 import os
 import numpy as np
 import logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', force=True)
+logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', force=True)
 logger = logging.getLogger(__name__)
 
 logger.info("Logger initialized for Flask application")
 
-
 # Configuration for Milvus and vLLM hosts
 MILVUS_HOST = os.environ.get('MILVUS_HOST', 'milvus')
 MILVUS_PORT = os.environ.get('MILVUS_PORT', '19530')
@@ -149,12 +148,18 @@ def index_text():
     except Exception as e:
         return jsonify({"error": str(e)}), 500
 
-# Flask route for deleting all indexed documents
-@app.route("/delete", methods=["DELETE"])
-def delete_all_documents():
+# Flask route for deleting indexed documents
+@app.route("/delete/<doc_id>", methods=["DELETE"])
+def delete_documents(doc_id):
     try:
-        # Delete all entities in the collection
-        delete_result = collection.delete(expr="id >= 0")  # Use a condition that matches all documents
+        if doc_id == '*':
+            # Delete all entities in the collection
+            delete_result = collection.delete(expr="id >= 0")
+            message = "All documents deleted successfully"
+        else:
+            # Delete specific document
+            delete_result = collection.delete(expr=f"id == {doc_id}")
+            message = f"Document with id {doc_id} deleted successfully"
 
         # Log the delete result
         logger.info(f"Delete result: {delete_result}")
@@ -163,7 +168,7 @@ def delete_all_documents():
         collection.flush()
 
         return jsonify({
-            "message": "All documents deleted successfully",
+            "message": message,
             "num_deleted": delete_result.delete_count
         }), 200
 
@@ -222,7 +227,7 @@ def list_documents():
 # Flask route for handling user queries
 @app.route("/completions", methods=["POST"])
 @app.route("/query", methods=["POST"]) #For backward compatability with the previouse release.
-def completions(): 
+def completions():     
     # Get user query and parameters from request
     data = request.get_json()
     query = data.get("prompt", "")
@@ -232,6 +237,15 @@ def completions():
     top_p = data.get("top_p", 1.0)
     top_k = data.get("top_k", -1)
 
+    # Print all params for debugging
+    logger.debug("[Completion] Request data:")
+    logger.debug(f"Query: {data.get('prompt')}")
+    logger.debug(f"Stream: {data.get('stream')}")
+    logger.debug(f"Temperature: {data.get('temperature')}")
+    logger.debug(f"Max tokens: {data.get('max_tokens')}")
+    logger.debug(f"Top p: {data.get('top_p')}")
+    logger.debug(f"Top k: {data.get('top_k')}")
+
     # Step 1: Generate query embedding
     query_embedding = generate_embedding(query).numpy().flatten().tolist()
 

diff --git a/requirements.txt b/requirements.txt
@@ -4,4 +4,5 @@ transformers
 torch
 scikit-learn
 gunicorn
-numpy
+numpy
+openai==0.28