forked from ruoccofabrizio/azure-open-ai-embeddings-qna
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
37f01a0
commit 24e7a94
Showing
24 changed files
with
720 additions
and
721 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,41 +1,24 @@ | ||
import logging, json, os, io | ||
import logging, json | ||
import azure.functions as func | ||
from azure.storage.blob import BlobServiceClient, generate_blob_sas | ||
from datetime import datetime, timedelta | ||
from utilities.formrecognizer import analyze_read | ||
from utilities.azureblobstorage import upload_file, upsert_blob_metadata | ||
from utilities.redisembeddings import set_document | ||
from utilities.utils import chunk_and_embed | ||
from utilities.utils import add_embeddings, convert_file_and_add_embeddings, initialize | ||
|
||
account_name = os.environ['BLOB_ACCOUNT_NAME'] | ||
account_key = os.environ['BLOB_ACCOUNT_KEY'] | ||
connect_str = f"DefaultEndpointsProtocol=https;AccountName={account_name};AccountKey={account_key};EndpointSuffix=core.windows.net" | ||
container_name = os.environ['BLOB_CONTAINER_NAME'] | ||
from utilities.helper import LLMHelper | ||
|
||
def main(msg: func.QueueMessage) -> None: | ||
logging.info('Python queue trigger function processed a queue item: %s', | ||
msg.get_body().decode('utf-8')) | ||
|
||
# Set up Azure OpenAI connection | ||
initialize() | ||
|
||
# Set up LLM Helper | ||
llm_helper = LLMHelper() | ||
# Get the file name from the message | ||
file_name = json.loads(msg.get_body().decode('utf-8'))['filename'] | ||
# Generate the SAS URL for the file | ||
file_sas = llm_helper.blob_client.get_blob_sas(file_name) | ||
|
||
# Check the file extension | ||
if file_name.endswith('.txt'): | ||
# Read the file from Blob Storage | ||
blob_client = BlobServiceClient.from_connection_string(connect_str).get_blob_client(container=container_name, blob=file_name) | ||
file_content = blob_client.download_blob().readall().decode('utf-8') | ||
|
||
# Embed the file | ||
data = chunk_and_embed(file_content, file_name) | ||
|
||
# Set the document in Redis | ||
set_document(data) | ||
# Add the text to the embeddings | ||
llm_helper.add_embeddings_lc(file_sas) | ||
else: | ||
file_sas = generate_blob_sas(account_name, container_name, file_name, account_key= account_key, permission='r', expiry=datetime.utcnow() + timedelta(hours=1)) | ||
convert_file_and_add_embeddings(f"https://{account_name}.blob.core.windows.net/{container_name}/{file_name}?{file_sas}" , file_name) | ||
# Get OCR with Layout API and then add embeddigns | ||
llm_helper.convert_file_and_add_embeddings(file_sas , file_name) | ||
|
||
upsert_blob_metadata(file_name, {'embeddings_added': 'true'}) | ||
llm_helper.blob_client.upsert_blob_metadata(file_name, {'embeddings_added': 'true'}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import streamlit as st | ||
from streamlit_chat import message | ||
from utilities.helper import LLMHelper | ||
|
||
# Initialize chat history | ||
if 'chat_history' not in st.session_state: | ||
st.session_state['chat_history'] = [] | ||
if 'source_documents' not in st.session_state: | ||
st.session_state['source_documents'] = [] | ||
|
||
llm_helper = LLMHelper() | ||
|
||
# Chat | ||
input_text = st.text_input("You: ", placeholder="type your question", key="input") | ||
|
||
if input_text: | ||
question = input_text | ||
input_text = "" | ||
question, result, _, sources = llm_helper.get_semantic_answer_lang_chain(question, st.session_state['chat_history']) | ||
st.session_state['chat_history'].append((question, result)) | ||
st.session_state['source_documents'].append(sources) | ||
|
||
if st.session_state['chat_history']: | ||
for i in range(len(st.session_state['chat_history'])-1, -1, -1): | ||
message(st.session_state['chat_history'][i][1], key=str(i)) | ||
st.markdown(f'\n\nSources: {st.session_state["source_documents"][i]}') | ||
message(st.session_state['chat_history'][i][0], is_user=True, key=str(i) + '_user') |
Oops, something went wrong.