forked from ruoccofabrizio/azure-open-ai-embeddings-qna
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
49efa86
commit 3980cb7
Showing
4 changed files
with
158 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
import streamlit as st | ||
import os, json, re | ||
from urllib.error import URLError | ||
import requests | ||
from utilities.formrecognizer import analyze_read | ||
from os import path | ||
from utilities.azureblobstorage import upload_file, get_all_files | ||
|
||
def convert_file(fullpath, filename): | ||
# Extract the text from the file | ||
text = analyze_read(fullpath) | ||
# Upload the text to Azure Blob Storage | ||
upload_file(text, f'converted/{filename}.txt', 'application/text') | ||
|
||
########## START - MAIN ########## | ||
try: | ||
# Set page layout to wide screen and menu item | ||
menu_items = { | ||
'Get help': None, | ||
'Report a bug': None, | ||
'About': ''' | ||
## Embeddings App | ||
Document Reader Sample Demo. | ||
''' | ||
} | ||
st.set_page_config(layout="wide", menu_items=menu_items) | ||
|
||
hide_streamlit_style = """ | ||
<style> | ||
#MainMenu {visibility: hidden;} | ||
footer {visibility: hidden;} | ||
</style> | ||
""" | ||
st.markdown(hide_streamlit_style, unsafe_allow_html=True) | ||
|
||
|
||
uploaded_file = st.file_uploader("Upload a document", type=['pdf']) | ||
if uploaded_file is not None: | ||
# To read file as bytes: | ||
bytes_data = uploaded_file.getvalue() | ||
|
||
if st.session_state.get('filename', '') != uploaded_file.name: | ||
# Upload a new file | ||
st.session_state['filename'] = uploaded_file.name | ||
st.session_state['file_url'] = upload_file(bytes_data, st.session_state['filename']) | ||
# Get OCR with Read API | ||
st.session_state['text'] = analyze_read(st.session_state['file_url']) | ||
|
||
pdf_display = f'<iframe src="{st.session_state["file_url"]}" width="700" height="1000" type="application/pdf"></iframe>' | ||
|
||
col1, col2, col3 = st.columns([2,1,1]) | ||
|
||
files_data = get_all_files() | ||
|
||
cols = st.columns([2,2,1]) | ||
cols[0].write('Original File') | ||
cols[1].write('Converted File') | ||
cols[2].write('Convert') | ||
|
||
for x in files_data: | ||
col1, col2, col3, = st.columns([2,2,1]) | ||
col1.write(f'<a href="{x["fullpath"]}">{x["filename"]}</a>', unsafe_allow_html=True) | ||
if x['converted_path'] != '': | ||
col2.write(f'<a href="{x["converted_path"]}">{x["filename"]}.txt</a>', unsafe_allow_html=True) | ||
if not x['converted']: | ||
col3.button('Convert', key=x['filename']+'_button', on_click=convert_file, args= (x['fullpath'],x['filename'],)) | ||
|
||
|
||
except URLError as e: | ||
st.error( | ||
""" | ||
**This demo requires internet access.** | ||
Connection error: %s | ||
""" | ||
% e.reason | ||
) | ||
|
||
########## END - MAIN ########## |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import os | ||
from datetime import datetime, timedelta | ||
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, generate_blob_sas, generate_container_sas, ContentSettings | ||
|
||
def upload_file(bytes_data, file_name, content_type='application/pdf'): | ||
account_name = os.environ['Blob_Account_Name'] | ||
account_key = os.environ['Blob_Account_Key'] | ||
connect_str = f"DefaultEndpointsProtocol=https;AccountName={account_name};AccountKey={account_key};EndpointSuffix=core.windows.net" | ||
container_name = os.environ['Blob_Container_Name'] | ||
# Create the BlobServiceClient object | ||
blob_service_client = BlobServiceClient.from_connection_string(connect_str) | ||
# Create a blob client using the local file name as the name for the blob | ||
blob_client = blob_service_client.get_blob_client(container=container_name, blob=file_name) | ||
# Upload the created file | ||
blob_client.upload_blob(bytes_data,overwrite=True, content_settings=ContentSettings(content_type=content_type)) | ||
|
||
return blob_client.url + '?' + generate_blob_sas(account_name, container_name, file_name,account_key=account_key, permission="r", expiry=datetime.utcnow() + timedelta(hours=3)) | ||
|
||
def get_all_files(): | ||
# Get all files in the container from Azure Blob Storage | ||
account_name = os.environ['Blob_Account_Name'] | ||
account_key = os.environ['Blob_Account_Key'] | ||
connect_str = f"DefaultEndpointsProtocol=https;AccountName={account_name};AccountKey={account_key};EndpointSuffix=core.windows.net" | ||
container_name = os.environ['Blob_Container_Name'] | ||
# Create the BlobServiceClient object | ||
blob_service_client = BlobServiceClient.from_connection_string(connect_str) | ||
# Get files in the container | ||
container_client = blob_service_client.get_container_client(container_name) | ||
blob_list = container_client.list_blobs() | ||
# sas = generate_blob_sas(account_name, container_name, blob.name,account_key=account_key, permission="r", expiry=datetime.utcnow() + timedelta(hours=3)) | ||
sas = generate_container_sas(account_name, container_name,account_key=account_key, permission="r", expiry=datetime.utcnow() + timedelta(hours=3)) | ||
files = [] | ||
converted_files = {} | ||
for blob in blob_list: | ||
if not blob.name.startswith('converted/'): | ||
files.append({ | ||
"filename" : blob.name, | ||
"converted": False, | ||
"fullpath": f"https://{account_name}.blob.core.windows.net/{container_name}/{blob.name}?{sas}", | ||
"converted_path": "" | ||
}) | ||
else: | ||
converted_files[blob.name] = f"https://{account_name}.blob.core.windows.net/{container_name}/{blob.name}?{sas}" | ||
|
||
for file in files: | ||
converted_filename = f"converted/{file['filename']}.txt" | ||
if converted_filename in converted_files: | ||
file['converted'] = True | ||
file['converted_path'] = converted_files[converted_filename] | ||
|
||
return files |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
from azure.core.credentials import AzureKeyCredential | ||
from azure.ai.formrecognizer import DocumentAnalysisClient | ||
import os | ||
|
||
def analyze_read(formUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/rest-api/read.png"): | ||
|
||
document_analysis_client = DocumentAnalysisClient( | ||
endpoint=os.environ['Form_Recognizer_Endpoint'], credential=AzureKeyCredential(os.environ['Form_Recognizer_Key']) | ||
) | ||
|
||
poller = document_analysis_client.begin_analyze_document_from_url( | ||
"prebuilt-read", formUrl) | ||
result = poller.result() | ||
|
||
return result.content | ||
|