-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy pathS2QA.py
222 lines (193 loc) · 8.37 KB
/
S2QA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
import streamlit as st
from backend import (
create_index,
get_chat_engine,
citation_query_engine,
generate_sample_questions,
)
from datetime import datetime
from utils import (
get_twitter_badge,
get_link_tree_badge,
get_github_badge,
display_questions,
extract_numbers_in_brackets,
generate_used_reference_display,
documents_to_df,
)
import openai
import time
# ?query=deep%20learning%20for%20nlp&num_papers=50&full_text=True
# TODO update URL with query and num_papers after button press: add share button
# TODO add github logo in same style as twitter and linktree
# TODO [](https://<your-custom-subdomain>.streamlit.app)
from supabase import create_client, Client
SUPABASE_URL = st.secrets.db_credentials.supabase_url
SUPABASE_KEY = st.secrets.db_credentials.supabase_key
user_uid = st.secrets.db_credentials.user_uid
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
url_params = st.experimental_get_query_params()
num_papers = 10
query = "large language models"
full_text = False
if "query" in url_params:
query = url_params["query"][0]
if "num_papers" in url_params:
num_papers = int(url_params["num_papers"][0])
if "full_text" in url_params:
full_text = bool(url_params["full_text"][0])
def dump_logs_to_supabase(query, response, success=True):
"""
This function dumps the query and response to a Supabase table.
"""
current_time = datetime.utcfromtimestamp(time.time()).strftime("%Y-%m-%d %H:%M:%S")
query_details = {
"timestamp": current_time,
"query": query,
"response": response,
"success": success,
}
supabase.table("s2qa_logs").insert(query_details).execute()
return
with st.sidebar:
st.title("📚🤖 S2QA: Query your Research Space")
st.markdown(
f" {get_github_badge()} {get_twitter_badge()} ",
unsafe_allow_html=True,
)
st.markdown("Ask deeper questions about your research space")
# openai_api_key = st.text_input("OpenAI API Key", "OPENAI_API_KEY", type="password")
openai.api_key = st.secrets.db_credentials.openai_key
openai_api_key = openai.api_key
# "🔑 [Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
research_space_box = st.empty()
# hidden text input placeholder
research_space_query = st.text_input(
"Enter your research space, \n e.g. machine learning, large language models, covid 19 vaccine",
query,
)
num_papers = st.slider("Number of papers you want to chat with ", 5, 10, num_papers)
full_text = st.toggle(
"Full Text (initial setup is slow as we first download the pdfs: default set to 10 papers)"
)
if full_text:
full_text = True
num_papers = 5
button = st.button("Set Research Space", type="primary")
num_papers = int(num_papers)
# Welcome to S2QA
st.markdown(
"""
# 🚀 Welcome to S2QA 🚀
## 🧩 Ask deeper questions about your research
### How to use this tool? 🧐
📌 **Step One:** Enter your research space (For example: machine learning, large language models, covid 19 vaccine)
📌 **Step Two:** Select the number of papers you want to get the answers from
📌 **Step Three:** Click on the button "Set Research Space" 🖱️. Please be patient, this could take a few minutes to set up the index
📌 **Step Four:** Congrats🎉! Now you have set your research space, you can delve into and ask questions about it
📌 **Step Five:** Once a research space is established, you can share the URL with your colleagues 👥 to collaborate on the research space
Happy Exploring! 🕵️♂️
"""
)
if button and research_space_query:
st.session_state["show_chat"] = True
with st.sidebar:
if not openai_api_key:
st.info("Please add your OpenAI API key to continue.")
st.stop()
with st.status("🦙🦙 LlaMa's are working together . . ."):
st.write("Fetching papers for research space: " + research_space_query)
try:
index, documents = create_index(
research_space_query.lower(), num_papers, full_text
)
st.experimental_set_query_params(
query=[research_space_query],
num_papers=[num_papers],
full_text=[full_text],
)
except Exception as e:
st.error("Error creating index: " + str(e))
st.error(
"Please check your API key or try reducing the number of papers."
)
st.stop()
st.write("Getting Query Engine ready . . .")
sample_questions = generate_sample_questions(documents)
chat_engine = citation_query_engine(index, 10, True, 512)
st.session_state["chat_engine"] = chat_engine
st.session_state["documents"] = documents
st.markdown(display_questions(sample_questions))
with st.expander("📚 Papers in the index: ", expanded=False):
st.dataframe(documents_to_df(documents))
st.success(
"###### 🤖 Summary of Research Space *"
+ research_space_query.lower()
+ "* with "
+ str(num_papers)
+ " papers is ready 🚀"
)
with st.chat_message("assistant"):
if "messages" not in st.session_state:
st.session_state.messages = []
response = chat_engine.query("elaborate on " + research_space_query)
full_response = ""
placeholder = st.empty()
for text in response.response_gen:
# Appending response content if available
full_response += text
# Displaying the response to the user
placeholder.markdown(full_response + "▌")
used_nodes = extract_numbers_in_brackets(full_response)
dump_logs_to_supabase(research_space_query, full_response, success=True)
if used_nodes:
list_titles = generate_used_reference_display(
response.source_nodes, used_nodes
)
full_response = str(full_response) + list_titles
documents = st.session_state["documents"]
questions = display_questions(generate_sample_questions(documents))
else:
questions = ""
placeholder.markdown(full_response + "\n" + questions)
st.session_state.messages.append(
{"role": "assistant", "content": full_response}
)
# st.session_state.messages = []
if st.session_state.get("show_chat", False):
if "messages" not in st.session_state:
st.session_state.messages = []
for message in st.session_state.messages[1:]:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input("Ask me anything about " + research_space_query):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = ""
last_query = st.session_state.messages[-1]["content"]
chat_engine = st.session_state["chat_engine"]
response = chat_engine.query(last_query)
for text in response.response_gen:
# Appending response content if available
full_response += text
# Displaying the response to the user
message_placeholder.markdown(full_response + "▌")
dump_logs_to_supabase(last_query, full_response, success=True)
used_nodes = extract_numbers_in_brackets(full_response)
if used_nodes:
list_titles = generate_used_reference_display(
response.source_nodes, used_nodes
)
full_response = str(full_response) + list_titles
documents = st.session_state["documents"]
questions = display_questions(generate_sample_questions(documents))
# questions = ""
else:
questions = ""
message_placeholder.markdown(full_response + "\n" + questions)
st.session_state.messages.append(
{"role": "assistant", "content": full_response}
)