forked from jasonacox/TinyLLM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathserver.py
1040 lines (966 loc) · 44.9 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/python3
"""
Web based LLM Lab Bench
Web based LLM Lab Bench is a web application that allows you to interact with a Language
Model (LLM) using an OpenAI API. The interface provides tools to test the LLM with RAG
prompts using different backend vector databases. The application uses FastAPI and Uvicorn
ASGI high speed web server implementation.
Features:
* Uses OpenAI API to talk to LLM
* Works with local hosted OpenAI API compatible LLMs, e.g. llama-cpp-python[server]
* Uses response stream to render LLM chunks instead of waiting for full response
* Supports multiple concurrent client sessions
* Supports commands to reset context, get version, etc.
* Uses FastAPI and Uvicorn ASGI high speed web server implementation
* (Optional) Supports RAG prompts using Qdrant Vector Database
Requirements:
* pip install fastapi uvicorn python-socketio jinja2 openai bs4 pypdf requests lxml aiohttp
* pip install weaviate-client
Environmental variables:
* PORT - Port that Chatbot will listen on
* PROMPT_FILE - File to store prompts
* DEBUG - Set to True to enable debug mode
* OPENAI_API_KEY - Required only for OpenAI
* OPENAI_API_BASE - URL to OpenAI API Server or locally hosted version
* LLM_MODEL - LLM Model to Use
* USE_SYSTEM - Use system in chat prompt if True
* MAXCLIENTS - Maximum number of clients to allow
* MAXTOKENS - Maximum number of tokens to send to LLM
* TEMPERATURE - LLM temperature
* AGENT_NAME - Name for Bot
* WEAVIATE_HOST - Weaviate Host for RAG (Optional)
* WEAVIATE_LIBRARY - Weaviate Library for RAG (Optional)
* RESULTS - Number of results to return from RAG query
Author: Jason A. Cox
17 Jun 2024
https://github.com/jasonacox/TinyLLM
"""
# Import Libraries
import asyncio
import datetime
import io
import json
import logging
import os
import time
import openai
import requests
import socketio
import uvicorn
from bs4 import BeautifulSoup
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from fastapi.responses import FileResponse
from pypdf import PdfReader
import aiohttp
# TinyLLM Version
VERSION = "v0.14.7"
# Set up logging
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(levelname)s %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logger.info("TinyLLM %s" % VERSION)
def log(text):
logger.info(text)
# Configuration Settings
api_key = os.environ.get("OPENAI_API_KEY", "open_api_key") # Required, use bogus string for Llama.cpp
api_base = os.environ.get("OPENAI_API_BASE", "http://localhost:8000/v1") # Required, use https://api.openai.com for OpenAI
agentname = os.environ.get("AGENT_NAME", "") # Set the name of your bot
mymodel = os.environ.get("LLM_MODEL", "models/7B/gguf-model.bin") # Pick model to use e.g. gpt-3.5-turbo for OpenAI
DEBUG = os.environ.get("DEBUG", "false").lower() == "true" # Set to True to enable debug mode
MAXCLIENTS = int(os.environ.get("MAXCLIENTS", 1000)) # Maximum number of concurrent clients
MAXTOKENS = int(os.environ.get("MAXTOKENS", 16*1024)) # Maximum number of tokens to send to LLM
TEMPERATURE = float(os.environ.get("TEMPERATURE", 0.0)) # LLM temperature
PORT = int(os.environ.get("PORT", 5000)) # Port to listen on
PROMPT_FILE = os.environ.get("PROMPT_FILE", f".tinyllm/prompts.json") # File to store system prompts
PROMPT_RO = os.environ.get("PROMPT_RO", "false").lower() == "true" # Set to True to enable read-only prompts
USE_SYSTEM = os.environ.get("USE_SYSTEM", "false").lower == "true" # Use system in chat prompt if True
TOKEN = os.environ.get("TOKEN", "secret") # Secret TinyLLM token for admin functions
ONESHOT = os.environ.get("ONESHOT", "false").lower() == "true" # Set to True to enable one-shot mode
RAG_ONLY = os.environ.get("RAG_ONLY", "false").lower() == "true" # Set to True to enable RAG only mode
# RAG Configuration Settings
WEAVIATE_HOST = os.environ.get("WEAVIATE_HOST", "") # Empty = no Weaviate support
WEAVIATE_LIBRARY = os.environ.get("WEAVIATE_LIBRARY", "tinyllm") # Weaviate library to use
RESULTS = int(os.environ.get("RESULTS", 1)) # Number of results to return from RAG query
ALPHA_KEY = os.environ.get("ALPHA_KEY", "alpha_key") # Optional - Alpha Vantage API Key
# Prompt Defaults
default_prompts = {}
default_prompts["greeting"] = "Hi"
default_prompts["agentname"] = "Jarvis"
default_prompts["baseprompt"] = "You are TinyLLM, a highly intelligent assistant. The current date is {date}.\n\nYou should give concise responses to very simple questions, but provide thorough responses to more complex and open-ended questions."
default_prompts["weather"] = "You are a weather forecaster. Keep your answers brief and accurate. Current date is {date} and weather conditions:\n[DATA]{context_str}[/DATA]\nProvide a weather update, current weather alerts, conditions, precipitation and forecast for {location} and answer this: {prompt}."
default_prompts["stock"] = "You are a stock analyst. Keep your answers brief and accurate. Current date is {date}."
default_prompts["news"] = "You are a newscaster who specializes in providing headline news. Use only the following context provided by Google News to summarize the top 10 headlines for today. Rank headlines by most important to least important. Always include the news organization and ID. Do not add any commentary.\nAlways use this format:\n#. [News Item] - [News Source] - LnkID:[ID]\nHere are some examples: \n1. The World is Round - Science - LnkID:91\n2. The Election is over and Children have won - US News - LnkID:22\n3. Storms Hit the Southern Coast - ABC - LnkID:55\nContext: {context_str}\nTop 10 Headlines with Source and LnkID:"
default_prompts["clarify"] = "You are a highly intelligent assistant. Keep your answers brief and accurate. {format}."
default_prompts["location"] = "What location is specified in this prompt, state None if there isn't one. Use a single word answer. [BEGIN] {prompt} [END]"
default_prompts["company"] = "What company is related to the stock price in this prompt? Please state none if there isn't one. Use a single word answer: [BEGIN] {prompt} [END]"
default_prompts["rag"] = "You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Back up your answer using facts from the following context.\\nContext: {context_str}\\nQuestion: {prompt}\\nAnswer:"
default_prompts["website"] = "Summarize the following text from URL {url}:\n[BEGIN] {website_text} [END]\nThe above article is about:"
default_prompts["LLM_temperature"] = TEMPERATURE
default_prompts["LLM_max_tokens"] = MAXTOKENS
# Log ONE_SHOT mode
if ONESHOT:
log("ONESHOT mode enabled.")
# Test OpenAI API
def test_model():
global api_key, api_base, mymodel, MAXTOKENS
log("Testing OpenAI API...")
try:
log(f"Using openai library version {openai.__version__}")
log(f"Connecting to OpenAI API at {api_base} using model {mymodel}")
llm = openai.OpenAI(api_key=api_key, base_url=api_base)
# Get models
try:
models = llm.models.list()
if len(models.data) == 0:
log("LLM: No models available - proceeding.")
except Exception as e:
log(f"LLM: Unable to get models, using default: {str(e)}")
models = mymodel
else:
# build list of models
model_list = [model.id for model in models.data]
log(f"LLM: Models available: {model_list}")
if not mymodel in model_list:
log(f"LLM: Model {mymodel} not found in models list.")
if len(model_list) == 1:
log("LLM: Switching to default model")
mymodel = model_list[0]
else:
log(f"LLM: Unable to find requested model {mymodel} in models list.")
raise Exception(f"Model {mymodel} not found")
# Test LLM
log(f"LLM: Using and testing model {mymodel}")
llm.chat.completions.create(
model=mymodel,
max_tokens=MAXTOKENS,
stream=False,
temperature=TEMPERATURE,
messages=[{"role": "user", "content": "Hello"}],
extra_body={"stop_token_ids":[128001, 128009]},
)
return True
except Exception as e:
log("OpenAI API Error: %s" % e)
log(f"Unable to connect to OpenAI API at {api_base} using model {mymodel}.")
if "maximum context length" in str(e):
if MAXTOKENS > 1024:
MAXTOKENS = int(MAXTOKENS / 2)
log(f"LLM: Maximum context length exceeded reducing MAXTOKENS to {MAXTOKENS}.")
return False
while True:
if test_model():
break
else:
log("Sleeping 5 seconds...")
time.sleep(5)
# Test Weaviate Connection
if WEAVIATE_HOST != "":
import weaviate
import weaviate.classes as wvc
try:
client = weaviate.connect_to_local(
host=WEAVIATE_HOST,
port=8080,
grpc_port=50051,
additional_config=weaviate.config.AdditionalConfig(timeout=(15, 115))
)
log(f"RAG: Connected to Weaviate at {WEAVIATE_HOST}")
client.close()
except Exception as e:
log(f"RAG: Unable to connect to Weaviate at {WEAVIATE_HOST}: {str(e)}")
WEAVIATE_HOST = "" # Disable RAG support
log("RAG: RAG support disabled.")
# Find document closely related to query
def query_index(query, library, num_results=RESULTS):
references = "References:"
content = ""
try:
client = weaviate.connect_to_local(
host=WEAVIATE_HOST,
port=8080,
grpc_port=50051,
additional_config=weaviate.config.AdditionalConfig(timeout=(15, 115))
)
hr = client.collections.get(library)
results = hr.query.near_text(
query=query,
limit=num_results
)
for ans in results.objects:
references = references + f"\n - {ans.properties['title']} - {ans.properties['file']}"
content = content + f"Document: {ans.properties['title']}\nDocument Source: {ans.properties['file']}\nContent: {ans.properties['content']}\n---\n"
if (len(content)/4) > MAXTOKENS/2:
break
client.close()
log(f"RAG: Retrieved: {references}")
return content, references
except Exception as er:
log(f"Error querying Weaviate: {str(er)}")
return None, None
# Globals
client = {}
prompts = {}
stats = {
"start_time": time.time(),
"errors": 0,
"ask": 0,
"ask_llm": 0,
}
#
# Configure FastAPI App and SocketIO
#
log("Starting server...")
sio = socketio.AsyncServer(async_mode="asgi")
socket_app = socketio.ASGIApp(sio)
app = FastAPI()
# Load system prompts from PROMPT_FILE
def load_prompts():
global prompts
try:
with open(PROMPT_FILE, "r") as f:
prompts = json.load(f)
# Ensure prompts always include all keys from default_prompts
for k in default_prompts:
if k not in prompts:
prompts[k] = default_prompts[k]
except:
log(f"Unable to load system prompts file {PROMPT_FILE}, creating with defaults.")
reset_prompts()
save_prompts()
# Save prompts to PROMPT_FILE
def save_prompts():
global prompts
try:
os.makedirs(os.path.dirname(PROMPT_FILE), exist_ok=True) # Create path if it doesn't exist
with open(PROMPT_FILE, "w") as f:
json.dump(prompts, f)
log(f"Saved {len(prompts)} prompts.")
except:
log("Unable to save prompts.")
# Expand variables in prompt to values
def expand_prompt(prompt, values):
# Always use current {date} and {time}
current_date = datetime.datetime.now()
values["date"] = current_date.strftime("%B %-d, %Y")
values["time"] = current_date.strftime("%-I:%M %p")
for k in values:
prompt = prompt.replace(f"{{{k}}}", values[k])
return prompt
# Reset prompts
def reset_prompts():
global prompts
prompts = {}
for k in default_prompts:
prompts[k] = default_prompts[k]
# Load prompts
load_prompts()
log(f"Loaded {len(prompts)} prompts.")
# Function to return base conversation prompt
def base_prompt(content=None, template=None):
global baseprompt, agentname, USE_SYSTEM, prompts
if agentname == "":
agentname = prompts["agentname"]
current_date = datetime.datetime.now()
formatted_date = current_date.strftime("%B %-d, %Y")
values = {"agentname": agentname, "date": formatted_date}
if template:
baseprompt = expand_prompt(template, values)
else:
baseprompt = expand_prompt(prompts["baseprompt"], values)
if not content:
content = baseprompt
if USE_SYSTEM:
return [{"role": "system", "content": content}]
else:
return [{"role": "user", "content": content}, {"role": "assistant", "content": "Okay, let's get started."}]
# Function - Send user prompt to LLM for response
async def ask(prompt, sid=None, ctx="context"):
global client, stats
stats["ask"] += 1
response = False
while not response:
try:
# remember context
if ONESHOT:
client[sid][ctx] = base_prompt()
client[sid][ctx].append({"role": "user", "content": prompt})
log(f"messages = {client[sid]['context']} - model = {mymodel}")
llm = openai.OpenAI(api_key=api_key, base_url=api_base)
response = llm.chat.completions.create(
model=mymodel,
max_tokens=MAXTOKENS,
stream=True, # Send response chunks as LLM computes next tokens
temperature=TEMPERATURE,
messages=client[sid][ctx],
extra_body={"stop_token_ids":[128001, 128009]},
)
except openai.OpenAIError as er:
# If we get an error, try to recover
client[sid][ctx].pop()
if "does not exist" in str(er):
await sio.emit('update', {'update': '[Model Unavailable... Retrying]', 'voice': 'user'},room=sid)
log(f"Model does not exist - retrying")
test_model()
await sio.emit('update', {'update': mymodel, 'voice': 'model'})
elif "maximum context length" in str(er):
if len(prompt) > 1000:
# assume we have very large prompt - cut out the middle
prompt = prompt[:len(prompt)//4] + " ... " + prompt[-len(prompt)//4:]
log(f"Reduce prompt size - now ~{len(prompt)/4}")
elif len(client[sid][ctx]) > 4:
# our context has grown too large, truncate the top
client[sid][ctx] = client[sid][ctx][:1] + client[sid][ctx][3:]
log(f"Truncate context: ~{len(client[sid]['context'])/4}")
else:
# our context has grown too large, reset
client[sid][ctx] = base_prompt()
log(f"Reset context ~{len(client[sid]['context'])/4}")
else:
log(f"ERROR: {er}")
stats["errors"] += 1
await sio.emit('update', {'update': e, 'voice': 'user'},room=sid)
if not client[sid]["remember"]:
client[sid]["remember"] =True
client[sid][ctx].pop()
client[sid][ctx].append({"role": "user", "content": "Help me remember."})
return response
async def ask_llm(query, format=""):
# Ask LLM a question
global stats
stats["ask_llm"] += 1
if format == "":
format = f"Respond in {format}."
content = base_prompt(expand_prompt(prompts["clarify"], {"format": format})) + [{"role": "user",
"content": query}]
log(f"ask_llm: {content}")
llm = openai.AsyncOpenAI(api_key=api_key, base_url=api_base)
response = await llm.chat.completions.create(
model=mymodel,
max_tokens=MAXTOKENS,
stream=False,
temperature=TEMPERATURE,
messages=content,
extra_body={"stop_token_ids":[128001, 128009]},
)
log(f"ask_llm -> {response.choices[0].message.content.strip()}")
return response.choices[0].message.content.strip()
# Function - Get weather for location
async def get_weather(location):
# Look up weather for location
if location == "":
location = "Los Angeles"
location = location.replace(" ", "+")
url = "https://wttr.in/%s?format=j2" % location
log(f"Fetching weather for {location} from {url}")
response = requests.get(url)
if response.status_code == 200:
return response.text
else:
return "Unable to fetch weather for %s" % location
# Function - Get stock price for company
async def get_stock(company):
if ALPHA_KEY == "alpha_key":
return "Unable to fetch stock price for %s - No Alpha Vantage API Key" % company
# First try to get the ticker symbol
symbol = await ask_llm(f"What is the stock symbol for {company}? Respond with symbol.")
if "none" in symbol.lower():
return "Unable to fetch stock price for %s - No matching symbol" % company
# Check to see if response has multiple lines and if so, pick the first one
symbol = symbol.split("\n")[0].strip()
# Check to see if there are multiple words and if so, pick the last one
if len(symbol.split()) > 1:
symbol = symbol.split()[-1]
# Strip off any spaces or non-alpha characters
symbol = ''.join(e for e in symbol if e.isalnum())
# Now get the stock price
url = "https://www.alphavantage.co/query?function=GLOBAL_QUOTE&symbol=%s&apikey=%s" % (symbol.upper(), ALPHA_KEY)
log(f"Fetching stock price for {company} from {url}")
response = requests.get(url)
if response.status_code == 200:
try:
price = response.json()["Global Quote"]["05. price"]
return f"The price of {company} (symbol {symbol}) is ${price}."
except:
return "Unable to fetch stock price for %s - No data available." % company
# Function - Get news for topic
async def get_top_articles(url, max=10):
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
soup = BeautifulSoup(await response.text(), 'xml')
items = soup.findAll('item')
articles = ""
links = {}
count = 1
for item in items:
title = item.find('title').string.strip()
#pubdate = item.find('pubDate').string.strip()
#description = item.find('description').string.strip()
link = item.find('link').string.strip()
links[f"LnkID:{count+100}"] = link
articles += f"Headline: {title} - LnkID:{count+100}\n"
count += 1
if count > max:
break
return articles, links
# Function - Fetch news for topic
async def get_news(topic, max=10):
if "none" in topic.lower() or "current" in topic.lower():
url = "https://news.google.com/rss/"
else:
topic = topic.replace(" ", "+")
url = "https://news.google.com/rss/search?q=%s" % topic
log(f"Fetching news for {topic} from {url}")
async with aiohttp.ClientSession() as session:
response, links = await get_top_articles(url, max)
return response, links
# Function - Extract text from URL
async def extract_text_from_url(url):
try:
async with aiohttp.ClientSession() as session:
async with session.get(url, allow_redirects=True) as response:
if response.status == 200:
# Route extraction based on content type
if ";" in response.headers["Content-Type"]:
content_type = response.headers["Content-Type"].split(";")[0]
else:
content_type = response.headers["Content-Type"]
content_handlers = {
"application/pdf": extract_text_from_pdf,
"text/plain": extract_text_from_text,
"text/csv": extract_text_from_text,
"text/xml": extract_text_from_text,
"application/json": extract_text_from_text,
"text/html": extract_text_from_html,
"application/xml": extract_text_from_text,
}
if content_type in content_handlers:
return await content_handlers[content_type](response)
else:
return "Unsupported content type"
else:
m = f"Failed to fetch the webpage. Status code: {response.status}"
log(m)
return m
except Exception as er:
log(f"An error occurred: {str(er)}")
# Function - Extract text from PDF
async def extract_text_from_pdf(response):
# Convert PDF to text
pdf_content = await response.read()
pdf2text = ""
f = io.BytesIO(pdf_content)
reader = PdfReader(f)
for page in reader.pages:
pdf2text = pdf2text + page.extract_text() + "\n"
return pdf2text
# Function - Extract text from text
async def extract_text_from_text(response):
return await response.text()
# Function - Extract text from HTML
async def extract_text_from_html(response):
html_content = await response.text()
# get title of page from html
source = "Document Source: " + str(response.url)
soup = BeautifulSoup(html_content, 'html.parser')
title = ("Document Title: " + soup.title.string + "\n") if soup.title else ""
paragraphs = soup.find_all(['p', 'code', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'pre', 'ol'])
website_text = f"{title}{source}\nDocument Content:\n" + '\n\n'.join([p.get_text() for p in paragraphs])
return website_text
#
# FastAPI Routes
#
templates = Jinja2Templates(directory="templates")
# Display the main chatbot page
@app.get("/", response_class=HTMLResponse)
async def index(request: Request):
return templates.TemplateResponse(request, "index.html")
# Serve static socket.io.js
@app.get("/socket.io.js")
def serve_socket_io_js():
return FileResponse("templates/socket.io.js", media_type="application/javascript")
# Display settings and stats
@app.get("/stats")
async def home(format: str = None):
global client, stats
# Create a simple status page
data = {
"TinyLLM Chatbot Version": VERSION,
"Start Time": datetime.datetime.fromtimestamp(stats["start_time"]).strftime("%Y-%m-%d %H:%M:%S"),
"Uptime": str(datetime.timedelta(seconds=int(time.time() - stats["start_time"]))),
"Errors": stats["errors"],
"User Queries": stats["ask"],
"LLM Queries": stats["ask_llm"],
"OpenAI API Key (OPENAI_API_KEY)": "************" if api_key != "" else "Not Set",
"OpenAI API URL (OPENAI_API_URL)": api_base,
"Agent Name (AGENT_NAME)": agentname,
"LLM Model (LLM_MODEL)": mymodel,
"Debug Mode (DEBUG)": DEBUG,
"Current Clients (MAXCLIENTS)": f"{len(client)} of {MAXCLIENTS}",
"LLM Max tokens Limit (MAXTOKENS)": MAXTOKENS,
"LLM Temperature (TEMPERATURE)": TEMPERATURE,
"Server Port (PORT)": PORT,
"Saved Prompts (PROMPT_FILE)": PROMPT_FILE,
"Read-Only Prompts (PROMPT_RO)": PROMPT_RO,
"LLM System Tags in Prompts (USE_SYSTEM)": USE_SYSTEM,
"Run without conversation context (ONESHOT).": ONESHOT,
"RAG: Run in RAG Only Mode (RAG_ONLY)": RAG_ONLY,
"RAG: Weaviate (WEAVIATE_HOST)": WEAVIATE_HOST,
"RAG: default Library (WEAVIATE_LIBRARY)": WEAVIATE_LIBRARY,
"RAG: Default Results Retrieved (RESULTS)": RESULTS,
}
if format == "json":
return data
# Build a simple HTML page based on data facets
html = "<html><head><title>TinyLLM Chatbot Status</title>"
html += "<style>body { font-family: Helvetica, Arial, sans-serif; }</style>"
html += "</head><body>"
html += "<h1>TinyLLM Chatbot Status</h1>"
# Provide link to project
html += "<p>Settings and Current Status for <a href='https://github.com/jasonacox/TinyLLM/tree/main/chatbot'>TinyLLM Chatbot</a></p>"
html += "<table>"
for key in data:
html += f"<tr><td>{key}</td><td>{data[key]}</td></tr>"
html += "</table>"
# Add JS to refresh page every 5 seconds
html += "<script>setTimeout(function(){location.reload()},5000);</script>"
html += "</body></html>"
return HTMLResponse(content=html, status_code=200)
# Return the current prompts
@app.get('/prompts')
async def get_prompts():
global prompts
# Update TEMPERATURE and MAXTOKENS
prompts["LLM_temperature"] = TEMPERATURE
prompts["LLM_max_tokens"] = MAXTOKENS
if PROMPT_RO:
prompts["READONLY"] = True
return prompts
# POST requests to update prompts
@app.post('/saveprompts')
async def update_prompts(data: dict):
global prompts, baseprompt, sio, TEMPERATURE, MAXTOKENS, agentname
if PROMPT_RO:
return ({"Result": "Prompts are read-only"})
oldbaseprompt = prompts["baseprompt"]
oldagentname = prompts["agentname"]
log(f"Received prompts: {data}")
# Update prompts
for key in data:
prompts[key] = data[key]
save_prompts()
if oldbaseprompt != prompts["baseprompt"] or oldagentname != prompts["agentname"]:
# Update baseprompt
agentname = prompts["agentname"]
current_date = datetime.datetime.now()
formatted_date = current_date.strftime("%B %-d, %Y")
values = {"agentname": agentname, "date": formatted_date}
baseprompt = expand_prompt(prompts["baseprompt"], values)
# Update TEMPERATURE and MAXTOKENS
if "LLM_temperature" in data:
TEMPERATURE = float(data["LLM_temperature"])
if "LLM_max_tokens" in data:
MAXTOKENS = int(data["LLM_max_tokens"])
# Notify all clients of update
log("Base prompt updated - notifying users")
await sio.emit('update', {'update': '[Prompts Updated - Refresh to reload]', 'voice': 'user'})
return ({"Result": "Prompts updated"})
# Reset prompts to default
@app.get('/resetprompts')
async def reset_prompts_route():
# Send the user the default prompts
global default_prompts
return (default_prompts)
# Return the current version and LLM model
@app.get('/version')
async def show_version_web():
global VERSION, DEBUG
log(f"Version requested - DEBUG={DEBUG}")
if DEBUG:
return {'version': "%s DEBUG MODE" % VERSION}
return {'version': VERSION, 'model': mymodel}
# Send an alert to all clients
@app.post('/alert')
async def alert(data: dict):
# Send an alert to all clients
# Make sure TOKEN is set and matches
if "token" in data and "message" in data and data["token"] == TOKEN:
log(f"Received alert: {data}")
await sio.emit('update', {'update': data["message"], 'voice': 'user'})
return ({'status': 'Alert sent'})
else:
log(f"Invalid token or missing message: {data}")
return ({'status': 'Invalid Token or missing message'})
#
# SocketIO Events
#
app.mount("/", socket_app) # Here we mount socket app to main fastapi app
# Client connected - start thread to send updates
@sio.on('connect')
async def handle_connect(session_id, env):
log(f"Client connected: {session_id}")
# Convert each character to its hex representation
def string_to_hex(input_string):
hex_values = [hex(ord(char)) for char in input_string]
return hex_values
# Continuous thread to send updates to connected clients
async def send_update(session_id, watch="prompt"):
global client
if watch == "prompt":
topic = "update"
ctx = "context"
else:
topic = "rag"
ctx = "ragcontext"
log(f"Starting send_update thread for {session_id}")
# Verify session is valid
if session_id not in client:
log(f"Invalid session {session_id}")
return
client[session_id][watch] = ""
try:
while not client[session_id]["stop_thread_flag"]:
if client[session_id][watch] == "":
await sio.sleep(0.1)
else:
try:
# Ask LLM for answers
response= await ask(client[session_id][watch],session_id,ctx)
completion_text = ''
tokens = 0
# iterate through the stream of events and print it
stime = time.time()
for event in response:
event_text = event.choices[0].delta.content
tokens += 1
if event_text:
chunk = event_text
completion_text += chunk
if DEBUG:
print(string_to_hex(chunk), end="")
print(f" = [{chunk}]")
await sio.emit(topic, {'update': chunk, 'voice': 'ai'},room=session_id)
# Update footer with stats
await sio.emit(topic, {'update':
f"TinyLLM Chatbot {VERSION} - {mymodel} - Tokens: {tokens} - TPS: {tokens/(time.time()-stime):.1f}",
'voice': 'footer'},room=session_id)
# Check for link injection
if client[session_id]["links"]:
await sio.emit(topic, {'update': json.dumps(client[session_id]["links"]), 'voice': 'links'},room=session_id)
client[session_id]["links"] = ""
# Check for references
if client[session_id]["references"]:
await sio.emit(topic, {'update': client[session_id]["references"], 'voice': 'ref'},room=session_id)
client[session_id]["references"] = ""
if not ONESHOT:
# remember context
client[session_id][ctx].append({"role": "assistant", "content" : completion_text})
except Exception as er:
# Unable to process prompt, give error
log(f"ERROR {er}")
await sio.emit(topic, {'update': 'An error occurred - unable to complete.', 'voice': 'ai'},room=session_id)
# Reset context
client[session_id][ctx] = base_prompt()
# Signal response is done
await sio.emit(topic, {'update': '', 'voice': 'done'},room=session_id)
client[session_id][watch] = ''
if DEBUG:
print(f"AI: {completion_text}")
except KeyError:
log(f"Thread ended: {session_id}")
except Exception as er:
log(f"Thread error: {er}")
if session_id in client:
# Client reconnected - restart thread
#client[session_id]["thread"].join()
log(f"Client reconnected: {session_id}")
# Send defaults to client
log(f"Sending defaults to {session_id}")
data = {
"baseprompt": client[session_id]["baseprompt"],
"ragprompt": client[session_id]["ragprompt"],
"vectorNum": client[session_id]["vectorNum"],
"vectordb": client[session_id]["vectordb"],
}
await sio.emit('defaults', data, room=session_id)
else:
# New client connected
log(f"New client connected: {session_id}")
# Limit number of clients
if len(client) > MAXCLIENTS:
log(f"Too many clients connected: {len(client)}")
await sio.emit('update', {'update': 'Too many clients connected. Try again later.', 'voice': 'user'},room=session_id)
return
# Create client session
client[session_id]={}
# Initialize context for this client
client[session_id]["context"] = base_prompt()
client[session_id]["ragcontext"] = base_prompt()
client[session_id]["remember"] = True
client[session_id]["visible"] = True
client[session_id]["prompt"] = ""
client[session_id]["stop_thread_flag"] = False
client[session_id]["references"] = ""
client[session_id]["links"] = {}
# Set up defaults for Lab
client[session_id]["baseprompt"] = prompts["baseprompt"]
client[session_id]["ragprompt"] = prompts["rag"]
client[session_id]["vectorNum"] = RESULTS
client[session_id]["vectordb"] = WEAVIATE_LIBRARY
# Send defaults to client
log(f"Sending defaults to {session_id}")
data = {
"baseprompt": client[session_id]["baseprompt"],
"ragprompt": client[session_id]["ragprompt"],
"vectorNum": client[session_id]["vectorNum"],
"vectordb": client[session_id]["vectordb"],
}
await sio.emit('defaults', data, room=session_id)
# Start continuous tasks to send updates
asyncio.create_task(send_update(session_id, "prompt"))
asyncio.create_task(send_update(session_id, "rag"))
# Client disconnected
@sio.on('disconnect')
async def handle_disconnect(session_id):
log(f"Client disconnected: {session_id}")
# Remove client
if session_id in client:
# shutdown thread
client[session_id]["stop_thread_flag"] = True
client.pop(session_id)
# Client requests update baseprompt
@sio.on('baseprompt')
async def handle_baseprompt(session_id, data):
global client
if session_id not in client:
log(f"Invalid session {session_id}")
return
client[session_id]["baseprompt"] = data["prompt"]
client[session_id]["context"] = base_prompt(template=data["prompt"])
log(f"Client requests baseprompt {session_id} - {data['prompt']}")
await sio.emit('update', {'update': 'Base Prompt Updated', 'voice': 'user'}, room=session_id)
# Client requests defaults
@sio.on('defaults')
async def handle_defaults(session_id, data):
global client
log(f"Client requests defaults {session_id}")
if session_id not in client:
log(f"Invalid session {session_id}")
return
data = {
"baseprompt": client[session_id]["baseprompt"],
"ragprompt": client[session_id]["ragprompt"],
"vectorNum": client[session_id]["vectorNum"],
"vectordb": client[session_id]["vectordb"],
}
await sio.emit('defaults', data, room=session_id)
await sio.emit('update', {'update': 'Data Refreshed', 'voice': 'user'}, room=session_id)
# Run User Prompt - Poll vector database for similar document
@sio.on('userprompt')
async def handle_userprompt(session_id, data):
global client
log(f"Client requests userprompt {session_id}")
if session_id not in client:
log(f"Invalid session {session_id}")
return
client[session_id]["userprompt"] = data["prompt"]
client[session_id]["vectordb"] = data["vectordb"]
client[session_id]["vectorNum"] = data["num"]
_, references = query_index(data["prompt"],
client[session_id]["vectordb"],
int(client[session_id]["vectorNum"]))
await sio.emit('docs-found', {'update': references, 'voice': 'user'}, room=session_id)
# Run RAG Prompt - Import and summarize topic from library
@sio.on('rag')
async def handle_rag(session_id, data):
global client
log(f"Client requests rag {session_id}")
if session_id not in client:
log(f"Invalid session {session_id}")
return
# Grab data from client
client[session_id]["baseprompt"] = data["baseprompt"]
client[session_id]["vectordb"] = data["vectordb"]
client[session_id]["vectorNum"] = data["num"]
client[session_id]["userprompt"] = data["userprompt"]
client[session_id]["ragtemplate"] = data["ragtemplate"]
# Query Weaviate for documents
log(f"VectorDB: {client[session_id]['vectordb']} - VectorNum: {client[session_id]['vectorNum']}")
results, references = query_index(client[session_id]["userprompt"],
client[session_id]["vectordb"],
int(client[session_id]["vectorNum"]))
await sio.emit('docs-found', {'update': references, 'voice': 'user'}, room=session_id)
# Create RAG prompt from documents
log(f"RAG: {results}")
client[session_id]["rag"] = expand_prompt(client[session_id]["ragtemplate"],
{"context_str": results,
"prompt": client[session_id]["userprompt"]})
await sio.emit('update', {'update': '[RAG: Running...]', 'voice': 'user'}, room=session_id)
# Client sent a message - handle it
@sio.on('message')
async def handle_message(session_id, data):
global client
# Handle incoming user prompts and store them
log(f'Received message from {session_id}')
log(f"Received Data: {data}")
if session_id not in client:
log(f"Invalid session {session_id}")
await handle_invalid_session(session_id)
return
p = data["prompt"]
client[session_id]["visible"] = data["show"]
# Did we get a start command? Use greeting prompt.
if p == "{start}":
p = prompts["greeting"]
# Did we get asked to fetch a URL?
if p.startswith("http"):
await handle_url_prompt(session_id, p)
elif p.startswith("/"):
await handle_command(session_id, p)
else:
await handle_normal_prompt(session_id, p)
return {'status': 'Message received'}
async def handle_invalid_session(session_id):
await sio.emit('update', {'update': '[Session Unrecognized - Try Refresh]', 'voice': 'user'}, room=session_id)
async def handle_url_prompt(session_id, p):
url = p.strip()
client[session_id]["visible"] = False
client[session_id]["remember"] = True
website_text = await extract_text_from_url(url)
if website_text:
log(f"* Reading {len(website_text)} bytes {url}")
await sio.emit('update', {'update': '%s [Reading...]' % url, 'voice': 'user'}, room=session_id)
url_encoded = requests.utils.quote(url)
client[session_id]["prompt"] = expand_prompt(prompts["website"], {"url": url_encoded, "website_text": website_text})
else:
await sio.emit('update', {'update': '%s [ERROR: Unable to read URL]' % url, 'voice': 'user'}, room=session_id)
client[session_id]["prompt"] = ''
async def handle_command(session_id, p):
command = p[1:].split(" ")[0].lower()
if command == "":
await sio.emit('update', {'update': '[Commands: /reset /version /sessions /rag /news /weather /stock]', 'voice': 'user'}, room=session_id)
client[session_id]["prompt"] = ''
elif command == "reset":
await reset_context(session_id)
elif command == "version":
await show_version(session_id)
elif command == "sessions":
await show_sessions(session_id)
elif command == "news":
await fetch_news(session_id, p)
elif command == "rag":
await handle_rag_command(session_id, p)
elif command == "weather":
await handle_weather_command(session_id, p)
elif command == "stock":
await handle_stock_command(session_id, p)
else:
await sio.emit('update', {'update': '[Invalid command]', 'voice': 'user'}, room=session_id)
client[session_id]["prompt"] = ''
async def reset_context(session_id):
client[session_id]["context"] = base_prompt()
await sio.emit('update', {'update': '[Memory Reset]', 'voice': 'user'}, room=session_id)
client[session_id]["prompt"] = prompts["greeting"]
client[session_id]["visible"] = False
async def show_version(session_id):
await sio.emit('update', {'update': '[TinyLLM Version: %s - Session: %s]' % (VERSION, session_id), 'voice': 'user'}, room=session_id)
client[session_id]["prompt"] = ''
async def show_sessions(session_id):
result = ""
x = 1
for s in client:
result += f"* {x}: {s}\n"
x += 1
await sio.emit('update', {'update': '[Sessions: %s]\n%s' % (len(client), result), 'voice': 'user'}, room=session_id)
client[session_id]["prompt"] = ''
async def fetch_news(session_id, p):
log("News requested")
topic = p[5:].strip() or "none"
await sio.emit('update', {'update': '%s [Fetching News]' % p, 'voice': 'user'}, room=session_id)
context_str, links = await get_news(topic, 25)
log(f"News Raw Context = {context_str}")
client[session_id]["visible"] = False
client[session_id]["remember"] = True
client[session_id]["links"] = links
client[session_id]["prompt"] = expand_prompt(prompts["news"], {"context_str": context_str})
async def handle_rag_command(session_id, p):
rag = p[4:].strip()
parts = rag.split()
library = ""
if len(parts) >= 2:
library = parts[0]
if parts[1].isdigit():
number = int(parts[1])
prompt = ' '.join(parts[2:])
else:
number = RESULTS
prompt = ' '.join(parts[1:])
if not library or not prompt:
await sio.emit('update', {'update': '[Usage: /rag {library} {opt:number} {prompt}] - Import and summarize topic from library.', 'voice': 'user'}, room=session_id)
else:
if WEAVIATE_HOST:
log(f"Pulling {number} entries from {library} with prompt {prompt}")
if not ONESHOT:
await sio.emit('update', {'update': '%s [RAG Command Running...]' % p, 'voice': 'user'}, room=session_id)
results, references = query_index(prompt, library, number)
if results:
context_str = ""
client[session_id]["visible"] = False
client[session_id]["remember"] = True
context_str = results
log(f" = {references}")
client[session_id]["references"] = references
client[session_id]["prompt"] = expand_prompt(prompts["rag"], {"context_str": context_str, "prompt": prompt})
else:
await sio.emit('update', {'update': '[Unable to access Vector Database for %s]' % library, 'voice': 'user'}, room=session_id)
else:
await sio.emit('update', {'update': '[RAG Support Disabled - Check Config]', 'voice': 'user'}, room=session_id)
async def handle_weather_command(session_id, p):
log("Weather prompt")
await sio.emit('update', {'update': '%s [Weather Command Running...]' % p, 'voice': 'user'}, room=session_id)
location = await ask_llm(expand_prompt(prompts["location"], {"prompt": p}))
location = ''.join(e for e in location if e.isalnum() or e.isspace())
if "none" in location.lower():
context_str = await get_weather("")
else:
context_str = await get_weather(location)
client[session_id]["visible"] = False