forked from TheExplainthis/ChatGPT-Line-Bot
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
5 changed files
with
177 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
line-bot-sdk==2.4.1 | ||
python-dotenv==0.21.1 | ||
Flask==2.2.2 | ||
opencc-python-reimplemented==0.1.4 | ||
opencc-python-reimplemented==0.1.4 | ||
beautifulsoup4==4.11.2 |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import os | ||
import re | ||
import requests | ||
from bs4 import BeautifulSoup | ||
|
||
|
||
WEBSITE_SYSTEM_MESSAGE = "你現在非常擅於做資料的整理、總結、歸納、統整,並能專注於細節、且能提出觀點" | ||
WEBSITE_MESSAGE_FORMAT = """ | ||
針對這個連結的內容: | ||
\"\"\" | ||
{} | ||
\"\"\" | ||
請關注幾個點: | ||
1. 他的主題為何? | ||
2. 他的重點為何? | ||
3. 他獨特的觀點為何? | ||
你需要回傳的格式是: | ||
- 主題: '...' | ||
- 重點: '...' | ||
- 獨特觀點: '...' | ||
""" | ||
|
||
|
||
class Website: | ||
def get_url_from_text(self, text: str): | ||
url_regex = re.compile(r'https?://\S+') | ||
match = re.search(url_regex, text) | ||
if match: | ||
return match.group() | ||
else: | ||
return None | ||
|
||
def get_content_from_url(self, url: str): | ||
hotpage = requests.get(url) | ||
main = BeautifulSoup(hotpage.text, 'html.parser') | ||
chunks = [article.text.strip() for article in main.find_all('article')] | ||
if chunks == []: | ||
chunks = [article.text.strip() for article in main.find_all('div', class_='content')] | ||
return chunks | ||
|
||
|
||
class WebsiteReader: | ||
def __init__(self, model=None): | ||
self.system_message = os.getenv('WEBSITE_SYSTEM_MESSAGE') or WEBSITE_SYSTEM_MESSAGE | ||
self.message_format = os.getenv('WEBSITE_MESSAGE_FORMAT') or WEBSITE_MESSAGE_FORMAT | ||
self.model = model | ||
self.text_length_limit = 1800 | ||
|
||
def send_msg(self, msg): | ||
role, content = self.model.chat_completion(msg) | ||
return role, content | ||
|
||
def summarize(self, chunks): | ||
text = '\n'.join(chunks)[:self.text_length_limit] | ||
msgs = [{ | ||
"role": "system", "content": self.system_message | ||
}, { | ||
"role": "user", "content": self.message_format.format(text) | ||
}] | ||
role, response = self.send_msg(msgs) | ||
return role, response |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
import math | ||
import os | ||
import re | ||
|
||
from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled | ||
|
||
|
||
YOUTUBE_SYSTEM_MESSAGE = "你現在非常擅於做資料的整理、總結、歸納、統整,並能專注於細節、且能提出觀點" | ||
PART_MESSAGE_FORMAT = """ PART {} START | ||
下面是一個 Youtube 影片的部分字幕: \"\"\"{}\"\"\" \n\n請總結出這部影片的重點與一些細節,字數約 100 字左右 | ||
PART {} END | ||
""" | ||
WHOLE_MESSAGE_FORMAT = "下面是每一個部分的小結論:\"\"\"{}\"\"\" \n\n 請給我全部小結論的總結,字數約 100 字左右" | ||
SINGLE_MESSAGE_FORMAT = "下面是一個 Youtube 影片的字幕: \"\"\"{}\"\"\" \n\n請總結出這部影片的重點與一些細節,字數約 100 字左右" | ||
|
||
|
||
class Youtube: | ||
def __init__(self, step): | ||
self.step = step | ||
self.chunk_size = 150 | ||
|
||
def get_transcript_chunks(self, video_id): | ||
try: | ||
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['zh-TW', 'zh', 'ja', 'zh-Hant', 'zh-Hans', 'en', 'ko']) | ||
text = [t.get('text') for i, t in enumerate(transcript) if i % self.step == 0] | ||
chunks = ['\n'.join(text[i*self.chunk_size: (i+1)*self.chunk_size]) for i in range(math.ceil(len(text) / self.chunk_size))] | ||
except NoTranscriptFound: | ||
return False, [], '目前只支援:中文、英文、日文、韓文' | ||
except TranscriptsDisabled: | ||
return False, [], '本影片無開啟字幕功能' | ||
except Exception as e: | ||
return False, [], str(e) | ||
return True, chunks, None | ||
|
||
def retrieve_video_id(self, url): | ||
regex = r'(?:youtube\.com\/(?:[^\/]+\/.+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})' | ||
match = re.search(regex, url) | ||
if match: | ||
return match.group(1) | ||
else: | ||
return None | ||
|
||
|
||
class YoutubeTranscriptReader: | ||
def __init__(self, model=None): | ||
self.summary_system_prompt = os.getenv('YOUTUBE_SYSTEM_MESSAGE') or YOUTUBE_SYSTEM_MESSAGE | ||
self.part_message_format = os.getenv('PART_MESSAGE_FORMAT') or PART_MESSAGE_FORMAT | ||
self.whole_message_format = os.getenv('WHOLE_MESSAGE_FORMAT') or WHOLE_MESSAGE_FORMAT | ||
self.single_message_format = os.getenv('SINGLE_MESSAGE_FORMAT') or SINGLE_MESSAGE_FORMAT | ||
self.model = model | ||
|
||
def send_msg(self, msg): | ||
role, content = self.model.chat_completion(msg) | ||
return role, content | ||
|
||
def summarize(self, chunks): | ||
summary_msg = [] | ||
if len(chunks) > 1: | ||
for i, chunk in enumerate(chunks): | ||
msgs = [{ | ||
"role": "system", "content": self.summary_system_prompt | ||
}, { | ||
"role": "user", "content": self.part_message_format.format(i, chunk, i) | ||
}] | ||
_, content = self.send_msg(msgs) | ||
summary_msg.append(content) | ||
text = '\n'.join(summary_msg) | ||
msgs = [{ | ||
'role': 'system', 'content': self.summary_system_prompt | ||
}, { | ||
'role': 'user', 'content': self.whole_message_format.format(text) | ||
}] | ||
else: | ||
text = chunks[0] | ||
msgs = [{ | ||
'role': 'system', 'content': self.summary_system_prompt | ||
}, { | ||
'role': 'user', 'content': self.single_message_format.format(text) | ||
}] | ||
role, response = self.send_msg(msgs) | ||
return role, response |