Skip to content

Commit

Permalink
feat: support multiple language voice
Browse files Browse the repository at this point in the history
  • Loading branch information
madawei2699 committed Mar 22, 2023
1 parent d96ea0e commit 391ef88
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 10 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ For now it is in development, but you can try it out by join this [channel](http
- [ ] Consider to fine-tue the chunk size of index node and prompt to save the cost
- If the chunk size is too big, it will cause the index node to be too large and the cost will be high.
- [x] Bot can read historical messages from the same thread, thus providing context to chatGPT
- [x] [Changing the number of output tokens](https://github.com/jerryjliu/llama_index/issues/778#issuecomment-1478303173)
- Index fine-tune
- [x] Use the [GPTListIndex](https://github.com/jerryjliu/llama_index/issues/753#issuecomment-1472387421) to summarize multiple URLs
- [ ] Use the `GPTTreeIndex` with `summarize` mode to summarize a single web page
Expand Down Expand Up @@ -69,6 +70,15 @@ For now it is in development, but you can try it out by join this [channel](http
- may use GPT4
- [x] Support voice reading ~~with self-hosting [whisper](https://github.com/aarnphm/whispercpp)~~
- (whisper -> chatGPT -> azure text2speech) to play language speaking practices 💥
- Support language
- Chinese
- English
- 🇺🇸
- 🇬🇧
- 🇦🇺
- 🇮🇳
- Japanese
- German
- [ ] Integrated with Azure OpenAI Service
- [ ] User access limit
- Limit the number of requests to bot per user per day to save the cost
Expand Down
28 changes: 19 additions & 9 deletions app/gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
import os
import logging
import hashlib
import random
import uuid
import openai
from langdetect import detect
from llama_index import GPTSimpleVectorIndex, LLMPredictor, RssReader, SimpleDirectoryReader
from llama_index.prompts.prompts import QuestionAnswerPrompt
from llama_index.readers.schema.base import Document
from langchain.chat_models import ChatOpenAI
from azure.cognitiveservices.speech import SpeechConfig, SpeechSynthesizer, ResultReason, CancellationReason, SpeechSynthesisOutputFormat, AudioDataStream
from azure.cognitiveservices.speech import SpeechConfig, SpeechSynthesizer, ResultReason, CancellationReason, SpeechSynthesisOutputFormat
from azure.cognitiveservices.speech.audio import AudioOutputConfig

from app.fetch_web_post import get_urls, scrape_website, scrape_website_by_phantomjscloud
Expand All @@ -20,7 +21,7 @@
openai.api_key = OPENAI_API_KEY

llm_predictor = LLMPredictor(llm=ChatOpenAI(
temperature=0.2, model_name="gpt-3.5-turbo"))
temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=4097))

index_cache_web_dir = '/tmp/myGPTReader/cache_web/'
index_cache_voice_dir = '/tmp/myGPTReader/voice/'
Expand Down Expand Up @@ -150,19 +151,28 @@ def get_text_from_whisper(voice_file_path):
def remove_prompt_from_text(text):
return text.replace('AI:', '').strip()

def convert_to_ssml(text):
lang_code_voice_map = {
'zh': ['zh-CN-XiaoxiaoNeural', 'zh-CN-XiaohanNeural', 'zh-CN-YunxiNeural', 'zh-CN-YunyangNeural'],
'en': ['en-US-JennyNeural', 'en-US-RogerNeural', 'en-IN-NeerjaNeural', 'en-IN-PrabhatNeural', 'en-AU-AnnetteNeural', 'en-AU-CarlyNeural', 'en-GB-AbbiNeural', 'en-GB-AlfieNeural'],
'ja': ['ja-JP-AoiNeural', 'ja-JP-DaichiNeural'],
'de': ['de-DE-AmalaNeural', 'de-DE-BerndNeural'],
}

def convert_to_ssml(text, voice_name=None):
lang_code = detect(text)
text = remove_prompt_from_text(text)
if voice_name is None:
try:
voice_name = random.choice(lang_code_voice_map[lang_code.split('-')[0]])
except KeyError:
voice_name = random.choice(lang_code_voice_map['en-us'])
ssml = '<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="zh-CN">'
if lang_code == 'zh-cn':
ssml += f'<voice name="zh-CN-XiaoxiaoNeural">{text}</voice>'
else:
ssml += f'<voice name="en-US-JennyNeural">{text}</voice>'
ssml += f'<voice name="{voice_name}">{text}</voice>'
ssml += '</speak>'

return ssml

def get_voice_file_from_text(text):
def get_voice_file_from_text(text, voice_name=None):
speech_config = SpeechConfig(subscription=SPEECH_KEY, region=SPEECH_REGION)
speech_config.set_speech_synthesis_output_format(
SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3)
Expand All @@ -171,7 +181,7 @@ def get_voice_file_from_text(text):
file_config = AudioOutputConfig(filename=file_name)
synthesizer = SpeechSynthesizer(
speech_config=speech_config, audio_config=file_config)
ssml = convert_to_ssml(text)
ssml = convert_to_ssml(text, voice_name)
result = synthesizer.speak_ssml_async(ssml).get()
if result.reason == ResultReason.SynthesizingAudioCompleted:
logging.info("Speech synthesized for text [{}], and the audio was saved to [{}]".format(
Expand Down
2 changes: 1 addition & 1 deletion app/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,8 @@ def handle_mentions(event, say, logger):
say(f'<@{user}>, {gpt_response}', thread_ts=thread_ts)
else:
voice_file_path = get_voice_file_from_text(gpt_response)
logger.info(f'=====> Voice file path is {voice_file_path}')
slack_app.client.files_upload_v2(file=voice_file_path, channel=channel, thread_ts=thread_ts)
say(f'<@{user}>', thread_ts=thread_ts)
except concurrent.futures.TimeoutError:
future.cancel()
err_msg = 'Task timedout(5m) and was canceled.'
Expand Down

0 comments on commit 391ef88

Please sign in to comment.