Skip to content

Commit

Permalink
🎉 Adds realistic voice
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexandre Sajus committed Dec 16, 2023
1 parent c7915e9 commit 5056c07
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 12 deletions.
21 changes: 14 additions & 7 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from gtts import gTTS
import pygame
from pygame import mixer
import elevenlabs

from record import SpeechToText

Expand All @@ -17,10 +18,11 @@

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY")
elevenlabs.set_api_key(os.getenv("ELEVENLABS_API_KEY"))
RECORDING_PATH = "wavs/recording.wav"

gpt_client = openai.Client(api_key=OPENAI_API_KEY)
context = "You are Sam, Alex's helpful secretary. Your answers should be limited to 1-2 short sentences."
context = "You are Jarvis, Alex's helpful and witty assistant. Your answers should be limited to 1-2 short sentences."

mixer.init()

Expand Down Expand Up @@ -69,26 +71,31 @@ async def transcribe(
if __name__ == "__main__":
while True:
# Record audio
print("Listening...", end="")
SpeechToText()
# Transcribe audio
print("Transcribing...")
deepgram = Deepgram(DEEPGRAM_API_KEY)
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
words = loop.run_until_complete(transcribe(RECORDING_PATH))
string_words = " ".join(
word_dict.get("word") for word_dict in words if "word" in word_dict
)
print(f"USER: {string_words}")
# Get response from GPT-3
context += f"\nAlex: {string_words}\nSam: "
print("Generating response...")
context += f"\nAlex: {string_words}\nJarvis: "
response = request_gpt(context)
print(f"AI: {response}")
context += response
# Convert response to audio
tts = gTTS(response)
tts.save("wavs/response.wav")
print("Converting to audio...")
audio = elevenlabs.generate(
text=response, voice="Adam", model="eleven_monolingual_v1"
)
elevenlabs.save(audio, "wavs/response.wav")
# Play response
print("Speaking...")
sound = mixer.Sound("wavs/response.wav")
sound.play()
pygame.time.wait(int(sound.get_length() * 1000))
print("LOG: Response played")
print(f"\n --- USER: {string_words}\n --- JARVIS: {response}\n")
4 changes: 0 additions & 4 deletions record.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def SpeechToText() -> None:
frames_per_buffer=960,
)
audio_source.start_stream()
print("LOG: Listening...")

def buffer_to_wav(buffer: bytes) -> bytes:
"""Wraps a buffer of raw audio data in a WAV"""
Expand Down Expand Up @@ -77,8 +76,6 @@ def buffer_to_wav(buffer: bytes) -> bytes:
)
wav_bytes = buffer_to_wav(audio_data)
wav_path.write_bytes(wav_bytes)
print(wav_path)
print("file saved")
break
elif wav_sink:
# Write to WAV file
Expand All @@ -90,7 +87,6 @@ def buffer_to_wav(buffer: bytes) -> bytes:
finally:
try:
audio_source.close_stream()
print("LOG: Recording Saved")
except Exception:
pass

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ openai==1.4.0
deepgram-sdk==2.12.0
pyaudio==0.2.14
rhasspy-silence==0.4.0
gTTS==2.4.0
elevenlabs==0.2.27
pygame==2.5.2
Binary file modified wavs/recording.wav
Binary file not shown.
Binary file modified wavs/response.wav
Binary file not shown.

0 comments on commit 5056c07

Please sign in to comment.