Skip to content

Commit

Permalink
Merge branch 'feat/new_tts_talkByData' into feat/new_stt_and_tts
Browse files Browse the repository at this point in the history
  • Loading branch information
vitoranello committed Nov 6, 2024
2 parents 5dbfff9 + 77f8f75 commit c03ce6d
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 25 deletions.
3 changes: 1 addition & 2 deletions launch/synthesizer_speech.launch
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
<?xml version="1.0" ?>
<launch>
<arg name="speech_synthesizer_machine" default="jetson"/>
<arg name="audio_player_machine" default="jetson"/>
<arg name="use_machine" default="true"/>

<rosparam file="$(find butia_speech)/config/ros.yaml" command="load"/>
<rosparam file="$(find butia_speech)/config/butia_speech_synthesizer.yaml" command="load"/>
<rosparam file="$(find butia_speech)/config/butia_audio_player.yaml" command="load"/>

<node name="audio_player" pkg="butia_speech" type="audio_player.py" output="screen" machine="$(arg audio_player_machine)"/>
<node name="audio_player" pkg="butia_speech" type="audio_player.py" output="screen"/>

<node name="speech_synthesizer" pkg="butia_speech" type="speech_synthesizer.py" output="screen" machine="$(arg speech_synthesizer_machine)"/>

Expand Down
34 changes: 20 additions & 14 deletions nodes/speech_synthesizer.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#!/usr/bin/env python3
# coding: utf-8
from butia_speech.srv import AudioPlayer, SynthesizeSpeech, SynthesizeSpeechResponse
from butia_speech.srv import AudioPlayer,AudioPlayerByData, AudioPlayerByDataRequest, SynthesizeSpeech, SynthesizeSpeechResponse
from butia_speech.msg import SynthesizeSpeechMessage
from std_msgs.msg import Bool
from audio_common_msgs.msg import AudioData, AudioInfo

from scipy.io import wavfile
import os
Expand Down Expand Up @@ -52,21 +53,26 @@ def synthesize_speech(req):
)
# Convert the response audio to a NumPy array
audio_samples = np.frombuffer(resp.audio, dtype=np.int16)
try:
# Write the audio samples to a WAV file
wavfile.write(FILENAME, configs["sample_rate_hz"], audio_samples)
print("success") # Print success message if the file is written successfully
except:
print("error") # Print error message if there is an issue writing the file

# Fetch the audio player service parameter
audio_player_service_param = rospy.get_param("services/audio_player/service", "/butia_speech/ap/audio_player")
# Wait for the audio player service to be available
rospy.wait_for_service(audio_player_service_param, timeout=rospy.Duration(10))
try:
audio_player = rospy.ServiceProxy(audio_player_service_param, AudioPlayer)
audio_player(FILENAME)
audio_data = AudioData()
audio_data.data = audio_samples.tobytes()
audio_info = AudioInfo()
audio_info.sample_rate = configs["sample_rate_hz"]
audio_info.channels = 1
audio_info.sample_format = '16' # Assuming 16-bit PCM

# Fetch the audio player by data service parameter
audio_player_by_data_service_param = rospy.get_param("services/audio_player_by_data/service", "/butia_speech/ap/audio_player_by_data")
# Wait for the audio player by data service to be available
rospy.wait_for_service(audio_player_by_data_service_param, timeout=rospy.Duration(10))
try:
audio_player_by_data = rospy.ServiceProxy(audio_player_by_data_service_param, AudioPlayerByData)

request = AudioPlayerByDataRequest()
request.data = audio_data
request.audio_info = audio_info
audio_player_by_data(request)

response = SynthesizeSpeechResponse()
response.success = True
return response
Expand Down
33 changes: 24 additions & 9 deletions nodes/speech_synthesizer_old.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#!/usr/bin/env python3
# coding: utf-8
from butia_speech.srv import AudioPlayer, SynthesizeSpeech, SynthesizeSpeechResponse
from butia_speech.srv import AudioPlayer, AudioPlayerByData, AudioPlayerByDataRequest, SynthesizeSpeech, SynthesizeSpeechResponse
from butia_speech.msg import SynthesizeSpeechMessage
from audio_common_msgs.msg import AudioData, AudioInfo
from std_msgs.msg import Bool

from espnet2.bin.tts_inference import Text2Speech
Expand Down Expand Up @@ -29,18 +30,32 @@

def synthesize_speech(req):
speech = req.text
lang = "en" # lang = req.lang
lang = "en" # lang = req.lang

with torch.no_grad():
wav = text2speech(speech)["wav"]
wavfile.write(FILENAME, text2speech.fs, (wav.view(-1).cpu().numpy()*32768).astype(np.int16))

audio_player_service_param = rospy.get_param("services/audio_player/service", "/butia_speech/ap/audio_player")
rospy.wait_for_service(audio_player_service_param, timeout=rospy.Duration(10))
try:
audio_player = rospy.ServiceProxy(audio_player_service_param, AudioPlayer)
audio_player(FILENAME)
wav_data = (wav.view(-1).cpu().numpy() * 32768).astype(np.int16)
wavfile.write(FILENAME, text2speech.fs, wav_data)

audio_data = AudioData()
audio_data.data = wav_data.tobytes()
audio_info = AudioInfo()
audio_info.sample_rate = text2speech.fs
audio_info.channels = 1
audio_info.sample_format = '16' # Assuming 16-bit PCM

# Fetch the audio player by data service parameter
audio_player_by_data_service_param = rospy.get_param("services/audio_player_by_data/service", "/butia_speech/ap/audio_player_by_data")
# Wait for the audio player by data service to be available
rospy.wait_for_service(audio_player_by_data_service_param, timeout=rospy.Duration(10))
try:
audio_player_by_data = rospy.ServiceProxy(audio_player_by_data_service_param, AudioPlayerByData)

request = AudioPlayerByDataRequest()
request.data = audio_data
request.audio_info = audio_info
audio_player_by_data(request)

response = True
return response
except rospy.ServiceException as exc:
Expand Down

0 comments on commit c03ce6d

Please sign in to comment.