Skip to content

Commit

Permalink
ask in prompt to send 200 dialogues; calc wpm using length of the audio
Browse files Browse the repository at this point in the history
  • Loading branch information
mkagenius committed Jan 4, 2025
1 parent 1e89324 commit b90c41e
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 9 deletions.
2 changes: 1 addition & 1 deletion backend/app/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
Discuss in the podcast, the main components of the system (e.g., frontend, backend, database, building, external services).
Discuss the relationships and interactions between these components.
Sometimes the answers can also be single word or very small so that it seems natural. Long answers all the time makes it monotonous.
Make it a 10 minute long or longer podcast if possible. Give atleast 80 voice tags for the host which is probably Ava here (160 including closing tag). + Same amount of voice tags for guest. Slowly count them and re-write the ssml if its falling short and then return the ssml."""
Make it a 20 minute long or longer podcast if possible. Give atleast 200 voice tags for the host + Same amount of voice tags for guest. Slowly count them and re-write the ssml if its falling short and then return the ssml."""

SYSTEM_FIRST_PROMPT = """
You are tasked with explaining to a principal software engineer how to draw the best and most accurate system design diagram / architecture of a given project. This explanation should be tailored to the specific project's purpose and structure. To accomplish this, you will be provided with two key pieces of information:
Expand Down
30 changes: 23 additions & 7 deletions backend/app/routers/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from tempfile import NamedTemporaryFile
import base64
import xml.etree.ElementTree as ET
from pydub import AudioSegment
import io

load_dotenv()

Expand All @@ -37,7 +39,8 @@ def get_cached_github_data(username: str, repo: str):
file_list = openai_service.get_important_files(file_tree)
for fpath in file_list:
content = github_service.get_github_file_content(username, repo, fpath)
file_content += f"FPATH: fpath CONTENT:{content}"
discuss_or_not = "- discuss this file." if '.md' not in fpath else ""
file_content += f"FPATH: {fpath} {discuss_or_not} \n CONTENT:{content}"
except Exception as e:
print(f"Some error in getting github file content {e}. Proceeding.")

Expand Down Expand Up @@ -90,7 +93,15 @@ def calculate_duration(text_line, wpm=135):
seconds = minutes * 60
return seconds

def ssml_to_webvtt(ssml_content, max_line_length=45, max_words_per_cue=30):
def no_of_words(text_lines):
if isinstance(text_lines, str): # If it's a single string
return len([word for word in text_lines.split() if word])
elif isinstance(text_lines, list): # If it's a list of strings
return sum(len([word for word in line.split() if word]) for line in text_lines)
else:
return 0

def ssml_to_webvtt(ssml_content, duration_in_seconds, max_line_length=45, max_words_per_cue=30):
# Helper function to insert line breaks at appropriate places
def add_line_breaks(text, max_length):
words = text.split()
Expand All @@ -110,13 +121,14 @@ def add_line_breaks(text, max_length):
text_content = re.sub(r'<speak[^>]*>|</speak>|<break[^>]*>', '', ssml_content)
text_content = re.sub(r'<voice[^>]*>', '\n\n', text_content)
text_content = re.sub(r'</voice>', '', text_content)
text_lines = filter(None, [line.strip() for line in text_content.splitlines()])
text_lines = list(filter(None, [line.strip() for line in text_content.splitlines()]))

# Step 2: Generate WebVTT content with sequential timestamps
vtt_content = "WEBVTT\n\n"
cumulative_time = 0.0
cue_index = 0

wpm = int(no_of_words(text_lines) / duration_in_seconds * 60)
print(wpm, " Words per minute")
for i, line in enumerate(text_lines):

# Break the line if it's too long into sub-lines based on word count
Expand All @@ -128,7 +140,7 @@ def add_line_breaks(text, max_length):

# Generate VTT for each sub-line
for sub_line in sub_lines:
duration = calculate_duration(sub_line)
duration = calculate_duration(sub_line, wpm=wpm)
start_time = cumulative_time
end_time = start_time + duration
cumulative_time = end_time # Update cumulative time for next line
Expand Down Expand Up @@ -170,7 +182,7 @@ async def generate(request: Request, body: ApiRequest):
file_content = github_data["file_content"]

# Check combined token count
combined_content = f"{file_tree}\n{readme}\n{file_content}"
combined_content = f"FILE TREE: {file_tree}\n README: {readme}\n IMPORTANT FILES: {file_content}"
combined_content = combined_content[:250000]
print(combined_content)
try:
Expand Down Expand Up @@ -218,7 +230,11 @@ async def generate(request: Request, body: ApiRequest):
# mp3_bytes = convert_wav_to_mp3(audio_bytes)
if audio_bytes:
response = Response(content=audio_bytes, media_type="audio/mpeg", headers={"Content-Disposition": "attachment; filename=explanation.mp3"})
vtt_content = ssml_to_webvtt(ssml_response)
# Assuming audio_bytes contains the audio data
audio = AudioSegment.from_file(io.BytesIO(audio_bytes), format="mp3")
duration_in_seconds = len(audio) / 1000.0
print("duration in sec", duration_in_seconds)
vtt_content = ssml_to_webvtt(ssml_response, duration_in_seconds)
encoded_vtt_content = base64.b64encode(vtt_content.encode('utf-8')).decode('utf-8')
response.headers["X-VTT-Content"] = encoded_vtt_content
# Add CORS headers
Expand Down
2 changes: 1 addition & 1 deletion backend/app/services/openai_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def call_openai_for_response(self, files_path, ssml_prompt_text):
"""
# Read the content of the file specified by files_path
with open(files_path[0], 'r') as file:
file_content = file.read()
file_content = file.read() # this has everything readme + tree + other files
# Send the prompt to Azure OpenAI for processing
response = openai.chat.completions.create(
model=self.model_name,
Expand Down

0 comments on commit b90c41e

Please sign in to comment.