Skip to content

Commit

Permalink
Merge branch 'main' into feature/camel_integration
Browse files Browse the repository at this point in the history
  • Loading branch information
snova-rodrigom authored Dec 10, 2024
2 parents e702e3e + 02eac7d commit 6a0a134
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 47 deletions.
Binary file removed sambanova_scribe/data/reddit_sample1.mp3
Binary file not shown.
Binary file removed sambanova_scribe/data/reddit_sample2.mp3
Binary file not shown.
Binary file removed sambanova_scribe/data/reddit_sample3.mp3
Binary file not shown.
9 changes: 9 additions & 0 deletions sambanova_scribe/data/sample_yt_cookies.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Netscape HTTP Cookie File
# This file is generated by yt-dlp. Do not edit.

.youtube.com TRUE / FALSE 0 PREF f6=40000000&f7=4100&tz=UTC&f4=4000000&hl=en
.youtube.com TRUE / TRUE 1733852835 GPS 1
.youtube.com TRUE / TRUE 0 SOCS CAI
.youtube.com TRUE / TRUE 0 YSC hbKEH5VLxFQ
.youtube.com TRUE / TRUE 1749403555 VISITOR_INFO1_LIVE ri8ciLc5j3k
.youtube.com TRUE / TRUE 1749403555 VISITOR_PRIVACY_METADATA CgJDTxIEGgAgLQ%3D%3D
93 changes: 46 additions & 47 deletions sambanova_scribe/notebooks/speech_asr_and_reasoning.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 1,
"metadata": {},
"outputs": [
{
Expand All @@ -18,7 +18,7 @@
"True"
]
},
"execution_count": 4,
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -60,7 +60,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -153,7 +153,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -168,7 +168,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand All @@ -185,7 +185,7 @@
"<IPython.lib.display.Audio object>"
]
},
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -198,16 +198,16 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content='I am a large language model created by Alibaba Cloud. I am called QianWen.', additional_kwargs={}, response_metadata={'finish_reason': 'stop', 'usage': {'completion_tokens': 19, 'completion_tokens_after_first_per_sec': 17.649233237868433, 'completion_tokens_after_first_per_sec_first_ten': 712.6685753608537, 'completion_tokens_per_sec': 16.163798158434954, 'end_time': 1733341412.6134014, 'is_last_response': True, 'prompt_tokens': 107, 'start_time': 1733341411.437935, 'time_to_first_token': 0.1555919647216797, 'total_latency': 1.1754662990570068, 'total_tokens': 126, 'total_tokens_per_sec': 107.1915035769897}, 'model_name': 'Qwen2-Audio-7B-Instruct', 'system_fingerprint': 'fastcoe', 'created': 1733341411}, id='9bc3b921-030f-4ac7-9228-b11955f62f6a')"
"AIMessage(content='I am a large language model created by Alibaba Cloud. I am called QianWen.', additional_kwargs={}, response_metadata={'finish_reason': 'stop', 'usage': {'completion_tokens': 19, 'completion_tokens_after_first_per_sec': 15.938705497327486, 'completion_tokens_after_first_per_sec_first_ten': 672.4065568514288, 'completion_tokens_per_sec': 14.751775486275731, 'end_time': 1733873051.7118666, 'is_last_response': True, 'prompt_tokens': 108, 'start_time': 1733873050.423886, 'time_to_first_token': 0.15865421295166016, 'total_latency': 1.287980556488037, 'total_tokens': 127, 'total_tokens_per_sec': 98.60397298721146}, 'model_name': 'Qwen2-Audio-7B-Instruct', 'system_fingerprint': 'fastcoe', 'created': 1733873050}, id='2272a3e6-ebd8-4a97-a708-296151743c7d')"
]
},
"execution_count": 12,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -235,7 +235,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -247,16 +247,16 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content='I\\'m an artificial intelligence model known as Llama. Llama stands for \"Large Language Model Meta AI.\"', additional_kwargs={}, response_metadata={'finish_reason': 'stop', 'usage': {'acceptance_rate': 7, 'completion_tokens': 22, 'completion_tokens_after_first_per_sec': 343.13913280610853, 'completion_tokens_after_first_per_sec_first_ten': 604.9892437667422, 'completion_tokens_per_sec': 87.0119562786763, 'end_time': 1733351159.5725753, 'is_last_response': True, 'prompt_tokens': 41, 'start_time': 1733351159.2949011, 'time_to_first_token': 0.2164745330810547, 'total_latency': 0.25283881596156527, 'total_tokens': 63, 'total_tokens_per_sec': 249.17060207075485}, 'model_name': 'Meta-Llama-3.1-70B-Instruct', 'system_fingerprint': 'fastcoe', 'created': 1733351159}, id='4e725824-a33c-477b-907c-88e63a79b906')"
"AIMessage(content='I\\'m an artificial intelligence model known as Llama. Llama stands for \"Large Language Model Meta AI.\"', additional_kwargs={}, response_metadata={'finish_reason': 'stop', 'usage': {'acceptance_rate': 7, 'completion_tokens': 22, 'completion_tokens_after_first_per_sec': 504.0740312584055, 'completion_tokens_after_first_per_sec_first_ten': 584.6534708670198, 'completion_tokens_per_sec': 105.12519139628144, 'end_time': 1733873055.6752837, 'is_last_response': True, 'prompt_tokens': 41, 'start_time': 1733873055.461978, 'time_to_first_token': 0.1716451644897461, 'total_latency': 0.2092742919921875, 'total_tokens': 63, 'total_tokens_per_sec': 301.04032081662416}, 'model_name': 'Meta-Llama-3.1-70B-Instruct', 'system_fingerprint': 'fastcoe', 'created': 1733873055}, id='7360d655-9998-49e4-bd6a-81917bc83a70')"
]
},
"execution_count": 9,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -275,7 +275,7 @@
},
{
"cell_type": "code",
"execution_count": 407,
"execution_count": 8,
"metadata": {},
"outputs": [
{
Expand All @@ -292,7 +292,7 @@
"<IPython.lib.display.Audio object>"
]
},
"execution_count": 407,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -312,7 +312,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -360,19 +360,18 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 10,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'audio_path' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[11], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m result \u001b[38;5;241m=\u001b[39m simple_asr(\u001b[43maudio_path\u001b[49m)\n\u001b[1;32m 2\u001b[0m result\n",
"\u001b[0;31mNameError\u001b[0m: name 'audio_path' is not defined"
]
"data": {
"text/plain": [
"\"Hi, who is going? Not bad, just go back from my meeting. How about you? I'm good, just got some work done. So what was the meeting about? It was about the new project we're working on. We are going to be using a new software tool. Oh, cool. I hear of that tool before. It is going to be easy to use? Yeah, it's pretty user-friendly. I think we will be able to get up and running quickly. Great, I'm looking forward to learning more about it.\""
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
Expand All @@ -382,7 +381,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 11,
"metadata": {},
"outputs": [
{
Expand All @@ -396,7 +395,7 @@
"[youtube] L-HCCaLe35w: Downloading m3u8 information\n",
"[info] L-HCCaLe35w: Downloading 1 format(s): 251\n",
"[download] Destination: /Users/jorgep/Documents/ask_public_own/ai-starter-kit-snova/sambanova_scribe/data/An Ensemble of AI Models.webm\n",
"[download] 100% of 371.51KiB in 00:00:00 at 3.13MiB/s \n",
"[download] 100% of 371.51KiB in 00:00:00 at 3.90MiB/s \n",
"[ExtractAudio] Destination: /Users/jorgep/Documents/ask_public_own/ai-starter-kit-snova/sambanova_scribe/data/An Ensemble of AI Models.mp3\n",
"Deleting original file /Users/jorgep/Documents/ask_public_own/ai-starter-kit-snova/sambanova_scribe/data/An Ensemble of AI Models.webm (pass -k to keep)\n",
"Successfully downloaded audio from: https://www.youtube.com/watch?v=L-HCCaLe35w\n"
Expand All @@ -408,7 +407,7 @@
"\"and something you said, i think, is so important to this discussion is the idea of an ensemble of models of the future. i really think we're going to see more and more of this. models are best for this, but i think we need two or three more to get to the best answer. to find best, as i say, the first time somebody uses a large language model to put something in the president's daily briefing book and it's wrong will be the last time somebody puts something in the president's daily briefing book. so getting it right is extraordinarily important here.\""
]
},
"execution_count": 12,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -431,7 +430,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -447,7 +446,7 @@
},
{
"cell_type": "code",
"execution_count": 415,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -524,7 +523,7 @@
},
{
"cell_type": "code",
"execution_count": 416,
"execution_count": 14,
"metadata": {},
"outputs": [
{
Expand All @@ -544,7 +543,7 @@
" TurnTranscription(speaker=2, gender='female', sentiment='neutral', transcription=\"great i'm looking forward to learning more about it\")]"
]
},
"execution_count": 416,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -571,7 +570,7 @@
},
{
"cell_type": "code",
"execution_count": 394,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -597,7 +596,7 @@
},
{
"cell_type": "code",
"execution_count": 395,
"execution_count": 17,
"metadata": {},
"outputs": [
{
Expand All @@ -606,7 +605,7 @@
"\"The mood of the person working on the project seems to be positive and enthusiastic. This can be inferred from their casual greeting, the use of words like 'cool' and 'great', and the anticipation of getting started with the new software tool.\""
]
},
"execution_count": 395,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -618,7 +617,7 @@
},
{
"cell_type": "code",
"execution_count": 396,
"execution_count": 18,
"metadata": {},
"outputs": [
{
Expand All @@ -627,7 +626,7 @@
"'One interesting fact is that the tools they are going to use for the new project were discussed in a meeting.'"
]
},
"execution_count": 396,
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -639,7 +638,7 @@
},
{
"cell_type": "code",
"execution_count": 397,
"execution_count": 19,
"metadata": {},
"outputs": [
{
Expand All @@ -648,7 +647,7 @@
"'two people are talking in the audio.'"
]
},
"execution_count": 397,
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -675,7 +674,7 @@
},
{
"cell_type": "code",
"execution_count": 398,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -706,7 +705,7 @@
},
{
"cell_type": "code",
"execution_count": 399,
"execution_count": 21,
"metadata": {},
"outputs": [
{
Expand All @@ -715,7 +714,7 @@
"'The mood of the person working on the project appears to be positive and enthusiastic. They seem to be looking forward to learning more about the new software tool and express optimism about getting started with it quickly.'"
]
},
"execution_count": 399,
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -727,7 +726,7 @@
},
{
"cell_type": "code",
"execution_count": 400,
"execution_count": 22,
"metadata": {},
"outputs": [
{
Expand All @@ -736,7 +735,7 @@
"'One interesting fact about the conversation is that the tools they are going to use for the new project were discussed in a meeting, specifically a new software tool that is user-friendly and expected to get them up and running quickly.'"
]
},
"execution_count": 400,
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -748,7 +747,7 @@
},
{
"cell_type": "code",
"execution_count": 401,
"execution_count": 23,
"metadata": {},
"outputs": [
{
Expand All @@ -757,7 +756,7 @@
"'There are 2 people talking in the audio.'"
]
},
"execution_count": 401,
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
4 changes: 4 additions & 0 deletions sambanova_scribe/src/scribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,10 @@ def progress_hook(d: Dict[str, Any]) -> None:
],
'outtmpl': output_path + '/%(title)s.%(ext)s',
'progress_hooks': [progress_hook],
'cookiefile': os.path.join(kit_dir, 'data', 'sample_yt_cookies.txt'),
'username': os.environ.get('YOUTUBE_USERNAME'),
'password': os.environ.get('YOUTUBE_PASSWORD'),
'verbose': True,
}

try:
Expand Down

0 comments on commit 6a0a134

Please sign in to comment.