Skip to content

Commit

Permalink
[fr] Implement multilingual support - fixes #3
Browse files Browse the repository at this point in the history
  • Loading branch information
addyosmani committed Dec 16, 2024
1 parent 755666b commit fc6c0f5
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 4 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,6 @@ dist-ssr
*.njsproj
*.sln
*.sw?

# Local Netlify folder
.netlify
15 changes: 15 additions & 0 deletions src/components/AudioManager.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import Constants from "../utils/Constants";
import { Transcriber } from "../hooks/useTranscriber";
import Progress from "./Progress";
import AudioRecorder from "./AudioRecorder";
import { ModelSelector } from "./ModelSelector";

export enum AudioSource {
URL = "URL",
Expand Down Expand Up @@ -151,6 +152,13 @@ export function AudioManager({ transcriber, onTranscriptionComplete }: Props) {
transcriber.start(audioData.buffer);
}, [audioData, transcriber]);

const handleModelChange = useCallback((modelId: string) => {
transcriber.setModel(modelId);
// Update multilingual setting based on model selection
const isEnglishOnly = modelId.endsWith('.en');
transcriber.setMultilingual(!isEnglishOnly);
}, [transcriber]);

const convertToMp3 = async (audioBuffer: AudioBuffer): Promise<Blob> => {
// Create an offline audio context
const offlineCtx = new OfflineAudioContext(
Expand Down Expand Up @@ -238,6 +246,7 @@ export function AudioManager({ transcriber, onTranscriptionComplete }: Props) {

return (
<div className="space-y-6">

{!audioData && (
<div className="flex flex-col items-center gap-4">
<button
Expand Down Expand Up @@ -276,6 +285,12 @@ export function AudioManager({ transcriber, onTranscriptionComplete }: Props) {
</div>
</div>
)}

<ModelSelector
selectedModel={transcriber.model}
onModelChange={handleModelChange}
className="mb-6"
/>

{isAudioLoading && (
<div className="w-full bg-gray-200 rounded-full h-1">
Expand Down
145 changes: 145 additions & 0 deletions src/components/ModelSelector.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import React, { ChangeEvent } from 'react';

interface ModelOption {
id: string;
name: string;
description: string;
isEnglishOnly: boolean;
size: 'tiny' | 'small' | 'base' | 'medium' | 'large' | 'large-v2';
isBeta?: boolean;
}

const modelOptions: ModelOption[] = [
{
id: 'Xenova/whisper-tiny.en',
name: 'Tiny (English)',
description: 'Fast, lightweight model optimized for English transcription',
isEnglishOnly: true,
size: 'tiny'
},
{
id: 'Xenova/whisper-tiny',
name: 'Tiny (Multilingual)',
description: 'Fast, lightweight model supporting multiple languages',
isEnglishOnly: false,
size: 'tiny'
},
{
id: 'Xenova/whisper-small.en',
name: 'Small (English)',
description: 'Balanced performance for English transcription',
isEnglishOnly: true,
size: 'small'
},
{
id: 'Xenova/whisper-small',
name: 'Small (Multilingual)',
description: 'Balanced performance supporting multiple languages',
isEnglishOnly: false,
size: 'small'
},
{
id: 'Xenova/whisper-base.en',
name: 'Base (English)',
description: 'Standard model for English transcription',
isEnglishOnly: true,
size: 'base'
},
{
id: 'Xenova/whisper-base',
name: 'Base (Multilingual)',
description: 'Standard model supporting multiple languages',
isEnglishOnly: false,
size: 'base'
},
{
id: 'Xenova/whisper-medium.en',
name: 'Medium (English)',
description: 'High accuracy for English transcription',
isEnglishOnly: true,
size: 'medium'
},
{
id: 'Xenova/whisper-large',
name: 'Large',
description: 'Highest accuracy for multilingual transcription',
isEnglishOnly: false,
size: 'large'
},
{
id: 'Xenova/whisper-large-v2',
name: 'Large V2',
description: 'Latest version with improved accuracy',
isEnglishOnly: false,
size: 'large-v2'
},
{
id: 'Xenova/nb-whisper-tiny-beta',
name: 'Tiny Beta',
description: 'Experimental tiny model with new features',
isEnglishOnly: false,
size: 'tiny',
isBeta: true
},
{
id: 'Xenova/nb-whisper-small-beta',
name: 'Small Beta',
description: 'Experimental small model with new features',
isEnglishOnly: false,
size: 'small',
isBeta: true
},
{
id: 'Xenova/nb-whisper-base-beta',
name: 'Base Beta',
description: 'Experimental base model with new features',
isEnglishOnly: false,
size: 'base',
isBeta: true
},
{
id: 'Xenova/nb-whisper-medium-beta',
name: 'Medium Beta',
description: 'Experimental medium model with new features',
isEnglishOnly: false,
size: 'medium',
isBeta: true
}
];

interface Props {
selectedModel: string;
onModelChange: (modelId: string) => void;
className?: string;
}

export function ModelSelector({ selectedModel, onModelChange, className = '' }: Props): React.ReactElement {
const handleChange = (e: ChangeEvent<HTMLSelectElement>) => {
onModelChange(e.target.value);
};

return (
<div className={`space-y-2 ${className}`}>
<label htmlFor="model-select" className="block text-sm font-medium text-slate-600">
Transcription Model
</label>
<select
id="model-select"
value={selectedModel}
onChange={handleChange}
className="w-full px-3 py-2 bg-white border border-slate-300 rounded-lg shadow-sm focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent text-slate-700"
>
{modelOptions.map((option) => (
<option key={option.id} value={option.id}>
{option.name} {option.isBeta ? '(Beta)' : ''} - {option.description}
</option>
))}
</select>
<p className="text-sm text-slate-500">
{modelOptions.find(m => m.id === selectedModel)?.isEnglishOnly
? 'This model is optimized for English only.'
: 'This model supports multiple languages.'}
</p>
</div>
);
}
6 changes: 2 additions & 4 deletions src/worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,8 @@ const transcribe = async (

const isDistilWhisper = model.startsWith("distil-whisper/");

let modelName = model;
if (!isDistilWhisper && !multilingual) {
modelName += ".en"
}
// Use the model name directly since it's already properly formatted in ModelSelector.tsx
const modelName = model;

const p = AutomaticSpeechRecognitionPipelineFactory;
if (p.model !== modelName || p.quantized !== quantized) {
Expand Down

0 comments on commit fc6c0f5

Please sign in to comment.