[fr] Implement multilingual support - fixes #3

addyosmani · Dec 16, 2024 · fc6c0f5 · fc6c0f5
1 parent 755666b
commit fc6c0f5
Show file tree

Hide file tree

Showing 4 changed files with 165 additions and 4 deletions.
diff --git a/.gitignore b/.gitignore
@@ -22,3 +22,6 @@ dist-ssr
 *.njsproj
 *.sln
 *.sw?
+
+# Local Netlify folder
+.netlify
diff --git a/src/components/AudioManager.tsx b/src/components/AudioManager.tsx
@@ -8,6 +8,7 @@ import Constants from "../utils/Constants";
 import { Transcriber } from "../hooks/useTranscriber";
 import Progress from "./Progress";
 import AudioRecorder from "./AudioRecorder";
+import { ModelSelector } from "./ModelSelector";
 
 export enum AudioSource {
     URL = "URL",
@@ -151,6 +152,13 @@ export function AudioManager({ transcriber, onTranscriptionComplete }: Props) {
         transcriber.start(audioData.buffer);
     }, [audioData, transcriber]);
 
+    const handleModelChange = useCallback((modelId: string) => {
+        transcriber.setModel(modelId);
+        // Update multilingual setting based on model selection
+        const isEnglishOnly = modelId.endsWith('.en');
+        transcriber.setMultilingual(!isEnglishOnly);
+    }, [transcriber]);
+
     const convertToMp3 = async (audioBuffer: AudioBuffer): Promise<Blob> => {
         // Create an offline audio context
         const offlineCtx = new OfflineAudioContext(
@@ -238,6 +246,7 @@ export function AudioManager({ transcriber, onTranscriptionComplete }: Props) {
 
     return (
         <div className="space-y-6">
+
             {!audioData && (
                 <div className="flex flex-col items-center gap-4">
                     <button
@@ -276,6 +285,12 @@ export function AudioManager({ transcriber, onTranscriptionComplete }: Props) {
                     </div>
                 </div>
             )}
+
+            <ModelSelector 
+                selectedModel={transcriber.model}
+                onModelChange={handleModelChange}
+                className="mb-6"
+            />
 
             {isAudioLoading && (
                 <div className="w-full bg-gray-200 rounded-full h-1">

diff --git a/src/components/ModelSelector.tsx b/src/components/ModelSelector.tsx
@@ -0,0 +1,145 @@
+import React, { ChangeEvent } from 'react';
+
+interface ModelOption {
+    id: string;
+    name: string;
+    description: string;
+    isEnglishOnly: boolean;
+    size: 'tiny' | 'small' | 'base' | 'medium' | 'large' | 'large-v2';
+    isBeta?: boolean;
+}
+
+const modelOptions: ModelOption[] = [
+    {
+        id: 'Xenova/whisper-tiny.en',
+        name: 'Tiny (English)',
+        description: 'Fast, lightweight model optimized for English transcription',
+        isEnglishOnly: true,
+        size: 'tiny'
+    },
+    {
+        id: 'Xenova/whisper-tiny',
+        name: 'Tiny (Multilingual)',
+        description: 'Fast, lightweight model supporting multiple languages',
+        isEnglishOnly: false,
+        size: 'tiny'
+    },
+    {
+        id: 'Xenova/whisper-small.en',
+        name: 'Small (English)',
+        description: 'Balanced performance for English transcription',
+        isEnglishOnly: true,
+        size: 'small'
+    },
+    {
+        id: 'Xenova/whisper-small',
+        name: 'Small (Multilingual)',
+        description: 'Balanced performance supporting multiple languages',
+        isEnglishOnly: false,
+        size: 'small'
+    },
+    {
+        id: 'Xenova/whisper-base.en',
+        name: 'Base (English)',
+        description: 'Standard model for English transcription',
+        isEnglishOnly: true,
+        size: 'base'
+    },
+    {
+        id: 'Xenova/whisper-base',
+        name: 'Base (Multilingual)',
+        description: 'Standard model supporting multiple languages',
+        isEnglishOnly: false,
+        size: 'base'
+    },
+    {
+        id: 'Xenova/whisper-medium.en',
+        name: 'Medium (English)',
+        description: 'High accuracy for English transcription',
+        isEnglishOnly: true,
+        size: 'medium'
+    },
+    {
+        id: 'Xenova/whisper-large',
+        name: 'Large',
+        description: 'Highest accuracy for multilingual transcription',
+        isEnglishOnly: false,
+        size: 'large'
+    },
+    {
+        id: 'Xenova/whisper-large-v2',
+        name: 'Large V2',
+        description: 'Latest version with improved accuracy',
+        isEnglishOnly: false,
+        size: 'large-v2'
+    },
+    {
+        id: 'Xenova/nb-whisper-tiny-beta',
+        name: 'Tiny Beta',
+        description: 'Experimental tiny model with new features',
+        isEnglishOnly: false,
+        size: 'tiny',
+        isBeta: true
+    },
+    {
+        id: 'Xenova/nb-whisper-small-beta',
+        name: 'Small Beta',
+        description: 'Experimental small model with new features',
+        isEnglishOnly: false,
+        size: 'small',
+        isBeta: true
+    },
+    {
+        id: 'Xenova/nb-whisper-base-beta',
+        name: 'Base Beta',
+        description: 'Experimental base model with new features',
+        isEnglishOnly: false,
+        size: 'base',
+        isBeta: true
+    },
+    {
+        id: 'Xenova/nb-whisper-medium-beta',
+        name: 'Medium Beta',
+        description: 'Experimental medium model with new features',
+        isEnglishOnly: false,
+        size: 'medium',
+        isBeta: true
+    }
+];
+
+interface Props {
+    selectedModel: string;
+    onModelChange: (modelId: string) => void;
+    className?: string;
+}
+
+export function ModelSelector({ selectedModel, onModelChange, className = '' }: Props): React.ReactElement {
+    const handleChange = (e: ChangeEvent<HTMLSelectElement>) => {
+        onModelChange(e.target.value);
+    };
+
+    return (
+        <div className={`space-y-2 ${className}`}>
+            <label htmlFor="model-select" className="block text-sm font-medium text-slate-600">
+                Transcription Model
+            </label>
+            <select
+                id="model-select"
+                value={selectedModel}
+                onChange={handleChange}
+                className="w-full px-3 py-2 bg-white border border-slate-300 rounded-lg shadow-sm focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent text-slate-700"
+            >
+                {modelOptions.map((option) => (
+                    <option key={option.id} value={option.id}>
+                        {option.name} {option.isBeta ? '(Beta)' : ''} - {option.description}
+                    </option>
+                ))}
+            </select>
+            <p className="text-sm text-slate-500">
+                {modelOptions.find(m => m.id === selectedModel)?.isEnglishOnly 
+                    ? 'This model is optimized for English only.'
+                    : 'This model supports multiple languages.'}
+            </p>
+        </div>
+    );
+}
diff --git a/src/worker.js b/src/worker.js
@@ -73,10 +73,8 @@ const transcribe = async (
 
     const isDistilWhisper = model.startsWith("distil-whisper/");
 
-    let modelName = model;
-    if (!isDistilWhisper && !multilingual) {
-        modelName += ".en"
-    }
+    // Use the model name directly since it's already properly formatted in ModelSelector.tsx
+    const modelName = model;
 
     const p = AutomaticSpeechRecognitionPipelineFactory;
     if (p.model !== modelName || p.quantized !== quantized) {