diff --git a/.gitignore b/.gitignore index a547bf3..c3bb0db 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,6 @@ dist-ssr *.njsproj *.sln *.sw? + +# Local Netlify folder +.netlify diff --git a/src/components/AudioManager.tsx b/src/components/AudioManager.tsx index c540a2b..1a0470c 100644 --- a/src/components/AudioManager.tsx +++ b/src/components/AudioManager.tsx @@ -8,6 +8,7 @@ import Constants from "../utils/Constants"; import { Transcriber } from "../hooks/useTranscriber"; import Progress from "./Progress"; import AudioRecorder from "./AudioRecorder"; +import { ModelSelector } from "./ModelSelector"; export enum AudioSource { URL = "URL", @@ -151,6 +152,13 @@ export function AudioManager({ transcriber, onTranscriptionComplete }: Props) { transcriber.start(audioData.buffer); }, [audioData, transcriber]); + const handleModelChange = useCallback((modelId: string) => { + transcriber.setModel(modelId); + // Update multilingual setting based on model selection + const isEnglishOnly = modelId.endsWith('.en'); + transcriber.setMultilingual(!isEnglishOnly); + }, [transcriber]); + const convertToMp3 = async (audioBuffer: AudioBuffer): Promise => { // Create an offline audio context const offlineCtx = new OfflineAudioContext( @@ -238,6 +246,7 @@ export function AudioManager({ transcriber, onTranscriptionComplete }: Props) { return (
+ {!audioData && (
)} + + {isAudioLoading && (
diff --git a/src/components/ModelSelector.tsx b/src/components/ModelSelector.tsx new file mode 100644 index 0000000..afbcd1a --- /dev/null +++ b/src/components/ModelSelector.tsx @@ -0,0 +1,145 @@ +import React, { ChangeEvent } from 'react'; + +interface ModelOption { + id: string; + name: string; + description: string; + isEnglishOnly: boolean; + size: 'tiny' | 'small' | 'base' | 'medium' | 'large' | 'large-v2'; + isBeta?: boolean; +} + +const modelOptions: ModelOption[] = [ + { + id: 'Xenova/whisper-tiny.en', + name: 'Tiny (English)', + description: 'Fast, lightweight model optimized for English transcription', + isEnglishOnly: true, + size: 'tiny' + }, + { + id: 'Xenova/whisper-tiny', + name: 'Tiny (Multilingual)', + description: 'Fast, lightweight model supporting multiple languages', + isEnglishOnly: false, + size: 'tiny' + }, + { + id: 'Xenova/whisper-small.en', + name: 'Small (English)', + description: 'Balanced performance for English transcription', + isEnglishOnly: true, + size: 'small' + }, + { + id: 'Xenova/whisper-small', + name: 'Small (Multilingual)', + description: 'Balanced performance supporting multiple languages', + isEnglishOnly: false, + size: 'small' + }, + { + id: 'Xenova/whisper-base.en', + name: 'Base (English)', + description: 'Standard model for English transcription', + isEnglishOnly: true, + size: 'base' + }, + { + id: 'Xenova/whisper-base', + name: 'Base (Multilingual)', + description: 'Standard model supporting multiple languages', + isEnglishOnly: false, + size: 'base' + }, + { + id: 'Xenova/whisper-medium.en', + name: 'Medium (English)', + description: 'High accuracy for English transcription', + isEnglishOnly: true, + size: 'medium' + }, + { + id: 'Xenova/whisper-large', + name: 'Large', + description: 'Highest accuracy for multilingual transcription', + isEnglishOnly: false, + size: 'large' + }, + { + id: 'Xenova/whisper-large-v2', + name: 'Large V2', + description: 'Latest version with improved accuracy', + isEnglishOnly: false, + size: 'large-v2' + }, + { + id: 'Xenova/nb-whisper-tiny-beta', + name: 'Tiny Beta', + description: 'Experimental tiny model with new features', + isEnglishOnly: false, + size: 'tiny', + isBeta: true + }, + { + id: 'Xenova/nb-whisper-small-beta', + name: 'Small Beta', + description: 'Experimental small model with new features', + isEnglishOnly: false, + size: 'small', + isBeta: true + }, + { + id: 'Xenova/nb-whisper-base-beta', + name: 'Base Beta', + description: 'Experimental base model with new features', + isEnglishOnly: false, + size: 'base', + isBeta: true + }, + { + id: 'Xenova/nb-whisper-medium-beta', + name: 'Medium Beta', + description: 'Experimental medium model with new features', + isEnglishOnly: false, + size: 'medium', + isBeta: true + } +]; + +interface Props { + selectedModel: string; + onModelChange: (modelId: string) => void; + className?: string; +} + +export function ModelSelector({ selectedModel, onModelChange, className = '' }: Props): React.ReactElement { + const handleChange = (e: ChangeEvent) => { + onModelChange(e.target.value); + }; + + return ( +
+ + +

+ {modelOptions.find(m => m.id === selectedModel)?.isEnglishOnly + ? 'This model is optimized for English only.' + : 'This model supports multiple languages.'} +

+
+ ); +} diff --git a/src/worker.js b/src/worker.js index 602edf3..5a33d86 100644 --- a/src/worker.js +++ b/src/worker.js @@ -73,10 +73,8 @@ const transcribe = async ( const isDistilWhisper = model.startsWith("distil-whisper/"); - let modelName = model; - if (!isDistilWhisper && !multilingual) { - modelName += ".en" - } + // Use the model name directly since it's already properly formatted in ModelSelector.tsx + const modelName = model; const p = AutomaticSpeechRecognitionPipelineFactory; if (p.model !== modelName || p.quantized !== quantized) {