Merge branch 'main' into bungles/main

Bunglepaws · Nov 3, 2023 · 8fd1419 · 8fd1419
2 parents 675737b + e49186b
commit 8fd1419
Show file tree

Hide file tree

Showing 79 changed files with 421,365 additions and 760 deletions.
diff --git a/README.md b/README.md
@@ -56,6 +56,26 @@ You don't need any programming or machine learning experience. The only required
 
 
 
+# Setting up the development environment
+
+Note: these installation instructions are for Windows. Use the requirements_linux_py3_10_6.txt file for Linux installation.
+
+Create the environment using virtualenv and python 3.10 (pre-requisite: Python 3.10)
+`virtualenv envXVATrainerP310 --python=C:\Users\Dan\AppData\Local\Programs\Python\Python310\python.exe`
+
+Activate your environment. Do this every time you launch a new terminal to work with xVATrainer
+`envXVATrainerP310\Scripts\activate`
+
+Install PyTorch v2.0.x with CUDA. Get the v2.0 link from the pytorch website (pre-requisite: CUDA drivers from nvidia)
+eg (might be outdated): pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
+
+Install the dependencies through pip: pip install -r reqs_gpu.txt
+
+Copy the folders from ./lib/\_dev into the environment (into envXVATrainerP310/Lib/site-packages). These are some library files which needed custom modifications/bug fixes to integrate with everything else. Overwrite as necessary
+
+Make sure that you're using librosa==0.8.1 (check with pip list. uninstall and re-install with this version if not)
+
+
 # Contribuiting
 
 If you'd like to help improve xVATrainer, get in touch (eg an Issue, though best on Discord), and let me know. The main areas of interest for community contributions are (though let me know your ideas!):

diff --git a/index.html b/index.html
@@ -713,7 +713,7 @@ <h2>Config</h2>
                     <div style="height: 60px;">
                         <div style="align-items: center;">
                             <span style="margin-bottom: 5px">Base batch size</span>
-                            <span style="font-size: 10pt;color: gray;">(8 for a GPU with 6GB of VRAM, 32 for 24GB, etc)</span>
+                            <span style="font-size: 10pt;color: gray;">(6 for a GPU with 6GB of VRAM, 24 for 24GB, etc)</span>
                         </div>
                         <div>
                             <input id="trainingAddConfigBatchSizeInput" type="number" value="8" min="1">

diff --git a/javascript/script.js b/javascript/script.js
@@ -1,5 +1,5 @@
 "use strict"
-window.appVersion = "1.2.0"
+window.appVersion = "1.2.1"
 app_version.innerHTML = "v"+window.appVersion
 window.PRODUCTION = module.filename.includes("resources")
 const path = PRODUCTION ? `${__dirname.replace(/\\/g,"/")}` : `${__dirname.replace(/\\/g,"/")}`
@@ -100,7 +100,7 @@ const initWebSocket = () => {
             window.errorModal(errorMessage).then(() => {
                 trainingStopBtn.click()
             })
-        } else if (event.data.includes("Finished training HiFi-GAN")) {
+        } else if (event.data.includes("Finished training")) {
 
             window.training_state.datasetsQueue[window.training_state.trainingQueueItem].status = `Finished`
             const allRowElems = Array.from(document.querySelectorAll("#trainingQueueRecordsContainer>div"))
@@ -588,7 +588,7 @@ btn_deleteall.addEventListener("click", () => {
 })
 btn_autotranscribe.addEventListener("click", () => {
     if (window.appState.currentDataset!=undefined) {
-        confirmModal(`Are you sure you'd like to kick off the auto-transcription process?<br>This will run for all 22050Hz audio with no text transcript.`).then(confirmation => {
+        confirmModal(`Are you sure you'd like to kick off the auto-transcription process?<br>This will run for all 22050Hz audio with no text transcript.<br><br>Configure the transcription model and language in the tools setting first.`).then(confirmation => {
             if (confirmation) {
                 setTimeout(() => {
                     createModal("spinner", "Auto-transcribing...<br>This may take a few minutes if there are hundreds of lines.<br>Audio files must be mono 22050Hz<br><br>This window will close if there is an error.")
@@ -599,7 +599,7 @@ btn_autotranscribe.addEventListener("click", () => {
 
                     window.tools_state.taskId = "transcribe"
                     window.tools_state.inputDirectory = inputDirectory
-                    window.tools_state.outputDirectory = ""
+                    window.tools_state.outputDirectory = `${window.userSettings.datasetsPath}/${window.appState.currentDataset}`
                     window.tools_state.inputFileType = "folder"
 
                     window.tools_state.spinnerElem = toolsSpinner
@@ -1481,14 +1481,8 @@ window.showUpdates()
 // Patreon
 // =======
 window.setupModal(patreonIcon, patreonContainer, () => {
-    const data = fs.readFileSync(`${path}/patreon.txt`, "utf8")
-    const names = new Set()
-    data.split("\r\n").forEach(name => names.add(name))
-
-    let content = ``
-    creditsList.innerHTML = ""
-    names.forEach(name => content += `<br>${name}`)
-    creditsList.innerHTML = content
+    const data = fs.readFileSync(`${path}/patreon.txt`, "utf8") + ", minermanb"
+    creditsList.innerHTML = data
 })
 
 // Training

diff --git a/javascript/tools.js b/javascript/tools.js
@@ -38,9 +38,7 @@ const lang_names = {
 }
 
 const makeTranscriptionModelDropdown = () => {
-    const languages = fs.readdirSync(`${window.path}/python/transcribe/wav2vec2`)
-        .filter(name => !name.startsWith("_")&&!name.includes("."))
-        .map(langCode => {return [langCode, lang_names[langCode]]}).sort((a,b) => a[1]<b[1]?-1:1)
+    const languages = Object.keys(lang_names).map(langCode => {return [langCode, lang_names[langCode]]}).sort((a,b) => a[1]<b[1]?-1:1)
     const selectElem = createElem("select")
 
     // Whisper
@@ -64,20 +62,21 @@ const makeTranscriptionModelDropdown = () => {
     whisperLangSelect.value = "en"
 
     // Wav2vec2
-    languages.forEach(lang => {
-        const optionElem = createElem("option", {value: `wav2vec2_${lang[0]}`})
-        optionElem.innerHTML = `Wav2vec2: ${lang[1]}`
-        selectElem.appendChild(optionElem)
-    })
+    // languages.forEach(lang => {
+    //     const optionElem = createElem("option", {value: `wav2vec2_${lang[0]}`})
+    //     optionElem.innerHTML = `Wav2vec2: ${lang[1]}`
+    //     selectElem.appendChild(optionElem)
+    // })
     selectElem.value = "whisper_medium"
     const modelDescription = createElem("div", "Transcription model (more available on nexus)")
     const rowItemModel = createElem("div", createElem("div", modelDescription), createElem("div", selectElem))
 
-    const whisperLangDescription = createElem("div", "Whisper language")
+    const whisperLangDescription = createElem("div", "Language")
     const rowItemWhisperLang = createElem("div", createElem("div", whisperLangDescription), createElem("div", whisperLangSelect))
     return [rowItemModel, selectElem, rowItemWhisperLang, whisperLangSelect]
 }
 
+
 const tools = {
     "Audio formatting": {
         taskId: "formatting",
@@ -507,7 +506,7 @@ const tools = {
         }
     },
 }
-
+delete tools["AI speaker diarization"] // TEMPORARY, until pyannote dep is fixed
 
 // Brute force progress indicator, for when the WebSockets don't work
 setInterval(() => {

diff --git a/javascript/train.js b/javascript/train.js
@@ -337,8 +337,8 @@ window.refreshTrainingQueueList = () => {
 
         configButton.addEventListener("click", () => {
             configAnExistingItem = true
-            window.training_state.datasetsQueue[di].dataset_path = window.training_state.datasetsQueue[di].dataset_path.replaceAll(/\\/, "/")
-            window.training_state.datasetsQueue[di].output_path = window.training_state.datasetsQueue[di].output_path.replaceAll(/\\/, "/")
+            window.training_state.datasetsQueue[di].dataset_path = window.training_state.datasetsQueue[di].dataset_path.replace(/\\/g, "/")
+            window.training_state.datasetsQueue[di].output_path = window.training_state.datasetsQueue[di].output_path.replace(/\\/g, "/")
             window.showConfigMenu(window.training_state.datasetsQueue[di], di)
         })
     })
@@ -656,15 +656,15 @@ cancelConfig.addEventListener("click", () => {
     queueItemConfigModalContainer.style.display = "none"
 })
 trainingAddConfigDatasetPathInput.addEventListener("keyup", e => {
-    trainingAddConfigDatasetPathInput.value = trainingAddConfigDatasetPathInput.value.replaceAll(/\\/, "/")
+    trainingAddConfigDatasetPathInput.value = trainingAddConfigDatasetPathInput.value.replace(/\\/g, "/")
 })
 trainingAddConfigOutputPathInput.addEventListener("keyup", e => {
-    trainingAddConfigOutputPathInput.value = trainingAddConfigOutputPathInput.value.replaceAll(/\\/, "/")
+    trainingAddConfigOutputPathInput.value = trainingAddConfigOutputPathInput.value.replace(/\\/g, "/")
 })
 acceptConfig.addEventListener("click", () => {
 
-    trainingAddConfigDatasetPathInput.value = trainingAddConfigDatasetPathInput.value.replaceAll(/\\/, "/")
-    trainingAddConfigOutputPathInput.value = trainingAddConfigOutputPathInput.value.replaceAll(/\\/, "/")
+    trainingAddConfigDatasetPathInput.value = trainingAddConfigDatasetPathInput.value.replace(/\\/g, "/")
+    trainingAddConfigOutputPathInput.value = trainingAddConfigOutputPathInput.value.replace(/\\/g, "/")
 
     if (!trainingAddConfigDatasetPathInput.value.trim().length) {
         return window.errorModal("You need to specify where your dataset is located.", queueItemConfigModalContainer)
@@ -699,7 +699,7 @@ acceptConfig.addEventListener("click", () => {
 
         let xvapitch_checkpoint = "[base]"
         if (xvapitch_ckpt_option_other.checked) {
-            xvapitch_checkpoint = trainingAddConfigCkptPathInput.replaceAll(/\\/, "/")
+            xvapitch_checkpoint = trainingAddConfigCkptPathInput.replace(/\\/g, "/")
         }
 
         // TODO
@@ -708,10 +708,10 @@ acceptConfig.addEventListener("click", () => {
             const queueIndex = window.training_state.currentlyConfiguringDatasetI
 
             const configData = {
-                "dataset_path": window.training_state.datasetsQueue[queueIndex].dataset_path.replaceAll(/\\/, "/"),
-                "output_path": trainingAddConfigOutputPathInput.value.replaceAll(/\\/, "/"),
+                "dataset_path": window.training_state.datasetsQueue[queueIndex].dataset_path.replace(/\\/g, "/"),
+                "output_path": trainingAddConfigOutputPathInput.value.replace(/\\/g, "/"),
                 "checkpoint": xvapitch_checkpoint,
-                // "hifigan_checkpoint": hg_ckpt.replaceAll(/\\/, "/"),
+                // "hifigan_checkpoint": hg_ckpt.replace(/\\/g, "/"),
 
                 "use_amp": trainingAddConfigUseAmp.checked ? "true" : "false",
                 "num_workers": parseInt(trainingAddConfigWorkersInput.value),
@@ -731,10 +731,10 @@ acceptConfig.addEventListener("click", () => {
             const configData = {
                 "status": "Ready",
 
-                "dataset_path": trainingAddConfigDatasetPathInput.value.replaceAll(/\\/, "/"),
-                "output_path": trainingAddConfigOutputPathInput.value.replaceAll(/\\/, "/"),
+                "dataset_path": trainingAddConfigDatasetPathInput.value.replace(/\\/g, "/"),
+                "output_path": trainingAddConfigOutputPathInput.value.replace(/\\/g, "/"),
                 "checkpoint": xvapitch_checkpoint,
-                // "hifigan_checkpoint": hg_ckpt.replaceAll(/\\/, "/"),
+                // "hifigan_checkpoint": hg_ckpt.replace(/\\/g, "/"),
 
                 "use_amp": trainingAddConfigUseAmp.checked ? "true" : "false",
                 "num_workers": parseInt(trainingAddConfigWorkersInput.value),
@@ -752,7 +752,7 @@ acceptConfig.addEventListener("click", () => {
         window.refreshTrainingQueueList()
     }
 
-    let xvap_ckpt = trainingAddConfigCkptPathInput.value.trim().replaceAll(/\\/, "/")
+    let xvap_ckpt = trainingAddConfigCkptPathInput.value.trim().replace(/\\/g, "/")
     if (xvapitch_ckpt_option_base.checked) {
         xvap_ckpt = "[base]"
     }
@@ -780,6 +780,7 @@ trainingQueueBtnClear.addEventListener("click", () => {
     window.confirmModal("Are you sure you'd like to clear the training queue, losing all configured model training runs?").then(resp => {
         if (resp) {
             window.training_state.datasetsQueue = []
+            window.training_state.selectedQueueItem = undefined
             window.refreshTrainingQueueList()
             fs.writeFileSync(`${window.path}/training_queue.json`, JSON.stringify(window.training_state.datasetsQueue, null, 4))
         }
@@ -895,9 +896,15 @@ exportSubmitButton.addEventListener("click", () => {
         const metadataJSON = JSON.parse(fs.readFileSync(`${window.userSettings.datasetsPath}/${window.appState.currentDataset}/dataset_metadata.json`, "utf8"))
         const voiceId = metadataJSON.games[0].voiceId
 
-        metadataJSON.games[0].resemblyzer = trainingJSON.games[0].resemblyzer
-        metadataJSON.games[0].voiceId = voiceId//window.appState.currentDataset
-        fs.writeFileSync(`${modelExport_outputDir.value.trim()}/${voiceId}.json`, JSON.stringify(metadataJSON, null, 4))
+        trainingJSON.author = metadataJSON.author
+        trainingJSON.license = metadataJSON.license
+        trainingJSON.lang = metadataJSON.lang
+        trainingJSON.games[0].gameId = metadataJSON.games[0].gameId
+        trainingJSON.games[0].voiceId = metadataJSON.games[0].voiceId
+        trainingJSON.games[0].gender = metadataJSON.games[0].gender
+        trainingJSON.games[0].voiceName = metadataJSON.games[0].voiceName
+
+        fs.writeFileSync(`${modelExport_outputDir.value.trim()}/${voiceId}.json`, JSON.stringify(trainingJSON, null, 4))
 
 
 

diff --git a/lib/_dev/g2pC-0.9.9.3.dist-info/INSTALLER b/lib/_dev/g2pC-0.9.9.3.dist-info/INSTALLER
@@ -0,0 +1 @@
+pip