Skip to content

Commit

Permalink
Merge branch 'main' into bungles/main
Browse files Browse the repository at this point in the history
  • Loading branch information
Bunglepaws committed Nov 3, 2023
2 parents 675737b + e49186b commit 8fd1419
Show file tree
Hide file tree
Showing 79 changed files with 421,365 additions and 760 deletions.
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,26 @@ You don't need any programming or machine learning experience. The only required



# Setting up the development environment

Note: these installation instructions are for Windows. Use the requirements_linux_py3_10_6.txt file for Linux installation.

Create the environment using virtualenv and python 3.10 (pre-requisite: Python 3.10)
`virtualenv envXVATrainerP310 --python=C:\Users\Dan\AppData\Local\Programs\Python\Python310\python.exe`

Activate your environment. Do this every time you launch a new terminal to work with xVATrainer
`envXVATrainerP310\Scripts\activate`

Install PyTorch v2.0.x with CUDA. Get the v2.0 link from the pytorch website (pre-requisite: CUDA drivers from nvidia)
eg (might be outdated): pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

Install the dependencies through pip: pip install -r reqs_gpu.txt

Copy the folders from ./lib/\_dev into the environment (into envXVATrainerP310/Lib/site-packages). These are some library files which needed custom modifications/bug fixes to integrate with everything else. Overwrite as necessary

Make sure that you're using librosa==0.8.1 (check with pip list. uninstall and re-install with this version if not)


# Contribuiting

If you'd like to help improve xVATrainer, get in touch (eg an Issue, though best on Discord), and let me know. The main areas of interest for community contributions are (though let me know your ideas!):
Expand Down
2 changes: 1 addition & 1 deletion index.html
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,7 @@ <h2>Config</h2>
<div style="height: 60px;">
<div style="align-items: center;">
<span style="margin-bottom: 5px">Base batch size</span>
<span style="font-size: 10pt;color: gray;">(8 for a GPU with 6GB of VRAM, 32 for 24GB, etc)</span>
<span style="font-size: 10pt;color: gray;">(6 for a GPU with 6GB of VRAM, 24 for 24GB, etc)</span>
</div>
<div>
<input id="trainingAddConfigBatchSizeInput" type="number" value="8" min="1">
Expand Down
18 changes: 6 additions & 12 deletions javascript/script.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"use strict"
window.appVersion = "1.2.0"
window.appVersion = "1.2.1"
app_version.innerHTML = "v"+window.appVersion
window.PRODUCTION = module.filename.includes("resources")
const path = PRODUCTION ? `${__dirname.replace(/\\/g,"/")}` : `${__dirname.replace(/\\/g,"/")}`
Expand Down Expand Up @@ -100,7 +100,7 @@ const initWebSocket = () => {
window.errorModal(errorMessage).then(() => {
trainingStopBtn.click()
})
} else if (event.data.includes("Finished training HiFi-GAN")) {
} else if (event.data.includes("Finished training")) {

window.training_state.datasetsQueue[window.training_state.trainingQueueItem].status = `Finished`
const allRowElems = Array.from(document.querySelectorAll("#trainingQueueRecordsContainer>div"))
Expand Down Expand Up @@ -588,7 +588,7 @@ btn_deleteall.addEventListener("click", () => {
})
btn_autotranscribe.addEventListener("click", () => {
if (window.appState.currentDataset!=undefined) {
confirmModal(`Are you sure you'd like to kick off the auto-transcription process?<br>This will run for all 22050Hz audio with no text transcript.`).then(confirmation => {
confirmModal(`Are you sure you'd like to kick off the auto-transcription process?<br>This will run for all 22050Hz audio with no text transcript.<br><br>Configure the transcription model and language in the tools setting first.`).then(confirmation => {
if (confirmation) {
setTimeout(() => {
createModal("spinner", "Auto-transcribing...<br>This may take a few minutes if there are hundreds of lines.<br>Audio files must be mono 22050Hz<br><br>This window will close if there is an error.")
Expand All @@ -599,7 +599,7 @@ btn_autotranscribe.addEventListener("click", () => {

window.tools_state.taskId = "transcribe"
window.tools_state.inputDirectory = inputDirectory
window.tools_state.outputDirectory = ""
window.tools_state.outputDirectory = `${window.userSettings.datasetsPath}/${window.appState.currentDataset}`
window.tools_state.inputFileType = "folder"

window.tools_state.spinnerElem = toolsSpinner
Expand Down Expand Up @@ -1481,14 +1481,8 @@ window.showUpdates()
// Patreon
// =======
window.setupModal(patreonIcon, patreonContainer, () => {
const data = fs.readFileSync(`${path}/patreon.txt`, "utf8")
const names = new Set()
data.split("\r\n").forEach(name => names.add(name))

let content = ``
creditsList.innerHTML = ""
names.forEach(name => content += `<br>${name}`)
creditsList.innerHTML = content
const data = fs.readFileSync(`${path}/patreon.txt`, "utf8") + ", minermanb"
creditsList.innerHTML = data
})

// Training
Expand Down
19 changes: 9 additions & 10 deletions javascript/tools.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,7 @@ const lang_names = {
}

const makeTranscriptionModelDropdown = () => {
const languages = fs.readdirSync(`${window.path}/python/transcribe/wav2vec2`)
.filter(name => !name.startsWith("_")&&!name.includes("."))
.map(langCode => {return [langCode, lang_names[langCode]]}).sort((a,b) => a[1]<b[1]?-1:1)
const languages = Object.keys(lang_names).map(langCode => {return [langCode, lang_names[langCode]]}).sort((a,b) => a[1]<b[1]?-1:1)
const selectElem = createElem("select")

// Whisper
Expand All @@ -64,20 +62,21 @@ const makeTranscriptionModelDropdown = () => {
whisperLangSelect.value = "en"

// Wav2vec2
languages.forEach(lang => {
const optionElem = createElem("option", {value: `wav2vec2_${lang[0]}`})
optionElem.innerHTML = `Wav2vec2: ${lang[1]}`
selectElem.appendChild(optionElem)
})
// languages.forEach(lang => {
// const optionElem = createElem("option", {value: `wav2vec2_${lang[0]}`})
// optionElem.innerHTML = `Wav2vec2: ${lang[1]}`
// selectElem.appendChild(optionElem)
// })
selectElem.value = "whisper_medium"
const modelDescription = createElem("div", "Transcription model (more available on nexus)")
const rowItemModel = createElem("div", createElem("div", modelDescription), createElem("div", selectElem))

const whisperLangDescription = createElem("div", "Whisper language")
const whisperLangDescription = createElem("div", "Language")
const rowItemWhisperLang = createElem("div", createElem("div", whisperLangDescription), createElem("div", whisperLangSelect))
return [rowItemModel, selectElem, rowItemWhisperLang, whisperLangSelect]
}


const tools = {
"Audio formatting": {
taskId: "formatting",
Expand Down Expand Up @@ -507,7 +506,7 @@ const tools = {
}
},
}

delete tools["AI speaker diarization"] // TEMPORARY, until pyannote dep is fixed

// Brute force progress indicator, for when the WebSockets don't work
setInterval(() => {
Expand Down
41 changes: 24 additions & 17 deletions javascript/train.js
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,8 @@ window.refreshTrainingQueueList = () => {

configButton.addEventListener("click", () => {
configAnExistingItem = true
window.training_state.datasetsQueue[di].dataset_path = window.training_state.datasetsQueue[di].dataset_path.replaceAll(/\\/, "/")
window.training_state.datasetsQueue[di].output_path = window.training_state.datasetsQueue[di].output_path.replaceAll(/\\/, "/")
window.training_state.datasetsQueue[di].dataset_path = window.training_state.datasetsQueue[di].dataset_path.replace(/\\/g, "/")
window.training_state.datasetsQueue[di].output_path = window.training_state.datasetsQueue[di].output_path.replace(/\\/g, "/")
window.showConfigMenu(window.training_state.datasetsQueue[di], di)
})
})
Expand Down Expand Up @@ -656,15 +656,15 @@ cancelConfig.addEventListener("click", () => {
queueItemConfigModalContainer.style.display = "none"
})
trainingAddConfigDatasetPathInput.addEventListener("keyup", e => {
trainingAddConfigDatasetPathInput.value = trainingAddConfigDatasetPathInput.value.replaceAll(/\\/, "/")
trainingAddConfigDatasetPathInput.value = trainingAddConfigDatasetPathInput.value.replace(/\\/g, "/")
})
trainingAddConfigOutputPathInput.addEventListener("keyup", e => {
trainingAddConfigOutputPathInput.value = trainingAddConfigOutputPathInput.value.replaceAll(/\\/, "/")
trainingAddConfigOutputPathInput.value = trainingAddConfigOutputPathInput.value.replace(/\\/g, "/")
})
acceptConfig.addEventListener("click", () => {

trainingAddConfigDatasetPathInput.value = trainingAddConfigDatasetPathInput.value.replaceAll(/\\/, "/")
trainingAddConfigOutputPathInput.value = trainingAddConfigOutputPathInput.value.replaceAll(/\\/, "/")
trainingAddConfigDatasetPathInput.value = trainingAddConfigDatasetPathInput.value.replace(/\\/g, "/")
trainingAddConfigOutputPathInput.value = trainingAddConfigOutputPathInput.value.replace(/\\/g, "/")

if (!trainingAddConfigDatasetPathInput.value.trim().length) {
return window.errorModal("You need to specify where your dataset is located.", queueItemConfigModalContainer)
Expand Down Expand Up @@ -699,7 +699,7 @@ acceptConfig.addEventListener("click", () => {

let xvapitch_checkpoint = "[base]"
if (xvapitch_ckpt_option_other.checked) {
xvapitch_checkpoint = trainingAddConfigCkptPathInput.replaceAll(/\\/, "/")
xvapitch_checkpoint = trainingAddConfigCkptPathInput.replace(/\\/g, "/")
}

// TODO
Expand All @@ -708,10 +708,10 @@ acceptConfig.addEventListener("click", () => {
const queueIndex = window.training_state.currentlyConfiguringDatasetI

const configData = {
"dataset_path": window.training_state.datasetsQueue[queueIndex].dataset_path.replaceAll(/\\/, "/"),
"output_path": trainingAddConfigOutputPathInput.value.replaceAll(/\\/, "/"),
"dataset_path": window.training_state.datasetsQueue[queueIndex].dataset_path.replace(/\\/g, "/"),
"output_path": trainingAddConfigOutputPathInput.value.replace(/\\/g, "/"),
"checkpoint": xvapitch_checkpoint,
// "hifigan_checkpoint": hg_ckpt.replaceAll(/\\/, "/"),
// "hifigan_checkpoint": hg_ckpt.replace(/\\/g, "/"),

"use_amp": trainingAddConfigUseAmp.checked ? "true" : "false",
"num_workers": parseInt(trainingAddConfigWorkersInput.value),
Expand All @@ -731,10 +731,10 @@ acceptConfig.addEventListener("click", () => {
const configData = {
"status": "Ready",

"dataset_path": trainingAddConfigDatasetPathInput.value.replaceAll(/\\/, "/"),
"output_path": trainingAddConfigOutputPathInput.value.replaceAll(/\\/, "/"),
"dataset_path": trainingAddConfigDatasetPathInput.value.replace(/\\/g, "/"),
"output_path": trainingAddConfigOutputPathInput.value.replace(/\\/g, "/"),
"checkpoint": xvapitch_checkpoint,
// "hifigan_checkpoint": hg_ckpt.replaceAll(/\\/, "/"),
// "hifigan_checkpoint": hg_ckpt.replace(/\\/g, "/"),

"use_amp": trainingAddConfigUseAmp.checked ? "true" : "false",
"num_workers": parseInt(trainingAddConfigWorkersInput.value),
Expand All @@ -752,7 +752,7 @@ acceptConfig.addEventListener("click", () => {
window.refreshTrainingQueueList()
}

let xvap_ckpt = trainingAddConfigCkptPathInput.value.trim().replaceAll(/\\/, "/")
let xvap_ckpt = trainingAddConfigCkptPathInput.value.trim().replace(/\\/g, "/")
if (xvapitch_ckpt_option_base.checked) {
xvap_ckpt = "[base]"
}
Expand Down Expand Up @@ -780,6 +780,7 @@ trainingQueueBtnClear.addEventListener("click", () => {
window.confirmModal("Are you sure you'd like to clear the training queue, losing all configured model training runs?").then(resp => {
if (resp) {
window.training_state.datasetsQueue = []
window.training_state.selectedQueueItem = undefined
window.refreshTrainingQueueList()
fs.writeFileSync(`${window.path}/training_queue.json`, JSON.stringify(window.training_state.datasetsQueue, null, 4))
}
Expand Down Expand Up @@ -895,9 +896,15 @@ exportSubmitButton.addEventListener("click", () => {
const metadataJSON = JSON.parse(fs.readFileSync(`${window.userSettings.datasetsPath}/${window.appState.currentDataset}/dataset_metadata.json`, "utf8"))
const voiceId = metadataJSON.games[0].voiceId

metadataJSON.games[0].resemblyzer = trainingJSON.games[0].resemblyzer
metadataJSON.games[0].voiceId = voiceId//window.appState.currentDataset
fs.writeFileSync(`${modelExport_outputDir.value.trim()}/${voiceId}.json`, JSON.stringify(metadataJSON, null, 4))
trainingJSON.author = metadataJSON.author
trainingJSON.license = metadataJSON.license
trainingJSON.lang = metadataJSON.lang
trainingJSON.games[0].gameId = metadataJSON.games[0].gameId
trainingJSON.games[0].voiceId = metadataJSON.games[0].voiceId
trainingJSON.games[0].gender = metadataJSON.games[0].gender
trainingJSON.games[0].voiceName = metadataJSON.games[0].voiceName

fs.writeFileSync(`${modelExport_outputDir.value.trim()}/${voiceId}.json`, JSON.stringify(trainingJSON, null, 4))



Expand Down
1 change: 1 addition & 0 deletions lib/_dev/g2pC-0.9.9.3.dist-info/INSTALLER
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pip
Loading

0 comments on commit 8fd1419

Please sign in to comment.