forked from vovanphuc/hum2song
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
39 changed files
with
2,891 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
*.txt | ||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
share/python-wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.nox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
*.py,cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
cover/ | ||
.idea/ | ||
data_faiss/ | ||
checkpoints/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
db.sqlite3-journal | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
.pybuilder/ | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# IPython | ||
profile_default/ | ||
ipython_config.py | ||
|
||
# pyenv | ||
# For a library or package, you might want to ignore these files since the code is | ||
# intended to run in multiple environments; otherwise, check them in: | ||
# .python-version | ||
|
||
# pipenv | ||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | ||
# However, in case of collaboration, if having platform-specific dependencies or dependencies | ||
# having no cross-platform support, pipenv may install dependencies that don't work, or not | ||
# install all needed dependencies. | ||
#Pipfile.lock | ||
|
||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow | ||
__pypackages__/ | ||
|
||
# Celery stuff | ||
celerybeat-schedule | ||
celerybeat.pid | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
.dmypy.json | ||
dmypy.json | ||
|
||
# Pyre type checker | ||
.pyre/ | ||
|
||
# pytype static type analyzer | ||
.pytype/ | ||
|
||
# Cython debug symbols | ||
cython_debug/ | ||
|
||
!requirements.txt | ||
!checkpoints/ | ||
!README.txt | ||
!data_val.txt | ||
!data_train.txt | ||
!full_data_val.txt | ||
!full_data_train.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
FROM nvcr.io/nvidia/pytorch:21.07-py3 | ||
USER root | ||
|
||
RUN apt-get update && \ | ||
DEBIAN_FRONTEND="noninteractive" apt-get install ffmpeg libavcodec-extra -y && \ | ||
apt-get update | ||
RUN python3 -m pip install --upgrade pip | ||
RUN pip install llvmlite --ignore-installed && \ | ||
pip install librosa==0.8.0 | ||
RUN pip install pydub && \ | ||
pip install faiss-gpu | ||
|
||
# RUN apt-get install libsox-fmt-all libsox-dev sox > /dev/null | ||
RUN python -m pip install torchaudio > /dev/null && \ | ||
python -m pip install git+https://github.com/facebookresearch/WavAugment.git > /dev/null | ||
|
||
RUN pip install opencv-python && \ | ||
pip install visdom | ||
|
||
COPY . /model | ||
WORKDIR /model | ||
|
||
CMD /bin/bash |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#Zalo AI challenge 2021 task Hum to Song | ||
|
||
## pipeline: | ||
<p align="center"> | ||
<img src="image/pipeline.png"> | ||
</p> | ||
|
||
|
||
## Chuẩn bị dữ liệu cho quá trình train: | ||
|
||
- Sửa các file đường dẫn trong config/preprocess.yaml | ||
|
||
+ raw_path: đường dẫn đến data thô | ||
+ preprocessed_path: đường dẫn đầu ra của quá trình rút trích mel | ||
+ temp_dir: đường dẫn chứa dữ liệu mp3 được chuẩn hóa | ||
- Chạy lần lượt các lệnh sau: | ||
|
||
``` | ||
python preprocessing.py | ||
python utils/split_train_val_by_id.py | ||
python utils/augment_mp3.py | ||
python utils/preprocess_augment.py | ||
``` | ||
##Train model: | ||
|
||
- Sửa các file đường dẫn trong config/config.py | ||
|
||
+ meta_train: đường dẫn đến file train_meta.csv trong preprocessed_path | ||
+ train_root: đường dẫn đến dữ liệu mel đã tiền xử lý | ||
+ train_list = 'full_data_train.txt' | ||
+ val_list = 'full_data_val.txt' | ||
- Chạy lần lượt các lệnh sau: | ||
``` | ||
python convert_data.py | ||
python train.py | ||
``` | ||
## Infer public test: | ||
|
||
- Đặt dữ liệu mp3 thô ở địa chỉ /data/public_test (bên trong chứa 2 thư mục full_song và hum) | ||
|
||
- Chạy lần lượt các lệnh sau: | ||
|
||
```/model/predict.sh``` | ||
|
||
## Infer private test: | ||
|
||
Đặt dữ liệu mp3 thô ở địa chỉ /data/private_test (bên trong chứa 2 thư mục full_song và hum) | ||
|
||
Chạy lần lượt các lệnh sau: | ||
|
||
```/model/predict_private_test.sh``` | ||
|
||
## Team: | ||
[Võ Văn Phúc](https://github.com/vovanphuc) | ||
|
||
[Nguyễn Văn Thiều](https://github.com/theluckygod) | ||
|
||
[Lâm Bá Thịnh](https://github.com/sasukepn1999) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
import audio.tools | ||
import audio.stft | ||
import audio.audio_processing |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
import torch | ||
import numpy as np | ||
import librosa.util as librosa_util | ||
from scipy.signal import get_window | ||
|
||
|
||
def window_sumsquare( | ||
window, | ||
n_frames, | ||
hop_length, | ||
win_length, | ||
n_fft, | ||
dtype=np.float32, | ||
norm=None, | ||
): | ||
""" | ||
# from librosa 0.6 | ||
Compute the sum-square envelope of a window function at a given hop length. | ||
This is used to estimate modulation effects induced by windowing | ||
observations in short-time fourier transforms. | ||
Parameters | ||
---------- | ||
window : string, tuple, number, callable, or list-like | ||
Window specification, as in `get_window` | ||
n_frames : int > 0 | ||
The number of analysis frames | ||
hop_length : int > 0 | ||
The number of samples to advance between frames | ||
win_length : [optional] | ||
The length of the window function. By default, this matches `n_fft`. | ||
n_fft : int > 0 | ||
The length of each analysis frame. | ||
dtype : np.dtype | ||
The data type of the output | ||
Returns | ||
------- | ||
wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))` | ||
The sum-squared envelope of the window function | ||
""" | ||
if win_length is None: | ||
win_length = n_fft | ||
|
||
n = n_fft + hop_length * (n_frames - 1) | ||
x = np.zeros(n, dtype=dtype) | ||
|
||
# Compute the squared window at the desired length | ||
win_sq = get_window(window, win_length, fftbins=True) | ||
win_sq = librosa_util.normalize(win_sq, norm=norm) ** 2 | ||
win_sq = librosa_util.pad_center(win_sq, n_fft) | ||
|
||
# Fill the envelope | ||
for i in range(n_frames): | ||
sample = i * hop_length | ||
x[sample : min(n, sample + n_fft)] += win_sq[: max(0, min(n_fft, n - sample))] | ||
return x | ||
|
||
|
||
def griffin_lim(magnitudes, stft_fn, n_iters=30): | ||
""" | ||
PARAMS | ||
------ | ||
magnitudes: spectrogram magnitudes | ||
stft_fn: STFT class with transform (STFT) and inverse (ISTFT) methods | ||
""" | ||
|
||
angles = np.angle(np.exp(2j * np.pi * np.random.rand(*magnitudes.size()))) | ||
angles = angles.astype(np.float32) | ||
angles = torch.autograd.Variable(torch.from_numpy(angles)) | ||
signal = stft_fn.inverse(magnitudes, angles).squeeze(1) | ||
|
||
for i in range(n_iters): | ||
_, angles = stft_fn.transform(signal) | ||
signal = stft_fn.inverse(magnitudes, angles).squeeze(1) | ||
return signal | ||
|
||
|
||
def dynamic_range_compression(x, C=1, clip_val=1e-5): | ||
""" | ||
PARAMS | ||
------ | ||
C: compression factor | ||
""" | ||
return torch.log(torch.clamp(x, min=clip_val) * C) | ||
|
||
|
||
def dynamic_range_decompression(x, C=1): | ||
""" | ||
PARAMS | ||
------ | ||
C: compression factor used to compress | ||
""" | ||
return torch.exp(x) / C |
Oops, something went wrong.