release

giaabaoo · Dec 22, 2021 · a67fdc6 · a67fdc6
1 parent 3968c92
commit a67fdc6
Show file tree

Hide file tree

Showing 39 changed files with 2,891 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,149 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+*.txt
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+.idea/
+data_faiss/
+checkpoints/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+!requirements.txt
+!checkpoints/
+!README.txt
+!data_val.txt
+!data_train.txt
+!full_data_val.txt
+!full_data_train.txt
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,23 @@
+FROM nvcr.io/nvidia/pytorch:21.07-py3
+USER root
+
+RUN apt-get update && \
+    DEBIAN_FRONTEND="noninteractive" apt-get install ffmpeg libavcodec-extra -y && \
+    apt-get update
+RUN python3 -m pip install --upgrade pip
+RUN pip install llvmlite --ignore-installed && \
+    pip install librosa==0.8.0 
+RUN pip install pydub && \
+    pip install faiss-gpu
+
+# RUN apt-get install libsox-fmt-all libsox-dev sox > /dev/null
+RUN python -m pip install torchaudio > /dev/null && \
+    python -m pip install git+https://github.com/facebookresearch/WavAugment.git > /dev/null
+
+RUN pip install opencv-python && \
+    pip install visdom
+
+COPY . /model
+WORKDIR /model
+
+CMD /bin/bash
diff --git a/README.md b/README.md
@@ -0,0 +1,70 @@
+#Zalo AI challenge 2021 task Hum to Song
+
+## pipeline:
+<p align="center">
+    <img src="image/pipeline.png">
+</p> 
+
+
+## Chuẩn bị dữ liệu cho quá trình train:
+
+    - Sửa các file đường dẫn trong config/preprocess.yaml
+
+        + raw_path: đường dẫn đến data thô
+      
+        + preprocessed_path: đường dẫn đầu ra của quá trình rút trích mel
+      
+        + temp_dir: đường dẫn chứa dữ liệu mp3 được chuẩn hóa
+      
+    - Chạy lần lượt các lệnh sau:
+
+```   
+        python preprocessing.py
+
+        python utils/split_train_val_by_id.py
+   
+        python utils/augment_mp3.py
+   
+        python utils/preprocess_augment.py
+   ```
+##Train model:
+
+    - Sửa các file đường dẫn trong config/config.py
+
+        + meta_train: đường dẫn đến file train_meta.csv trong preprocessed_path
+      
+        + train_root: đường dẫn đến dữ liệu mel đã tiền xử lý
+      
+        + train_list = 'full_data_train.txt'
+      
+        + val_list = 'full_data_val.txt'
+      
+    - Chạy lần lượt các lệnh sau:
+```   
+        python convert_data.py
+   
+        python train.py
+   ```
+## Infer public test:
+
+    - Đặt dữ liệu mp3 thô ở địa chỉ /data/public_test (bên trong chứa 2 thư mục full_song và hum)
+
+    - Chạy lần lượt các lệnh sau: 
+
+   ```/model/predict.sh```
+
+## Infer private test:
+
+    Đặt dữ liệu mp3 thô ở địa chỉ /data/private_test (bên trong chứa 2 thư mục full_song và hum)
+
+    Chạy lần lượt các lệnh sau:
+
+   ```/model/predict_private_test.sh```
+
+## Team:
+   [Võ Văn Phúc](https://github.com/vovanphuc)
+
+   [Nguyễn Văn Thiều](https://github.com/theluckygod)
+
+   [Lâm Bá Thịnh](https://github.com/sasukepn1999)
+
diff --git a/audio/__init__.py b/audio/__init__.py
@@ -0,0 +1,3 @@
+import audio.tools
+import audio.stft
+import audio.audio_processing
diff --git a/audio/audio_processing.py b/audio/audio_processing.py
@@ -0,0 +1,100 @@
+import torch
+import numpy as np
+import librosa.util as librosa_util
+from scipy.signal import get_window
+
+
+def window_sumsquare(
+    window,
+    n_frames,
+    hop_length,
+    win_length,
+    n_fft,
+    dtype=np.float32,
+    norm=None,
+):
+    """
+    # from librosa 0.6
+    Compute the sum-square envelope of a window function at a given hop length.
+
+    This is used to estimate modulation effects induced by windowing
+    observations in short-time fourier transforms.
+
+    Parameters
+    ----------
+    window : string, tuple, number, callable, or list-like
+        Window specification, as in `get_window`
+
+    n_frames : int > 0
+        The number of analysis frames
+
+    hop_length : int > 0
+        The number of samples to advance between frames
+
+    win_length : [optional]
+        The length of the window function.  By default, this matches `n_fft`.
+
+    n_fft : int > 0
+        The length of each analysis frame.
+
+    dtype : np.dtype
+        The data type of the output
+
+    Returns
+    -------
+    wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))`
+        The sum-squared envelope of the window function
+    """
+    if win_length is None:
+        win_length = n_fft
+
+    n = n_fft + hop_length * (n_frames - 1)
+    x = np.zeros(n, dtype=dtype)
+
+    # Compute the squared window at the desired length
+    win_sq = get_window(window, win_length, fftbins=True)
+    win_sq = librosa_util.normalize(win_sq, norm=norm) ** 2
+    win_sq = librosa_util.pad_center(win_sq, n_fft)
+
+    # Fill the envelope
+    for i in range(n_frames):
+        sample = i * hop_length
+        x[sample : min(n, sample + n_fft)] += win_sq[: max(0, min(n_fft, n - sample))]
+    return x
+
+
+def griffin_lim(magnitudes, stft_fn, n_iters=30):
+    """
+    PARAMS
+    ------
+    magnitudes: spectrogram magnitudes
+    stft_fn: STFT class with transform (STFT) and inverse (ISTFT) methods
+    """
+
+    angles = np.angle(np.exp(2j * np.pi * np.random.rand(*magnitudes.size())))
+    angles = angles.astype(np.float32)
+    angles = torch.autograd.Variable(torch.from_numpy(angles))
+    signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
+
+    for i in range(n_iters):
+        _, angles = stft_fn.transform(signal)
+        signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
+    return signal
+
+
+def dynamic_range_compression(x, C=1, clip_val=1e-5):
+    """
+    PARAMS
+    ------
+    C: compression factor
+    """
+    return torch.log(torch.clamp(x, min=clip_val) * C)
+
+
+def dynamic_range_decompression(x, C=1):
+    """
+    PARAMS
+    ------
+    C: compression factor used to compress
+    """
+    return torch.exp(x) / C