Skip to content

Commit

Permalink
initial
Browse files Browse the repository at this point in the history
  • Loading branch information
Victorwz committed Jun 13, 2023
1 parent bbb58e5 commit bc14427
Show file tree
Hide file tree
Showing 1,342 changed files with 271,526 additions and 35 deletions.
47 changes: 13 additions & 34 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,16 @@ __pycache__/
*.py[cod]
*$py.class

debug.py
tensorboard.py
output
tensorboard
checkpoints
plot
.amltignore
.amltconfig
./amlt

# C extensions
*.so

Expand All @@ -20,6 +30,7 @@ parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
Expand Down Expand Up @@ -49,7 +60,6 @@ coverage.xml
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
Expand All @@ -72,7 +82,6 @@ instance/
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
Expand All @@ -83,9 +92,7 @@ profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
Expand All @@ -94,22 +101,7 @@ ipython_config.py
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
Expand Down Expand Up @@ -145,16 +137,3 @@ dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,12 @@
# LongMem
# NeurIPS 2023 Submission Language Models Augmented with Decoupled Memory

## Project Strcture
Pre-trained LLM Class (L24, E1024, Alibi POS_ENCODING): ``fairseq/fairseq/models/newgpt.py``

Transformer Decoder with SideNetwork (L12, E1024, Alibi POS_ENCODING): ``fairseq/fairseq/models/sidenet/transformer_decoder_sidenet.py``

Transformer Language Model with SideNetwork Class: ``fairseq/fairseq/models/transformer_lm_sidenet.py``

Memory Bank and Retrieval: ``fairseq/fairseq/modules/dynamic_memory_with_chunk.py``

Joint Attention for Memory Fusion: ``fairseq/fairseq/modules/joint_multihead_attention_sum.py``
171 changes: 171 additions & 0 deletions fairseq/.circleci/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
# Use 2.1 for orbs
version: 2.1

# -------------------------------------------------------------------------------------
# Environments to run the jobs in
# -------------------------------------------------------------------------------------
gpu: &gpu
environment:
CUDA_VERSION: "11.2"
machine:
image: ubuntu-2004-cuda-11.2:202103-01
resource_class: gpu.nvidia.medium.multi


# -------------------------------------------------------------------------------------
# Re-usable commands
# -------------------------------------------------------------------------------------
cache_key: &cache_key cache-key-{{ .Environment.CIRCLE_JOB }}-{{ checksum ".circleci/config.yml" }}-{{ checksum "setup.py"}}

install_dep_common: &install_dep_common
- run:
name: Install Common Dependencies
command: |
source activate fairseq
pip install --upgrade setuptools
pip install bitarray boto3 deepspeed editdistance fastBPE iopath ipdb ipython pyarrow pytest sacremoses sentencepiece subword-nmt hydra-core==1.0.7 omegaconf==2.0.6
pip install --progress-bar off pytest
pip install --progress-bar off fairscale
pip install -i https://test.pypi.org/simple/ bitsandbytes-cuda111 -U
python -c 'import torch; print("Torch version:", torch.__version__)'
python -m torch.utils.collect_env
install_dep_fused_ops: &install_dep_fused_ops
- run:
name: Install Megatron/Apex Dependencies
working_directory: ~/
command: |
source activate fairseq
git clone https://github.com/NVIDIA/apex
cd apex
git checkout e2083df5eb96643c61613b9df48dd4eea6b07690
sed -i '101,107 s/^/#/' setup.py
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--deprecated_fused_adam" --global-option="--xentropy" --global-option="--fast_multihead_attn" ./
cd ~/
git clone --depth=1 --branch v2.4 https://github.com/NVIDIA/Megatron-LM.git
cd Megatron-LM
pip install -e .
install_dep_xformers: &install_dep_xformers
- run:
name: Install xFormers Dependencies
working_directory: ~/
command: |
source activate fairseq
git clone https://github.com/facebookresearch/xformers.git
cd xformers
pip install -r requirements.txt
pip install -e .
install_dep_pt19: &install_dep_pt19
- run:
name: Install Pytorch Dependencies
command: |
source activate fairseq
pip install --upgrade setuptools
pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html
python -c 'import torch; print("Torch version:", torch.__version__)'
install_dep_pt18: &install_dep_pt18
- run:
name: Install Pytorch Dependencies
command: |
source activate fairseq
pip install --upgrade setuptools
pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
python -c 'import torch; print("Torch version:", torch.__version__)'
install_repo: &install_repo
- run:
name: Install Repository
command: |
source activate fairseq
pip install .
python setup.py build_ext --inplace
run_unittests: &run_unittests
- run:
name: Run Unit Tests
command: |
source activate fairseq
pytest tests/gpu/test_binaries_gpu.py
check_nvidia_driver: &check_nvidia_driver
- run:
name: Check NVIDIA Driver
working_directory: ~/
command: |
pyenv versions
nvidia-smi
create_conda_env: &create_conda_env
- run:
name: Install and Create Conda Environment
command: |
curl -o ~/miniconda.sh -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
chmod +x ~/miniconda.sh
~/miniconda.sh -b -p $HOME/miniconda
rm ~/miniconda.sh
echo 'export PATH=$HOME/miniconda/bin:$PATH' >> $BASH_ENV
source $BASH_ENV
if [ ! -d ~/miniconda/envs/fairseq ]
then
conda create -y -n fairseq python=3.8
fi
source activate fairseq
python --version
pip install --upgrade pip
# -------------------------------------------------------------------------------------
# Jobs to run
# -------------------------------------------------------------------------------------

jobs:

gpu_tests_pt19:
<<: *gpu

working_directory: ~/fairseq-py

steps:
- checkout
- <<: *check_nvidia_driver
- <<: *create_conda_env
- restore_cache:
key: *cache_key
- <<: *install_dep_pt19
- <<: *install_dep_common
- <<: *install_dep_fused_ops
- save_cache:
paths:
- ~/miniconda/
key: *cache_key
- <<: *install_repo
- <<: *run_unittests

gpu_tests_pt18:
<<: *gpu

working_directory: ~/fairseq-py

steps:
- checkout
- <<: *check_nvidia_driver
- <<: *create_conda_env
- restore_cache:
key: *cache_key
- <<: *install_dep_pt18
- <<: *install_dep_common
- <<: *install_dep_fused_ops
- save_cache:
paths:
- ~/miniconda/
key: *cache_key
- <<: *install_repo
- <<: *run_unittests

workflows:
version: 2
build:
jobs:
- gpu_tests_pt18
- gpu_tests_pt19
2 changes: 2 additions & 0 deletions fairseq/.isort.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[settings]
known_third_party = _cffi_backend,agg_results,aml,bitarray,boto3,botocore,dump_hubert_feature,dynamicconv_cuda,editdistance,faiss,fasttext,feature_utils,ffmpeg,g2p_en,h5py,hydra,hypothesis,indicnlp,inflect,iopath,joblib,kaldi_io,kenlm,libfb,librosa,lightconv_cuda,matplotlib,misc,mmpt,mmpt_cli,model,nltk,npy_append_array,numpy,omegaconf,pandas,pathbuilder,preprocessing,progressbar,pythainlp,random_sequence_shuffler,regex,sacrebleu,sacremoses,scipy,sentencepiece,setuptools,six,sklearn,soundfile,sweep,sweep_wmt_en2de_transformer_big_common,tabulate,torch,torchaudio,tqdm,unidecode,utils,videoreader,wav2vec_cluster_faiss,wget,yaml
40 changes: 40 additions & 0 deletions fairseq/.pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
exclude: 'build|stubs'

default_language_version:
python: python3

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.1.0
hooks:
- id: trailing-whitespace
- id: check-ast
- id: check-merge-conflict
- id: no-commit-to-branch
args: ['--branch=master']
- id: check-added-large-files
args: ['--maxkb=500']
- id: end-of-file-fixer

- repo: https://github.com/ambv/black
rev: 22.3.0
hooks:
- id: black
language_version: python3.8

- repo: https://gitlab.com/pycqa/flake8
rev: 3.9.2
hooks:
- id: flake8
args: [
# only error for syntax errors and undefined names
"--select=E9,F63,F7,F82",
]

- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort
exclude: README.md
additional_dependencies: [toml]
args: ["--profile", "black"]
Loading

0 comments on commit bc14427

Please sign in to comment.