feat: support transformers library in model-hub [DET-4823, 4719, 4721…

…, 4720] (determined-ai#2068) * feat: support transformers library in model-hub * address comments * address 2nd round of comments * address detailed comments * add additional hf examples (determined-ai#2125) * fix model-hub tests * docs: add documentation for model-hub [DET-5031] (determined-ai#2276) * docs: add documentation for model-hub * address comments * add release note
ioga · May 8, 2021 · f677fa7 · f677fa7
1 parent fc27d77
commit f677fa7
Show file tree

Hide file tree

Showing 79 changed files with 5,625 additions and 8 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -29,3 +29,18 @@ values =
 [bumpversion:file:webui/react/config-overrides.js]
 
 [bumpversion:file:helm/charts/determined/Chart.yaml]
+
+[bumpversion:file:model_hub/model_hub/__version__.py]
+
+[bumpversion:file:model_hub/setup.py]
+
+[bumpversion:file:model_hub/examples/huggingface/token-classification/ner_config.yaml]
+
+[bumpversion:file:model_hub/examples/huggingface/language-modeling/clm_config.yaml]
+[bumpversion:file:model_hub/examples/huggingface/language-modeling/mlm_config.yaml]
+[bumpversion:file:model_hub/examples/huggingface/language-modeling/plm_config.yaml]
+
+[bumpversion:file:model_hub/examples/huggingface/multiple-choice/swag_config.yaml]
+
+[bumpversion:file:model_hub/examples/huggingface/text-classification/glue_config.yaml]
+[bumpversion:file:model_hub/examples/huggingface/text-classification/xnli_config.yaml]
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -185,6 +185,9 @@ commands:
       determined:
         type: boolean
         default: false
+      model-hub: 
+        type: boolean
+        default: false
       extras-requires:
         type: string
         default: ""
@@ -210,6 +213,9 @@ commands:
             if [ "<<parameters.determined>>" = "true" ]; then
               cat harness/setup.py >> /tmp/cachefile
             fi
+            if [ "<<parameters.model-hub>>" = "true" ]; then
+              cat model_hub/setup.py >> /tmp/cachefile
+            fi
             echo <<parameters.extras-requires>> >> /tmp/cachefile
             if [ -n <<parameters.extra-requirements-file>> ]; then
               cat <<parameters.extra-requirements-file>> >> /tmp/cachefile
@@ -224,6 +230,12 @@ commands:
             - install-wheel:
                 package-name: determined
                 package-location: ~/project/harness
+      - when:
+          condition: <<parameters.model-hub>>
+          steps:
+            - install-wheel:
+                package-name: model-hub
+                package-location: ~/project/model_hub
       - run:
           name: Install <<parameters.extras-requires>>
           command: |
@@ -763,6 +775,7 @@ jobs:
       - setup-python-venv:
           determined: true
           extras-requires: "tensorflow==2.4.1 torch==1.8"
+          model-hub: true
           extra-requirements-file: "docs/requirements.txt"
           executor: determinedai/cimg-base:stable
       - run: make -C examples build
@@ -777,6 +790,7 @@ jobs:
             - cli/dist
             - common/dist
             - harness/dist
+            - model_hub/dist
             - docs/site/html
       - store_artifacts:
           path: docs/site/html
@@ -837,6 +851,20 @@ jobs:
       - run: make package
       - run: make -C master publish-dev
       - run: make -C agent publish-dev
+      - run:
+          name: Build and publish model_hub docker images
+          command: |
+            if [ ${CIRCLE_BRANCH} = 'master' ] || [[ ${CIRCLE_BRANCH} == *"release-"* ]]; then
+                # For master and release branches, we will tag and publish both the environment
+                # with the git hash as well as the version.  This will make that image available
+                # immediately for nightly tests.
+                make -C model_hub build-docker
+                make -C model_hub publish-docker
+            else
+                # Otherwise, only tag and publish the environment with the git hash.
+                make -C model_hub build-docker-dev
+                make -C model_hub publish-docker-dev
+            fi
 
   package-and-push-system-rc:
     docker:
@@ -861,6 +889,8 @@ jobs:
       - run: make package
       - run: make -C master publish
       - run: make -C agent publish
+      - run: make -C model_hub build-docker
+      - run: make -C model_hub publish-docker
 
   package-and-push-system-release:
     docker:
@@ -884,6 +914,8 @@ jobs:
       - pre-package-and-push-system
       - run: make -C master release
       - run: make -C agent release
+      - run: make -C model_hub build-docker
+      - run: make -C model_hub publish-docker
 
   publish-helm:
     docker:
@@ -1144,12 +1176,14 @@ jobs:
       - setup-python-venv:
           determined: true
           extras-requires: "torch==1.7.1"
+          model-hub: true
           extra-requirements-file: "requirements.txt"
           executor: determinedai/cimg-base:stable
       - run: make -C cli check
       - run: make -C common check
       - run: make -C harness check
       - run: make -C deploy check
+      - run: make -C model_hub check
       - run: make -C e2e_tests check
       - run: make -C examples check
       - run: make -C tools check
@@ -1179,6 +1213,19 @@ jobs:
           executor: determinedai/cimg-base:stable
       - run: make -C harness test-tf2
 
+  test-unit-model-hub:
+    docker:
+      - image: determinedai/cimg-base:stable
+    steps:
+      - checkout
+      - setup-python-venv:
+          determined: true
+          model-hub: true
+          extras-requires: "tensorflow==2.4.1 torch==1.7.1 torchvision==0.8.2"
+          extra-requirements-file: "model_hub/tests/requirements.txt"
+          executor: determinedai/cimg-base:stable
+      - run: make -C model_hub test
+
   test-examples:
     docker:
       - image: determinedai/cimg-base:stable
@@ -1595,6 +1642,7 @@ workflows:
             - build-bindings
       - test-unit-harness
       - test-unit-harness-tf2
+      - test-unit-model-hub
       - test-examples
 
   test-intg:
@@ -1782,6 +1830,26 @@ workflows:
               mark: ["e2e_gpu"]
               slack-mentions: ["${SLACK_USER_ID}"]
 
+      - request-model-hub-tests:
+          type: approval
+          filters: *upstream-feature-branch
+
+      - test-e2e-aws:
+          name: test-e2e-model-hub
+          context: aws
+          filters: *upstream-feature-branch
+          requires:
+            - request-model-hub-tests
+            - package-and-push-system-dev
+          matrix:
+            parameters:
+              gpu-agent-instance-type: ["p2.8xlarge"]
+              cpu-agent-instance-type: ["m5.large"]
+              cluster-id-prefix: ["model-hub"]
+              mark: ["model_hub"]
+              slack-mentions: ["${SLACK_USER_ID}"]
+              max-dynamic-agents: [2]
+
       - test-e2e-gke:
           name: test-e2e-gke-single-gpu
           context: gcp
@@ -1923,7 +1991,7 @@ workflows:
               mark: ["distributed"]
               gpu-agent-instance-type: ["p2.8xlarge"]
               cpu-agent-instance-type: ["m5.large"]
-              max-dynamic-agents: [2]
+              max-dynamic-agents: [3]
               slack-mentions: ["channel"]
               slack-channel: ["ml-ag"]
 
@@ -1984,7 +2052,7 @@ workflows:
       - publish-python-package:
           matrix:
             parameters:
-              path: ["harness", "common", "cli", "deploy"]
+              path: ["harness", "common", "cli", "deploy", "model_hub"]
           context: determined-production
           filters: *release-and-rc-filters
 

diff --git a/.gitignore b/.gitignore
@@ -6,6 +6,7 @@ __pycache__/
 .Python
 env/
 build/
+build-examples/
 develop-eggs/
 downloads/
 eggs/

diff --git a/Makefile b/Makefile
@@ -30,7 +30,7 @@ build-%:
 	$(MAKE) -C $(subst -,/,$*) build
 
 .PHONY: build-docs
-build-docs: build-common build-harness build-cli build-deploy build-examples build-helm build-proto
+build-docs: build-common build-harness build-cli build-deploy build-model_hub build-examples build-helm build-proto 
 	$(MAKE) -C docs build
 
 .PHONY: build-bindings
@@ -56,23 +56,23 @@ build: build-master build-agent
 clean-%:
 	$(MAKE) -C $(subst -,/,$*) clean
 .PHONY: clean
-clean: clean-tools clean-proto clean-common clean-harness clean-cli clean-deploy clean-examples clean-docs clean-webui clean-master clean-agent clean-bindings
+clean: clean-tools clean-proto clean-common clean-harness clean-cli clean-deploy clean-model_hub clean-examples clean-docs clean-webui clean-master clean-agent clean-bindings 
 
 .PHONY: check-%
 check-%:
 	$(MAKE) -C $(subst -,/,$*) check
 .PHONY: check
-check: check-common check-proto check-harness check-cli check-deploy check-e2e_tests check-tools check-master check-webui check-examples check-docs check-schemas
+check: check-common check-proto check-harness check-cli check-deploy check-model_hub check-e2e_tests check-tools check-master check-webui check-examples check-docs check-schemas
 	$(MAKE) check-agent
 
 .PHONY: fmt-%
 fmt-%:
 	$(MAKE) -C $(subst -,/,$*) fmt
 .PHONY: fmt
-fmt: fmt-common fmt-harness fmt-cli fmt-deploy fmt-e2e_tests fmt-tools fmt-master fmt-agent fmt-webui fmt-examples fmt-docs fmt-schemas fmt-proto
+fmt: fmt-common fmt-harness fmt-cli fmt-deploy fmt-model_hub fmt-e2e_tests fmt-tools fmt-master fmt-agent fmt-webui fmt-examples fmt-docs fmt-schemas fmt-proto 
 
 .PHONY: test-%
 test-%:
 	$(MAKE) -C $(subst -,/,$*) test
 .PHONY: test
-test: test-harness test-cli test-common test-master test-agent test-webui
+test: test-harness test-cli test-common test-model_hub test-master test-agent test-webui 
diff --git a/docs/Makefile b/docs/Makefile
@@ -7,6 +7,7 @@ SPHINXBUILD   = sphinx-build
 .PHONY: build-examples
 build-examples:
 	$(MAKE) -C ../examples build
+	$(MAKE) -C ../model_hub examples
 
 .PHONY: build-helm
 build-helm:
@@ -19,6 +20,7 @@ reference/attributions.txt: $(shell find ../tools/scripts/licenses -type f)
 build: build-examples build-helm reference/attributions.txt
 	mkdir -p site/downloads/examples
 	cp ../examples/build/* site/downloads/examples
+	cp ../model_hub/build-examples/* site/downloads/examples
 	mkdir -p site/downloads/helm
 	cp ../helm/build/* site/downloads/helm
 	$(MAKE) sp-html

diff --git a/docs/examples.txt b/docs/examples.txt
@@ -122,6 +122,35 @@ MNIST <pytorch-mnist-tutorial>` and :ref:`tf.keras MNIST
       -  :download:`bert_glue_pytorch.tgz
          </examples/bert_glue_pytorch.tgz>`
 
+   -  -  PyTorch (:ref:`Model Hub Transformers
+         <model-hub-transformers>`)
+      -  WikiText-2
+      -  :download:`language-modeling.tgz
+         </examples/language-modeling.tgz>`
+
+   -  -  PyTorch (:ref:`Model Hub Transformers
+         <model-hub-transformers>`)
+      -  SWAG
+      -  :download:`multiple-choice.tgz </examples/multiple-choice.tgz>`
+
+   -  -  PyTorch (:ref:`Model Hub Transformers
+         <model-hub-transformers>`)
+      -  SQuAD v1 and v2
+      -  :download:`question-answering.tgz
+         </examples/question-answering.tgz>`
+
+   -  -  PyTorch (:ref:`Model Hub Transformers
+         <model-hub-transformers>`)
+      -  GLUE and XNLI
+      -  :download:`text-classification.tgz
+         </examples/text-classification.tgz>`
+
+   -  -  PyTorch (:ref:`Model Hub Transformers
+         <model-hub-transformers>`)
+      -  CoNLL-2003
+      -  :download:`token-classification.tgz
+         </examples/token-classification.tgz>`
+
 ************************
  HP Search Benchmarking
 ************************

diff --git a/docs/index.txt b/docs/index.txt
@@ -8,6 +8,7 @@
    reference/index
    faq
    examples
+   model-hub/index
    release-notes
 
 ###############

diff --git a/docs/model-hub/index.txt b/docs/model-hub/index.txt
@@ -0,0 +1,71 @@
+.. _model-hub:
+
+###########
+ Model Hub
+###########
+
+.. rubric:: **Overview**
+
+Determined's **model-hub** library makes it easy to train models from
+popular third-party libraries with a Determined cluster. With
+**model-hub**, use trusted implementations of model architectures with
+Determined's ability to easily scale to distributed training, track
+experiments, share resources, and perform hyperparameter searches.
+
+Each supported third-party library in **model-hub** is accompanied by:
+
+-  Official examples checked for correctness and thoroughly tested for
+   use with Determined.
+-  A base Determined Trial class with common functionality implemented
+   for the user.
+-  A prebuilt docker environment with all dependencies installed and
+   versioned for reproducibility.
+-  A suite of helper functions (if applicable) to allow users to easily
+   write their own Trial classes for use with the third-party library.
+
+.. rubric:: **Getting Started**
+
+For a given task, deep learning practitioners often adapt existing model
+implementations from a trusted third-party library, such as HuggingFace
+Transformers. When beginning your deep learning project in this way, we
+suggest using **model-hub** with the following these steps:
+
+-  Check for a Model Hub library that supports model implementations for
+   your task.
+-  If the Model Hub Library includes an official example fit for your
+   task, copy, customize, and deploy it.
+-  If the Model Hub Library does not include example fit for your task,
+   copy the base Determined Trial class and customize it.
+
+For detailed instructions, check out the documentation for your Model
+Hub library of choice.
+
+.. rubric:: **Available Libraries**
+
+Released libraries
+
+-  :ref:`Model Hub Transformers <model-hub-transformers>`
+
+Future libraries on our roadmap
+
+-  `mmdetection <https://github.com/open-mmlab/mmdetection>`_
+-  `detectron2 <https://github.com/facebookresearch/detectron2>`_
+
+Our initial release of **model-hub** includes support for the
+`Huggingface transformers library
+<https://github.com/huggingface/transformers>`_. We are actively working
+on releasing new third-party libraries. Please check back for updates.
+If you have additional libraries you want to see supported in
+**model-hub** please let us know by filing an issue on `GitHub
+<https://github.com/determined-ai/determined>`_ or reaching out on our
+community `Slack.
+<https://join.slack.com/t/determined-community/shared_invite/zt-cnj7802v-KcVbaUrIzQOwmkmY7gP0Ew>`_
+
+For next steps, learn more about :ref:`Model Hub Transformers
+<model-hub-transformers>`!
+
+.. toctree::
+   :maxdepth: 1
+   :hidden:
+
+   transformers/index
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,6 +6,7 @@ __pycache__/ @@
     .Python
     env/
     build/
+    build-examples/
     develop-eggs/
     downloads/
     eggs/
@@ Expand Down @@