feat(ml): export clip models to ONNX and host models on Hugging Face (i…

…mmich-app#4700) * export clip models * export to hf refactored export code * export mclip, general refactoring cleanup * updated conda deps * do transforms with pillow and numpy, add tokenization config to export, general refactoring * moved conda dockerfile, re-added poetry * minor fixes * updated link * updated tests * removed `requirements.txt` from workflow * fixed mimalloc path * removed torchvision * cleaner np typing * review suggestions * update default model name * update test
sellnat77 · Oct 31, 2023 · 87a0ba3 · 87a0ba3
1 parent 3212a47
commit 87a0ba3
Show file tree

Hide file tree

Showing 29 changed files with 6,196 additions and 2,047 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -166,7 +166,6 @@ jobs:
       - name: Install dependencies
         run: |
           poetry install --with dev
-          poetry run pip install --no-deps -r requirements.txt
       - name: Lint with ruff
         run: |
           poetry run ruff check --format=github app

diff --git a/machine-learning/Dockerfile b/machine-learning/Dockerfile
@@ -10,9 +10,8 @@ RUN poetry config installer.max-workers 10 && \
 RUN python -m venv /opt/venv
 ENV VIRTUAL_ENV="/opt/venv" PATH="/opt/venv/bin:${PATH}"
 
-COPY poetry.lock pyproject.toml requirements.txt ./
+COPY poetry.lock pyproject.toml ./
 RUN poetry install --sync --no-interaction --no-ansi --no-root --only main
-RUN pip install --no-deps -r requirements.txt
 
 FROM python:3.11-slim-bookworm
 

diff --git a/machine-learning/app/conftest.py b/machine-learning/app/conftest.py
@@ -1,5 +1,6 @@
 import json
-from typing import Any, Iterator, TypeAlias
+from pathlib import Path
+from typing import Any, Iterator
 from unittest import mock
 
 import numpy as np
@@ -8,8 +9,7 @@
 from PIL import Image
 
 from .main import app, init_state
-
-ndarray: TypeAlias = np.ndarray[int, np.dtype[np.float32]]
+from .schemas import ndarray_f32
 
 
 @pytest.fixture
@@ -18,13 +18,13 @@ def pil_image() -> Image.Image:
 
 
 @pytest.fixture
-def cv_image(pil_image: Image.Image) -> ndarray:
+def cv_image(pil_image: Image.Image) -> ndarray_f32:
     return np.asarray(pil_image)[:, :, ::-1]  # PIL uses RGB while cv2 uses BGR
 
 
 @pytest.fixture
 def mock_get_model() -> Iterator[mock.Mock]:
-    with mock.patch("app.models.cache.InferenceModel.from_model_type", autospec=True) as mocked:
+    with mock.patch("app.models.cache.from_model_type", autospec=True) as mocked:
         yield mocked
 
 
@@ -37,3 +37,25 @@ def deployed_app() -> TestClient:
 @pytest.fixture(scope="session")
 def responses() -> dict[str, Any]:
     return json.load(open("responses.json", "r"))
+
+
+@pytest.fixture(scope="session")
+def clip_model_cfg() -> dict[str, Any]:
+    return {
+        "embed_dim": 512,
+        "vision_cfg": {"image_size": 224, "layers": 12, "width": 768, "patch_size": 32},
+        "text_cfg": {"context_length": 77, "vocab_size": 49408, "width": 512, "heads": 8, "layers": 12},
+    }
+
+
+@pytest.fixture(scope="session")
+def clip_preprocess_cfg() -> dict[str, Any]:
+    return {
+        "size": [224, 224],
+        "mode": "RGB",
+        "mean": [0.48145466, 0.4578275, 0.40821073],
+        "std": [0.26862954, 0.26130258, 0.27577711],
+        "interpolation": "bicubic",
+        "resize_mode": "shortest",
+        "fill_color": 0,
+    }
diff --git a/machine-learning/app/models/__init__.py b/machine-learning/app/models/__init__.py
@@ -1,3 +1,25 @@
-from .clip import CLIPEncoder
+from typing import Any
+
+from app.schemas import ModelType
+
+from .base import InferenceModel
+from .clip import MCLIPEncoder, OpenCLIPEncoder, is_mclip, is_openclip
 from .facial_recognition import FaceRecognizer
 from .image_classification import ImageClassifier
+
+
+def from_model_type(model_type: ModelType, model_name: str, **model_kwargs: Any) -> InferenceModel:
+    match model_type:
+        case ModelType.CLIP:
+            if is_openclip(model_name):
+                return OpenCLIPEncoder(model_name, **model_kwargs)
+            elif is_mclip(model_name):
+                return MCLIPEncoder(model_name, **model_kwargs)
+            else:
+                raise ValueError(f"Unknown CLIP model {model_name}")
+        case ModelType.FACIAL_RECOGNITION:
+            return FaceRecognizer(model_name, **model_kwargs)
+        case ModelType.IMAGE_CLASSIFICATION:
+            return ImageClassifier(model_name, **model_kwargs)
+        case _:
+            raise ValueError(f"Unknown model type {model_type}")
diff --git a/machine-learning/app/models/base.py b/machine-learning/app/models/base.py
@@ -25,7 +25,7 @@ def __init__(
     ) -> None:
         self.model_name = model_name
         self.loaded = False
-        self._cache_dir = Path(cache_dir) if cache_dir is not None else get_cache_dir(model_name, self.model_type)
+        self._cache_dir = Path(cache_dir) if cache_dir is not None else None
         self.providers = model_kwargs.pop("providers", ["CPUExecutionProvider"])
         #  don't pre-allocate more memory than needed
         self.provider_options = model_kwargs.pop(
@@ -92,7 +92,7 @@ def model_type(self) -> ModelType:
 
     @property
     def cache_dir(self) -> Path:
-        return self._cache_dir
+        return self._cache_dir if self._cache_dir is not None else get_cache_dir(self.model_name, self.model_type)
 
     @cache_dir.setter
     def cache_dir(self, cache_dir: Path) -> None:

diff --git a/machine-learning/app/models/cache.py b/machine-learning/app/models/cache.py
@@ -4,6 +4,8 @@
 from aiocache.lock import OptimisticLock
 from aiocache.plugins import BasePlugin, TimingPlugin
 
+from app.models import from_model_type
+
 from ..schemas import ModelType
 from .base import InferenceModel
 
@@ -50,7 +52,7 @@ async def get(self, model_name: str, model_type: ModelType, **model_kwargs: Any)
         async with OptimisticLock(self.cache, key) as lock:
             model = await self.cache.get(key)
             if model is None:
-                model = InferenceModel.from_model_type(model_type, model_name, **model_kwargs)
+                model = from_model_type(model_type, model_name, **model_kwargs)
                 await lock.cas(model, ttl=self.ttl)
         return model