Register feature extractor (huggingface#15634)

* Rework AutoFeatureExtractor.from_pretrained internal * Custom feature extractor * Add more tests * Add support for custom feature extractor code * Clean up * Add register API to AutoFeatureExtractor
ShungJhon · Feb 14, 2022 · 2e11a04 · 2e11a04
1 parent 0f71c29
commit 2e11a04
Show file tree

Hide file tree

Showing 3 changed files with 57 additions and 2 deletions.
diff --git a/src/transformers/models/auto/feature_extraction_auto.py b/src/transformers/models/auto/feature_extraction_auto.py
@@ -68,6 +68,10 @@ def feature_extractor_class_from_name(class_name: str):
             return getattr(module, class_name)
             break
 
+    for config, extractor in FEATURE_EXTRACTOR_MAPPING._extra_content.items():
+        if getattr(extractor, "__name__", None) == class_name:
+            return extractor
+
     return None
 
 
@@ -301,3 +305,15 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
             f"`feature_extractor_type` key in its {FEATURE_EXTRACTOR_NAME} of {CONFIG_NAME}, or one of the following "
             "`model_type` keys in its {CONFIG_NAME}: {', '.join(c for c in FEATURE_EXTRACTOR_MAPPING_NAMES.keys())}"
         )
+
+    @staticmethod
+    def register(config_class, feature_extractor_class):
+        """
+        Register a new feature extractor for this class.
+
+        Args:
+            config_class ([`PretrainedConfig`]):
+                The configuration corresponding to the model to register.
+            feature_extractor_class ([`FeatureExtractorMixin`]): The feature extractor to register.
+        """
+        FEATURE_EXTRACTOR_MAPPING.register(config_class, feature_extractor_class)
diff --git a/tests/test_feature_extraction_auto.py b/tests/test_feature_extraction_auto.py
@@ -15,13 +15,28 @@
 
 import json
 import os
+import sys
 import tempfile
 import unittest
-
-from transformers import AutoFeatureExtractor, Wav2Vec2Config, Wav2Vec2FeatureExtractor
+from pathlib import Path
+
+from transformers import (
+    CONFIG_MAPPING,
+    FEATURE_EXTRACTOR_MAPPING,
+    AutoConfig,
+    AutoFeatureExtractor,
+    Wav2Vec2Config,
+    Wav2Vec2FeatureExtractor,
+)
 from transformers.testing_utils import DUMMY_UNKNOWN_IDENTIFIER
 
 
+sys.path.append(str(Path(__file__).parent.parent / "utils"))
+
+from test_module.custom_configuration import CustomConfig  # noqa E402
+from test_module.custom_feature_extraction import CustomFeatureExtractor  # noqa E402
+
+
 SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures")
 SAMPLE_FEATURE_EXTRACTION_CONFIG = os.path.join(
     os.path.dirname(os.path.abspath(__file__)), "fixtures/dummy_feature_extractor_config.json"
@@ -88,3 +103,24 @@ def test_from_pretrained_dynamic_feature_extractor(self):
             "hf-internal-testing/test_dynamic_feature_extractor", trust_remote_code=True
         )
         self.assertEqual(model.__class__.__name__, "NewFeatureExtractor")
+
+    def test_new_feature_extractor_registration(self):
+        try:
+            AutoConfig.register("custom", CustomConfig)
+            AutoFeatureExtractor.register(CustomConfig, CustomFeatureExtractor)
+            # Trying to register something existing in the Transformers library will raise an error
+            with self.assertRaises(ValueError):
+                AutoFeatureExtractor.register(Wav2Vec2Config, Wav2Vec2FeatureExtractor)
+
+            # Now that the config is registered, it can be used as any other config with the auto-API
+            feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
+            with tempfile.TemporaryDirectory() as tmp_dir:
+                feature_extractor.save_pretrained(tmp_dir)
+                new_feature_extractor = AutoFeatureExtractor.from_pretrained(tmp_dir)
+                self.assertIsInstance(new_feature_extractor, CustomFeatureExtractor)
+
+        finally:
+            if "custom" in CONFIG_MAPPING._extra_content:
+                del CONFIG_MAPPING._extra_content["custom"]
+            if CustomConfig in FEATURE_EXTRACTOR_MAPPING._extra_content:
+                del FEATURE_EXTRACTOR_MAPPING._extra_content[CustomConfig]
diff --git a/tests/test_feature_extraction_common.py b/tests/test_feature_extraction_common.py
@@ -43,6 +43,9 @@
 SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures")
 
 
+SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures")
+
+
 def prepare_image_inputs(feature_extract_tester, equal_resolution=False, numpify=False, torchify=False):
     """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
     or a list of PyTorch tensors if one specifies torchify=True.