Skip to content

Commit

Permalink
Register feature extractor (huggingface#15634)
Browse files Browse the repository at this point in the history
* Rework AutoFeatureExtractor.from_pretrained internal

* Custom feature extractor

* Add more tests

* Add support for custom feature extractor code

* Clean up

* Add register API to AutoFeatureExtractor
  • Loading branch information
sgugger authored Feb 14, 2022
1 parent 0f71c29 commit 2e11a04
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 2 deletions.
16 changes: 16 additions & 0 deletions src/transformers/models/auto/feature_extraction_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ def feature_extractor_class_from_name(class_name: str):
return getattr(module, class_name)
break

for config, extractor in FEATURE_EXTRACTOR_MAPPING._extra_content.items():
if getattr(extractor, "__name__", None) == class_name:
return extractor

return None


Expand Down Expand Up @@ -301,3 +305,15 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
f"`feature_extractor_type` key in its {FEATURE_EXTRACTOR_NAME} of {CONFIG_NAME}, or one of the following "
"`model_type` keys in its {CONFIG_NAME}: {', '.join(c for c in FEATURE_EXTRACTOR_MAPPING_NAMES.keys())}"
)

@staticmethod
def register(config_class, feature_extractor_class):
"""
Register a new feature extractor for this class.
Args:
config_class ([`PretrainedConfig`]):
The configuration corresponding to the model to register.
feature_extractor_class ([`FeatureExtractorMixin`]): The feature extractor to register.
"""
FEATURE_EXTRACTOR_MAPPING.register(config_class, feature_extractor_class)
40 changes: 38 additions & 2 deletions tests/test_feature_extraction_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,28 @@

import json
import os
import sys
import tempfile
import unittest

from transformers import AutoFeatureExtractor, Wav2Vec2Config, Wav2Vec2FeatureExtractor
from pathlib import Path

from transformers import (
CONFIG_MAPPING,
FEATURE_EXTRACTOR_MAPPING,
AutoConfig,
AutoFeatureExtractor,
Wav2Vec2Config,
Wav2Vec2FeatureExtractor,
)
from transformers.testing_utils import DUMMY_UNKNOWN_IDENTIFIER


sys.path.append(str(Path(__file__).parent.parent / "utils"))

from test_module.custom_configuration import CustomConfig # noqa E402
from test_module.custom_feature_extraction import CustomFeatureExtractor # noqa E402


SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures")
SAMPLE_FEATURE_EXTRACTION_CONFIG = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "fixtures/dummy_feature_extractor_config.json"
Expand Down Expand Up @@ -88,3 +103,24 @@ def test_from_pretrained_dynamic_feature_extractor(self):
"hf-internal-testing/test_dynamic_feature_extractor", trust_remote_code=True
)
self.assertEqual(model.__class__.__name__, "NewFeatureExtractor")

def test_new_feature_extractor_registration(self):
try:
AutoConfig.register("custom", CustomConfig)
AutoFeatureExtractor.register(CustomConfig, CustomFeatureExtractor)
# Trying to register something existing in the Transformers library will raise an error
with self.assertRaises(ValueError):
AutoFeatureExtractor.register(Wav2Vec2Config, Wav2Vec2FeatureExtractor)

# Now that the config is registered, it can be used as any other config with the auto-API
feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
with tempfile.TemporaryDirectory() as tmp_dir:
feature_extractor.save_pretrained(tmp_dir)
new_feature_extractor = AutoFeatureExtractor.from_pretrained(tmp_dir)
self.assertIsInstance(new_feature_extractor, CustomFeatureExtractor)

finally:
if "custom" in CONFIG_MAPPING._extra_content:
del CONFIG_MAPPING._extra_content["custom"]
if CustomConfig in FEATURE_EXTRACTOR_MAPPING._extra_content:
del FEATURE_EXTRACTOR_MAPPING._extra_content[CustomConfig]
3 changes: 3 additions & 0 deletions tests/test_feature_extraction_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures")


SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures")


def prepare_image_inputs(feature_extract_tester, equal_resolution=False, numpify=False, torchify=False):
"""This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
or a list of PyTorch tensors if one specifies torchify=True.
Expand Down

0 comments on commit 2e11a04

Please sign in to comment.