make BaseFeaturizer an ABC with abstractmethods

leonradams · Jul 22, 2020 · 33a9c48 · 33a9c48
1 parent 66573ca
commit 33a9c48
Showing 1 changed file with 12 additions and 7 deletions.
diff --git a/matminer/featurizers/base.py b/matminer/featurizers/base.py
@@ -3,19 +3,20 @@
 import sys
 import traceback
 import warnings
-from multiprocessing import Pool, cpu_count
+from abc import ABC, abstractmethod
 from functools import partial
+from multiprocessing import Pool, cpu_count
 
 import numpy as np
 import pandas as pd
-from six import string_types, reraise
-from sklearn.base import TransformerMixin, BaseEstimator, is_classifier
+from six import reraise, string_types
+from sklearn.base import BaseEstimator, TransformerMixin, is_classifier
 from tqdm.auto import tqdm
 
 from matminer.utils.utils import homogenize_multiindex
 
 
-class BaseFeaturizer(BaseEstimator, TransformerMixin):
+class BaseFeaturizer(BaseEstimator, TransformerMixin, ABC):
     """
     Abstract class to calculate features from raw materials input data
     such a compound formula or a pymatgen crystal structure or
@@ -99,7 +100,7 @@ class BaseFeaturizer(BaseEstimator, TransformerMixin):
 
     An additional factor to consider is the chunksize for data parallelisation.
     For lightweight computational tasks, the overhead associated with passing
-    data from `multiprocessing.Pool.map()` to the function being parallelised
+    data from `multiprocessing.Pool.map()` to the function being parallelized
     can increase the time taken for all tasks to be completed. By setting
     the `self._chunksize` argument, the overhead associated with passing data
     to the tasks can be reduced. Note that there is only an advantage to using
@@ -108,7 +109,7 @@ class BaseFeaturizer(BaseEstimator, TransformerMixin):
     itself. By default, we allow the Python multiprocessing library to determine
     the chunk size automatically based on the size of the list being featurized.
     You may want to specify a small chunk size for computationally-expensive
-    featurizers, which will enable better distribution of taks across threads.
+    featurizers, which will enable better distribution of tasks across threads.
     In contrast, for more lightweight featurizers, it is recommended that
     the implementor trial a range of chunksize values to find the optimum.
     As a general rule of thumb, if the featurize function takes 0.1 seconds or
@@ -140,7 +141,7 @@ class BaseFeaturizer(BaseEstimator, TransformerMixin):
     """
 
     def set_n_jobs(self, n_jobs):
-        """Set the number of threads for this """
+        """Set the number of threads for this."""
         self._n_jobs = n_jobs
 
     @property
@@ -503,6 +504,7 @@ def featurize_wrapper(self, x, return_errors=False, ignore_errors=False):
                        "featurize_many(), featurize_dataframe(), etc.)."
                 reraise(type(e), type(e)(msg), sys.exc_info()[2])
 
+    @abstractmethod
     def featurize(self, *x):
         """
         Main featurizer function, which has to be implemented
@@ -517,6 +519,7 @@ def featurize(self, *x):
 
         raise NotImplementedError("featurize() is not defined!")
 
+    @abstractmethod
     def feature_labels(self):
         """
         Generate attribute names.
@@ -527,6 +530,7 @@ def feature_labels(self):
 
         raise NotImplementedError("feature_labels() is not defined!")
 
+    @abstractmethod
     def citations(self):
         """
         Citation(s) and reference(s) for this feature.
@@ -538,6 +542,7 @@ def citations(self):
 
         raise NotImplementedError("citations() is not defined!")
 
+    @abstractmethod
     def implementors(self):
         """
         List of implementors of the feature.