Merge pull request getpelican#931 from Cartroo/slugsubstitutions

Allow text substitutions when generating slugs
chrisng · Jul 14, 2013 · fe33d3e · fe33d3e
2 parents 7ec4d5f + 39518e1
commit fe33d3e
Show file tree

Hide file tree

Showing 6 changed files with 28 additions and 8 deletions.
diff --git a/docs/settings.rst b/docs/settings.rst
@@ -258,6 +258,10 @@ Setting name (default value)                            What does it do?
                                                         posts.
 `DAY_ARCHIVE_SAVE_AS` (False)                           The location to save per-day archives of your
                                                         posts.
+`SLUG_SUBSTITUTIONS`  (``()``)                          Substitutions to make prior to stripping out
+                                                        non-alphanumerics when generating slugs. Specified
+                                                        as a list of 2-tuples of ``(from, to)`` which are
+                                                        applied in order.
 ====================================================    =====================================================
 
 .. note::

diff --git a/pelican/contents.py b/pelican/contents.py
@@ -86,7 +86,8 @@ def __init__(self, content, metadata=None, settings=None,
 
         # create the slug if not existing, from the title
         if not hasattr(self, 'slug') and hasattr(self, 'title'):
-            self.slug = slugify(self.title)
+            self.slug = slugify(self.title,
+                                settings.get('SLUG_SUBSTITUTIONS', ()))
 
         self.source_path = source_path
 

diff --git a/pelican/settings.py b/pelican/settings.py
@@ -105,6 +105,7 @@
     'PLUGINS': [],
     'TEMPLATE_PAGES': {},
     'IGNORE_FILES': ['.#*'],
+    'SLUG_SUBSTITUTIONS': (),
     }
 
 def read_settings(path=None, override=None):

diff --git a/pelican/tests/test_utils.py b/pelican/tests/test_utils.py
@@ -94,6 +94,17 @@ def test_slugify(self):
         for value, expected in samples:
             self.assertEqual(utils.slugify(value), expected)
 
+    def test_slugify_substitute(self):
+
+        samples = (('C++ is based on C', 'cpp-is-based-on-c'),
+                   ('C+++ test C+ test', 'cpp-test-c-test'),
+                   ('c++, c#, C#, C++', 'cpp-c-sharp-c-sharp-cpp'),
+                   ('c++-streams', 'cpp-streams'),)
+
+        subs = (('C++', 'CPP'), ('C#', 'C-SHARP'))
+        for value, expected in samples:
+            self.assertEqual(utils.slugify(value, subs), expected)
+
     def test_get_relative_path(self):
 
         samples = ((os.path.join('test', 'test.html'), os.pardir),

diff --git a/pelican/urlwrappers.py b/pelican/urlwrappers.py
@@ -15,10 +15,10 @@ class URLWrapper(object):
     def __init__(self, name, settings):
         # next 2 lines are redundant with the setter of the name property
         # but are here for clarity
+        self.settings = settings
         self._name = name
-        self.slug = slugify(name)
+        self.slug = slugify(name, self.settings.get('SLUG_SUBSTITUTIONS', ()))
         self.name = name
-        self.settings = settings
 
     @property
     def name(self):
@@ -27,7 +27,7 @@ def name(self):
     @name.setter
     def name(self, name):
         self._name = name
-        self.slug = slugify(name)
+        self.slug = slugify(name, self.settings.get('SLUG_SUBSTITUTIONS', ()))
 
     def as_dict(self):
         d = self.__dict__
@@ -41,7 +41,8 @@ def _key(self):
         return self.slug
 
     def _normalize_key(self, key):
-        return six.text_type(slugify(key))
+        subs = self.settings.get('SLUG_SUBSTITUTIONS', ())
+        return six.text_type(slugify(key, subs))
 
     def __eq__(self, other):
         return self._key() == self._normalize_key(other)

diff --git a/pelican/utils.py b/pelican/utils.py
@@ -231,7 +231,7 @@ def __exit__(self, exc_type, exc_value, traceback):
         pass
 
 
-def slugify(value):
+def slugify(value, substitutions=()):
     """
     Normalizes string, converts to lowercase, removes non-alpha characters,
     and converts spaces to hyphens.
@@ -249,8 +249,10 @@ def slugify(value):
     if isinstance(value, six.binary_type):
         value = value.decode('ascii')
     # still unicode
-    value = unicodedata.normalize('NFKD', value)
-    value = re.sub('[^\w\s-]', '', value).strip().lower()
+    value = unicodedata.normalize('NFKD', value).lower()
+    for src, dst in substitutions:
+        value = value.replace(src.lower(), dst.lower())
+    value = re.sub('[^\w\s-]', '', value).strip()
     value = re.sub('[-\s]+', '-', value)
     # we want only ASCII chars
     value = value.encode('ascii', 'ignore')