included code snippets: don't just slap in a pre. syntax highlighting

The pygments code-syntax-highlighting code has some built-in functionality to guess at a code snippet's language based on filename and content. We hadn't hooked it up. This hooks it up. Testing Done: https://travis-ci.org/pantsbuild/pants/builds/39542386 patched this and https://rbcommons.com/s/twitter/r/1191/ into a branch and test-generated the doc site: http://pantsbuild.github.io/sitegen-test/JVMProjects.html Bugs closed: 693 Reviewed at https://rbcommons.com/s/twitter/r/1182/
rgbenson · Oct 31, 2014 · 3e789d6 · 3e789d6
1 parent 1625379
commit 3e789d6
Show file tree

Hide file tree

Showing 2 changed files with 59 additions and 43 deletions.
diff --git a/src/python/pants/backend/core/tasks/markdown_to_html.py b/src/python/pants/backend/core/tasks/markdown_to_html.py
@@ -11,8 +11,11 @@
 
 import markdown
 from pkg_resources import resource_string
+from pygments import highlight
 from pygments.formatters.html import HtmlFormatter
+from pygments.lexers import guess_lexer_for_filename, PythonLexer, TextLexer
 from pygments.styles import get_all_styles
+from pygments.util import ClassNotFound
 
 from pants import binary_util
 from pants.base.address import SyntheticAddress
@@ -57,7 +60,7 @@ def extendMarkdown(self, md, md_globals):
 INCLUDE_PATTERN = r'!inc(\[(?P<params>[^]]*)\])?\((?P<path>[^' + '\n' + r']*)\)'
 
 
-def choose_include_lines(s, params, source_path):
+def choose_include_text(s, params, source_path):
   """Given the contents of a file and !inc[these params], return matching lines
 
   If there was a problem matching parameters, return empty list.
@@ -104,7 +107,7 @@ def choose_include_lines(s, params, source_path):
       break
   else:
     # never started recording:
-    return []
+    return ''
   for line_ix in range(line_ix, len(lines)):
     line = lines[line_ix]
     if end_before is not None and end_before in line:
@@ -115,8 +118,8 @@ def choose_include_lines(s, params, source_path):
   else:
     if (end_before or end_at):
       # we had an end- filter, but never encountered it.
-      return []
-  return chosen_lines
+      return ''
+  return '\n'.join(chosen_lines)
 
 
 class IncludeExcerptPattern(markdown.inlinepatterns.Pattern):
@@ -134,19 +137,32 @@ def handleMatch(self, match):
     include_path = os.path.join(source_dir, rel_include_path)
     try:
       with open(include_path) as include_file:
-        include_text = include_file.read()
+        file_text = include_file.read()
     except IOError as e:
       raise IOError('Markdown file {0} tried to include file {1}, got '
                     '{2}'.format(self.source_path,
                                  rel_include_path,
                                  e.strerror))
-    include_lines = choose_include_lines(include_text, params, self.source_path)
-    if not include_lines:
+    include_text = choose_include_text(file_text, params, self.source_path)
+    if not include_text:
       raise TaskError('Markdown file {0} tried to include file {1} but '
                       'filtered out everything'.format(self.source_path,
                                                        rel_include_path))
-    el = markdown.util.etree.Element('pre')
-    el.text = markdown.util.AtomicString('\n'.join(include_lines))
+    el = markdown.util.etree.Element('div')
+    el.set('class', 'md-included-snippet')
+    try:
+      lexer = guess_lexer_for_filename(include_path, file_text)
+    except ClassNotFound:
+      # e.g., ClassNotFound: no lexer for filename u'BUILD' found
+      if 'BUILD' in include_path:
+        lexer = PythonLexer()
+      else:
+        lexer = TextLexer()  # the boring plain-text lexer
+
+    html_snippet = highlight(include_text,
+                             lexer,
+                             HtmlFormatter(cssclass='codehilite'))
+    el.text = html_snippet
     return el
 
 

diff --git a/tests/python/pants_test/tasks/test_markdown_to_html.py b/tests/python/pants_test/tasks/test_markdown_to_html.py
@@ -17,74 +17,74 @@
 class ChooseLinesTest(unittest.TestCase):
   def test_include_no_params(self):
     self.assertEquals(
-        markdown_to_html.choose_include_lines(ABC, '', 'fake.md'),
-        ['able', 'baker', 'charlie'])
+        markdown_to_html.choose_include_text(ABC, '', 'fake.md'),
+        '\n'.join(['able', 'baker', 'charlie']))
 
   def test_include_start_at(self):
     self.assertEquals(
-        markdown_to_html.choose_include_lines(ABC, 'start-at=abl', 'fake.md'),
-        ['able', 'baker', 'charlie'])
+        markdown_to_html.choose_include_text(ABC, 'start-at=abl', 'fake.md'),
+        '\n'.join(['able', 'baker', 'charlie']))
 
     self.assertEquals(
-        markdown_to_html.choose_include_lines(ABC, 'start-at=bak', 'fake.md'),
-        ['baker', 'charlie'])
+        markdown_to_html.choose_include_text(ABC, 'start-at=bak', 'fake.md'),
+        '\n'.join(['baker', 'charlie']))
 
     self.assertEquals(
-      markdown_to_html.choose_include_lines(ABC, 'start-at=xxx', 'fake.md'),
-      [])
+      markdown_to_html.choose_include_text(ABC, 'start-at=xxx', 'fake.md'),
+      '')
 
   def test_include_start_after(self):
     self.assertEquals(
-      markdown_to_html.choose_include_lines(ABC, 'start-after=bak', 'fake.md'),
-      ['charlie'])
+      markdown_to_html.choose_include_text(ABC, 'start-after=bak', 'fake.md'),
+      'charlie')
 
     self.assertEquals(
-      markdown_to_html.choose_include_lines(ABC, 'start-after=cha', 'fake.md'),
-      [])
+      markdown_to_html.choose_include_text(ABC, 'start-after=cha', 'fake.md'),
+      '')
 
     self.assertEquals(
-      markdown_to_html.choose_include_lines(ABC, 'start-after=xxx', 'fake.md'),
-      [])
+      markdown_to_html.choose_include_text(ABC, 'start-after=xxx', 'fake.md'),
+      '')
 
   def test_include_end_at(self):
     self.assertEquals(
-      markdown_to_html.choose_include_lines(ABC, 'end-at=abl', 'fake.md'),
-      ['able'])
+      markdown_to_html.choose_include_text(ABC, 'end-at=abl', 'fake.md'),
+      'able')
 
     self.assertEquals(
-      markdown_to_html.choose_include_lines(ABC, 'end-at=bak', 'fake.md'),
-      ['able', 'baker'])
+      markdown_to_html.choose_include_text(ABC, 'end-at=bak', 'fake.md'),
+      '\n'.join(['able', 'baker']))
 
     self.assertEquals(
-      markdown_to_html.choose_include_lines(ABC, 'end-at=xxx', 'fake.md'),
-      [])
+      markdown_to_html.choose_include_text(ABC, 'end-at=xxx', 'fake.md'),
+      '')
 
   def test_include_end_before(self):
     self.assertEquals(
-      markdown_to_html.choose_include_lines(ABC, 'end-before=abl', 'fake.md'),
-      [])
+      markdown_to_html.choose_include_text(ABC, 'end-before=abl', 'fake.md'),
+      '')
 
     self.assertEquals(
-      markdown_to_html.choose_include_lines(ABC, 'end-before=xxx', 'fake.md'),
-      [])
+      markdown_to_html.choose_include_text(ABC, 'end-before=xxx', 'fake.md'),
+      '')
 
     self.assertEquals(
-      markdown_to_html.choose_include_lines(ABC, 'end-before=bak', 'fake.md'),
-      ['able'])
+      markdown_to_html.choose_include_text(ABC, 'end-before=bak', 'fake.md'),
+      'able')
 
   def test_include_start_at_end_at(self):
     self.assertEquals(
-      markdown_to_html.choose_include_lines(ABC, 'start-at=abl&end-at=abl', 'fake.md'),
-      ['able'])
+      markdown_to_html.choose_include_text(ABC, 'start-at=abl&end-at=abl', 'fake.md'),
+      'able')
 
     self.assertEquals(
-      markdown_to_html.choose_include_lines(ABC, 'start-at=cha&end-at=cha', 'fake.md'),
-      ['charlie'])
+      markdown_to_html.choose_include_text(ABC, 'start-at=cha&end-at=cha', 'fake.md'),
+      'charlie')
 
     self.assertEquals(
-      markdown_to_html.choose_include_lines(ABC, 'start-at=abl&end-at=bak', 'fake.md'),
-      ['able', 'baker'])
+      markdown_to_html.choose_include_text(ABC, 'start-at=abl&end-at=bak', 'fake.md'),
+      '\n'.join(['able', 'baker']))
 
     self.assertEquals(
-      markdown_to_html.choose_include_lines(ABC, 'start-at=bak&end-at=abl', 'fake.md'),
-      [])
+      markdown_to_html.choose_include_text(ABC, 'start-at=bak&end-at=abl', 'fake.md'),
+      '')