Merge branch '1.1.0_pandoc_with_mirror_tests' into 1.1.0_rc1

kamisamahuang · Apr 6, 2019 · 172b910 · 172b910
2 parents 355b969 + c26760f
commit 172b910
Show file tree

Hide file tree

Showing 28 changed files with 1,196 additions and 19 deletions.
diff --git a/jupytext/combine.py b/jupytext/combine.py
@@ -13,7 +13,7 @@
 def black_invariant(text, chars=None):
     """Remove characters that may be changed when reformatting the text with black"""
     if chars is None:
-        chars = [' ', '\n', ',', "'", '"', '(', ')', '\\']
+        chars = [' ', '\t', '\n', ',', "'", '"', '(', ')', '\\']
 
     for char in chars:
         text = text.replace(char, '')

diff --git a/jupytext/compare.py b/jupytext/compare.py
@@ -82,6 +82,7 @@ def compare_notebooks(notebook_expected,
     allow_missing_code_cell_metadata = allow_expected_differences and format_name == 'sphinx'
     allow_missing_markdown_cell_metadata = allow_expected_differences and format_name in ['sphinx', 'spin']
     allow_removed_final_blank_line = allow_expected_differences
+    replace_tabs_with_spaces = format_name == 'pandoc'
 
     cell_metadata_filter = notebook_actual.get('jupytext', {}).get('cell_metadata_filter')
 
@@ -93,6 +94,9 @@ def compare_notebooks(notebook_expected,
     modified_cells = set()
     modified_cell_metadata = set()
     for i, ref_cell in enumerate(notebook_expected.cells, 1):
+        if replace_tabs_with_spaces and '\t' in ref_cell.source:
+            ref_cell = copy(ref_cell)
+            ref_cell.source = ref_cell.source.replace('\t', '    ')
         try:
             test_cell = next(test_cell_iter)
         except StopIteration:

diff --git a/jupytext/formats.py b/jupytext/formats.py
@@ -16,6 +16,7 @@
 from .metadata_filter import metadata_filter_as_string
 from .stringparser import StringParser
 from .languages import _SCRIPT_EXTENSIONS, _COMMENT_CHARS
+from .pandoc import pandoc_version, is_pandoc_available
 
 
 class JupytextFormatError(ValueError):
@@ -134,6 +135,15 @@ def __init__(self,
             current_version_number='1.1')
     ]
 
+if is_pandoc_available():
+    JUPYTEXT_FORMATS.append(NotebookFormatDescription(
+        format_name='pandoc',
+        extension='.md',
+        header_prefix='',
+        cell_reader_class=None,
+        cell_exporter_class=None,
+        current_version_number=pandoc_version()))
+
 NOTEBOOK_EXTENSIONS = list(dict.fromkeys(['.ipynb'] + [fmt.extension for fmt in JUPYTEXT_FORMATS]))
 EXTENSION_PREFIXES = ['.lgt', '.spx', '.pct', '.hyd', '.nb']
 
@@ -151,6 +161,9 @@ def get_format_implementation(ext, format_name=None):
             formats_for_extension.append(fmt.format_name)
 
     if formats_for_extension:
+        if ext == '.md' and format_name == 'pandoc':
+            raise JupytextFormatError('Please install pandoc>=2.7.1')
+
         raise JupytextFormatError("Format '{}' is not associated to extension '{}'. "
                                   "Please choose one of: {}.".format(format_name, ext,
                                                                      ', '.join(formats_for_extension)))
@@ -254,6 +267,11 @@ def guess_format(text, ext):
         if rspin_comment_count >= 1:
             return 'spin', {}
 
+    if ext == '.md':
+        for line in lines:
+            if line.startswith(':::'):  # Pandoc div
+                return 'pandoc'
+
     # Default format
     return get_format_implementation(ext).format_name, {}
 

diff --git a/jupytext/header.py b/jupytext/header.py
@@ -70,6 +70,23 @@ def encoding_and_executable(notebook, metadata, ext):
     return lines
 
 
+def insert_jupytext_info_and_filter_metadata(metadata, ext, text_format):
+    """Update the notebook metadata to include Jupytext information, and filter
+    the notebook metadata according to the default or user filter"""
+    if insert_or_test_version_number():
+        metadata.setdefault('jupytext', {})['text_representation'] = {
+            'extension': ext,
+            'format_name': text_format.format_name,
+            'format_version': text_format.current_version_number,
+            'jupytext_version': __version__}
+
+    if 'jupytext' in metadata and not metadata['jupytext']:
+        del metadata['jupytext']
+
+    notebook_metadata_filter = metadata.get('jupytext', {}).get('notebook_metadata_filter')
+    return filter_metadata(metadata, notebook_metadata_filter, _DEFAULT_NOTEBOOK_METADATA)
+
+
 def metadata_and_cell_to_header(notebook, metadata, text_format, ext):
     """
     Return the text header corresponding to a notebook, and remove the
@@ -90,18 +107,7 @@ def metadata_and_cell_to_header(notebook, metadata, text_format, ext):
                 lines_to_next_cell = cell.metadata.get('lines_to_next_cell')
                 notebook.cells = notebook.cells[1:]
 
-    if insert_or_test_version_number():
-        metadata.setdefault('jupytext', {})['text_representation'] = {
-            'extension': ext,
-            'format_name': text_format.format_name,
-            'format_version': text_format.current_version_number,
-            'jupytext_version': __version__}
-
-    if 'jupytext' in metadata and not metadata['jupytext']:
-        del metadata['jupytext']
-
-    notebook_metadata_filter = metadata.get('jupytext', {}).get('notebook_metadata_filter')
-    metadata = filter_metadata(metadata, notebook_metadata_filter, _DEFAULT_NOTEBOOK_METADATA)
+    metadata = insert_jupytext_info_and_filter_metadata(metadata, ext, text_format)
 
     if metadata:
         header.extend(yaml.safe_dump({'jupyter': metadata}, default_flow_style=False).splitlines())

diff --git a/jupytext/jupytext.py b/jupytext/jupytext.py
@@ -6,16 +6,18 @@
 import logging
 from copy import copy, deepcopy
 from nbformat.v4.rwbase import NotebookReader, NotebookWriter
-from nbformat.v4.nbbase import new_notebook, new_code_cell
+from nbformat.v4.nbbase import new_notebook, new_code_cell, NotebookNode
 import nbformat
 from .formats import _VALID_FORMAT_OPTIONS
 from .formats import read_format_from_metadata, update_jupytext_formats_metadata, rearrange_jupytext_metadata
 from .formats import format_name_for_ext, guess_format, divine_format, get_format_implementation, long_form_one_format
-from .header import header_to_metadata_and_cell, metadata_and_cell_to_header
+from .header import header_to_metadata_and_cell, metadata_and_cell_to_header, insert_jupytext_info_and_filter_metadata
 from .header import encoding_and_executable, insert_or_test_version_number
-from .metadata_filter import update_metadata_filters
+from .metadata_filter import update_metadata_filters, filter_metadata
+from .cell_metadata import _IGNORE_CELL_METADATA
 from .languages import default_language_from_metadata_and_ext, set_main_and_cell_language
 from .pep8 import pep8_lines_between_cells
+from .pandoc import md_to_notebook, notebook_to_md
 
 
 class TextNotebookConverter(NotebookReader, NotebookWriter):
@@ -42,6 +44,9 @@ def update_fmt_with_notebook_options(self, metadata):
 
     def reads(self, s, **_):
         """Read a notebook represented as text"""
+        if self.fmt.get('format_name') == 'pandoc':
+            return md_to_notebook(s)
+
         lines = s.splitlines()
 
         cells = []
@@ -86,6 +91,21 @@ def reads(self, s, **_):
 
     def writes(self, nb, metadata=None, **kwargs):
         """Return the text representation of the notebook"""
+        if self.fmt.get('format_name') == 'pandoc':
+            metadata = insert_jupytext_info_and_filter_metadata(metadata, self.ext, self.implementation)
+
+            cells = []
+            for cell in nb.cells:
+                cell_metadata = filter_metadata(copy(cell.metadata),
+                                                self.fmt.get('cell_metadata_filter'),
+                                                _IGNORE_CELL_METADATA)
+                if cell.cell_type == 'code':
+                    cells.append(new_code_cell(source=cell.source, metadata=cell_metadata))
+                else:
+                    cells.append(NotebookNode(source=cell.source, metadata=cell_metadata, cell_type=cell.cell_type))
+
+            return notebook_to_md(new_notebook(metadata=metadata, cells=cells))
+
         # Copy the notebook, in order to be sure we do not modify the original notebook
         nb = new_notebook(cells=nb.cells, metadata=deepcopy(metadata or nb.metadata))
         metadata = nb.metadata

diff --git a/jupytext/pandoc.py b/jupytext/pandoc.py
@@ -0,0 +1,80 @@
+"""Jupyter notebook to Markdown and back, using Pandoc"""
+
+import os
+import subprocess
+import tempfile
+import nbformat
+from pkg_resources import parse_version
+
+
+class PandocError(OSError):
+    """An error related to Pandoc"""
+    pass
+
+
+def pandoc(args, filein=None, fileout=None):
+    """Execute pandoc with the given arguments"""
+    cmd = [u'pandoc']
+
+    if filein:
+        cmd.append(filein)
+
+    if fileout:
+        cmd.append('-o')
+        cmd.append(fileout)
+
+    cmd.extend(args.split())
+
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+    out, err = proc.communicate()
+    if proc.returncode:
+        raise PandocError('pandoc exited with return code {}\n{}'.format(proc.returncode, str(err)))
+    return out.decode('utf-8')
+
+
+def is_pandoc_available():
+    """Is Pandoc>=2.7.1 available?"""
+    try:
+        pandoc_version()
+        return True
+    except (IOError, OSError, PandocError):
+        return False
+
+
+def pandoc_version():
+    """Pandoc's version number"""
+    version = pandoc(u'--version').splitlines()[0].split()[1]
+    if parse_version(version) < parse_version('2.7.1'):
+        raise PandocError('Please install pandoc>=2.7.1 (found version {})'.format(version))
+
+    return version
+
+
+def md_to_notebook(text):
+    """Convert a Markdown text to a Jupyter notebook, using Pandoc"""
+    tmp_file = tempfile.NamedTemporaryFile(delete=False)
+    tmp_file.write(text.encode('utf-8'))
+    tmp_file.close()
+
+    pandoc(u'--from markdown --to ipynb -s --atx-headers --wrap=preserve', tmp_file.name, tmp_file.name)
+
+    with open(tmp_file.name, encoding='utf-8') as opened_file:
+        notebook = nbformat.read(opened_file, as_version=4)
+    os.unlink(tmp_file.name)
+
+    return notebook
+
+
+def notebook_to_md(notebook):
+    """Convert a notebook to its Markdown representation, using Pandoc"""
+    tmp_file = tempfile.NamedTemporaryFile(delete=False)
+    tmp_file.write(nbformat.writes(notebook).encode('utf-8'))
+    tmp_file.close()
+
+    pandoc(u'--from ipynb --to markdown -s --atx-headers --wrap=preserve', tmp_file.name, tmp_file.name)
+
+    with open(tmp_file.name, encoding='utf-8') as opened_file:
+        text = opened_file.read()
+
+    os.unlink(tmp_file.name)
+    return '\n'.join(text.splitlines())
diff --git a/...otebooks/mirror/ipynb_to_pandoc/Notebook with function and cell metadata 164.md b/...otebooks/mirror/ipynb_to_pandoc/Notebook with function and cell metadata 164.md
@@ -0,0 +1,43 @@
+---
+jupyter:
+  kernelspec:
+    display_name: Python 3
+    language: python
+    name: python3
+  nbformat: 4
+  nbformat_minor: 2
+---
+
+::: {.cell .code}
+``` {.python}
+1 + 1
+```
+:::
+
+::: {.cell .markdown}
+A markdown cell
+And below, the cell for function f has non trivial cell metadata. And the next cell as well.
+:::
+
+::: {.cell .code attributes="{\"n\":\"10\",\"id\":\"\",\"classes\":[]}"}
+``` {.python}
+def f(x):
+    return x
+```
+:::
+
+::: {.cell .code attributes="{\"n\":\"10\",\"id\":\"\",\"classes\":[]}"}
+``` {.python}
+f(5)
+```
+:::
+
+::: {.cell .markdown}
+More text
+:::
+
+::: {.cell .code}
+``` {.python}
+2 + 2
+```
+:::
diff --git a/tests/notebooks/mirror/ipynb_to_pandoc/Notebook with html and latex cells.md b/tests/notebooks/mirror/ipynb_to_pandoc/Notebook with html and latex cells.md
@@ -0,0 +1,45 @@
+---
+jupyter:
+  kernelspec:
+    display_name: Python 3
+    language: python
+    name: python3
+  nbformat: 4
+  nbformat_minor: 2
+---
+
+::: {.cell .code}
+``` {.python}
+%%html
+<p><a href="https://github.com/mwouts/jupytext", style="color: rgb(0,0,255)">Jupytext</a> on GitHub</p>
+```
+:::
+
+::: {.cell .code}
+``` {.python}
+%%latex
+$\frac{\pi}{2}$
+```
+:::
+
+::: {.cell .code}
+``` {.python}
+%load_ext rpy2.ipython
+```
+:::
+
+::: {.cell .code}
+``` {.python}
+%%R
+library(ggplot2)
+ggplot(data=data.frame(x=c('A', 'B'), y=c(5, 2)), aes(x,weight=y)) + geom_bar()
+```
+:::
+
+::: {.cell .code}
+``` {.python}
+%matplotlib inline
+import pandas as pd
+pd.Series({'A':5, 'B':2}).plot(figsize=(3,2), kind='bar')
+```
+:::
diff --git a/tests/notebooks/mirror/ipynb_to_pandoc/Notebook with many hash signs.md b/tests/notebooks/mirror/ipynb_to_pandoc/Notebook with many hash signs.md
@@ -0,0 +1,40 @@
+---
+jupyter:
+  kernelspec:
+    display_name: Python 3
+    language: python
+    name: python3
+  nbformat: 4
+  nbformat_minor: 2
+---
+
+::: {.cell .markdown}
+################################################################## 
+
+This is a notebook that contains many hash signs.
+Hopefully its python representation is not recognized as a Sphinx Gallery script\...
+
+################################################################## 
+:::
+
+::: {.cell .code}
+``` {.python}
+some = 1
+code = 2
+some+code
+
+##################################################################
+# A comment
+##################################################################
+# Another comment
+```
+:::
+
+::: {.cell .markdown}
+##################################################################  {#section}
+
+This is a notebook that contains many hash signs.
+Hopefully its python representation is not recognized as a Sphinx Gallery script\...
+
+##################################################################  {#section-1}
+:::