Skip to content

Commit

Permalink
Merge branch '1.1.0_pandoc_with_mirror_tests' into 1.1.0_rc1
Browse files Browse the repository at this point in the history
  • Loading branch information
mwouts committed Apr 6, 2019
2 parents 355b969 + c26760f commit 172b910
Show file tree
Hide file tree
Showing 28 changed files with 1,196 additions and 19 deletions.
2 changes: 1 addition & 1 deletion jupytext/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
def black_invariant(text, chars=None):
"""Remove characters that may be changed when reformatting the text with black"""
if chars is None:
chars = [' ', '\n', ',', "'", '"', '(', ')', '\\']
chars = [' ', '\t', '\n', ',', "'", '"', '(', ')', '\\']

for char in chars:
text = text.replace(char, '')
Expand Down
4 changes: 4 additions & 0 deletions jupytext/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def compare_notebooks(notebook_expected,
allow_missing_code_cell_metadata = allow_expected_differences and format_name == 'sphinx'
allow_missing_markdown_cell_metadata = allow_expected_differences and format_name in ['sphinx', 'spin']
allow_removed_final_blank_line = allow_expected_differences
replace_tabs_with_spaces = format_name == 'pandoc'

cell_metadata_filter = notebook_actual.get('jupytext', {}).get('cell_metadata_filter')

Expand All @@ -93,6 +94,9 @@ def compare_notebooks(notebook_expected,
modified_cells = set()
modified_cell_metadata = set()
for i, ref_cell in enumerate(notebook_expected.cells, 1):
if replace_tabs_with_spaces and '\t' in ref_cell.source:
ref_cell = copy(ref_cell)
ref_cell.source = ref_cell.source.replace('\t', ' ')
try:
test_cell = next(test_cell_iter)
except StopIteration:
Expand Down
18 changes: 18 additions & 0 deletions jupytext/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from .metadata_filter import metadata_filter_as_string
from .stringparser import StringParser
from .languages import _SCRIPT_EXTENSIONS, _COMMENT_CHARS
from .pandoc import pandoc_version, is_pandoc_available


class JupytextFormatError(ValueError):
Expand Down Expand Up @@ -134,6 +135,15 @@ def __init__(self,
current_version_number='1.1')
]

if is_pandoc_available():
JUPYTEXT_FORMATS.append(NotebookFormatDescription(
format_name='pandoc',
extension='.md',
header_prefix='',
cell_reader_class=None,
cell_exporter_class=None,
current_version_number=pandoc_version()))

NOTEBOOK_EXTENSIONS = list(dict.fromkeys(['.ipynb'] + [fmt.extension for fmt in JUPYTEXT_FORMATS]))
EXTENSION_PREFIXES = ['.lgt', '.spx', '.pct', '.hyd', '.nb']

Expand All @@ -151,6 +161,9 @@ def get_format_implementation(ext, format_name=None):
formats_for_extension.append(fmt.format_name)

if formats_for_extension:
if ext == '.md' and format_name == 'pandoc':
raise JupytextFormatError('Please install pandoc>=2.7.1')

raise JupytextFormatError("Format '{}' is not associated to extension '{}'. "
"Please choose one of: {}.".format(format_name, ext,
', '.join(formats_for_extension)))
Expand Down Expand Up @@ -254,6 +267,11 @@ def guess_format(text, ext):
if rspin_comment_count >= 1:
return 'spin', {}

if ext == '.md':
for line in lines:
if line.startswith(':::'): # Pandoc div
return 'pandoc'

# Default format
return get_format_implementation(ext).format_name, {}

Expand Down
30 changes: 18 additions & 12 deletions jupytext/header.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,23 @@ def encoding_and_executable(notebook, metadata, ext):
return lines


def insert_jupytext_info_and_filter_metadata(metadata, ext, text_format):
"""Update the notebook metadata to include Jupytext information, and filter
the notebook metadata according to the default or user filter"""
if insert_or_test_version_number():
metadata.setdefault('jupytext', {})['text_representation'] = {
'extension': ext,
'format_name': text_format.format_name,
'format_version': text_format.current_version_number,
'jupytext_version': __version__}

if 'jupytext' in metadata and not metadata['jupytext']:
del metadata['jupytext']

notebook_metadata_filter = metadata.get('jupytext', {}).get('notebook_metadata_filter')
return filter_metadata(metadata, notebook_metadata_filter, _DEFAULT_NOTEBOOK_METADATA)


def metadata_and_cell_to_header(notebook, metadata, text_format, ext):
"""
Return the text header corresponding to a notebook, and remove the
Expand All @@ -90,18 +107,7 @@ def metadata_and_cell_to_header(notebook, metadata, text_format, ext):
lines_to_next_cell = cell.metadata.get('lines_to_next_cell')
notebook.cells = notebook.cells[1:]

if insert_or_test_version_number():
metadata.setdefault('jupytext', {})['text_representation'] = {
'extension': ext,
'format_name': text_format.format_name,
'format_version': text_format.current_version_number,
'jupytext_version': __version__}

if 'jupytext' in metadata and not metadata['jupytext']:
del metadata['jupytext']

notebook_metadata_filter = metadata.get('jupytext', {}).get('notebook_metadata_filter')
metadata = filter_metadata(metadata, notebook_metadata_filter, _DEFAULT_NOTEBOOK_METADATA)
metadata = insert_jupytext_info_and_filter_metadata(metadata, ext, text_format)

if metadata:
header.extend(yaml.safe_dump({'jupyter': metadata}, default_flow_style=False).splitlines())
Expand Down
26 changes: 23 additions & 3 deletions jupytext/jupytext.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@
import logging
from copy import copy, deepcopy
from nbformat.v4.rwbase import NotebookReader, NotebookWriter
from nbformat.v4.nbbase import new_notebook, new_code_cell
from nbformat.v4.nbbase import new_notebook, new_code_cell, NotebookNode
import nbformat
from .formats import _VALID_FORMAT_OPTIONS
from .formats import read_format_from_metadata, update_jupytext_formats_metadata, rearrange_jupytext_metadata
from .formats import format_name_for_ext, guess_format, divine_format, get_format_implementation, long_form_one_format
from .header import header_to_metadata_and_cell, metadata_and_cell_to_header
from .header import header_to_metadata_and_cell, metadata_and_cell_to_header, insert_jupytext_info_and_filter_metadata
from .header import encoding_and_executable, insert_or_test_version_number
from .metadata_filter import update_metadata_filters
from .metadata_filter import update_metadata_filters, filter_metadata
from .cell_metadata import _IGNORE_CELL_METADATA
from .languages import default_language_from_metadata_and_ext, set_main_and_cell_language
from .pep8 import pep8_lines_between_cells
from .pandoc import md_to_notebook, notebook_to_md


class TextNotebookConverter(NotebookReader, NotebookWriter):
Expand All @@ -42,6 +44,9 @@ def update_fmt_with_notebook_options(self, metadata):

def reads(self, s, **_):
"""Read a notebook represented as text"""
if self.fmt.get('format_name') == 'pandoc':
return md_to_notebook(s)

lines = s.splitlines()

cells = []
Expand Down Expand Up @@ -86,6 +91,21 @@ def reads(self, s, **_):

def writes(self, nb, metadata=None, **kwargs):
"""Return the text representation of the notebook"""
if self.fmt.get('format_name') == 'pandoc':
metadata = insert_jupytext_info_and_filter_metadata(metadata, self.ext, self.implementation)

cells = []
for cell in nb.cells:
cell_metadata = filter_metadata(copy(cell.metadata),
self.fmt.get('cell_metadata_filter'),
_IGNORE_CELL_METADATA)
if cell.cell_type == 'code':
cells.append(new_code_cell(source=cell.source, metadata=cell_metadata))
else:
cells.append(NotebookNode(source=cell.source, metadata=cell_metadata, cell_type=cell.cell_type))

return notebook_to_md(new_notebook(metadata=metadata, cells=cells))

# Copy the notebook, in order to be sure we do not modify the original notebook
nb = new_notebook(cells=nb.cells, metadata=deepcopy(metadata or nb.metadata))
metadata = nb.metadata
Expand Down
80 changes: 80 additions & 0 deletions jupytext/pandoc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""Jupyter notebook to Markdown and back, using Pandoc"""

import os
import subprocess
import tempfile
import nbformat
from pkg_resources import parse_version


class PandocError(OSError):
"""An error related to Pandoc"""
pass


def pandoc(args, filein=None, fileout=None):
"""Execute pandoc with the given arguments"""
cmd = [u'pandoc']

if filein:
cmd.append(filein)

if fileout:
cmd.append('-o')
cmd.append(fileout)

cmd.extend(args.split())

proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
out, err = proc.communicate()
if proc.returncode:
raise PandocError('pandoc exited with return code {}\n{}'.format(proc.returncode, str(err)))
return out.decode('utf-8')


def is_pandoc_available():
"""Is Pandoc>=2.7.1 available?"""
try:
pandoc_version()
return True
except (IOError, OSError, PandocError):
return False


def pandoc_version():
"""Pandoc's version number"""
version = pandoc(u'--version').splitlines()[0].split()[1]
if parse_version(version) < parse_version('2.7.1'):
raise PandocError('Please install pandoc>=2.7.1 (found version {})'.format(version))

return version


def md_to_notebook(text):
"""Convert a Markdown text to a Jupyter notebook, using Pandoc"""
tmp_file = tempfile.NamedTemporaryFile(delete=False)
tmp_file.write(text.encode('utf-8'))
tmp_file.close()

pandoc(u'--from markdown --to ipynb -s --atx-headers --wrap=preserve', tmp_file.name, tmp_file.name)

with open(tmp_file.name, encoding='utf-8') as opened_file:
notebook = nbformat.read(opened_file, as_version=4)
os.unlink(tmp_file.name)

return notebook


def notebook_to_md(notebook):
"""Convert a notebook to its Markdown representation, using Pandoc"""
tmp_file = tempfile.NamedTemporaryFile(delete=False)
tmp_file.write(nbformat.writes(notebook).encode('utf-8'))
tmp_file.close()

pandoc(u'--from ipynb --to markdown -s --atx-headers --wrap=preserve', tmp_file.name, tmp_file.name)

with open(tmp_file.name, encoding='utf-8') as opened_file:
text = opened_file.read()

os.unlink(tmp_file.name)
return '\n'.join(text.splitlines())
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
---
jupyter:
kernelspec:
display_name: Python 3
language: python
name: python3
nbformat: 4
nbformat_minor: 2
---

::: {.cell .code}
``` {.python}
1 + 1
```
:::

::: {.cell .markdown}
A markdown cell
And below, the cell for function f has non trivial cell metadata. And the next cell as well.
:::

::: {.cell .code attributes="{\"n\":\"10\",\"id\":\"\",\"classes\":[]}"}
``` {.python}
def f(x):
return x
```
:::

::: {.cell .code attributes="{\"n\":\"10\",\"id\":\"\",\"classes\":[]}"}
``` {.python}
f(5)
```
:::

::: {.cell .markdown}
More text
:::

::: {.cell .code}
``` {.python}
2 + 2
```
:::
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
---
jupyter:
kernelspec:
display_name: Python 3
language: python
name: python3
nbformat: 4
nbformat_minor: 2
---

::: {.cell .code}
``` {.python}
%%html
<p><a href="https://github.com/mwouts/jupytext", style="color: rgb(0,0,255)">Jupytext</a> on GitHub</p>
```
:::

::: {.cell .code}
``` {.python}
%%latex
$\frac{\pi}{2}$
```
:::

::: {.cell .code}
``` {.python}
%load_ext rpy2.ipython
```
:::

::: {.cell .code}
``` {.python}
%%R
library(ggplot2)
ggplot(data=data.frame(x=c('A', 'B'), y=c(5, 2)), aes(x,weight=y)) + geom_bar()
```
:::

::: {.cell .code}
``` {.python}
%matplotlib inline
import pandas as pd
pd.Series({'A':5, 'B':2}).plot(figsize=(3,2), kind='bar')
```
:::
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
---
jupyter:
kernelspec:
display_name: Python 3
language: python
name: python3
nbformat: 4
nbformat_minor: 2
---

::: {.cell .markdown}
##################################################################

This is a notebook that contains many hash signs.
Hopefully its python representation is not recognized as a Sphinx Gallery script\...

##################################################################
:::

::: {.cell .code}
``` {.python}
some = 1
code = 2
some+code
##################################################################
# A comment
##################################################################
# Another comment
```
:::

::: {.cell .markdown}
################################################################## {#section}

This is a notebook that contains many hash signs.
Hopefully its python representation is not recognized as a Sphinx Gallery script\...

################################################################## {#section-1}
:::
Loading

0 comments on commit 172b910

Please sign in to comment.