Skip to content

Commit

Permalink
Merge pull request #126 from gvwilson/consolidating-links
Browse files Browse the repository at this point in the history
Checking consolidated links
  • Loading branch information
rgaiacs authored Feb 4, 2017
2 parents 3d04cbb + d271051 commit 220eedc
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 11 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ ${RMD_DST} : ${RMD_SRC}

## lesson-check : validate lesson Markdown.
lesson-check :
@bin/lesson_check.py -s . -p ${PARSER}
@bin/lesson_check.py -s . -p ${PARSER} -r _includes/links.md

## lesson-check-all : validate lesson Markdown, checking line lengths and trailing whitespace.
lesson-check-all :
Expand Down
88 changes: 78 additions & 10 deletions bin/lesson_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from util import Reporter, read_markdown, load_yaml, check_unwanted_files, require, IMAGE_FILE_SUFFIX

__version__ = '0.2'
__version__ = '0.3'

# Where to look for source Markdown files.
SOURCE_DIRS = ['', '_episodes', '_extras']
Expand Down Expand Up @@ -48,7 +48,10 @@
P_FIGURE_REFS = re.compile(r'<img[^>]+src="([^"]+)"[^>]*>')

# Pattern to match internally-defined Markdown links.
P_INTERNALLY_DEFINED_LINK = re.compile(r'\[[^\]]+\]\[[^\]]+\]')
P_INTERNAL_LINK_REF = re.compile(r'\[([^\]]+)\]\[([^\]]+)\]')

# Pattern to match reference links (to resolve internally-defined references).
P_INTERNAL_LINK_DEF = re.compile(r'^\[([^\]]+)\]:\s*(.+)')

# What kinds of blockquotes are allowed?
KNOWN_BLOCKQUOTES = {
Expand Down Expand Up @@ -103,13 +106,16 @@ def main():
args = parse_args()
args.reporter = Reporter()
check_config(args.reporter, args.source_dir)
args.references = read_references(args.reporter, args.reference_path)

docs = read_all_markdown(args.source_dir, args.parser)
check_fileset(args.source_dir, args.reporter, docs.keys())
check_unwanted_files(args.source_dir, args.reporter)
for filename in docs.keys():
checker = create_checker(args, filename, docs[filename])
checker.check()
check_figures(args.source_dir, args.reporter)

args.reporter.report()


Expand All @@ -126,6 +132,10 @@ def parse_args():
default=None,
dest='parser',
help='path to Markdown parser')
parser.add_option('-r', '--references',
default=None,
dest='reference_path',
help='path to Markdown file of external references')
parser.add_option('-s', '--source',
default=os.curdir,
dest='source_dir',
Expand Down Expand Up @@ -160,6 +170,37 @@ def check_config(reporter, source_dir):
'"root" not set to ".." in configuration')


def read_references(reporter, ref_path):
"""Read shared file of reference links, returning dictionary of valid references
{symbolic_name : URL}
"""

result = {}
urls_seen = set()
if ref_path:
with open(ref_path, 'r') as reader:
for (num, line) in enumerate(reader):
line_num = num + 1
m = P_INTERNAL_LINK_DEF.search(line)
require(m,
'{0}:{1} not valid reference:\n{2}'.format(ref_path, line_num, line.rstrip()))
name = m.group(1)
url = m.group(2)
require(name,
'Empty reference at {0}:{1}'.format(ref_path, line_num))
reporter.check(name not in result,
ref_path,
'Duplicate reference {0} at line {1}',
name, line_num)
reporter.check(url not in urls_seen,
ref_path,
'Duplicate definition of URL {0} at line {1}',
url, line_num)
result[name] = url
urls_seen.add(url)
return result


def read_all_markdown(source_dir, parser):
"""Read source files, returning
{path : {'metadata':yaml, 'metadata_len':N, 'text':text, 'lines':[(i, line, len)], 'doc':doc}}
Expand Down Expand Up @@ -274,7 +315,7 @@ def __init__(self, args, filename, metadata, metadata_len, text, lines, doc):


def check(self):
"""Run tests on metadata."""
"""Run tests."""

self.check_metadata()
self.check_line_lengths()
Expand Down Expand Up @@ -342,17 +383,16 @@ def check_codeblock_classes(self):
def check_defined_link_references(self):
"""Check that defined links resolve in the file.
Internally-defined links match the pattern [text][label]. If
the label contains '{{...}}', it is hopefully a references to
a configuration value - we should check that, but don't right
now.
Internally-defined links match the pattern [text][label].
"""

result = set()
for node in self.find_all(self.doc, {'type' : 'text'}):
for match in P_INTERNALLY_DEFINED_LINK.findall(node['value']):
if '{{' not in match:
result.add(match)
for match in P_INTERNAL_LINK_REF.findall(node['value']):
text = match[0]
link = match[1]
if link not in self.args.references:
result.add('"{0}"=>"{1}"'.format(text, link))
self.reporter.check(not result,
self.filename,
'Internally-defined links may be missing definitions: {0}',
Expand Down Expand Up @@ -441,6 +481,14 @@ class CheckEpisode(CheckBase):
def __init__(self, args, filename, metadata, metadata_len, text, lines, doc):
super(CheckEpisode, self).__init__(args, filename, metadata, metadata_len, text, lines, doc)


def check(self):
"""Run extra tests."""

super(CheckEpisode, self).check()
self.check_reference_inclusion()


def check_metadata(self):
super(CheckEpisode, self).check_metadata()
if self.metadata:
Expand All @@ -467,6 +515,26 @@ def check_metadata_fields(self, expected):
name, type(self.metadata[name]), type_)


def check_reference_inclusion(self):
"""Check that links file has been included."""

if not self.args.reference_path:
return

for (i, last_line, line_len) in reversed(self.lines):
if last_line:
break

require(last_line,
'No non-empty lines in {0}'.format(self.filename))

include_filename = os.path.split(self.args.reference_path)[-1]
if include_filename not in last_line:
self.reporter.add(self.filename,
'episode does not include "{0}"',
include_filename)


class CheckReference(CheckBase):
"""Check the reference page."""

Expand Down

0 comments on commit 220eedc

Please sign in to comment.