Skip to content
Snippets Groups Projects
Commit d2710514 authored by Greg Wilson's avatar Greg Wilson
Browse files

Checking consolidated links

parent 5e89d32e
Branches
Tags
No related merge requests found
......@@ -89,7 +89,7 @@ ${RMD_DST} : ${RMD_SRC}
## lesson-check : validate lesson Markdown.
lesson-check :
@bin/lesson_check.py -s . -p ${PARSER}
@bin/lesson_check.py -s . -p ${PARSER} -r _includes/links.md
## lesson-check-all : validate lesson Markdown, checking line lengths and trailing whitespace.
lesson-check-all :
......
......@@ -14,7 +14,7 @@ from optparse import OptionParser
from util import Reporter, read_markdown, load_yaml, check_unwanted_files, require, IMAGE_FILE_SUFFIX
__version__ = '0.2'
__version__ = '0.3'
# Where to look for source Markdown files.
SOURCE_DIRS = ['', '_episodes', '_extras']
......@@ -48,7 +48,10 @@ P_TRAILING_WHITESPACE = re.compile(r'\s+$')
P_FIGURE_REFS = re.compile(r'<img[^>]+src="([^"]+)"[^>]*>')
# Pattern to match internally-defined Markdown links.
P_INTERNALLY_DEFINED_LINK = re.compile(r'\[[^\]]+\]\[[^\]]+\]')
P_INTERNAL_LINK_REF = re.compile(r'\[([^\]]+)\]\[([^\]]+)\]')
# Pattern to match reference links (to resolve internally-defined references).
P_INTERNAL_LINK_DEF = re.compile(r'^\[([^\]]+)\]:\s*(.+)')
# What kinds of blockquotes are allowed?
KNOWN_BLOCKQUOTES = {
......@@ -103,6 +106,8 @@ def main():
args = parse_args()
args.reporter = Reporter()
check_config(args.reporter, args.source_dir)
args.references = read_references(args.reporter, args.reference_path)
docs = read_all_markdown(args.source_dir, args.parser)
check_fileset(args.source_dir, args.reporter, docs.keys())
check_unwanted_files(args.source_dir, args.reporter)
......@@ -110,6 +115,7 @@ def main():
checker = create_checker(args, filename, docs[filename])
checker.check()
check_figures(args.source_dir, args.reporter)
args.reporter.report()
......@@ -126,6 +132,10 @@ def parse_args():
default=None,
dest='parser',
help='path to Markdown parser')
parser.add_option('-r', '--references',
default=None,
dest='reference_path',
help='path to Markdown file of external references')
parser.add_option('-s', '--source',
default=os.curdir,
dest='source_dir',
......@@ -160,6 +170,37 @@ def check_config(reporter, source_dir):
'"root" not set to ".." in configuration')
def read_references(reporter, ref_path):
"""Read shared file of reference links, returning dictionary of valid references
{symbolic_name : URL}
"""
result = {}
urls_seen = set()
if ref_path:
with open(ref_path, 'r') as reader:
for (num, line) in enumerate(reader):
line_num = num + 1
m = P_INTERNAL_LINK_DEF.search(line)
require(m,
'{0}:{1} not valid reference:\n{2}'.format(ref_path, line_num, line.rstrip()))
name = m.group(1)
url = m.group(2)
require(name,
'Empty reference at {0}:{1}'.format(ref_path, line_num))
reporter.check(name not in result,
ref_path,
'Duplicate reference {0} at line {1}',
name, line_num)
reporter.check(url not in urls_seen,
ref_path,
'Duplicate definition of URL {0} at line {1}',
url, line_num)
result[name] = url
urls_seen.add(url)
return result
def read_all_markdown(source_dir, parser):
"""Read source files, returning
{path : {'metadata':yaml, 'metadata_len':N, 'text':text, 'lines':[(i, line, len)], 'doc':doc}}
......@@ -274,7 +315,7 @@ class CheckBase(object):
def check(self):
"""Run tests on metadata."""
"""Run tests."""
self.check_metadata()
self.check_line_lengths()
......@@ -342,17 +383,16 @@ class CheckBase(object):
def check_defined_link_references(self):
"""Check that defined links resolve in the file.
Internally-defined links match the pattern [text][label]. If
the label contains '{{...}}', it is hopefully a references to
a configuration value - we should check that, but don't right
now.
Internally-defined links match the pattern [text][label].
"""
result = set()
for node in self.find_all(self.doc, {'type' : 'text'}):
for match in P_INTERNALLY_DEFINED_LINK.findall(node['value']):
if '{{' not in match:
result.add(match)
for match in P_INTERNAL_LINK_REF.findall(node['value']):
text = match[0]
link = match[1]
if link not in self.args.references:
result.add('"{0}"=>"{1}"'.format(text, link))
self.reporter.check(not result,
self.filename,
'Internally-defined links may be missing definitions: {0}',
......@@ -441,6 +481,14 @@ class CheckEpisode(CheckBase):
def __init__(self, args, filename, metadata, metadata_len, text, lines, doc):
super(CheckEpisode, self).__init__(args, filename, metadata, metadata_len, text, lines, doc)
def check(self):
"""Run extra tests."""
super(CheckEpisode, self).check()
self.check_reference_inclusion()
def check_metadata(self):
super(CheckEpisode, self).check_metadata()
if self.metadata:
......@@ -467,6 +515,26 @@ class CheckEpisode(CheckBase):
name, type(self.metadata[name]), type_)
def check_reference_inclusion(self):
"""Check that links file has been included."""
if not self.args.reference_path:
return
for (i, last_line, line_len) in reversed(self.lines):
if last_line:
break
require(last_line,
'No non-empty lines in {0}'.format(self.filename))
include_filename = os.path.split(self.args.reference_path)[-1]
if include_filename not in last_line:
self.reporter.add(self.filename,
'episode does not include "{0}"',
include_filename)
class CheckReference(CheckBase):
"""Check the reference page."""
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment