From cebd8558273a274f84b3b129c627f42a2ed09ef1 Mon Sep 17 00:00:00 2001 From: Greg Wilson Date: Sun, 24 Jul 2016 09:12:20 -0400 Subject: [PATCH] Checking that internally-defined links resolve. An internally-defined Markdown link has the form [xxx][yyy]. If the ID 'yyy' doesn't resolve, the text is left as-is, so we check for that, then subtract those that reference configuration values using '{{'. It's a hack, but it'll catch at least a few things. --- bin/lesson_check.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/bin/lesson_check.py b/bin/lesson_check.py index ace8de6..7bc09a3 100755 --- a/bin/lesson_check.py +++ b/bin/lesson_check.py @@ -47,6 +47,9 @@ P_TRAILING_WHITESPACE = re.compile(r'\s+$') # Pattern to match figure references in HTML. P_FIGURE_REFS = re.compile(r']+src="([^"]+)"[^>]*>') +# Pattern to match internally-defined Markdown links. +P_INTERNALLY_DEFINED_LINK = re.compile(r'\[[^\]]+\]\[[^\]]+\]') + # What kinds of blockquotes are allowed? KNOWN_BLOCKQUOTES = { 'callout', @@ -274,6 +277,7 @@ class CheckBase(object): self.check_trailing_whitespace() self.check_blockquote_classes() self.check_codeblock_classes() + self.check_defined_link_references() def check_metadata(self): @@ -331,6 +335,26 @@ class CheckBase(object): cls) + def check_defined_link_references(self): + """Check that defined links resolve in the file. + + Internally-defined links match the pattern [text][label]. If + the label contains '{{...}}', it is hopefully a references to + a configuration value - we should check that, but don't right + now. + """ + + result = set() + for node in self.find_all(self.doc, {'type' : 'text'}): + for match in P_INTERNALLY_DEFINED_LINK.findall(node['value']): + if '{{' not in match: + result.add(match) + self.reporter.check(not result, + self.filename, + 'Internally-defined links may be missing definitions: {0}', + ', '.join(sorted(result))) + + def find_all(self, node, pattern, accum=None): """Find all matches for a pattern.""" -- GitLab