From 5ce0625754c47af2e44d888f009625d147704989 Mon Sep 17 00:00:00 2001 From: Andy Boughton Date: Sun, 4 Jan 2015 20:59:36 -0500 Subject: [PATCH] Changes to link text validation per #58 and #90. Validate link text in index.md (always), or only in headings (reference.md and instructors.md) Demote msg level when http links skipped Strip anchor from filenames in error messages --- tools/check.py | 113 +++++++++++++++++++++++++----------- tools/test_check.py | 46 +++++++++++++++ tools/validation_helpers.py | 12 +++- 3 files changed, 134 insertions(+), 37 deletions(-) diff --git a/tools/check.py b/tools/check.py index 62eaf1e..dda03f8 100755 --- a/tools/check.py +++ b/tools/check.py @@ -10,7 +10,6 @@ Contains validators for several kinds of template. Call at command line with flag -h to see options and usage instructions. """ -from __future__ import print_function import argparse import glob @@ -203,33 +202,31 @@ class MarkdownValidator(object): return (len(missing_headings) == 0) and \ valid_order and no_extra and correct_level - def _validate_one_link(self, link_node): - """Logic to validate a single external asset (image or link) - + # Link validation methods + def _validate_one_html_link(self, link_node, check_text=False): + """ Any local html file being linked was generated as part of the lesson. Therefore, file links (.html) must have a Markdown file in the expected folder. The title of the linked Markdown document should match the link text. - - For other assets (links or images), just verify that a file exists """ dest, link_text = self.ast.get_link_info(link_node) - if re.match(r"^[\w,\s-]+\.(html?)", dest, re.IGNORECASE): - # HTML files in same folder are made from Markdown; special tests - fn = dest.split("#")[0] # Split anchor name from filename - expected_md_fn = os.path.splitext(fn)[0] + os.extsep + "md" - expected_md_path = os.path.join(self.markdown_dir, - expected_md_fn) - if not os.path.isfile(expected_md_path): - logging.error( - "In {0}: " - "The document links to {1}, but could not find " - "the expected markdown file {2}".format( - self.filename, dest, expected_md_path)) - return False + # HTML files in same folder are made from Markdown; special tests + fn = dest.split("#")[0] # Split anchor name from filename + expected_md_fn = os.path.splitext(fn)[0] + os.extsep + "md" + expected_md_path = os.path.join(self.markdown_dir, + expected_md_fn) + if not os.path.isfile(expected_md_path): + logging.error( + "In {0}: " + "The document links to {1}, but could not find " + "the expected markdown file {2}".format( + self.filename, fn, expected_md_path)) + return False + if check_text is True: # If file exists, parse and validate link text = node title with open(expected_md_path, 'rU') as link_dest_file: dest_contents = link_dest_file.read() @@ -247,37 +244,68 @@ class MarkdownValidator(object): self.filename, dest, link_text, dest_page_title)) return False - elif not re.match(r"^((https?|ftp)://)", dest, re.IGNORECASE)\ + return True + + def _validate_one_link(self, link_node, check_text=False): + """Logic to validate a single link to a file asset + + Performs special checks for links to a local markdown file. + + For links or images, just verify that a file exists. + """ + dest, link_text = self.ast.get_link_info(link_node) + + if re.match(r"^[\w,\s-]+\.(html?)", dest, re.IGNORECASE): + # Validate local html links have matching md file + return self._validate_one_html_link(link_node, + check_text=check_text) + elif not re.match(r"^((https?|ftp)://.+)", dest, re.IGNORECASE)\ and not re.match(r"^#.*", dest): # If not web URL, and not anchor on same page, then # verify that local file exists dest_path = os.path.join(self.lesson_dir, dest) + dest_path = dest_path.split("#")[0] # Split anchor from filename if not os.path.isfile(dest_path): + fn = dest.split("#")[0] # Split anchor name from filename logging.error( "In {0}: " "Could not find the linked asset file " "{1} in {2}. If this is a URL, it must be " "prefixed with http(s):// or ftp://.".format( - self.filename, dest, dest_path)) + self.filename, fn, dest_path)) return False else: - logging.warning( + logging.debug( "In {0}: " "Skipped validation of link {1}".format(self.filename, dest)) return True - def _validate_links(self, links_to_skip=()): + def _partition_links(self): + """Fetch links in document. If this template has special requirements + for link text (eg only some links' text should match dest page title), + filter the list accordingly. + + Default behavior: don't check the text of any links""" + check_text = [] + no_check_text = self.ast.find_external_links() + + return check_text, no_check_text + + def _validate_links(self): """Validate all references to external content This includes links AND images: these are the two types of node that CommonMark assigns a .destination property""" - links = self.ast.find_external_links() + check_text, no_check_text = self._partition_links() valid = True - for link_node in links: - if link_node.destination not in links_to_skip: - res = self._validate_one_link(link_node) - valid = valid and res + for link_node in check_text: + res = self._validate_one_link(link_node, check_text=True) + valid = valid and res + + for link_node in no_check_text: + res = self._validate_one_link(link_node, check_text=False) + valid = valid and res return valid def _run_tests(self): @@ -309,6 +337,11 @@ class IndexPageValidator(MarkdownValidator): DOC_HEADERS = {'layout': vh.is_str, 'title': vh.is_str} + def _partition_links(self): + """Check the text of every link in index.md""" + check_text = self.ast.find_external_links() + return check_text, [] + def _validate_intro_section(self): """Validate the intro section @@ -339,12 +372,6 @@ class IndexPageValidator(MarkdownValidator): self.filename)) return intro_section and prereqs_tests - def _validate_links(self, links_to_skip=('motivation.html', - 'reference.html', - 'discussion.html', - 'instructors.html')): - return super(IndexPageValidator, self)._validate_links(links_to_skip) - def _run_tests(self): parent_tests = super(IndexPageValidator, self)._run_tests() tests = [self._validate_intro_section()] @@ -418,6 +445,15 @@ class ReferencePageValidator(MarkdownValidator): "title": vh.is_str, "subtitle": vh.is_str} + def _partition_links(self): + """For reference.md, only check that text of link matches + dest page subtitle if the link is in a heading""" + all_links = self.ast.find_external_links() + check_text = self.ast.find_external_links( + parent_crit=self.ast.is_heading) + dont_check_text = [n for n in all_links if n not in check_text] + return check_text, dont_check_text + def _validate_glossary_entry(self, glossary_entry): """Validate glossary entry @@ -491,6 +527,15 @@ class InstructorPageValidator(MarkdownValidator): "title": vh.is_str, "subtitle": vh.is_str} + def _partition_links(self): + """For instructors.md, only check that text of link matches + dest page subtitle if the link is in a heading""" + all_links = self.ast.find_external_links() + check_text = self.ast.find_external_links( + parent_crit=self.ast.is_heading) + dont_check_text = [n for n in all_links if n not in check_text] + return check_text, dont_check_text + class LicensePageValidator(MarkdownValidator): """Validate LICENSE.md: user should not edit this file""" diff --git a/tools/test_check.py b/tools/test_check.py index 99cf945..05a8023 100644 --- a/tools/test_check.py +++ b/tools/test_check.py @@ -202,6 +202,18 @@ Paragraph of introductory material. self.assertFalse(validator._validate_intro_section()) # TESTS INVOLVING LINKS TO OTHER CONTENT + def test_should_check_text_of_all_links_in_index(self): + """Text of every local-html link in index.md should + match dest page title""" + validator = self._create_validator(""" +## [This link is in a heading](reference.html) +[Topic Title One](01-one.html#anchor)""") + links = validator.ast.find_external_links() + check_text, dont_check_text = validator._partition_links() + + self.assertEqual(len(dont_check_text), 0) + self.assertEqual(len(check_text), 2) + def test_file_links_validate(self): """Verify that all links in a sample file validate. Involves checking for example files; may fail on "core" branch""" @@ -303,6 +315,18 @@ minutes: not a number Some text""") self.assertFalse(validator._validate_has_no_headings()) + def test_should_not_check_text_of_links_in_topic(self): + """Never check that text of local-html links in topic + matches dest title """ + validator = self._create_validator(""" +## [This link is in a heading](reference.html) +[Topic Title One](01-one.html#anchor)""") + links = validator.ast.find_external_links() + check_text, dont_check_text = validator._partition_links() + + self.assertEqual(len(dont_check_text), 2) + self.assertEqual(len(check_text), 0) + def test_sample_file_passes_validation(self): sample_validator = self.VALIDATOR(self.SAMPLE_FILE) res = sample_validator.validate() @@ -385,6 +409,28 @@ class TestInstructorPage(BaseTemplateTest): SAMPLE_FILE = os.path.join(MARKDOWN_DIR, "instructors.md") VALIDATOR = check.InstructorPageValidator + def test_should_selectively_check_text_of_links_in_topic(self): + """Only verify that text of local-html links in topic + matches dest title if the link is in a heading""" + validator = self._create_validator(""" +## [Reference](reference.html) + +[Topic Title One](01-one.html#anchor)""") + check_text, dont_check_text = validator._partition_links() + + self.assertEqual(len(dont_check_text), 1) + self.assertEqual(len(check_text), 1) + + def test_link_dest_bad_while_text_ignored(self): + validator = self._create_validator(""" +[ignored text](nonexistent.html)""") + self.assertFalse(validator._validate_links()) + + def test_link_dest_good_while_text_ignored(self): + validator = self._create_validator(""" +[ignored text](01-one.html)""") + self.assertTrue(validator._validate_links()) + def test_sample_file_passes_validation(self): sample_validator = self.VALIDATOR(self.SAMPLE_FILE) res = sample_validator.validate() diff --git a/tools/validation_helpers.py b/tools/validation_helpers.py index 6acc11c..864d362 100644 --- a/tools/validation_helpers.py +++ b/tools/validation_helpers.py @@ -121,21 +121,27 @@ class CommonMarkHelper(object): return dest, link_text - def find_external_links(self, ast_node=None): + def find_external_links(self, ast_node=None, parent_crit=None): """Recursive function that locates all references to external content under specified node. (links or images)""" ast_node = ast_node or self.data + if parent_crit is None: + # User can optionally provide a function to filter link list + # based on where link appears. (eg, only links in headings) + # If no filter is provided, accept all links in that node. + parent_crit = lambda n: True # Link can be node itself, or hiding in inline content links = [n for n in ast_node.inline_content - if self.is_external(n)] + if self.is_external(n) and parent_crit(ast_node)] if self.is_external(ast_node): links.append(ast_node) # Also look for links in sub-nodes for n in ast_node.children: - links.extend(self.find_external_links(n)) + links.extend(self.find_external_links(n, + parent_crit=parent_crit)) return links -- GitLab