Skip to content
Snippets Groups Projects
Commit 02430798 authored by abought's avatar abought
Browse files

Merge pull request #113 from abought/link_check_text

Link text validation changes
parents 40c84e19 7c92facb
Branches
Tags
No related merge requests found
*~
*.pyc
.DS_Store
_site
......@@ -10,7 +10,6 @@ Contains validators for several kinds of template.
Call at command line with flag -h to see options and usage instructions.
"""
from __future__ import print_function
import argparse
import glob
......@@ -203,33 +202,31 @@ class MarkdownValidator(object):
return (len(missing_headings) == 0) and \
valid_order and no_extra and correct_level
def _validate_one_link(self, link_node):
"""Logic to validate a single external asset (image or link)
# Link validation methods
def _validate_one_html_link(self, link_node, check_text=False):
"""
Any local html file being linked was generated as part of the lesson.
Therefore, file links (.html) must have a Markdown file
in the expected folder.
The title of the linked Markdown document should match the link text.
For other assets (links or images), just verify that a file exists
"""
dest, link_text = self.ast.get_link_info(link_node)
if re.match(r"^[\w,\s-]+\.(html?)", dest, re.IGNORECASE):
# HTML files in same folder are made from Markdown; special tests
fn = dest.split("#")[0] # Split anchor name from filename
expected_md_fn = os.path.splitext(fn)[0] + os.extsep + "md"
expected_md_path = os.path.join(self.markdown_dir,
expected_md_fn)
if not os.path.isfile(expected_md_path):
logging.error(
"In {0}: "
"The document links to {1}, but could not find "
"the expected markdown file {2}".format(
self.filename, dest, expected_md_path))
return False
# HTML files in same folder are made from Markdown; special tests
fn = dest.split("#")[0] # Split anchor name from filename
expected_md_fn = os.path.splitext(fn)[0] + os.extsep + "md"
expected_md_path = os.path.join(self.markdown_dir,
expected_md_fn)
if not os.path.isfile(expected_md_path):
logging.error(
"In {0}: "
"The document links to {1}, but could not find "
"the expected markdown file {2}".format(
self.filename, fn, expected_md_path))
return False
if check_text is True:
# If file exists, parse and validate link text = node title
with open(expected_md_path, 'rU') as link_dest_file:
dest_contents = link_dest_file.read()
......@@ -247,37 +244,68 @@ class MarkdownValidator(object):
self.filename, dest,
link_text, dest_page_title))
return False
elif not re.match(r"^((https?|ftp)://)", dest, re.IGNORECASE)\
return True
def _validate_one_link(self, link_node, check_text=False):
"""Logic to validate a single link to a file asset
Performs special checks for links to a local markdown file.
For links or images, just verify that a file exists.
"""
dest, link_text = self.ast.get_link_info(link_node)
if re.match(r"^[\w,\s-]+\.(html?)", dest, re.IGNORECASE):
# Validate local html links have matching md file
return self._validate_one_html_link(link_node,
check_text=check_text)
elif not re.match(r"^((https?|ftp)://.+)", dest, re.IGNORECASE)\
and not re.match(r"^#.*", dest):
# If not web URL, and not anchor on same page, then
# verify that local file exists
dest_path = os.path.join(self.lesson_dir, dest)
dest_path = dest_path.split("#")[0] # Split anchor from filename
if not os.path.isfile(dest_path):
fn = dest.split("#")[0] # Split anchor name from filename
logging.error(
"In {0}: "
"Could not find the linked asset file "
"{1} in {2}. If this is a URL, it must be "
"prefixed with http(s):// or ftp://.".format(
self.filename, dest, dest_path))
self.filename, fn, dest_path))
return False
else:
logging.warning(
logging.debug(
"In {0}: "
"Skipped validation of link {1}".format(self.filename, dest))
return True
def _validate_links(self, links_to_skip=()):
def _partition_links(self):
"""Fetch links in document. If this template has special requirements
for link text (eg only some links' text should match dest page title),
filter the list accordingly.
Default behavior: don't check the text of any links"""
check_text = []
no_check_text = self.ast.find_external_links()
return check_text, no_check_text
def _validate_links(self):
"""Validate all references to external content
This includes links AND images: these are the two types of node that
CommonMark assigns a .destination property"""
links = self.ast.find_external_links()
check_text, no_check_text = self._partition_links()
valid = True
for link_node in links:
if link_node.destination not in links_to_skip:
res = self._validate_one_link(link_node)
valid = valid and res
for link_node in check_text:
res = self._validate_one_link(link_node, check_text=True)
valid = valid and res
for link_node in no_check_text:
res = self._validate_one_link(link_node, check_text=False)
valid = valid and res
return valid
def _run_tests(self):
......@@ -309,6 +337,11 @@ class IndexPageValidator(MarkdownValidator):
DOC_HEADERS = {'layout': vh.is_str,
'title': vh.is_str}
def _partition_links(self):
"""Check the text of every link in index.md"""
check_text = self.ast.find_external_links()
return check_text, []
def _validate_intro_section(self):
"""Validate the intro section
......@@ -339,12 +372,6 @@ class IndexPageValidator(MarkdownValidator):
self.filename))
return intro_section and prereqs_tests
def _validate_links(self, links_to_skip=('motivation.html',
'reference.html',
'discussion.html',
'instructors.html')):
return super(IndexPageValidator, self)._validate_links(links_to_skip)
def _run_tests(self):
parent_tests = super(IndexPageValidator, self)._run_tests()
tests = [self._validate_intro_section()]
......@@ -418,6 +445,15 @@ class ReferencePageValidator(MarkdownValidator):
"title": vh.is_str,
"subtitle": vh.is_str}
def _partition_links(self):
"""For reference.md, only check that text of link matches
dest page subtitle if the link is in a heading"""
all_links = self.ast.find_external_links()
check_text = self.ast.find_external_links(
parent_crit=self.ast.is_heading)
dont_check_text = [n for n in all_links if n not in check_text]
return check_text, dont_check_text
def _validate_glossary_entry(self, glossary_entry):
"""Validate glossary entry
......@@ -491,6 +527,15 @@ class InstructorPageValidator(MarkdownValidator):
"title": vh.is_str,
"subtitle": vh.is_str}
def _partition_links(self):
"""For instructors.md, only check that text of link matches
dest page subtitle if the link is in a heading"""
all_links = self.ast.find_external_links()
check_text = self.ast.find_external_links(
parent_crit=self.ast.is_heading)
dont_check_text = [n for n in all_links if n not in check_text]
return check_text, dont_check_text
class LicensePageValidator(MarkdownValidator):
"""Validate LICENSE.md: user should not edit this file"""
......
......@@ -202,6 +202,18 @@ Paragraph of introductory material.
self.assertFalse(validator._validate_intro_section())
# TESTS INVOLVING LINKS TO OTHER CONTENT
def test_should_check_text_of_all_links_in_index(self):
"""Text of every local-html link in index.md should
match dest page title"""
validator = self._create_validator("""
## [This link is in a heading](reference.html)
[Topic Title One](01-one.html#anchor)""")
links = validator.ast.find_external_links()
check_text, dont_check_text = validator._partition_links()
self.assertEqual(len(dont_check_text), 0)
self.assertEqual(len(check_text), 2)
def test_file_links_validate(self):
"""Verify that all links in a sample file validate.
Involves checking for example files; may fail on "core" branch"""
......@@ -303,6 +315,18 @@ minutes: not a number
Some text""")
self.assertFalse(validator._validate_has_no_headings())
def test_should_not_check_text_of_links_in_topic(self):
"""Never check that text of local-html links in topic
matches dest title """
validator = self._create_validator("""
## [This link is in a heading](reference.html)
[Topic Title One](01-one.html#anchor)""")
links = validator.ast.find_external_links()
check_text, dont_check_text = validator._partition_links()
self.assertEqual(len(dont_check_text), 2)
self.assertEqual(len(check_text), 0)
def test_sample_file_passes_validation(self):
sample_validator = self.VALIDATOR(self.SAMPLE_FILE)
res = sample_validator.validate()
......@@ -385,6 +409,28 @@ class TestInstructorPage(BaseTemplateTest):
SAMPLE_FILE = os.path.join(MARKDOWN_DIR, "instructors.md")
VALIDATOR = check.InstructorPageValidator
def test_should_selectively_check_text_of_links_in_topic(self):
"""Only verify that text of local-html links in topic
matches dest title if the link is in a heading"""
validator = self._create_validator("""
## [Reference](reference.html)
[Topic Title One](01-one.html#anchor)""")
check_text, dont_check_text = validator._partition_links()
self.assertEqual(len(dont_check_text), 1)
self.assertEqual(len(check_text), 1)
def test_link_dest_bad_while_text_ignored(self):
validator = self._create_validator("""
[ignored text](nonexistent.html)""")
self.assertFalse(validator._validate_links())
def test_link_dest_good_while_text_ignored(self):
validator = self._create_validator("""
[ignored text](01-one.html)""")
self.assertTrue(validator._validate_links())
def test_sample_file_passes_validation(self):
sample_validator = self.VALIDATOR(self.SAMPLE_FILE)
res = sample_validator.validate()
......
......@@ -121,21 +121,27 @@ class CommonMarkHelper(object):
return dest, link_text
def find_external_links(self, ast_node=None):
def find_external_links(self, ast_node=None, parent_crit=None):
"""Recursive function that locates all references to external content
under specified node. (links or images)"""
ast_node = ast_node or self.data
if parent_crit is None:
# User can optionally provide a function to filter link list
# based on where link appears. (eg, only links in headings)
# If no filter is provided, accept all links in that node.
parent_crit = lambda n: True
# Link can be node itself, or hiding in inline content
links = [n for n in ast_node.inline_content
if self.is_external(n)]
if self.is_external(n) and parent_crit(ast_node)]
if self.is_external(ast_node):
links.append(ast_node)
# Also look for links in sub-nodes
for n in ast_node.children:
links.extend(self.find_external_links(n))
links.extend(self.find_external_links(n,
parent_crit=parent_crit))
return links
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment