diff --git a/pages/index.md b/pages/index.md index 55a249a16ef839ecfea917a9be4d0cadfd344446..f113f1af0c17f6d84deca2c0f7894864a0595c37 100644 --- a/pages/index.md +++ b/pages/index.md @@ -11,8 +11,8 @@ Paragraph of introductory material. ## Topics -1. [Topic Title 1](01-one.html) -2. [Topic Title 2](02-two.html) +1. [Topic Title One](01-one.html) +2. [Topic Title Two](02-two.html) ## Other Resources diff --git a/tools/check b/tools/check index 5457d73910a65ded713ad7ff3599396928c6ecba..12a402cfdf46bcace32be3f8176521ebb5151e07 100755 --- a/tools/check +++ b/tools/check @@ -1,213 +1,656 @@ -#!/usr/bin/python -# -# Software Carpentry Lesson Validator -# -# Check for errors in lessons built using the Software Carpentry template -# found at http://github.com/swcarpentry/lesson-template. -# -# Usage: -# -# $ tools/check +#! /usr/bin/env python -import sys +""" +Validate Software Carpentry lessons +according to the Markdown template specification described here: +http://software-carpentry.org/blog/2014/10/new-lesson-template-v2.html + +Validates the presence of headings, as well as specific sub-nodes. +Contains validators for several kinds of template. + +Call at command line with flag -h to see options and usage instructions. +""" +from __future__ import print_function + +import argparse +import glob +import hashlib +import logging import os import re -import yaml +import sys -#---------------------------------------- -# Error reporting. +try: + # Code tested with CommonMark version 0.5.4; API may change + import CommonMark +except ImportError: + ERROR_MESSAGE = """This program requires the CommonMark python package. +Install using -def report_error(file_path, line_number, line, error_message): - """ - Print information about general error. - """ - ERR_MSG = "Error at line {} of {}:\n\t{}\n{}" - print(ERR_MSG.format(line_number, file_path, line, error_message)) + # pip install commonmark -def report_missing(present, file_path, missing_element): - """ - Print information about missing element. - """ - ERR_MSG = "Error on {}: missing {}" - if not present: - print(ERR_MSG.format(file_path, missing_element)) +or -def report_missing_metadata(missing_element): - """ - Print information about missing metadata at YAML header. - """ - ERR_MSG = "Error on YAML header: missing {}" - print(ERR_MSG.format(missing_element)) + # easy_install commonmark +""" + print(ERROR_MESSAGE) + sys.exit(1) -def report_broken_link(file_path, line_number, link): - """ - Print information about broken link. - """ - ERR_MSG = "Broken link at line {} of {}:\n\tCan't find {}." - print(ERR_MSG.format(line_number, file_path, link)) +import validation_helpers as vh -#---------------------------------------- -# Checking. -def check_yaml(metadata): - """ - Check if all metadata are present at YAML header. - """ - METADATA_REQUIRED = {"layout", "title", "minutes"} - for key in METADATA_REQUIRED - set(metadata.keys()): - report_missing_metadata(key) +class MarkdownValidator(object): + """Base class for Markdown validation -# TODO: Implement check_lesson -def check_lesson(file_path): + Contains basic validation skeleton to be extended for specific page types """ - Checks the file ``pages/[0-9]{2}-.*.md`` for: - - - "layout: topic" in YAML header - - "title" as keyword in YAML header - - line "> ## Learning Objectives {.objectives}" after YAML header - - items in learning objectives begin with "*" - - items in learning objective following four-space indentation rule - - code samples be of type input, error, output, python, shell, r, matlab, or sql - - callout box style - - challenge box style - """ - pass + HEADINGS = [] # List of strings containing expected heading text + WARN_ON_EXTRA_HEADINGS = True # Warn when other headings are present? -# TODO: Implement check_discussion -def check_discussion(file_path): - """ - Checks the file ``pages/discussion.md`` for: + DOC_HEADERS = {} # Rows in header section (first few lines of document). - FIXME: tell what need to check. - """ - pass + def __init__(self, filename=None, markdown=None): + """Perform validation on a Markdown document. -# TODO: Complete implementation of check_index -# TODO: break check_index into pieces -- it's too long. -def check_index(file_path): - """ - Checks the file ``pages/index.md`` for: - - - "layout: lesson" in YAML header - - "title" as keyword in YAML header - - introductory paragraph(s) right after YAML header - - line with "> ## Prerequisites" - - non-empty prerequisites - - title line with "## Topics" - - items at topic list begin with "*" - - items in topic list follow four-space indentation rule - - links at topic list are valid - - line with "## Other Resources" - - items at other resources list begin with "*" - - link at other resources list are valid - """ - # State variables - in_yaml = False - yaml_metadata = [] - has_prerequisites = False - has_topics = False - has_other_resources = False - - # Load file and process it - with open(file_path, "r") as lines: - for line_number, line in enumerate(lines): - if re.match("---", line): # what if there are multiple YAML blocks?? - in_yaml = not in_yaml - elif in_yaml: - yaml_metadata.append(line) - elif re.match("> ## Prerequisites", line): # check this in the Markdown or in the generated HTML? - has_prerequisites = True - elif re.match("## Topics", line): # as above? - has_topics = True - elif re.match("## Other Resources", line): # as above - has_other_resources = True - else: - ## Push this check into another function - this one is getting too long. - # Check if local links are valid - matches = re.search("\[.*\]\((?P.*)\)", line) - if matches and not matches.group("link").startswith("http"): - link = os.path.join(os.path.dirname(file_path), matches.group("link")) - if link.endswith(".html"): - link = link.replace("html", "md") # NO: what about "03-html-editing.html" ? - if not os.path.exists(link): - report_broken_link(file_path, line_number, link) - - ## Again, this function is too long - break it into sub-functions. - # Check YAML - yaml_metadata = yaml.load("\n".join(yaml_metadata)) - check_yaml(yaml_metadata) - - # Check sections - ## Note the refactoring: replaces three conditionals with one. - report_missing(has_prerequisites, file_path, "Prerequisites") - report_missing(has_topics, file_path, "Topics") - report_missing(has_other_resources, file_path, "Other Resources") - -# TODO Implement check_intructors -def check_intructors(file_path): - """ - Checks the file ``pages/instructors.md`` for: + Validator accepts either the path to a file containing Markdown, + OR a valid Markdown string. The latter is useful for unit testing.""" + self.filename = filename - - "title: Instructor"s Guide" in YAML header - - line with "## Overall" - - line with "## General Points" - - lines with topics titles begin with "## " - - points begin with "*" and following four space rules. - """ - pass + if filename: + # Expect Markdown files to be in same directory as the input file + self.markdown_dir = os.path.dirname(filename) + self.lesson_dir = os.path.abspath( # Parent directory of lesson + os.path.join(self.markdown_dir, os.pardir)) + with open(filename, 'rU') as f: + self.markdown = f.read() + else: + # Look for linked content in ../pages (relative to this file) + self.lesson_dir = os.path.abspath( + os.path.join(os.path.dirname(__file__), os.pardir)) -# TODO Implement check_motivation -def check_motivation(file_path): - """ - Checks the file ``pages/motivation.md``. + self.markdown_dir = os.path.join(self.lesson_dir, "pages") + self.markdown = markdown - FIXME: tell what need to check. - """ - pass + ast = self._parse_markdown(self.markdown) + self.ast = vh.CommonMarkHelper(ast) -# TODO Implement check_reference -def check_reference(file_path): - """ - Checks the file ``pages/reference.md`` for: + def _parse_markdown(self, markdown): + parser = CommonMark.DocParser() + ast = parser.parse(markdown) + return ast - - ``layout: reference`` in YAML header - - line with "## Glossary" - - words definitions after at the "Glossary" as:: + def _validate_hrs(self): + """Validate header - > **Key Word 1**: the definition - > relevant to the lesson. - """ - pass + Verify that the header section at top of document + is bracketed by two horizontal rules""" + valid = True + try: + hr_nodes = [self.ast.children[0], self.ast.children[2]] + except IndexError: + logging.error( + "In {0}: " + "Document must include header sections".format(self.filename)) + return False -def check_file(file_path): - """ - Call the correctly check function based on the name of the file. + for hr in hr_nodes: + if not self.ast.is_hr(hr): + logging.error( + "In {0}: " + "Expected --- at line: {1}".format( + self.filename, hr.start_line)) + valid = False + return valid + + def _validate_one_doc_header_row(self, text): + """Validate a single row of the document header section""" + label, content = text.split(":", 1) + if label not in self.DOC_HEADERS: + logging.warning( + "In {0}: " + "Unrecognized label in header section: {1}".format( + self.filename, label)) + return False + + validation_function = self.DOC_HEADERS[label] + validate_header = validation_function(content) + if not validate_header: + logging.error( + "In {0}: " + "Document header field for label {1} " + "does not follow expected format".format(self.filename, label)) + return validate_header + + # Methods related to specific validation. Can override specific tests. + def _validate_doc_headers(self): + """Validate the document header section. + + Pass only if the header of the document contains the specified + sections with the expected contents""" + + # Header section should be wrapped in hrs + has_hrs = self._validate_hrs() + + # Labeled sections in the actual headers should match expected format + header_node = self.ast.children[1] + test_headers = [self._validate_one_doc_header_row(s) + for s in header_node.strings] + + # Must have all expected header lines, and no others. + only_headers = (len(header_node.strings) == len(self.DOC_HEADERS)) + + # Headings must appear in the order expected + valid_order = self._validate_section_heading_order() + + return has_hrs and all(test_headers) and only_headers and valid_order + + def _validate_section_heading_order(self, ast_node=None, headings=None): + """Verify that section headings appear, and in the order expected""" + # TODO: Refactor into individual tests in the future + if ast_node is None: + ast_node = self.ast.data + headings = self.HEADINGS + + heading_nodes = self.ast.get_section_headings(ast_node) + # All headings should be exactly level 2 + correct_level = True + for n in heading_nodes: + if n.level != 2: + logging.error( + "In {0}: " + "Heading at line {1} should be level 2".format( + self.filename, n.start_line)) + correct_level = False + + heading_labels = [vh.strip_attrs(n.strings[0]) for n in heading_nodes] + + # Check for missing and extra headings + missing_headings = [expected_heading for expected_heading in headings + if expected_heading not in heading_labels] + + extra_headings = [found_heading for found_heading in heading_labels + if found_heading not in headings] + + for h in missing_headings: + logging.error( + "In {0}: " + "Document is missing expected heading: {1}".format( + self.filename, h)) + + if self.WARN_ON_EXTRA_HEADINGS is True: + for h in extra_headings: + logging.error( + "In {0}: " + "Document contains heading " + "not specified in the template: {1}".format( + self.filename, h)) + no_extra = (len(extra_headings) == 0) + else: + no_extra = True + + # Check that the subset of headings + # in the template spec matches order in the document + valid_order = True + headings_overlap = [h for h in heading_labels if h in headings] + if len(missing_headings) == 0 and headings_overlap != headings: + valid_order = False + logging.error( + "In {0}: " + "Document headings do not match " + "the order specified by the template".format(self.filename)) + + return (len(missing_headings) == 0) and \ + valid_order and no_extra and correct_level + + def _validate_one_link(self, link_node): + """Logic to validate a single external asset (image or link) + + Any local html file being linked was generated as part of the lesson. + Therefore, file links (.html) must have a Markdown file + in the expected folder. + + The title of the linked Markdown document should match the link text. + + For other assets (links or images), just verify that a file exists + """ + dest, link_text = self.ast.get_link_info(link_node) + + if re.match(r"^[\w,\s-]+\.(html?)", dest, re.IGNORECASE): + # HTML files in same folder are made from Markdown; special tests + expected_md_fn = os.path.splitext(dest)[0] + os.extsep + "md" + expected_md_path = os.path.join(self.markdown_dir, + expected_md_fn) + if not os.path.isfile(expected_md_path): + logging.error( + "In {0}: " + "The document links to {1}, but could not find " + "the expected markdown file {2}".format( + self.filename, dest, expected_md_path)) + return False + + # If file exists, parse and validate link text = node title + with open(expected_md_path, 'rU') as link_dest_file: + dest_contents = link_dest_file.read() + + dest_ast = self._parse_markdown(dest_contents) + dest_ast = vh.CommonMarkHelper(dest_ast) + dest_page_title = dest_ast.get_doc_header_subtitle() + + if dest_page_title != link_text: + logging.error( + "In {0}: " + "The linked page {1} exists, but " + "the link text '{2}' does not match the " + "(sub)title of that page, '{3}'.".format( + self.filename, dest, + link_text, dest_page_title)) + return False + elif not re.match(r"^((https?|ftp)://)", dest, re.IGNORECASE)\ + and not re.match(r"^#.*", dest): + # If not web URL, and not anchor on same page, then + # verify that local file exists + dest_path = os.path.join(self.lesson_dir, dest) + if not os.path.isfile(dest_path): + logging.error( + "In {0}: " + "Could not find the linked asset file " + "{1} in {2}. If this is a URL, it must be " + "prefixed with http(s):// or ftp://.".format( + self.filename, dest, dest_path)) + return False + else: + logging.warning( + "In {0}: " + "Skipped validation of link {1}".format(self.filename, dest)) + return True + + def _validate_links(self, links_to_skip=()): + """Validate all references to external content + + This includes links AND images: these are the two types of node that + CommonMark assigns a .destination property""" + links = self.ast.find_external_links() + + valid = True + for link_node in links: + if link_node.destination not in links_to_skip: + res = self._validate_one_link(link_node) + valid = valid and res + return valid + + def _run_tests(self): + """ + Let user override the list of tests to be performed. + + Error trapping is handled by the validate() wrapper method. + """ + tests = [self._validate_doc_headers(), + self._validate_section_heading_order(), + self._validate_links()] + + return all(tests) + + def validate(self): + """Perform all required validations. Wrap in exception handler""" + try: + return self._run_tests() + except IndexError: + logging.error("Document is missing critical sections") + return False + + +class IndexPageValidator(MarkdownValidator): + """Validate the contents of the homepage (index.md)""" + HEADINGS = ['Topics', + 'Other Resources'] + + DOC_HEADERS = {'layout': vh.is_str, + 'title': vh.is_str} + + def _validate_intro_section(self): + """Validate the intro section + + It must be a paragraph, followed by blockquoted list of prereqs""" + intro_block = self.ast.children[3] + intro_section = self.ast.is_paragraph(intro_block) + if not intro_section: + logging.error( + "In {0}: " + "Expected paragraph of introductory text at {1}".format( + self.filename, intro_block.start_line)) + + # Validate the prerequisites block + prereqs_block = self.ast.get_block_titled("Prerequisites", + heading_level=2) + if prereqs_block: + # Found the expected block; now check contents + prereqs_tests = self.ast.has_number_children(prereqs_block[0], + minc=2) + else: + prereqs_tests = False + + if prereqs_tests is False: + logging.error( + "In {0}: " + "Intro should contain a blockquoted section with level 2 " + "title 'Prerequisites'. Section should not be empty.".format( + self.filename)) + return intro_section and prereqs_tests + + def _validate_links(self, links_to_skip=('motivation.html', + 'reference.html', + 'discussion.html', + 'instructors.html')): + return super(IndexPageValidator, self)._validate_links(links_to_skip) + + def _run_tests(self): + tests = [self._validate_intro_section()] + parent_tests = super(IndexPageValidator, self)._run_tests() + return all(tests) and parent_tests + + +class TopicPageValidator(MarkdownValidator): + """Validate the Markdown contents of a topic page, eg 01-topicname.md""" + DOC_HEADERS = {"layout": vh.is_str, + "title": vh.is_str, + "subtitle": vh.is_str, + "minutes": vh.is_numeric} + + # TODO: Write validator for, eg, challenge section + def _validate_learning_objective(self): + learn_node = self.ast.get_block_titled("Learning Objectives", + heading_level=2) + if learn_node: + # In addition to title, the node must have some content + node_tests = self.ast.has_number_children(learn_node[0], minc=2) + else: + node_tests = False + + if node_tests is False: + logging.error( + "In {0}: " + "Learning Objectives should not be empty.".format( + self.filename)) + + return node_tests + + def _validate_has_no_headings(self): + """Check headings + + The top-level document has no headings indicating subtopics. + The only valid subheadings are nested in blockquote elements""" + heading_nodes = self.ast.get_section_headings() + if len(heading_nodes) == 0: + return True + + logging.error( + "In {0}: " + "The topic page should not have sub-headings " + "outside of special blocks. " + "If a topic needs sub-headings, " + "it should be broken into multiple topics.".format(self.filename)) + for n in heading_nodes: + logging.warning( + "In {0}: " + "The following sub-heading should be removed: {1}".format( + self.filename, n.strings[0])) + return False + + def _run_tests(self): + tests = [self._validate_has_no_headings(), + self._validate_learning_objective()] + parent_tests = super(TopicPageValidator, self)._run_tests() + return all(tests) and parent_tests + + +class MotivationPageValidator(MarkdownValidator): + """Validate motivation.md""" + DOC_HEADERS = {"layout": vh.is_str, + "title": vh.is_str} + # TODO: How to validate? May be a mix of reveal.js (HTML) + markdown. + + +class ReferencePageValidator(MarkdownValidator): + """Validate reference.md""" + HEADINGS = ["Glossary"] + WARN_ON_EXTRA_HEADINGS = False + + DOC_HEADERS = {"layout": vh.is_str, + "title": vh.is_str, + "subtitle": vh.is_str} + + def _validate_glossary_entry(self, glossary_entry): + """Validate glossary entry + + Glossary entry must be formatted in conformance with Pandoc's + ```definition_lists``` extension. + + That syntax isn't supported by the CommonMark parser, so we identify + terms manually.""" + if len(glossary_entry) < 2: + logging.error( + "In {0}:" + "Glossary entry must have at least two lines- " + "a term and a definition.".format( + self.filename)) + return False + + entry_is_valid = True + for line_index, line in enumerate(glossary_entry): + if line_index == 1: + if not re.match("^: ", line): + logging.error( + "In {0}:" + "First line of definition must " + "start with ': '.".format( + self.filename)) + entry_is_valid = False + elif line_index > 1: + if not re.match("^ ", line): + logging.error( + "In {0}:" + "Subsequent lines of definition must " + "start with ' '.".format( + self.filename)) + entry_is_valid = False + return entry_is_valid + + def _validate_glossary(self): + """Validate the glossary section. + + Assumes that the glossary is at the end of the file: + everything after the header. (and there must be a glossary section) + + Verifies that the only things in the glossary are definition items. + """ + is_glossary_valid = True + in_glossary = False + for node in self.ast.children: + if in_glossary: + is_glossary_valid = is_glossary_valid and \ + self._validate_glossary_entry(node.strings) + elif self.ast.is_heading(node) and "Glossary" in node.strings: + in_glossary = True + + return is_glossary_valid + + def _run_tests(self): + tests = [self._validate_glossary()] + parent_tests = super(ReferencePageValidator, self)._run_tests() + return all(tests) and parent_tests + + +class InstructorPageValidator(MarkdownValidator): + """Simple validator for Instructor's Guide- instructors.md""" + HEADINGS = ["Legend", "Overall"] + WARN_ON_EXTRA_HEADINGS = False + + DOC_HEADERS = {"layout": vh.is_str, + "title": vh.is_str, + "subtitle": vh.is_str} + + +class LicensePageValidator(MarkdownValidator): + """Validate LICENSE.md: user should not edit this file""" + def _run_tests(self): + """Skip the base tests; just check md5 hash""" + # TODO: This hash is specific to the license for english-language repo + expected_hash = '258aa6822fa77f7c49c37c3759017891' + m = hashlib.md5() + try: + m.update(self.markdown) + except TypeError: + # Workaround for hashing in python3 + m.update(self.markdown.encode('utf-8')) + + if m.hexdigest() == expected_hash: + return True + else: + logging.error("The provided license file should not be modified.") + return False + + +class DiscussionPageValidator(MarkdownValidator): """ - # Pair of regex and function to call - CONTROL = ( - ("[0-9]{2}-.*", check_lesson), - ("discussion", check_discussion), - ("index", check_index), - ("instructors", check_intructors), - ("motivation", check_motivation), - ("reference", check_reference) - ) - for (pattern, checker) in CONTROL: - if re.search(pattern, file_path): - checker(file_path) - -def main(list_of_files): + Validate the discussion page (discussion.md). + Most of the content is free-form. """ - Call the check function for every file in ``list_of_files``. + WARN_ON_EXTRA_HEADINGS = False + DOC_HEADERS = {"layout": vh.is_str, + "title": vh.is_str, + "subtitle": vh.is_str} - If ``list_of_files`` is empty load all the files from ``pages`` directory. - """ - if not list_of_files: - list_of_files = [os.path.join("pages", filename) for filename in os.listdir("pages")] - for filename in list_of_files: - if filename.endswith(".md"): - check_file(filename) +# Associate lesson template names with validators. This list used by CLI. +# Dict of {name: (Validator, filename_pattern)} +LESSON_TEMPLATES = {"index": (IndexPageValidator, "^index"), + "topic": (TopicPageValidator, "^[0-9]{2}-.*"), + "motivation": (MotivationPageValidator, "^motivation"), + "reference": (ReferencePageValidator, "^reference"), + "instructor": (InstructorPageValidator, "^instructors"), + "license": (LicensePageValidator, "^LICENSE"), + "discussion": (DiscussionPageValidator, "^discussion")} + + +def identify_template(filepath): + """Identify template + + Given the path to a single file, + identify the appropriate template to use""" + for template_name, (validator, pattern) in LESSON_TEMPLATES.items(): + if re.search(pattern, os.path.basename(filepath)): + return template_name + + return None + + +def validate_single(filepath, template=None): + """Validate a single Markdown file based on a specified template""" + template = template or identify_template(filepath) + if template is None: + logging.error( + "Validation failed for {0}: " + "Could not automatically identify correct template.".format( + filepath)) + return False + + logging.debug( + "Beginning validation of {0} using template {1}".format( + filepath, template)) + validator = LESSON_TEMPLATES[template][0] + validate_file = validator(filepath) + + res = validate_file.validate() + if res is True: + logging.debug("File {0} successfully passed validation".format( + filepath)) + else: + logging.debug("File {0} failed validation: " + "see error log for details".format(filepath)) + + return res + + +def validate_folder(path, template=None): + """Validate an entire folder of files""" + search_str = os.path.join(path, "*.md") # Find files based on extension + filename_list = glob.glob(search_str) + + if not filename_list: + logging.error( + "No Markdown files were found " + "in specified directory {0}".format(path)) + return False + + all_valid = True + for fn in filename_list: + res = validate_single(fn, template=template) + all_valid = all_valid and res + return all_valid + + +def start_logging(level=logging.INFO): + """Initialize logging and print messages to console.""" + logging.basicConfig(stream=sys.stdout, level=level) + + +def command_line(): + """Handle arguments passed in via the command line""" + parser = argparse.ArgumentParser() + parser.add_argument("file_or_path", + nargs="*", + default=[os.getcwd()], + help="The individual pathname") + + parser.add_argument('-t', '--template', + choices=LESSON_TEMPLATES.keys(), + help="The type of template to apply to all file(s). " + "If not specified, will auto-identify template.") + + parser.add_argument('-d', '--debug', + action='store_true', + help="Enable debug information.") + + return parser.parse_args() + + +def main(parsed_args_obj): + if parsed_args_obj.debug: + log_level = "DEBUG" + else: + log_level = "WARNING" + start_logging(log_level) + + template = parsed_args_obj.template + + all_valid = True + for fn in parsed_args_obj.file_or_path: + if os.path.isdir(fn): + res = validate_folder(fn, template=template) + elif os.path.isfile(fn): + res = validate_single(fn, template=template) + else: + res = False + logging.error( + "The specified file or folder {0} does not exist; " + "could not perform validation".format(fn)) + + all_valid = all_valid and res + + if all_valid is True: + logging.debug("All Markdown files successfully passed validation.") + sys.exit(0) + else: + logging.warning( + "Some errors were encountered during validation. " + "See log for details.") + sys.exit(1) + if __name__ == "__main__": - main(sys.argv[1:]) + parsed_args = command_line() + main(parsed_args) + + #### Sample of how validator is used directly + # validator = HomePageValidator('../index.md') + # print validator.validate() diff --git a/tools/test_check.py b/tools/test_check.py new file mode 100644 index 0000000000000000000000000000000000000000..451cf78c4cb8d5ef3f313b75b62521e71394a6fc --- /dev/null +++ b/tools/test_check.py @@ -0,0 +1,366 @@ +#! /usr/bin/env python + +import imp, logging, os, unittest +check = imp.load_source("check", # Import non-.py file + os.path.join(os.path.dirname(__file__), "check")) + +# Make log messages visible to help audit test failures +check.start_logging(level=logging.DEBUG) + + +class BaseTemplateTest(unittest.TestCase): + """Common methods for testing template validators""" + SAMPLE_FILE = "" # Path to a file that should pass all tests + VALIDATOR = check.MarkdownValidator + + def setUp(self): + self.sample_validator = self.VALIDATOR(self.SAMPLE_FILE) + + def _create_validator(self, markdown): + """Create validator object from markdown string; useful for failures""" + return self.VALIDATOR(markdown=markdown) + + +class TestAstHelpers(BaseTemplateTest): + SAMPLE_FILE = '../pages/index.md' + VALIDATOR = check.MarkdownValidator + + def test_link_text_extracted(self): + """Verify that link text and destination are extracted correctly""" + validator = self._create_validator("""[This is a link](discussion.html)""") + links = validator.ast.find_external_links(validator.ast.children[0]) + + dest, link_text = validator.ast.get_link_info(links[0]) + self.assertEqual(dest, "discussion.html") + self.assertEqual(link_text, "This is a link") + + +class TestIndexPage(BaseTemplateTest): + """Test the ability to correctly identify and validate specific sections + of a markdown file""" + SAMPLE_FILE = "../pages/index.md" + VALIDATOR = check.IndexPageValidator + + def test_sample_file_passes_validation(self): + res = self.sample_validator.validate() + self.assertTrue(res) + + def test_headers_missing_hrs(self): + validator = self._create_validator("""Blank row + +layout: lesson +title: Lesson Title +keywords: ["some", "key terms", "in a list"] + +Another section that isn't an HR +""") + + self.assertFalse(validator._validate_doc_headers()) + + def test_headers_missing_a_line(self): + """One of the required headers is missing""" + validator = self._create_validator("""--- +layout: lesson +keywords: ["some", "key terms", "in a list"] +---""") + self.assertFalse(validator._validate_doc_headers()) + + # TESTS INVOLVING DOCUMENT HEADER SECTION + def test_headers_fail_with_other_content(self): + validator = self._create_validator("""--- +layout: lesson +title: Lesson Title +keywords: ["some", "key terms", "in a list"] +otherline: Nothing +---""") + self.assertFalse(validator._validate_doc_headers()) + + def test_headers_fail_because_invalid_content(self): + validator = self._create_validator("""--- +layout: lesson +title: Lesson Title +keywords: this is not a list +---""") + self.assertFalse(validator._validate_doc_headers()) + + # TESTS INVOLVING SECTION TITLES/HEADINGS + def test_index_has_valid_section_headings(self): + """The provided index page""" + res = self.sample_validator._validate_section_heading_order() + self.assertTrue(res) + + def test_index_fail_when_section_heading_absent(self): + res = self.sample_validator.ast.has_section_heading("Fake heading") + self.assertFalse(res) + + def test_fail_when_section_heading_is_wrong_level(self): + """All headings must be exactly level 2""" + validator = self._create_validator("""--- +layout: page +title: Lesson Title +--- +Paragraph of introductory material. + +> ## Prerequisites +> +> A short paragraph describing what learners need to know +> before tackling this lesson. + +### Topics + +1. [Topic Title 1](01-one.html) +2. [Topic Title 2](02-two.html) + +## Other Resources + +* [Motivation](motivation.html) +* [Reference Guide](reference.html) +* [Next Steps](discussion.html) +* [Instructor's Guide](instructors.html)""") + self.assertFalse(validator._validate_section_heading_order()) + + + def test_fail_when_section_headings_in_wrong_order(self): + validator = self._create_validator("""--- +layout: lesson +title: Lesson Title +keywords: ["some", "key terms", "in a list"] +--- +Paragraph of introductory material. + +> ## Prerequisites +> +> A short paragraph describing what learners need to know +> before tackling this lesson. + +## Other Resources + +* [Motivation](motivation.html) +* [Reference Guide](reference.html) +* [Instructor's Guide](instructors.html) + + +## Topics + +* [Topic Title 1](01-one.html) +* [Topic Title 2](02-two.html)""") + + self.assertFalse(validator._validate_section_heading_order()) + + def test_pass_when_prereq_section_has_correct_heading_level(self): + validator = self._create_validator("""--- +layout: lesson +title: Lesson Title +keywords: ["some", "key terms", "in a list"] +--- +Paragraph of introductory material. + +> ## Prerequisites +> +> A short paragraph describing what learners need to know +> before tackling this lesson. +""") + self.assertTrue(validator._validate_intro_section()) + + def test_fail_when_prereq_section_has_incorrect_heading_level(self): + validator = self._create_validator("""--- +layout: lesson +title: Lesson Title +keywords: ["some", "key terms", "in a list"] +--- +Paragraph of introductory material. + +> # Prerequisites +> +> A short paragraph describing what learners need to know +> before tackling this lesson. +""") + self.assertFalse(validator._validate_intro_section()) + + # TESTS INVOLVING LINKS TO OTHER CONTENT + def test_file_links_validate(self): + res = self.sample_validator._validate_links() + self.assertTrue(res) + + def test_html_link_to_extant_md_file_passes(self): + """Verify that an HTML link with corresponding MD file will pass""" + validator = self._create_validator("""[Topic Title One](01-one.html)""") + self.assertTrue(validator._validate_links()) + + def test_html_link_with_anchor_to_extant_md_passes(self): + """Verify that link is identified correctly even if to page anchor + + For now this just tests that the regex handles #anchors. + It doesn't validate that the named anchor exists in the md file + """ + validator = self._create_validator("""[Topic Title One](01-one.html#anchor)""") + self.assertTrue(validator._validate_links()) + + def test_inpage_anchor_passes_validation(self): + """Links that reference anchors within the page should be ignored""" + # TODO: Revisit once anchor rules are available + validator = self._create_validator("""Most databases also support Booleans and date/time values; +SQLite uses the integers 0 and 1 for the former, and represents the latter as discussed [earlier](#a:dates).""") + self.assertTrue(validator._validate_links()) + + + def test_missing_markdown_file_fails_validation(self): + """Fail validation when an html file is linked without corresponding + markdown file""" + validator = self._create_validator("""[Broken link](nonexistent.html)""") + self.assertFalse(validator._validate_links()) + + def test_website_link_ignored_by_validator(self): + """Don't look for markdown if the file linked isn't local- + remote website links are ignored""" + validator = self._create_validator("""[Broken link](http://website.com/filename.html)""") + self.assertTrue(validator._validate_links()) + + def test_malformed_website_link_fails_validator(self): + """If the link isn't prefixed by http(s):// or ftp://, fail. + This is because there are a lot of edge cases in distinguishing + between filenames and URLs: err on the side of certainty.""" + validator = self._create_validator("""[Broken link](www.website.com/filename.html)""") + self.assertFalse(validator._validate_links()) + + def test_finds_image_asset(self): + """Image asset is found""" + validator = self._create_validator( + """![this is the image's title](fig/example.svg "this is the image's alt text")""") + self.assertTrue(validator._validate_links()) + + def test_image_asset_not_found(self): + """Image asset can't be found if path is invalid""" + validator = self._create_validator( + """![this is the image's title](fig/exemple.svg "this is the image's alt text")""") + self.assertFalse(validator._validate_links()) + + def test_non_html_link_finds_csv(self): + """Look for CSV file in appropriate folder""" + validator = self._create_validator( + """Use [this CSV](data/data.csv) for the exercise.""") + self.assertTrue(validator._validate_links()) + + def test_non_html_links_are_path_sensitive(self): + """Fails to find CSV file with wrong path.""" + validator = self._create_validator( + """Use [this CSV](data.csv) for the exercise.""") + self.assertFalse(validator._validate_links()) + + +class TestTopicPage(BaseTemplateTest): + """Verifies that the topic page validator works as expected""" + SAMPLE_FILE = "../pages/01-one.md" + VALIDATOR = check.TopicPageValidator + + def test_sample_file_passes_validation(self): + res = self.sample_validator.validate() + self.assertTrue(res) + + +class TestMotivationPage(BaseTemplateTest): + """Verifies that the instructors page validator works as expected""" + SAMPLE_FILE = "../pages/motivation.md" + VALIDATOR = check.MotivationPageValidator + + def test_sample_file_passes_validation(self): + res = self.sample_validator.validate() + self.assertTrue(res) + + +class TestReferencePage(BaseTemplateTest): + """Verifies that the reference page validator works as expected""" + SAMPLE_FILE = "../pages/reference.md" + VALIDATOR = check.ReferencePageValidator + + def test_missing_glossary_definition(self): + validator = self._create_validator("") + self.assertFalse(validator._validate_glossary_entry( + ["Key word"])) + + def test_missing_colon_at_glossary_definition(self): + validator = self._create_validator("") + self.assertFalse(validator._validate_glossary_entry( + ["Key word", "Definition of term"])) + + def test_wrong_indentation_at_glossary_definition(self): + validator = self._create_validator("") + self.assertFalse(validator._validate_glossary_entry( + ["Key word", ": Definition of term"])) + + def test_wrong_continuation_at_glossary_definition(self): + validator = self._create_validator("") + self.assertFalse(validator._validate_glossary_entry( + ["Key word", ": Definition of term", "continuation"])) + + def test_valid_glossary_definition(self): + validator = self._create_validator("") + self.assertTrue(validator._validate_glossary_entry( + ["Key word", ": Definition of term", " continuation"])) + + def test_only_definitions_can_appear_after_glossary_heading(self): + validator = self._create_validator("""## Glossary + +Key Word 1 +: Definition of first term + +Paragraph + +Key Word 2 +: Definition of second term +""") + self.assertFalse(validator._validate_glossary()) + + def test_glossary(self): + validator = self._create_validator("""## Glossary + +Key Word 1 +: Definition of first term + +Key Word 2 +: Definition of second term +""") + self.assertTrue(validator._validate_glossary()) + + def test_sample_file_passes_validation(self): + res = self.sample_validator.validate() + self.assertTrue(res) + + +class TestInstructorPage(BaseTemplateTest): + """Verifies that the instructors page validator works as expected""" + SAMPLE_FILE = "../pages/instructors.md" + VALIDATOR = check.InstructorPageValidator + + def test_sample_file_passes_validation(self): + res = self.sample_validator.validate() + self.assertTrue(res) + + +class TestLicensePage(BaseTemplateTest): + SAMPLE_FILE = '../pages/LICENSE.md' + VALIDATOR = check.LicensePageValidator + + def test_sample_file_passes_validation(self): + res = self.sample_validator.validate() + self.assertTrue(res) + + def test_modified_file_fails_validation(self): + with open(self.SAMPLE_FILE, 'rU') as f: + orig_text = f.read() + mod_text = orig_text.replace("The", "the") + validator = self._create_validator(mod_text) + self.assertFalse(validator.validate()) + + +class TestDiscussionPage(BaseTemplateTest): + SAMPLE_FILE = '../pages/discussion.md' + VALIDATOR = check.DiscussionPageValidator + + def test_sample_file_passes_validation(self): + res = self.sample_validator.validate() + self.assertTrue(res) + + +if __name__ == "__main__": + unittest.main() diff --git a/tools/validation_helpers.py b/tools/validation_helpers.py new file mode 100644 index 0000000000000000000000000000000000000000..6acc11c3dfccb80167d860c43b3b4cafe343bbbe --- /dev/null +++ b/tools/validation_helpers.py @@ -0,0 +1,202 @@ +#! /usr/bin/env python + +import json +import logging +import re +import sys + +try: # Hack to make codebase compatible with python 2 and 3 + basestring +except NameError: + basestring = str + + +# Common validation functions +def is_list(text): + """Validate whether the provided string can be converted to python list""" + text = text.strip() + try: + text_as_list = json.loads(text) + except ValueError: + logging.debug("Could not convert string to python object: {0}".format(text)) + return False + + return isinstance(text_as_list, list) + + +def is_str(text): + """Validate whether the input is a non-blank python string""" + return isinstance(text, basestring) and len(text) > 0 + + +def is_numeric(text): + """Validate whether the string represents a number (including unicode)""" + try: + float(text) + return True + except ValueError: + return False + + +#### Text cleanup functions, pre-validation +def strip_attrs(s): + """Strip attributes of the form {.name} from a markdown title string""" + return re.sub(r"\s\{\..*?\}", "", s) + + +def get_css_class(s): + """Return any and all CSS classes (when a line is suffixed by {.classname}) + Returns empty list when """ + return re.findall("\{\.(.*?)\}", s) + + +### Helper objects +class CommonMarkHelper(object): + """Basic helper functions for working with the internal abstract syntax + tree produced by CommonMark parser""" + def __init__(self, ast): + self.data = ast + self.children = self.data.children + + def get_doc_header_title(self): + """Helper method for SWC templates: get the document title from + the YAML headers""" + doc_headers = self.data.children[1] # Throw index error if none found + + for s in doc_headers.strings: + label, contents = s.split(":", 1) + if label.lower() == "title": + return contents.strip() + + # If title not found, return an empty string for display purposes + return '' + + def get_doc_header_subtitle(self): + """Helper method for SWC templates: get the document title from + the YAML headers""" + doc_headers = self.data.children[1] # Throw index error if none found + + for s in doc_headers.strings: + label, contents = s.split(":", 1) + if label.lower() == "subtitle": + return contents.strip() + + # If title not found, return an empty string for display purposes + return '' + + def get_block_titled(self, title, heading_level=2, ast_node=None): + """Examine children. Return all children of the given node that: + a) are blockquoted elements, and + b) contain a heading with the specified text, at the specified level. + For example, this can be used to find the "Prerequisites" section + in index.md + + Returns empty list if no appropriate node is found""" + if ast_node is None: + ast_node = self.data + return [n for n in ast_node.children + if self.is_block(n) and + self.has_section_heading( + title, + ast_node=n, + heading_level=heading_level, + show_msg=False)] + + def get_section_headings(self, ast_node=None): + """Returns a list of ast nodes that are headings""" + if ast_node is None: + ast_node = self.data + return [n for n in ast_node.children if self.is_heading(n)] + + def get_link_info(self, link_node): + """Given a link node, return the link title and destination""" + if not self.is_external(link_node): + raise TypeError("Cannot apply this method to something that is not a link") + + dest = link_node.destination + try: + link_text = link_node.label[0].c + except: + link_text = None + + return dest, link_text + + def find_external_links(self, ast_node=None): + """Recursive function that locates all references to external content + under specified node. (links or images)""" + ast_node = ast_node or self.data + + # Link can be node itself, or hiding in inline content + links = [n for n in ast_node.inline_content + if self.is_external(n)] + + if self.is_external(ast_node): + links.append(ast_node) + + # Also look for links in sub-nodes + for n in ast_node.children: + links.extend(self.find_external_links(n)) + + return links + + def has_section_heading(self, section_title, ast_node=None, + heading_level=2, limit=sys.maxsize, show_msg=True): + """Does the file contain (<= x copies of) specified heading text? + Will strip off any CSS attributes when looking for the section title""" + if ast_node is None: + ast_node = self.data + + num_nodes = len([n for n in self.get_section_headings(ast_node) + if (strip_attrs(n.strings[0]) == section_title) + and (n.level == heading_level)]) + + # Suppress error msg if used as a helper method + if show_msg and num_nodes == 0: + logging.error("Document does not contain the specified " + "heading: {0}".format(section_title)) + elif show_msg and num_nodes > limit: + logging.error("Document must not contain more than {0} copies of" + " the heading {1}".format(limit, section_title or 0)) + elif show_msg: + logging.info("Verified that document contains the specified" + " heading: {0}".format(section_title)) + return (0 < num_nodes <= limit) + + def has_number_children(self, ast_node, + exact=None, minc=0, maxc=sys.maxsize): + """Does the specified node (such as a bulleted list) have the expected + number of children?""" + + if exact: # If specified, must have exactly this number of children + minc = maxc = exact + + return (minc <= len(ast_node.children) <= maxc) + + # Helpers, in case the evolving CommonMark spec changes the names of nodes + def is_hr(self, ast_node): + """Is the node a horizontal rule (hr)?""" + return ast_node.t == 'HorizontalRule' + + def is_heading(self, ast_node): + """Is the node a heading/ title?""" + return ast_node.t == "ATXHeader" + + def is_paragraph(self, ast_node): + """Is the node a paragraph?""" + return ast_node.t == "Paragraph" + + def is_list(self, ast_node): + """Is the node a list? (ordered or unordered)""" + return ast_node.t == "List" + + def is_link(self, ast_node): + """Is the node a link?""" + return ast_node.t == "Link" + + def is_external(self, ast_node): + """Does the node reference content outside the file? (image or link)""" + return ast_node.t in ("Link", "Image") + + def is_block(self, ast_node): + """Is the node a BlockQuoted element?""" + return ast_node.t == "BlockQuote"