#!/usr/bin/python # # Software Carpentry Lesson Validator # # Check for errors in lessons built using the Software Carpentry template # found at http://github.com/swcarpentry/lesson-template. # # Usage: # # $ tools/check import sys import os import re import yaml #---------------------------------------- # Error reporting. def report_error(file_path, line_number, line, error_message): """ Print information about general error. """ ERR_MSG = "Error at line {} of {}:\n\t{}\n{}" print(ERR_MSG.format(line_number, file_path, line, error_message)) def report_missing(present, file_path, missing_element): """ Print information about missing element. """ ERR_MSG = "Error on {}: missing {}" if not present: print(ERR_MSG.format(file_path, missing_element)) def report_missing_metadata(missing_element): """ Print information about missing metadata at YAML header. """ ERR_MSG = "Error on YAML header: missing {}" print(ERR_MSG.format(missing_element)) def report_broken_link(file_path, line_number, link): """ Print information about broken link. """ ERR_MSG = "Broken link at line {} of {}:\n\tCan't find {}." print(ERR_MSG.format(line_number, file_path, link)) #---------------------------------------- # Checking. def check_yaml(metadata): """ Check if all metadata are present at YAML header. """ METADATA_REQUIRED = {"layout", "title", "minutes"} for key in METADATA_REQUIRED - set(metadata.keys()): report_missing_metadata(key) # TODO: Implement check_lesson def check_lesson(file_path): """ Checks the file ``pages/[0-9]{2}-.*.md`` for: - "layout: topic" in YAML header - "title" as keyword in YAML header - line "> ## Learning Objectives {.objectives}" after YAML header - items in learning objectives begin with "*" - items in learning objective following four-space indentation rule - code samples be of type input, error, output, python, shell, r, matlab, or sql - callout box style - challenge box style """ pass # TODO: Implement check_discussion def check_discussion(file_path): """ Checks the file ``pages/discussion.md`` for: FIXME: tell what need to check. """ pass # TODO: Complete implementation of check_index # TODO: break check_index into pieces -- it's too long. def check_index(file_path): """ Checks the file ``pages/index.md`` for: - "layout: lesson" in YAML header - "title" as keyword in YAML header - introductory paragraph(s) right after YAML header - line with "> ## Prerequisites" - non-empty prerequisites - title line with "## Topics" - items at topic list begin with "*" - items in topic list follow four-space indentation rule - links at topic list are valid - line with "## Other Resources" - items at other resources list begin with "*" - link at other resources list are valid """ # State variables in_yaml = False yaml_metadata = [] has_prerequisites = False has_topics = False has_other_resources = False # Load file and process it with open(file_path, "r") as lines: for line_number, line in enumerate(lines): if re.match("---", line): # what if there are multiple YAML blocks?? in_yaml = not in_yaml elif in_yaml: yaml_metadata.append(line) elif re.match("> ## Prerequisites", line): # check this in the Markdown or in the generated HTML? has_prerequisites = True elif re.match("## Topics", line): # as above? has_topics = True elif re.match("## Other Resources", line): # as above has_other_resources = True else: ## Push this check into another function - this one is getting too long. # Check if local links are valid matches = re.search("\[.*\]\((?P<link>.*)\)", line) if matches and not matches.group("link").startswith("http"): link = os.path.join(os.path.dirname(file_path), matches.group("link")) if link.endswith(".html"): link = link.replace("html", "md") # NO: what about "03-html-editing.html" ? if not os.path.exists(link): report_broken_link(file_path, line_number, link) ## Again, this function is too long - break it into sub-functions. # Check YAML yaml_metadata = yaml.load("\n".join(yaml_metadata)) check_yaml(yaml_metadata) # Check sections ## Note the refactoring: replaces three conditionals with one. report_missing(has_prerequisites, file_path, "Prerequisites") report_missing(has_topics, file_path, "Topics") report_missing(has_other_resources, file_path, "Other Resources") # TODO Implement check_intructors def check_intructors(file_path): """ Checks the file ``pages/instructors.md`` for: - "title: Instructor"s Guide" in YAML header - line with "## Overall" - line with "## General Points" - lines with topics titles begin with "## " - points begin with "*" and following four space rules. """ pass # TODO Implement check_motivation def check_motivation(file_path): """ Checks the file ``pages/motivation.md``. FIXME: tell what need to check. """ pass # TODO Implement check_reference def check_reference(file_path): """ Checks the file ``pages/reference.md`` for: - ``layout: reference`` in YAML header - line with "## Glossary" - words definitions after at the "Glossary" as:: > **Key Word 1**: the definition > relevant to the lesson. """ pass def check_file(file_path): """ Call the correctly check function based on the name of the file. """ # Pair of regex and function to call CONTROL = ( ("[0-9]{2}-.*", check_lesson), ("discussion", check_discussion), ("index", check_index), ("instructors", check_intructors), ("motivation", check_motivation), ("reference", check_reference) ) for (pattern, checker) in CONTROL: if re.search(pattern, file_path): checker(file_path) def main(list_of_files): """ Call the check function for every file in ``list_of_files``. If ``list_of_files`` is empty load all the files from ``pages`` directory. """ if not list_of_files: list_of_files = [os.path.join("pages", filename) for filename in os.listdir("pages")] for filename in list_of_files: if filename.endswith(".md"): check_file(filename) if __name__ == "__main__": main(sys.argv[1:])