Skip to content
Snippets Groups Projects
check 6.64 KiB
Newer Older
Raniere Silva's avatar
Raniere Silva committed
#!/usr/bin/python
#
# Software Carpentry Lesson Validator
#
Greg Wilson's avatar
Greg Wilson committed
# Check for errors in lessons built using the Software Carpentry template
# found at http://github.com/swcarpentry/lesson-template.
Raniere Silva's avatar
Raniere Silva committed
#
# Usage:
#
#     $ tools/check
Greg Wilson's avatar
Greg Wilson committed
import sys
Raniere Silva's avatar
Raniere Silva committed
import os
import re
import yaml

Greg Wilson's avatar
Greg Wilson committed
#----------------------------------------
# Error reporting.
Greg Wilson's avatar
Greg Wilson committed
def report_error(file_path, line_number, line, error_message):
Raniere Silva's avatar
Raniere Silva committed
    """
    Print information about general error.
    """
Greg Wilson's avatar
Greg Wilson committed
    ERR_MSG = "Error at line {} of {}:\n\t{}\n{}"
    print(ERR_MSG.format(line_number, file_path, line, error_message))

def report_missing(present, file_path, missing_element):
Raniere Silva's avatar
Raniere Silva committed
    """
    Print information about missing element.
    """
Greg Wilson's avatar
Greg Wilson committed
    ERR_MSG = "Error on {}: missing {}"
    if not present:
        print(ERR_MSG.format(file_path, missing_element))
Raniere Silva's avatar
Raniere Silva committed

def report_missing_metadata(missing_element):
Raniere Silva's avatar
Raniere Silva committed
    """
    Print information about missing metadata at YAML header.
    """
Greg Wilson's avatar
Greg Wilson committed
    ERR_MSG = "Error on YAML header: missing {}"
    print(ERR_MSG.format(missing_element))
Raniere Silva's avatar
Raniere Silva committed

def report_broken_link(file_path, line_number, link):
Raniere Silva's avatar
Raniere Silva committed
    """
    Print information about broken link.
    """
Greg Wilson's avatar
Greg Wilson committed
    ERR_MSG = "Broken link at line {} of {}:\n\tCan't find {}."
    print(ERR_MSG.format(line_number, file_path, link))
Greg Wilson's avatar
Greg Wilson committed
#----------------------------------------
# Checking.

def check_yaml(metadata):
Raniere Silva's avatar
Raniere Silva committed
    """
    Check if all metadata are present at YAML header.
    """
Greg Wilson's avatar
Greg Wilson committed
    METADATA_REQUIRED = {"layout", "title", "minutes"}
    for key in METADATA_REQUIRED - set(metadata.keys()):
        report_missing_metadata(key)
Raniere Silva's avatar
Raniere Silva committed
# TODO: Implement check_lesson
Raniere Silva's avatar
Raniere Silva committed
def check_lesson(file_path):
Raniere Silva's avatar
Raniere Silva committed
    """
    Checks the file ``pages/[0-9]{2}-.*.md`` for:

Greg Wilson's avatar
Greg Wilson committed
    - "layout: topic" in YAML header
    - "title" as keyword in YAML header
Raniere Silva's avatar
Raniere Silva committed
    - line "> ## Learning Objectives {.objectives}" after YAML header
Greg Wilson's avatar
Greg Wilson committed
    - items in learning objectives begin with "*"
    - items in learning objective following four-space indentation rule
    - code samples be of type input, error, output, python, shell, r, matlab, or sql
Raniere Silva's avatar
Raniere Silva committed
    - callout box style
    - challenge box style
    """
Raniere Silva's avatar
Raniere Silva committed
    pass

Raniere Silva's avatar
Raniere Silva committed
# TODO: Implement check_discussion
Raniere Silva's avatar
Raniere Silva committed
def check_discussion(file_path):
Raniere Silva's avatar
Raniere Silva committed
    """
    Checks the file ``pages/discussion.md`` for:

    FIXME: tell what need to check.
    """
Raniere Silva's avatar
Raniere Silva committed
    pass

Raniere Silva's avatar
Raniere Silva committed
# TODO: Complete implementation of check_index
# TODO: break check_index into pieces -- it's too long.
Raniere Silva's avatar
Raniere Silva committed
def check_index(file_path):
Raniere Silva's avatar
Raniere Silva committed
    """
    Checks the file ``pages/index.md`` for:

    - "layout: lesson" in YAML header
    - "title" as keyword in YAML header
Greg Wilson's avatar
Greg Wilson committed
    - introductory paragraph(s) right after YAML header
Raniere Silva's avatar
Raniere Silva committed
    - line with "> ## Prerequisites"
Greg Wilson's avatar
Greg Wilson committed
    - non-empty prerequisites
    - title line with "## Topics"
Raniere Silva's avatar
Raniere Silva committed
    - items at topic list begin with "*"
Greg Wilson's avatar
Greg Wilson committed
    - items in topic list follow four-space indentation rule
Raniere Silva's avatar
Raniere Silva committed
    - links at topic list are valid
    - line with "## Other Resources"
    - items at other resources list begin with "*"
    - link at other resources list are valid
    """
Raniere Silva's avatar
Raniere Silva committed
    # State variables
    in_yaml = False
    yaml_metadata = []
    has_prerequisites = False
    has_topics = False
    has_other_resources = False

    # Load file and process it
Raniere Silva's avatar
Raniere Silva committed
    with open(file_path, "r") as lines:
Raniere Silva's avatar
Raniere Silva committed
        for line_number, line in enumerate(lines):
Raniere Silva's avatar
Raniere Silva committed
            if re.match("---", line): # what if there are multiple YAML blocks??
Raniere Silva's avatar
Raniere Silva committed
                in_yaml = not in_yaml
            elif in_yaml:
                yaml_metadata.append(line)
Raniere Silva's avatar
Raniere Silva committed
            elif re.match("> ## Prerequisites", line): # check this in the Markdown or in the generated HTML?
Raniere Silva's avatar
Raniere Silva committed
                has_prerequisites = True
Raniere Silva's avatar
Raniere Silva committed
            elif re.match("## Topics", line): # as above?
Raniere Silva's avatar
Raniere Silva committed
                has_topics = True
Raniere Silva's avatar
Raniere Silva committed
            elif re.match("## Other Resources", line): # as above
Raniere Silva's avatar
Raniere Silva committed
                has_other_resources = True
            else:
Greg Wilson's avatar
Greg Wilson committed
                ## Push this check into another function - this one is getting too long.
Raniere Silva's avatar
Raniere Silva committed
                # Check if local links are valid
Raniere Silva's avatar
Raniere Silva committed
                matches = re.search("\[.*\]\((?P<link>.*)\)", line)
Raniere Silva's avatar
Raniere Silva committed
                if matches and not matches.group("link").startswith("http"):
Greg Wilson's avatar
Greg Wilson committed
                    link = os.path.join(os.path.dirname(file_path), matches.group("link"))
Raniere Silva's avatar
Raniere Silva committed
                    if link.endswith(".html"):
Raniere Silva's avatar
Raniere Silva committed
                        link = link.replace("html", "md") # NO: what about "03-html-editing.html" ?
Raniere Silva's avatar
Raniere Silva committed
                    if not os.path.exists(link):
                        report_broken_link(file_path, line_number, link)

Greg Wilson's avatar
Greg Wilson committed
    ## Again, this function is too long - break it into sub-functions.
Raniere Silva's avatar
Raniere Silva committed
    # Check YAML
Raniere Silva's avatar
Raniere Silva committed
    yaml_metadata = yaml.load("\n".join(yaml_metadata))
    check_yaml(yaml_metadata)
Raniere Silva's avatar
Raniere Silva committed

    # Check sections
Greg Wilson's avatar
Greg Wilson committed
    ## Note the refactoring: replaces three conditionals with one.
    report_missing(has_prerequisites, file_path, "Prerequisites")
    report_missing(has_topics, file_path, "Topics")
    report_missing(has_other_resources, file_path, "Other Resources")
Raniere Silva's avatar
Raniere Silva committed
# TODO Implement check_intructors
Raniere Silva's avatar
Raniere Silva committed
def check_intructors(file_path):
Raniere Silva's avatar
Raniere Silva committed
    """
    Checks the file ``pages/instructors.md`` for:

    - "title: Instructor"s Guide" in YAML header
    - line with "## Overall"
    - line with "## General Points"
    - lines with topics titles begin with "## "
    - points begin with "*" and following four space rules.
    """
Raniere Silva's avatar
Raniere Silva committed
# TODO Implement check_motivation
Raniere Silva's avatar
Raniere Silva committed
def check_motivation(file_path):
Raniere Silva's avatar
Raniere Silva committed
    """
    Checks the file ``pages/motivation.md``.

    FIXME: tell what need to check.
    """
Raniere Silva's avatar
Raniere Silva committed
# TODO Implement check_reference
Raniere Silva's avatar
Raniere Silva committed
def check_reference(file_path):
Raniere Silva's avatar
Raniere Silva committed
    """
    Checks the file ``pages/reference.md`` for:

    -   ``layout: reference`` in YAML header
    -   line with "## Glossary"
    -   words definitions after at the "Glossary" as::

        > **Key Word 1**: the definition
        > relevant to the lesson.
    """
Raniere Silva's avatar
Raniere Silva committed
    pass

def check_file(file_path):
Raniere Silva's avatar
Raniere Silva committed
    """
    Call the correctly check function based on the name of the file.
    """
    # Pair of regex and function to call
Greg Wilson's avatar
Greg Wilson committed
    CONTROL = (
Raniere Silva's avatar
Raniere Silva committed
        ("[0-9]{2}-.*", check_lesson),
        ("discussion",  check_discussion),
        ("index",       check_index),
        ("instructors", check_intructors),
Greg Wilson's avatar
Greg Wilson committed
        ("motivation",  check_motivation),
        ("reference",   check_reference)
    )
    for (pattern, checker) in CONTROL:
        if re.search(pattern, file_path):
            checker(file_path)

Raniere Silva's avatar
Raniere Silva committed
def main(list_of_files):
    """
    Call the check function for every file in ``list_of_files``.

    If ``list_of_files`` is empty load all the files from ``pages`` directory.
    """
    if not list_of_files:
        list_of_files = [os.path.join("pages", filename) for filename in os.listdir("pages")]

    for filename in list_of_files:
        if filename.endswith(".md"):
            check_file(filename)
Raniere Silva's avatar
Raniere Silva committed

if __name__ == "__main__":
Raniere Silva's avatar
Raniere Silva committed
    main(sys.argv[1:])