Newer
Older
import sys
import os
import glob
import json
import yaml
import re
from subprocess import Popen, PIPE
from optparse import OptionParser
from util import Reporter
__version__ = '0.2'
# Where to look for source Markdown files.
SOURCE_DIRS = ['', '_episodes', '_extras']
# Required files: each item is a tuple of (YAML_required, path).
# TODO: We do not yet validate whether any files have the required
# YAML headers, but should in the future.
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# The '%' is replaced with the source directory path for checking.
# Episodes are handled specially, and extra files in '_extras' are also handled specially.
# This list must include all the Markdown files listed in the 'bin/initialize' script.
REQUIRED_FILES = [
(True, '%/CONDUCT.md'),
(False, '%/CONTRIBUTING.md'),
(True, '%/LICENSE.md'),
(False, '%/README.md'),
(True, '%/_extras/discuss.md'),
(True, '%/_extras/guide.md'),
(True, '%/index.md'),
(True, '%/reference.md'),
(True, '%/setup.md')
]
# Episode filename pattern.
P_EPISODE_FILENAME = re.compile(r'/_episodes/(\d\d)-[-\w]+.md$')
# What kinds of blockquotes are allowed?
KNOWN_BLOCKQUOTES = {
'callout',
'challenge',
'checklist',
'keypoints',
'objectives',
'prereq',
'quotation',
'solution',
'testimonial'
}
# What kinds of code fragments are allowed?
KNOWN_CODEBLOCKS = {
'error',
'output',
'source'
}
# What fields are required in episode metadata?
EPISODE_METADATA_FIELDS = {
('title', str),
('teaching', int),
('exercises', int),
('questions', list),
('objectives', list),
('keypoints', list)
}
def main():
args = parse_args()
args.reporter = Reporter(args)
check_config(args)
docs = read_all_markdown(args, args.source_dir)
check_fileset(args.source_dir, args.reporter, docs.keys())
for filename in docs.keys():
checker = create_checker(args, filename, docs[filename])
checker.check()
args.reporter.report()
def parse_args():
parser = OptionParser()
parser.add_option('-p', '--parser',
default=None,
dest='parser',
help='path to Markdown parser')
parser.add_option('-s', '--source',
default=os.curdir,
dest='source_dir',
help='source directory')
args, extras = parser.parse_args()
require(args.parser is not None,
'Path to Markdown parser not provided')
require(not extras,
'Unexpected trailing command-line arguments "{0}"'.format(extras))
return args
def check_config(args):
config_file = os.path.join(args.source_dir, '_config.yml')
with open(config_file, 'r') as reader:
config = yaml.load(reader)
args.reporter.check_field(config_file, 'configuration', config, 'kind', 'lesson')
def read_all_markdown(args, source_dir):
"""Read source files, returning {path : {'metadta':yaml, 'doc':doc}}."""
all_dirs = [os.path.join(source_dir, d) for d in SOURCE_DIRS]
all_patterns = [os.path.join(d, '*.md') for d in all_dirs]
result = {}
for pat in all_patterns:
for filename in glob.glob(pat):
data = read_markdown(args, filename)
if data:
result[filename] = data
return result
def read_markdown(args, path):
"""Get YAML and AST for Markdown file, returning {'metadata':yaml, 'doc':doc}."""
# Split and extract YAML (if present).
metadata = None
metadata_len = None
with open(path, 'r') as reader:
body = reader.read()
pieces = body.split('---', 2)
if len(pieces) == 3:
metadata = yaml.load(pieces[1])
metadata_len = pieces[1].count('\n')
body = pieces[2]
# Parse Markdown.
cmd = 'ruby {0}'.format(args.parser)
p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, close_fds=True, universal_newlines=True)
stdout_data, stderr_data = p.communicate(body)
doc = json.loads(stdout_data)
return {
'metadata': metadata,
'metadata_len': metadata_len,
'doc': doc
}
def check_fileset(source_dir, reporter, filenames_present):
"""Are all required files present? Are extraneous files present?"""
required = [p[1].replace('%', source_dir) for p in REQUIRED_FILES]
missing = set(required) - set(filenames_present)
reporter.add(None, 'Missing required file {0}', m)
if '_episodes' not in filename:
continue
m = P_EPISODE_FILENAME.search(filename)
if m and m.group(1):
seen.append(m.group(1))
else:
reporter.add(None, 'Episode {0} has badly-formatted filename', filename)
None,
'Duplicate episode numbers {0} vs {1}',
sorted(seen), sorted(set(seen)))
seen = [int(s) for s in seen]
seen.sort()
reporter.check(all([i+1 == n for (i, n) in enumerate(seen)]),
None,
'Missing or non-consecutive episode numbers {0}',
seen)
def create_checker(args, filename, info):
for (pat, cls) in CHECKERS:
if pat.search(filename):
return cls(args, filename, **info)
def require(condition, message):
if not condition:
print(message, file=sys.stderr)
sys.exit(1)
class CheckBase(object):
def __init__(self, args, filename, metadata, metadata_len, doc):
super(CheckBase, self).__init__()
self.args = args
self.reporter = self.args.reporter # for convenience
self.filename = filename
self.metadata = metadata
self.metadata_len = metadata_len
self.doc = doc
self.layout = None
def check(self):
self.check_metadata()
self.check_blockquote_classes()
self.check_codeblock_classes()
def check_metadata(self):
self.reporter.check(self.metadata is not None,
self.filename,
'Missing metadata entirely')
if self.metadata and (self.layout is not None):
self.reporter.check_field(self.filename, 'metadata', self.metadata, 'layout', self.layout)
def check_blockquote_classes(self):
"""Check that all blockquotes have known classes."""
for node in self.find_all(self.doc, {'type' : 'blockquote'}):
cls = self.get_val(node, 'attr', 'class')
self.reporter.check(cls in KNOWN_BLOCKQUOTES,
(self.filename, self.get_loc(node)),
'Unknown or missing blockquote type {0}',
cls)
def check_codeblock_classes(self):
"""Check that all code blocks have known classes."""
for node in self.find_all(self.doc, {'type' : 'codeblock'}):
cls = self.get_val(node, 'attr', 'class')
self.reporter.check(cls in KNOWN_CODEBLOCKS,
(self.filename, self.get_loc(node)),
'Unknown or missing code block type {0}',
cls)
def find_all(self, node, pattern, accum=None):
assert type(pattern) == dict, 'Patterns must be dictionaries'
if accum is None:
accum = []
if self.match(node, pattern):
accum.append(node)
for child in node.get('children', []):
self.find_all(child, pattern, accum)
return accum
def match(self, node, pattern):
for key in pattern:
if key not in node:
return False
val = pattern[key]
if type(val) == str:
if node[key] != val:
return False
elif type(val) == dict:
if not self.match(node[key], val):
return False
return True
def get_val(self, node, *chain):
curr = node
for selector in chain:
curr = curr.get(selector, None)
if curr is None:
break
return curr
def get_loc(self, node):
"""Convenience method to get node's line number."""
return self.get_val(node, 'options', 'location') + self.metadata_len
class CheckNonJekyll(CheckBase):
"""Check a file that isn't translated by Jekyll."""
def __init__(self, args, filename, metadata, metadata_len, doc):
super(CheckNonJekyll, self).__init__(args, filename, metadata, metadata_len, doc)
def check_metadata(self):
self.reporter.check(self.metadata is None,
self.filename,
'Unexpected metadata')
class CheckIndex(CheckBase):
def __init__(self, args, filename, metadata, metadata_len, doc):
super(CheckIndex, self).__init__(args, filename, metadata, metadata_len, doc)
self.layout = 'lesson_homepage'
class CheckEpisode(CheckBase):
def __init__(self, args, filename, metadata, metadata_len, doc):
super(CheckEpisode, self).__init__(args, filename, metadata, metadata_len, doc)
def check_metadata(self):
super(CheckEpisode, self).check_metadata()
if self.metadata:
for (name, type_) in EPISODE_METADATA_FIELDS:
self.reporter.check(type(self.metadata.get(name, None)) == type_,
self.filename,
'"{0}" missing, empty, or has wrong type in metadata',
name)
class CheckReference(CheckBase):
def __init__(self, args, filename, metadata, metadata_len, doc):
super(CheckReference, self).__init__(args, filename, metadata, metadata_len, doc)
self.layout = 'reference'
class CheckGeneric(CheckBase):
def __init__(self, args, filename, metadata, metadata_len, doc):
super(CheckGeneric, self).__init__(args, filename, metadata, metadata_len, doc)
self.layout = 'page'
CHECKERS = [
(re.compile(r'CONTRIBUTING\.md'), CheckNonJekyll),
(re.compile(r'README\.md'), CheckNonJekyll),
(re.compile(r'index\.md'), CheckIndex),
(re.compile(r'reference\.md'), CheckReference),
(re.compile(r'_episodes/.*\.md'), CheckEpisode),
(re.compile(r'.*\.md'), CheckGeneric)
]
if __name__ == '__main__':
main()